Describe the enhancement requested
In Arrow's cpp/src/arrow/compute/kernels/scalar_cast_string.cc, there are several cast kernels registered for cases where the input and output types are the same, for example:
|
// Fixed -> Fixed |
|
template <typename O, typename I> |
|
enable_if_t<std::is_same<I, FixedSizeBinaryType>::value && |
|
std::is_same<O, FixedSizeBinaryType>::value, |
|
Status> |
|
BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { |
|
const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options; |
|
const int32_t in_width = batch[0].type()->byte_width(); |
|
const int32_t out_width = |
|
checked_cast<const FixedSizeBinaryType&>(*options.to_type).byte_width(); |
|
if (in_width != out_width) { |
|
return Status::Invalid("Failed casting from ", batch[0].type()->ToString(), " to ", |
|
options.to_type.ToString(), ": widths must match"); |
|
} |
|
return ZeroCopyCastExec(ctx, batch, out); |
|
} |
|
template <typename O, typename I> |
|
enable_if_t<is_binary_view_like_type<I>::value && is_binary_view_like_type<O>::value, |
|
Status> |
|
BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { |
|
const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options; |
|
const ArraySpan& input = batch[0].array; |
|
|
|
if constexpr (!I::is_utf8 && O::is_utf8) { |
|
if (!options.allow_invalid_utf8) { |
|
InitializeUTF8(); |
|
ArraySpanVisitor<I> visitor; |
|
Utf8Validator validator; |
|
RETURN_NOT_OK(visitor.Visit(input, &validator)); |
|
} |
|
} |
|
|
|
void AddBinaryToFixedSizeBinaryCast(CastFunction* func) { |
|
AddBinaryToFixedSizeBinaryCast<StringType>(func); |
|
AddBinaryToFixedSizeBinaryCast<StringViewType>(func); |
|
AddBinaryToFixedSizeBinaryCast<BinaryType>(func); |
|
AddBinaryToFixedSizeBinaryCast<BinaryViewType>(func); |
|
AddBinaryToFixedSizeBinaryCast<LargeStringType>(func); |
|
AddBinaryToFixedSizeBinaryCast<LargeBinaryType>(func); |
|
AddBinaryToFixedSizeBinaryCast<FixedSizeBinaryType>(func); |
|
} |
However, casts between identical types appear to be handled earlier in cast.cc:
|
if (args[0].type() && args[0].type()->Equals(*cast_options->to_type)) { |
|
// Nested types might differ in field names but still be considered equal, |
|
// so we can only return non-nested types as-is. |
|
if (!is_nested(args[0].type()->id())) { |
|
return args[0]; |
|
} else if (args[0].is_array()) { |
|
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> array, |
|
::arrow::internal::GetArrayView( |
|
args[0].array(), cast_options->to_type.owned_type)); |
|
return Datum(array); |
|
} else if (args[0].is_chunked_array()) { |
|
ARROW_ASSIGN_OR_RAISE( |
|
std::shared_ptr<ChunkedArray> array, |
|
args[0].chunked_array()->View(cast_options->to_type.owned_type)); |
|
return Datum(array); |
|
} |
|
} |
|
|
|
Result<std::shared_ptr<CastFunction>> result = |
|
GetCastFunction(*cast_options->to_type); |
|
if (!result.ok()) { |
|
Status s = result.status(); |
|
return s.WithMessage(s.message(), " from ", *args[0].type()); |
|
} |
|
return (*result)->Execute(args, options, ctx); |
|
} |
|
}; |
As a result, it seems these kernels may never be invoked.
Component(s)
C++
Describe the enhancement requested
In Arrow's
cpp/src/arrow/compute/kernels/scalar_cast_string.cc, there are several cast kernels registered for cases where the input and output types are the same, for example:arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
Lines 643 to 658 in 73742e8
arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
Lines 472 to 487 in 73742e8
arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
Lines 772 to 780 in 73742e8
However, casts between identical types appear to be handled earlier in cast.cc:
arrow/cpp/src/arrow/compute/cast.cc
Lines 101 to 127 in 73742e8
As a result, it seems these kernels may never be invoked.
Component(s)
C++