@@ -188,10 +188,34 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
188
188
Decimal256 ( _, _) => {
189
189
pack_numeric_to_dictionary :: < K , Decimal256Type > ( array, dict_value_type, cast_options)
190
190
}
191
- Utf8 => pack_byte_to_dictionary :: < K , GenericStringType < i32 > > ( array, cast_options) ,
192
- LargeUtf8 => pack_byte_to_dictionary :: < K , GenericStringType < i64 > > ( array, cast_options) ,
193
- Binary => pack_byte_to_dictionary :: < K , GenericBinaryType < i32 > > ( array, cast_options) ,
194
- LargeBinary => pack_byte_to_dictionary :: < K , GenericBinaryType < i64 > > ( array, cast_options) ,
191
+ Utf8 => {
192
+ // If the input is a view type, we can avoid casting (thus copying) the data
193
+ if array. data_type ( ) == & DataType :: Utf8View {
194
+ return string_view_to_dictionary :: < K , i32 > ( array) ;
195
+ }
196
+ pack_byte_to_dictionary :: < K , GenericStringType < i32 > > ( array, cast_options)
197
+ }
198
+ LargeUtf8 => {
199
+ // If the input is a view type, we can avoid casting (thus copying) the data
200
+ if array. data_type ( ) == & DataType :: Utf8View {
201
+ return string_view_to_dictionary :: < K , i64 > ( array) ;
202
+ }
203
+ pack_byte_to_dictionary :: < K , GenericStringType < i64 > > ( array, cast_options)
204
+ }
205
+ Binary => {
206
+ // If the input is a view type, we can avoid casting (thus copying) the data
207
+ if array. data_type ( ) == & DataType :: BinaryView {
208
+ return binary_view_to_dictionary :: < K , i32 > ( array) ;
209
+ }
210
+ pack_byte_to_dictionary :: < K , GenericBinaryType < i32 > > ( array, cast_options)
211
+ }
212
+ LargeBinary => {
213
+ // If the input is a view type, we can avoid casting (thus copying) the data
214
+ if array. data_type ( ) == & DataType :: BinaryView {
215
+ return binary_view_to_dictionary :: < K , i64 > ( array) ;
216
+ }
217
+ pack_byte_to_dictionary :: < K , GenericBinaryType < i64 > > ( array, cast_options)
218
+ }
195
219
_ => Err ( ArrowError :: CastError ( format ! (
196
220
"Unsupported output type for dictionary packing: {dict_value_type:?}"
197
221
) ) ) ,
@@ -226,6 +250,58 @@ where
226
250
Ok ( Arc :: new ( b. finish ( ) ) )
227
251
}
228
252
253
+ pub ( crate ) fn string_view_to_dictionary < K , O : OffsetSizeTrait > (
254
+ array : & dyn Array ,
255
+ ) -> Result < ArrayRef , ArrowError >
256
+ where
257
+ K : ArrowDictionaryKeyType ,
258
+ {
259
+ let mut b = GenericByteDictionaryBuilder :: < K , GenericStringType < O > > :: with_capacity (
260
+ array. len ( ) ,
261
+ 1024 ,
262
+ 1024 ,
263
+ ) ;
264
+ let string_view = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) . unwrap ( ) ;
265
+ for v in string_view. iter ( ) {
266
+ match v {
267
+ Some ( v) => {
268
+ b. append ( v) ?;
269
+ }
270
+ None => {
271
+ b. append_null ( ) ;
272
+ }
273
+ }
274
+ }
275
+
276
+ Ok ( Arc :: new ( b. finish ( ) ) )
277
+ }
278
+
279
+ pub ( crate ) fn binary_view_to_dictionary < K , O : OffsetSizeTrait > (
280
+ array : & dyn Array ,
281
+ ) -> Result < ArrayRef , ArrowError >
282
+ where
283
+ K : ArrowDictionaryKeyType ,
284
+ {
285
+ let mut b = GenericByteDictionaryBuilder :: < K , GenericBinaryType < O > > :: with_capacity (
286
+ array. len ( ) ,
287
+ 1024 ,
288
+ 1024 ,
289
+ ) ;
290
+ let binary_view = array. as_any ( ) . downcast_ref :: < BinaryViewArray > ( ) . unwrap ( ) ;
291
+ for v in binary_view. iter ( ) {
292
+ match v {
293
+ Some ( v) => {
294
+ b. append ( v) ?;
295
+ }
296
+ None => {
297
+ b. append_null ( ) ;
298
+ }
299
+ }
300
+ }
301
+
302
+ Ok ( Arc :: new ( b. finish ( ) ) )
303
+ }
304
+
229
305
// Packs the data as a GenericByteDictionaryBuilder, if possible, with the
230
306
// key types of K
231
307
pub ( crate ) fn pack_byte_to_dictionary < K , T > (
0 commit comments