@@ -293,6 +293,155 @@ impl ScalarValue {
293
293
self . to_array_of_size ( 1 )
294
294
}
295
295
296
+ /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
297
+ /// corresponding to those values. For example,
298
+ ///
299
+ /// Returns an error if the iterator is empty or if the
300
+ /// [`ScalarValue`]s are not all the same type
301
+ ///
302
+ /// Example
303
+ /// ```
304
+ /// use datafusion::scalar::ScalarValue;
305
+ /// use arrow::array::{ArrayRef, BooleanArray};
306
+ ///
307
+ /// let scalars = vec![
308
+ /// ScalarValue::Boolean(Some(true)),
309
+ /// ScalarValue::Boolean(None),
310
+ /// ScalarValue::Boolean(Some(false)),
311
+ /// ];
312
+ ///
313
+ /// // Build an Array from the list of ScalarValues
314
+ /// let array = ScalarValue::iter_to_array(scalars.iter())
315
+ /// .unwrap();
316
+ ///
317
+ /// let expected: ArrayRef = std::sync::Arc::new(
318
+ /// BooleanArray::from(vec![
319
+ /// Some(true),
320
+ /// None,
321
+ /// Some(false)
322
+ /// ]
323
+ /// ));
324
+ ///
325
+ /// assert_eq!(&array, &expected);
326
+ /// ```
327
+ pub fn iter_to_array < ' a > (
328
+ scalars : impl IntoIterator < Item = & ' a ScalarValue > ,
329
+ ) -> Result < ArrayRef > {
330
+ let mut scalars = scalars. into_iter ( ) . peekable ( ) ;
331
+
332
+ // figure out the type based on the first element
333
+ let data_type = match scalars. peek ( ) {
334
+ None => {
335
+ return Err ( DataFusionError :: Internal (
336
+ "Empty iterator passed to ScalarValue::iter_to_array" . to_string ( ) ,
337
+ ) )
338
+ }
339
+ Some ( sv) => sv. get_datatype ( ) ,
340
+ } ;
341
+
342
+ /// Creates an array of $ARRAY_TY by unpacking values of
343
+ /// SCALAR_TY for primitive types
344
+ macro_rules! build_array_primitive {
345
+ ( $ARRAY_TY: ident, $SCALAR_TY: ident) => { {
346
+ {
347
+ let values = scalars
348
+ . map( |sv| {
349
+ if let ScalarValue :: $SCALAR_TY( v) = sv {
350
+ Ok ( * v)
351
+ } else {
352
+ Err ( DataFusionError :: Internal ( format!(
353
+ "Inconsistent types in ScalarValue::iter_to_array. \
354
+ Expected {:?}, got {:?}",
355
+ data_type, sv
356
+ ) ) )
357
+ }
358
+ } )
359
+ . collect:: <Result <Vec <_>>>( ) ?;
360
+
361
+ let array: $ARRAY_TY = values. iter( ) . collect( ) ;
362
+ Arc :: new( array)
363
+ }
364
+ } } ;
365
+ }
366
+
367
+ /// Creates an array of $ARRAY_TY by unpacking values of
368
+ /// SCALAR_TY for "string-like" types.
369
+ macro_rules! build_array_string {
370
+ ( $ARRAY_TY: ident, $SCALAR_TY: ident) => { {
371
+ {
372
+ let values = scalars
373
+ . map( |sv| {
374
+ if let ScalarValue :: $SCALAR_TY( v) = sv {
375
+ Ok ( v)
376
+ } else {
377
+ Err ( DataFusionError :: Internal ( format!(
378
+ "Inconsistent types in ScalarValue::iter_to_array. \
379
+ Expected {:?}, got {:?}",
380
+ data_type, sv
381
+ ) ) )
382
+ }
383
+ } )
384
+ . collect:: <Result <Vec <_>>>( ) ?;
385
+
386
+ // it is annoying that one can not create
387
+ // StringArray et al directly from iter of &String,
388
+ // requiring this map to &str
389
+ let values = values. iter( ) . map( |s| s. as_ref( ) ) ;
390
+
391
+ let array: $ARRAY_TY = values. collect( ) ;
392
+ Arc :: new( array)
393
+ }
394
+ } } ;
395
+ }
396
+
397
+ let array: ArrayRef = match & data_type {
398
+ DataType :: Boolean => build_array_primitive ! ( BooleanArray , Boolean ) ,
399
+ DataType :: Float32 => build_array_primitive ! ( Float32Array , Float32 ) ,
400
+ DataType :: Float64 => build_array_primitive ! ( Float64Array , Float64 ) ,
401
+ DataType :: Int8 => build_array_primitive ! ( Int8Array , Int8 ) ,
402
+ DataType :: Int16 => build_array_primitive ! ( Int16Array , Int16 ) ,
403
+ DataType :: Int32 => build_array_primitive ! ( Int32Array , Int32 ) ,
404
+ DataType :: Int64 => build_array_primitive ! ( Int64Array , Int64 ) ,
405
+ DataType :: UInt8 => build_array_primitive ! ( UInt8Array , UInt8 ) ,
406
+ DataType :: UInt16 => build_array_primitive ! ( UInt16Array , UInt16 ) ,
407
+ DataType :: UInt32 => build_array_primitive ! ( UInt32Array , UInt32 ) ,
408
+ DataType :: UInt64 => build_array_primitive ! ( UInt64Array , UInt64 ) ,
409
+ DataType :: Utf8 => build_array_string ! ( StringArray , Utf8 ) ,
410
+ DataType :: LargeUtf8 => build_array_string ! ( LargeStringArray , LargeUtf8 ) ,
411
+ DataType :: Binary => build_array_string ! ( BinaryArray , Binary ) ,
412
+ DataType :: LargeBinary => build_array_string ! ( LargeBinaryArray , LargeBinary ) ,
413
+ DataType :: Date32 => build_array_primitive ! ( Date32Array , Date32 ) ,
414
+ DataType :: Date64 => build_array_primitive ! ( Date64Array , Date64 ) ,
415
+ DataType :: Timestamp ( TimeUnit :: Second , None ) => {
416
+ build_array_primitive ! ( TimestampSecondArray , TimestampSecond )
417
+ }
418
+ DataType :: Timestamp ( TimeUnit :: Millisecond , None ) => {
419
+ build_array_primitive ! ( TimestampMillisecondArray , TimestampMillisecond )
420
+ }
421
+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) => {
422
+ build_array_primitive ! ( TimestampMicrosecondArray , TimestampMicrosecond )
423
+ }
424
+ DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) => {
425
+ build_array_primitive ! ( TimestampNanosecondArray , TimestampNanosecond )
426
+ }
427
+ DataType :: Interval ( IntervalUnit :: DayTime ) => {
428
+ build_array_primitive ! ( IntervalDayTimeArray , IntervalDayTime )
429
+ }
430
+ DataType :: Interval ( IntervalUnit :: YearMonth ) => {
431
+ build_array_primitive ! ( IntervalYearMonthArray , IntervalYearMonth )
432
+ }
433
+ _ => {
434
+ return Err ( DataFusionError :: Internal ( format ! (
435
+ "Unsupported creation of {:?} array from ScalarValue {:?}" ,
436
+ data_type,
437
+ scalars. peek( )
438
+ ) ) )
439
+ }
440
+ } ;
441
+
442
+ Ok ( array)
443
+ }
444
+
296
445
/// Converts a scalar value into an array of `size` rows.
297
446
pub fn to_array_of_size ( & self , size : usize ) -> ArrayRef {
298
447
match self {
@@ -609,6 +758,12 @@ impl From<u64> for ScalarValue {
609
758
}
610
759
}
611
760
761
+ impl From < & str > for ScalarValue {
762
+ fn from ( value : & str ) -> Self {
763
+ ScalarValue :: Utf8 ( Some ( value. to_string ( ) ) )
764
+ }
765
+ }
766
+
612
767
macro_rules! impl_try_from {
613
768
( $SCALAR: ident, $NATIVE: ident) => {
614
769
impl TryFrom <ScalarValue > for $NATIVE {
@@ -940,4 +1095,139 @@ mod tests {
940
1095
assert ! ( prim_array. is_null( 1 ) ) ;
941
1096
assert_eq ! ( prim_array. value( 2 ) , 101 ) ;
942
1097
}
1098
+
1099
+ /// Creates array directly and via ScalarValue and ensures they are the same
1100
+ macro_rules! check_scalar_iter {
1101
+ ( $SCALAR_T: ident, $ARRAYTYPE: ident, $INPUT: expr) => { {
1102
+ let scalars: Vec <_> =
1103
+ $INPUT. iter( ) . map( |v| ScalarValue :: $SCALAR_T( * v) ) . collect( ) ;
1104
+
1105
+ let array = ScalarValue :: iter_to_array( scalars. iter( ) ) . unwrap( ) ;
1106
+
1107
+ let expected: ArrayRef = Arc :: new( $ARRAYTYPE:: from( $INPUT) ) ;
1108
+
1109
+ assert_eq!( & array, & expected) ;
1110
+ } } ;
1111
+ }
1112
+
1113
+ /// Creates array directly and via ScalarValue and ensures they
1114
+ /// are the same, for string arrays
1115
+ macro_rules! check_scalar_iter_string {
1116
+ ( $SCALAR_T: ident, $ARRAYTYPE: ident, $INPUT: expr) => { {
1117
+ let scalars: Vec <_> = $INPUT
1118
+ . iter( )
1119
+ . map( |v| ScalarValue :: $SCALAR_T( v. map( |v| v. to_string( ) ) ) )
1120
+ . collect( ) ;
1121
+
1122
+ let array = ScalarValue :: iter_to_array( scalars. iter( ) ) . unwrap( ) ;
1123
+
1124
+ let expected: ArrayRef = Arc :: new( $ARRAYTYPE:: from( $INPUT) ) ;
1125
+
1126
+ assert_eq!( & array, & expected) ;
1127
+ } } ;
1128
+ }
1129
+
1130
+ /// Creates array directly and via ScalarValue and ensures they
1131
+ /// are the same, for binary arrays
1132
+ macro_rules! check_scalar_iter_binary {
1133
+ ( $SCALAR_T: ident, $ARRAYTYPE: ident, $INPUT: expr) => { {
1134
+ let scalars: Vec <_> = $INPUT
1135
+ . iter( )
1136
+ . map( |v| ScalarValue :: $SCALAR_T( v. map( |v| v. to_vec( ) ) ) )
1137
+ . collect( ) ;
1138
+
1139
+ let array = ScalarValue :: iter_to_array( scalars. iter( ) ) . unwrap( ) ;
1140
+
1141
+ let expected: $ARRAYTYPE =
1142
+ $INPUT. iter( ) . map( |v| v. map( |v| v. to_vec( ) ) ) . collect( ) ;
1143
+
1144
+ let expected: ArrayRef = Arc :: new( expected) ;
1145
+
1146
+ assert_eq!( & array, & expected) ;
1147
+ } } ;
1148
+ }
1149
+
1150
+ #[ test]
1151
+ fn scalar_iter_to_array_boolean ( ) {
1152
+ check_scalar_iter ! ( Boolean , BooleanArray , vec![ Some ( true ) , None , Some ( false ) ] ) ;
1153
+ check_scalar_iter ! ( Float32 , Float32Array , vec![ Some ( 1.9 ) , None , Some ( -2.1 ) ] ) ;
1154
+ check_scalar_iter ! ( Float64 , Float64Array , vec![ Some ( 1.9 ) , None , Some ( -2.1 ) ] ) ;
1155
+
1156
+ check_scalar_iter ! ( Int8 , Int8Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1157
+ check_scalar_iter ! ( Int16 , Int16Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1158
+ check_scalar_iter ! ( Int32 , Int32Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1159
+ check_scalar_iter ! ( Int64 , Int64Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1160
+
1161
+ check_scalar_iter ! ( UInt8 , UInt8Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1162
+ check_scalar_iter ! ( UInt16 , UInt16Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1163
+ check_scalar_iter ! ( UInt32 , UInt32Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1164
+ check_scalar_iter ! ( UInt64 , UInt64Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1165
+
1166
+ check_scalar_iter ! (
1167
+ TimestampSecond ,
1168
+ TimestampSecondArray ,
1169
+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1170
+ ) ;
1171
+ check_scalar_iter ! (
1172
+ TimestampMillisecond ,
1173
+ TimestampMillisecondArray ,
1174
+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1175
+ ) ;
1176
+ check_scalar_iter ! (
1177
+ TimestampMicrosecond ,
1178
+ TimestampMicrosecondArray ,
1179
+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1180
+ ) ;
1181
+ check_scalar_iter ! (
1182
+ TimestampNanosecond ,
1183
+ TimestampNanosecondArray ,
1184
+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1185
+ ) ;
1186
+
1187
+ check_scalar_iter_string ! (
1188
+ Utf8 ,
1189
+ StringArray ,
1190
+ vec![ Some ( "foo" ) , None , Some ( "bar" ) ]
1191
+ ) ;
1192
+ check_scalar_iter_string ! (
1193
+ LargeUtf8 ,
1194
+ LargeStringArray ,
1195
+ vec![ Some ( "foo" ) , None , Some ( "bar" ) ]
1196
+ ) ;
1197
+ check_scalar_iter_binary ! (
1198
+ Binary ,
1199
+ BinaryArray ,
1200
+ vec![ Some ( b"foo" ) , None , Some ( b"bar" ) ]
1201
+ ) ;
1202
+ check_scalar_iter_binary ! (
1203
+ LargeBinary ,
1204
+ LargeBinaryArray ,
1205
+ vec![ Some ( b"foo" ) , None , Some ( b"bar" ) ]
1206
+ ) ;
1207
+ }
1208
+
1209
+ #[ test]
1210
+ fn scalar_iter_to_array_empty ( ) {
1211
+ let scalars = vec ! [ ] as Vec < ScalarValue > ;
1212
+
1213
+ let result = ScalarValue :: iter_to_array ( scalars. iter ( ) ) . unwrap_err ( ) ;
1214
+ assert ! (
1215
+ result
1216
+ . to_string( )
1217
+ . contains( "Empty iterator passed to ScalarValue::iter_to_array" ) ,
1218
+ "{}" ,
1219
+ result
1220
+ ) ;
1221
+ }
1222
+
1223
+ #[ test]
1224
+ fn scalar_iter_to_array_mismatched_types ( ) {
1225
+ use ScalarValue :: * ;
1226
+ // If the scalar values are not all the correct type, error here
1227
+ let scalars: Vec < ScalarValue > = vec ! [ Boolean ( Some ( true ) ) , Int32 ( Some ( 5 ) ) ] ;
1228
+
1229
+ let result = ScalarValue :: iter_to_array ( scalars. iter ( ) ) . unwrap_err ( ) ;
1230
+ assert ! ( result. to_string( ) . contains( "Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)" ) ,
1231
+ "{}" , result) ;
1232
+ }
943
1233
}
0 commit comments