Skip to content

Commit 9fdc4fe

Browse files
alambDandandan
andauthored
Function to create ArrayRef from an iterator of ScalarValues (apache#381)
* Function to create `ArrayRef` from an iterator of ScalarValues * Apply suggestions from code review Co-authored-by: Daniël Heres <[email protected]> * Update datafusion/src/scalar.rs Co-authored-by: Daniël Heres <[email protected]> * Fix up code references Co-authored-by: Daniël Heres <[email protected]>
1 parent 0aea0df commit 9fdc4fe

File tree

1 file changed

+290
-0
lines changed

1 file changed

+290
-0
lines changed

datafusion/src/scalar.rs

+290
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,155 @@ impl ScalarValue {
293293
self.to_array_of_size(1)
294294
}
295295

296+
/// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
297+
/// corresponding to those values. For example,
298+
///
299+
/// Returns an error if the iterator is empty or if the
300+
/// [`ScalarValue`]s are not all the same type
301+
///
302+
/// Example
303+
/// ```
304+
/// use datafusion::scalar::ScalarValue;
305+
/// use arrow::array::{ArrayRef, BooleanArray};
306+
///
307+
/// let scalars = vec![
308+
/// ScalarValue::Boolean(Some(true)),
309+
/// ScalarValue::Boolean(None),
310+
/// ScalarValue::Boolean(Some(false)),
311+
/// ];
312+
///
313+
/// // Build an Array from the list of ScalarValues
314+
/// let array = ScalarValue::iter_to_array(scalars.iter())
315+
/// .unwrap();
316+
///
317+
/// let expected: ArrayRef = std::sync::Arc::new(
318+
/// BooleanArray::from(vec![
319+
/// Some(true),
320+
/// None,
321+
/// Some(false)
322+
/// ]
323+
/// ));
324+
///
325+
/// assert_eq!(&array, &expected);
326+
/// ```
327+
pub fn iter_to_array<'a>(
328+
scalars: impl IntoIterator<Item = &'a ScalarValue>,
329+
) -> Result<ArrayRef> {
330+
let mut scalars = scalars.into_iter().peekable();
331+
332+
// figure out the type based on the first element
333+
let data_type = match scalars.peek() {
334+
None => {
335+
return Err(DataFusionError::Internal(
336+
"Empty iterator passed to ScalarValue::iter_to_array".to_string(),
337+
))
338+
}
339+
Some(sv) => sv.get_datatype(),
340+
};
341+
342+
/// Creates an array of $ARRAY_TY by unpacking values of
343+
/// SCALAR_TY for primitive types
344+
macro_rules! build_array_primitive {
345+
($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
346+
{
347+
let values = scalars
348+
.map(|sv| {
349+
if let ScalarValue::$SCALAR_TY(v) = sv {
350+
Ok(*v)
351+
} else {
352+
Err(DataFusionError::Internal(format!(
353+
"Inconsistent types in ScalarValue::iter_to_array. \
354+
Expected {:?}, got {:?}",
355+
data_type, sv
356+
)))
357+
}
358+
})
359+
.collect::<Result<Vec<_>>>()?;
360+
361+
let array: $ARRAY_TY = values.iter().collect();
362+
Arc::new(array)
363+
}
364+
}};
365+
}
366+
367+
/// Creates an array of $ARRAY_TY by unpacking values of
368+
/// SCALAR_TY for "string-like" types.
369+
macro_rules! build_array_string {
370+
($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
371+
{
372+
let values = scalars
373+
.map(|sv| {
374+
if let ScalarValue::$SCALAR_TY(v) = sv {
375+
Ok(v)
376+
} else {
377+
Err(DataFusionError::Internal(format!(
378+
"Inconsistent types in ScalarValue::iter_to_array. \
379+
Expected {:?}, got {:?}",
380+
data_type, sv
381+
)))
382+
}
383+
})
384+
.collect::<Result<Vec<_>>>()?;
385+
386+
// it is annoying that one can not create
387+
// StringArray et al directly from iter of &String,
388+
// requiring this map to &str
389+
let values = values.iter().map(|s| s.as_ref());
390+
391+
let array: $ARRAY_TY = values.collect();
392+
Arc::new(array)
393+
}
394+
}};
395+
}
396+
397+
let array: ArrayRef = match &data_type {
398+
DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
399+
DataType::Float32 => build_array_primitive!(Float32Array, Float32),
400+
DataType::Float64 => build_array_primitive!(Float64Array, Float64),
401+
DataType::Int8 => build_array_primitive!(Int8Array, Int8),
402+
DataType::Int16 => build_array_primitive!(Int16Array, Int16),
403+
DataType::Int32 => build_array_primitive!(Int32Array, Int32),
404+
DataType::Int64 => build_array_primitive!(Int64Array, Int64),
405+
DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
406+
DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
407+
DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
408+
DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
409+
DataType::Utf8 => build_array_string!(StringArray, Utf8),
410+
DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
411+
DataType::Binary => build_array_string!(BinaryArray, Binary),
412+
DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
413+
DataType::Date32 => build_array_primitive!(Date32Array, Date32),
414+
DataType::Date64 => build_array_primitive!(Date64Array, Date64),
415+
DataType::Timestamp(TimeUnit::Second, None) => {
416+
build_array_primitive!(TimestampSecondArray, TimestampSecond)
417+
}
418+
DataType::Timestamp(TimeUnit::Millisecond, None) => {
419+
build_array_primitive!(TimestampMillisecondArray, TimestampMillisecond)
420+
}
421+
DataType::Timestamp(TimeUnit::Microsecond, None) => {
422+
build_array_primitive!(TimestampMicrosecondArray, TimestampMicrosecond)
423+
}
424+
DataType::Timestamp(TimeUnit::Nanosecond, None) => {
425+
build_array_primitive!(TimestampNanosecondArray, TimestampNanosecond)
426+
}
427+
DataType::Interval(IntervalUnit::DayTime) => {
428+
build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
429+
}
430+
DataType::Interval(IntervalUnit::YearMonth) => {
431+
build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
432+
}
433+
_ => {
434+
return Err(DataFusionError::Internal(format!(
435+
"Unsupported creation of {:?} array from ScalarValue {:?}",
436+
data_type,
437+
scalars.peek()
438+
)))
439+
}
440+
};
441+
442+
Ok(array)
443+
}
444+
296445
/// Converts a scalar value into an array of `size` rows.
297446
pub fn to_array_of_size(&self, size: usize) -> ArrayRef {
298447
match self {
@@ -609,6 +758,12 @@ impl From<u64> for ScalarValue {
609758
}
610759
}
611760

761+
impl From<&str> for ScalarValue {
762+
fn from(value: &str) -> Self {
763+
ScalarValue::Utf8(Some(value.to_string()))
764+
}
765+
}
766+
612767
macro_rules! impl_try_from {
613768
($SCALAR:ident, $NATIVE:ident) => {
614769
impl TryFrom<ScalarValue> for $NATIVE {
@@ -940,4 +1095,139 @@ mod tests {
9401095
assert!(prim_array.is_null(1));
9411096
assert_eq!(prim_array.value(2), 101);
9421097
}
1098+
1099+
/// Creates array directly and via ScalarValue and ensures they are the same
1100+
macro_rules! check_scalar_iter {
1101+
($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
1102+
let scalars: Vec<_> =
1103+
$INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
1104+
1105+
let array = ScalarValue::iter_to_array(scalars.iter()).unwrap();
1106+
1107+
let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
1108+
1109+
assert_eq!(&array, &expected);
1110+
}};
1111+
}
1112+
1113+
/// Creates array directly and via ScalarValue and ensures they
1114+
/// are the same, for string arrays
1115+
macro_rules! check_scalar_iter_string {
1116+
($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
1117+
let scalars: Vec<_> = $INPUT
1118+
.iter()
1119+
.map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
1120+
.collect();
1121+
1122+
let array = ScalarValue::iter_to_array(scalars.iter()).unwrap();
1123+
1124+
let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
1125+
1126+
assert_eq!(&array, &expected);
1127+
}};
1128+
}
1129+
1130+
/// Creates array directly and via ScalarValue and ensures they
1131+
/// are the same, for binary arrays
1132+
macro_rules! check_scalar_iter_binary {
1133+
($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
1134+
let scalars: Vec<_> = $INPUT
1135+
.iter()
1136+
.map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
1137+
.collect();
1138+
1139+
let array = ScalarValue::iter_to_array(scalars.iter()).unwrap();
1140+
1141+
let expected: $ARRAYTYPE =
1142+
$INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
1143+
1144+
let expected: ArrayRef = Arc::new(expected);
1145+
1146+
assert_eq!(&array, &expected);
1147+
}};
1148+
}
1149+
1150+
#[test]
1151+
fn scalar_iter_to_array_boolean() {
1152+
check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
1153+
check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
1154+
check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
1155+
1156+
check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
1157+
check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
1158+
check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
1159+
check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
1160+
1161+
check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
1162+
check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
1163+
check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
1164+
check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
1165+
1166+
check_scalar_iter!(
1167+
TimestampSecond,
1168+
TimestampSecondArray,
1169+
vec![Some(1), None, Some(3)]
1170+
);
1171+
check_scalar_iter!(
1172+
TimestampMillisecond,
1173+
TimestampMillisecondArray,
1174+
vec![Some(1), None, Some(3)]
1175+
);
1176+
check_scalar_iter!(
1177+
TimestampMicrosecond,
1178+
TimestampMicrosecondArray,
1179+
vec![Some(1), None, Some(3)]
1180+
);
1181+
check_scalar_iter!(
1182+
TimestampNanosecond,
1183+
TimestampNanosecondArray,
1184+
vec![Some(1), None, Some(3)]
1185+
);
1186+
1187+
check_scalar_iter_string!(
1188+
Utf8,
1189+
StringArray,
1190+
vec![Some("foo"), None, Some("bar")]
1191+
);
1192+
check_scalar_iter_string!(
1193+
LargeUtf8,
1194+
LargeStringArray,
1195+
vec![Some("foo"), None, Some("bar")]
1196+
);
1197+
check_scalar_iter_binary!(
1198+
Binary,
1199+
BinaryArray,
1200+
vec![Some(b"foo"), None, Some(b"bar")]
1201+
);
1202+
check_scalar_iter_binary!(
1203+
LargeBinary,
1204+
LargeBinaryArray,
1205+
vec![Some(b"foo"), None, Some(b"bar")]
1206+
);
1207+
}
1208+
1209+
#[test]
1210+
fn scalar_iter_to_array_empty() {
1211+
let scalars = vec![] as Vec<ScalarValue>;
1212+
1213+
let result = ScalarValue::iter_to_array(scalars.iter()).unwrap_err();
1214+
assert!(
1215+
result
1216+
.to_string()
1217+
.contains("Empty iterator passed to ScalarValue::iter_to_array"),
1218+
"{}",
1219+
result
1220+
);
1221+
}
1222+
1223+
#[test]
1224+
fn scalar_iter_to_array_mismatched_types() {
1225+
use ScalarValue::*;
1226+
// If the scalar values are not all the correct type, error here
1227+
let scalars: Vec<ScalarValue> = vec![Boolean(Some(true)), Int32(Some(5))];
1228+
1229+
let result = ScalarValue::iter_to_array(scalars.iter()).unwrap_err();
1230+
assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
1231+
"{}", result);
1232+
}
9431233
}

0 commit comments

Comments
 (0)