Skip to content

Commit 79ac655

Browse files
committed
improve comments.
1 parent 7838369 commit 79ac655

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

datafusion/functions-aggregate/src/median.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,9 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
241241
///
242242
/// For calculating the accurate medians of groups, we need to store all values
243243
/// of groups before final evaluation.
244-
/// And values in each group will be stored in a `Vec<T>`, so the total group values
244+
/// So values in each group will be stored in a `Vec<T>`, so the total group values
245245
/// will be actually organized as a `Vec<Vec<T>>`.
246246
///
247-
/// In partial aggregation stage, the `values`
248-
///
249247
#[derive(Debug)]
250248
struct MedianGroupsAccumulator<T: ArrowNumericType + Send> {
251249
data_type: DataType,
@@ -272,7 +270,7 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
272270
assert_eq!(values.len(), 1, "single argument to update_batch");
273271
let values = values[0].as_primitive::<T>();
274272

275-
// increment counts, update sums
273+
// Push the `not nulls + not filtered` row into its group
276274
self.group_values.resize(total_num_groups, Vec::new());
277275
accumulate(
278276
group_indices,
@@ -296,30 +294,43 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
296294
) -> Result<()> {
297295
assert_eq!(values.len(), 1, "one argument to merge_batch");
298296

299-
// The merged values should be organized like as a `non-nullable ListArray` like:
297+
// The merged values should be organized like as a `ListArray` which is nullable,
298+
// but `values` in it is `non-nullable`(`values` with nulls usually generated
299+
// from `convert_to_state`).
300+
//
301+
// Following is the possible and impossible input `values`:
300302
//
303+
// # Possible values
301304
// ```text
302305
// group 0: [1, 2, 3]
303-
// group 1: [4, 5]
306+
// group 1: null (list array is nullable)
304307
// group 2: [6, 7, 8]
305308
// ...
306309
// group n: [...]
307310
// ```
308311
//
312+
// # Impossible values
313+
// ```text
314+
// group x: [1, 2, null] (values in list array is non-nullable)
315+
// ```
316+
//
309317
let input_group_values = values[0].as_list::<i32>();
310-
assert!(input_group_values.null_count() == 0);
311318

312319
// Ensure group values big enough
313320
self.group_values.resize(total_num_groups, Vec::new());
314321

315322
// Extend values to related groups
323+
// TODO: avoid using iterator of the `ListArray`, this will lead to
324+
// many calls of `slice` of its `values` array, and `slice` is not
325+
// so efficient.
316326
group_indices
317327
.iter()
318328
.zip(input_group_values.iter())
319329
.for_each(|(&group_index, values_opt)| {
320-
let values = values_opt.unwrap();
321-
let values = values.as_primitive::<T>();
322-
self.group_values[group_index].extend(values.values().iter());
330+
if let Some(values) = values_opt {
331+
let values = values.as_primitive::<T>();
332+
self.group_values[group_index].extend(values.values().iter());
333+
}
323334
});
324335

325336
Ok(())

0 commit comments

Comments
 (0)