Skip to content

Commit df685a5

Browse files
committed
improve comments.
1 parent 37bfe01 commit df685a5

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

datafusion/functions-aggregate/src/median.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,9 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
242242
///
243243
/// For calculating the accurate medians of groups, we need to store all values
244244
/// of groups before final evaluation.
245-
/// And values in each group will be stored in a `Vec<T>`, so the total group values
245+
/// So values in each group will be stored in a `Vec<T>`, so the total group values
246246
/// will be actually organized as a `Vec<Vec<T>>`.
247247
///
248-
/// In partial aggregation stage, the `values`
249-
///
250248
#[derive(Debug)]
251249
struct MedianGroupsAccumulator<T: ArrowNumericType + Send> {
252250
data_type: DataType,
@@ -273,7 +271,7 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
273271
assert_eq!(values.len(), 1, "single argument to update_batch");
274272
let values = values[0].as_primitive::<T>();
275273

276-
// increment counts, update sums
274+
// Push the `not nulls + not filtered` row into its group
277275
self.group_values.resize(total_num_groups, Vec::new());
278276
accumulate(
279277
group_indices,
@@ -297,30 +295,43 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
297295
) -> Result<()> {
298296
assert_eq!(values.len(), 1, "one argument to merge_batch");
299297

300-
// The merged values should be organized like as a `non-nullable ListArray` like:
298+
// The merged values should be organized like as a `ListArray` which is nullable,
299+
// but `values` in it is `non-nullable`(`values` with nulls usually generated
300+
// from `convert_to_state`).
301+
//
302+
// Following is the possible and impossible input `values`:
301303
//
304+
// # Possible values
302305
// ```text
303306
// group 0: [1, 2, 3]
304-
// group 1: [4, 5]
307+
// group 1: null (list array is nullable)
305308
// group 2: [6, 7, 8]
306309
// ...
307310
// group n: [...]
308311
// ```
309312
//
313+
// # Impossible values
314+
// ```text
315+
// group x: [1, 2, null] (values in list array is non-nullable)
316+
// ```
317+
//
310318
let input_group_values = values[0].as_list::<i32>();
311-
assert!(input_group_values.null_count() == 0);
312319

313320
// Ensure group values big enough
314321
self.group_values.resize(total_num_groups, Vec::new());
315322

316323
// Extend values to related groups
324+
// TODO: avoid using iterator of the `ListArray`, this will lead to
325+
// many calls of `slice` of its `values` array, and `slice` is not
326+
// so efficient.
317327
group_indices
318328
.iter()
319329
.zip(input_group_values.iter())
320330
.for_each(|(&group_index, values_opt)| {
321-
let values = values_opt.unwrap();
322-
let values = values.as_primitive::<T>();
323-
self.group_values[group_index].extend(values.values().iter());
331+
if let Some(values) = values_opt {
332+
let values = values.as_primitive::<T>();
333+
self.group_values[group_index].extend(values.values().iter());
334+
}
324335
});
325336

326337
Ok(())

0 commit comments

Comments
 (0)