@@ -242,11 +242,9 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
242
242
///
243
243
/// For calculating the accurate medians of groups, we need to store all values
244
244
/// of groups before final evaluation.
245
- /// And values in each group will be stored in a `Vec<T>`, so the total group values
245
+ /// So values in each group will be stored in a `Vec<T>`, so the total group values
246
246
/// will be actually organized as a `Vec<Vec<T>>`.
247
247
///
248
- /// In partial aggregation stage, the `values`
249
- ///
250
248
#[ derive( Debug ) ]
251
249
struct MedianGroupsAccumulator < T : ArrowNumericType + Send > {
252
250
data_type : DataType ,
@@ -273,7 +271,7 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
273
271
assert_eq ! ( values. len( ) , 1 , "single argument to update_batch" ) ;
274
272
let values = values[ 0 ] . as_primitive :: < T > ( ) ;
275
273
276
- // increment counts, update sums
274
+ // Push the `not nulls + not filtered` row into its group
277
275
self . group_values . resize ( total_num_groups, Vec :: new ( ) ) ;
278
276
accumulate (
279
277
group_indices,
@@ -297,30 +295,43 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
297
295
) -> Result < ( ) > {
298
296
assert_eq ! ( values. len( ) , 1 , "one argument to merge_batch" ) ;
299
297
300
- // The merged values should be organized like as a `non-nullable ListArray` like:
298
+ // The merged values should be organized like as a `ListArray` which is nullable,
299
+ // but `values` in it is `non-nullable`(`values` with nulls usually generated
300
+ // from `convert_to_state`).
301
+ //
302
+ // Following is the possible and impossible input `values`:
301
303
//
304
+ // # Possible values
302
305
// ```text
303
306
// group 0: [1, 2, 3]
304
- // group 1: [4, 5]
307
+ // group 1: null (list array is nullable)
305
308
// group 2: [6, 7, 8]
306
309
// ...
307
310
// group n: [...]
308
311
// ```
309
312
//
313
+ // # Impossible values
314
+ // ```text
315
+ // group x: [1, 2, null] (values in list array is non-nullable)
316
+ // ```
317
+ //
310
318
let input_group_values = values[ 0 ] . as_list :: < i32 > ( ) ;
311
- assert ! ( input_group_values. null_count( ) == 0 ) ;
312
319
313
320
// Ensure group values big enough
314
321
self . group_values . resize ( total_num_groups, Vec :: new ( ) ) ;
315
322
316
323
// Extend values to related groups
324
+ // TODO: avoid using iterator of the `ListArray`, this will lead to
325
+ // many calls of `slice` of its `values` array, and `slice` is not
326
+ // so efficient.
317
327
group_indices
318
328
. iter ( )
319
329
. zip ( input_group_values. iter ( ) )
320
330
. for_each ( |( & group_index, values_opt) | {
321
- let values = values_opt. unwrap ( ) ;
322
- let values = values. as_primitive :: < T > ( ) ;
323
- self . group_values [ group_index] . extend ( values. values ( ) . iter ( ) ) ;
331
+ if let Some ( values) = values_opt {
332
+ let values = values. as_primitive :: < T > ( ) ;
333
+ self . group_values [ group_index] . extend ( values. values ( ) . iter ( ) ) ;
334
+ }
324
335
} ) ;
325
336
326
337
Ok ( ( ) )
0 commit comments