@@ -241,11 +241,9 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
241
241
///
242
242
/// For calculating the accurate medians of groups, we need to store all values
243
243
/// of groups before final evaluation.
244
- /// And values in each group will be stored in a `Vec<T>`, so the total group values
244
+ /// So values in each group will be stored in a `Vec<T>`, so the total group values
245
245
/// will be actually organized as a `Vec<Vec<T>>`.
246
246
///
247
- /// In partial aggregation stage, the `values`
248
- ///
249
247
#[ derive( Debug ) ]
250
248
struct MedianGroupsAccumulator < T : ArrowNumericType + Send > {
251
249
data_type : DataType ,
@@ -272,7 +270,7 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
272
270
assert_eq ! ( values. len( ) , 1 , "single argument to update_batch" ) ;
273
271
let values = values[ 0 ] . as_primitive :: < T > ( ) ;
274
272
275
- // increment counts, update sums
273
+ // Push the `not nulls + not filtered` row into its group
276
274
self . group_values . resize ( total_num_groups, Vec :: new ( ) ) ;
277
275
accumulate (
278
276
group_indices,
@@ -296,30 +294,43 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
296
294
) -> Result < ( ) > {
297
295
assert_eq ! ( values. len( ) , 1 , "one argument to merge_batch" ) ;
298
296
299
- // The merged values should be organized like as a `non-nullable ListArray` like:
297
+ // The merged values should be organized like as a `ListArray` which is nullable,
298
+ // but `values` in it is `non-nullable`(`values` with nulls usually generated
299
+ // from `convert_to_state`).
300
+ //
301
+ // Following is the possible and impossible input `values`:
300
302
//
303
+ // # Possible values
301
304
// ```text
302
305
// group 0: [1, 2, 3]
303
- // group 1: [4, 5]
306
+ // group 1: null (list array is nullable)
304
307
// group 2: [6, 7, 8]
305
308
// ...
306
309
// group n: [...]
307
310
// ```
308
311
//
312
+ // # Impossible values
313
+ // ```text
314
+ // group x: [1, 2, null] (values in list array is non-nullable)
315
+ // ```
316
+ //
309
317
let input_group_values = values[ 0 ] . as_list :: < i32 > ( ) ;
310
- assert ! ( input_group_values. null_count( ) == 0 ) ;
311
318
312
319
// Ensure group values big enough
313
320
self . group_values . resize ( total_num_groups, Vec :: new ( ) ) ;
314
321
315
322
// Extend values to related groups
323
+ // TODO: avoid using iterator of the `ListArray`, this will lead to
324
+ // many calls of `slice` of its `values` array, and `slice` is not
325
+ // so efficient.
316
326
group_indices
317
327
. iter ( )
318
328
. zip ( input_group_values. iter ( ) )
319
329
. for_each ( |( & group_index, values_opt) | {
320
- let values = values_opt. unwrap ( ) ;
321
- let values = values. as_primitive :: < T > ( ) ;
322
- self . group_values [ group_index] . extend ( values. values ( ) . iter ( ) ) ;
330
+ if let Some ( values) = values_opt {
331
+ let values = values. as_primitive :: < T > ( ) ;
332
+ self . group_values [ group_index] . extend ( values. values ( ) . iter ( ) ) ;
333
+ }
323
334
} ) ;
324
335
325
336
Ok ( ( ) )
0 commit comments