Skip to content

Commit 7ca7456

Browse files
authored
Handle nulls in approx_percentile_cont (#11721)
* Respect nulls in approx_percentile_cont * Compile * Remove check * Adapt comment
1 parent cd786e2 commit 7ca7456

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

datafusion/functions-aggregate/src/approx_percentile_cont.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ use std::any::Any;
1919
use std::fmt::{Debug, Formatter};
2020
use std::sync::Arc;
2121

22-
use arrow::array::RecordBatch;
22+
use arrow::array::{Array, RecordBatch};
23+
use arrow::compute::{filter, is_not_null};
2324
use arrow::{
2425
array::{
2526
ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
@@ -393,8 +394,12 @@ impl Accumulator for ApproxPercentileAccumulator {
393394
}
394395

395396
fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
396-
let values = &values[0];
397-
let sorted_values = &arrow::compute::sort(values, None)?;
397+
// Remove any nulls before computing the percentile
398+
let mut values = Arc::clone(&values[0]);
399+
if values.nulls().is_some() {
400+
values = filter(&values, &is_not_null(&values)?)?;
401+
}
402+
let sorted_values = &arrow::compute::sort(&values, None)?;
398403
let sorted_values = ApproxPercentileAccumulator::convert_to_float(sorted_values)?;
399404
self.digest = self.digest.merge_sorted_f64(&sorted_values);
400405
Ok(())

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,12 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) < 0.05
12371237
----
12381238
true
12391239

1240+
# percentile_cont_with_nulls
1241+
query I
1242+
SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
1243+
----
1244+
2
1245+
12401246
# csv_query_cube_avg
12411247
query TIR
12421248
SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2

0 commit comments

Comments
 (0)