Skip to content

Commit a2b9ab8

Browse files
authored
Minor: add with_estimated_selectivity to Precision (#8177)
* Minor: add apply_filter to Precision * fix: use inexact * Rename to with_estimated_selectivity
1 parent c14a765 commit a2b9ab8

File tree

2 files changed

+17
-17
lines changed

2 files changed

+17
-17
lines changed

datafusion/common/src/stats.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,15 @@ impl Precision<usize> {
151151
(_, _) => Precision::Absent,
152152
}
153153
}
154+
155+
/// Return the estimate of applying a filter with estimated selectivity
156+
/// `selectivity` to this Precision. A selectivity of `1.0` means that all
157+
/// rows are selected. A selectivity of `0.5` means half the rows are
158+
/// selected. Will always return inexact statistics.
159+
pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
160+
self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
161+
.to_inexact()
162+
}
154163
}
155164

156165
impl Precision<ScalarValue> {

datafusion/physical-plan/src/filter.rs

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -200,15 +200,12 @@ impl ExecutionPlan for FilterExec {
200200
// assume filter selects 20% of rows if we cannot do anything smarter
201201
// tracking issue for making this configurable:
202202
// https://github.com/apache/arrow-datafusion/issues/8133
203-
let selectivity = 0.2_f32;
204-
let mut stats = input_stats.into_inexact();
205-
if let Precision::Inexact(n) = stats.num_rows {
206-
stats.num_rows = Precision::Inexact((selectivity * n as f32) as usize);
207-
}
208-
if let Precision::Inexact(n) = stats.total_byte_size {
209-
stats.total_byte_size =
210-
Precision::Inexact((selectivity * n as f32) as usize);
211-
}
203+
let selectivity = 0.2_f64;
204+
let mut stats = input_stats.clone().into_inexact();
205+
stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity);
206+
stats.total_byte_size = stats
207+
.total_byte_size
208+
.with_estimated_selectivity(selectivity);
212209
return Ok(stats);
213210
}
214211

@@ -222,14 +219,8 @@ impl ExecutionPlan for FilterExec {
222219

223220
// Estimate (inexact) selectivity of predicate
224221
let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
225-
let num_rows = match num_rows.get_value() {
226-
Some(nr) => Precision::Inexact((*nr as f64 * selectivity).ceil() as usize),
227-
None => Precision::Absent,
228-
};
229-
let total_byte_size = match total_byte_size.get_value() {
230-
Some(tbs) => Precision::Inexact((*tbs as f64 * selectivity).ceil() as usize),
231-
None => Precision::Absent,
232-
};
222+
let num_rows = num_rows.with_estimated_selectivity(selectivity);
223+
let total_byte_size = total_byte_size.with_estimated_selectivity(selectivity);
233224

234225
let column_statistics = collect_new_statistics(
235226
&input_stats.column_statistics,

0 commit comments

Comments
 (0)