Skip to content

Commit 695cedc

Browse files
authored
Correctness integration test for parquet filter pushdown (#3976)
* parquet filter pushdown correctness tests * Do not run tests on windows * Drop shared file after tests are over * Rework to be single threaded
1 parent 60f3ef6 commit 695cedc

File tree

5 files changed

+494
-12
lines changed

5 files changed

+494
-12
lines changed

datafusion/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ csv = "1.1.6"
106106
ctor = "0.1.22"
107107
doc-comment = "0.3"
108108
env_logger = "0.9"
109+
parquet-test-utils = { path = "../../parquet-test-utils" }
109110
rstest = "0.15.0"
110111
test-utils = { path = "../../test-utils" }
111112

datafusion/core/src/physical_plan/file_format/parquet.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,6 @@ mod tests {
911911
use crate::datasource::listing::{FileRange, PartitionedFile};
912912
use crate::datasource::object_store::ObjectStoreUrl;
913913
use crate::execution::options::CsvReadOptions;
914-
use crate::physical_plan::metrics::MetricValue;
915914
use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
916915
use crate::test::object_store::local_unpartitioned_file;
917916
use crate::{
@@ -1742,15 +1741,8 @@ mod tests {
17421741
///
17431742
/// Panics if no such metric.
17441743
fn get_value(metrics: &MetricsSet, metric_name: &str) -> usize {
1745-
let sum = metrics.sum(|m| match m.value() {
1746-
MetricValue::Count { name, .. } if name == metric_name => true,
1747-
MetricValue::Time { name, .. } if name == metric_name => true,
1748-
_ => false,
1749-
});
1750-
1751-
match sum {
1752-
Some(MetricValue::Count { count, .. }) => count.value(),
1753-
Some(MetricValue::Time { time, .. }) => time.value(),
1744+
match metrics.sum_by_name(metric_name) {
1745+
Some(v) => v.as_usize(),
17541746
_ => {
17551747
panic!(
17561748
"Expected metric not found. Looking for '{}' in\n\n{:#?}",

datafusion/core/src/physical_plan/metrics/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,23 @@ impl MetricsSet {
241241
Some(accum)
242242
}
243243

244+
/// returns the sum of all the metrics with the specified name
245+
/// the returned set.
246+
pub fn sum_by_name(&self, metric_name: &str) -> Option<MetricValue> {
247+
self.sum(|m| match m.value() {
248+
MetricValue::Count { name, .. } => name == metric_name,
249+
MetricValue::Time { name, .. } => name == metric_name,
250+
MetricValue::OutputRows(_) => false,
251+
MetricValue::ElapsedCompute(_) => false,
252+
MetricValue::SpillCount(_) => false,
253+
MetricValue::SpilledBytes(_) => false,
254+
MetricValue::CurrentMemoryUsage(_) => false,
255+
MetricValue::Gauge { name, .. } => name == metric_name,
256+
MetricValue::StartTimestamp(_) => false,
257+
MetricValue::EndTimestamp(_) => false,
258+
})
259+
}
260+
244261
/// Returns returns a new derived `MetricsSet` where all metrics
245262
/// that had the same name and partition=`Some(..)` have been
246263
/// aggregated together. The resulting `MetricsSet` has all

0 commit comments

Comments
 (0)