Skip to content

Commit 7c1c794

Browse files
suremarcalambNGA-TRAN
authored
feat: Determine ordering of file groups (#9593)
* add statistics to PartitionedFile * just dump work for now * working test case * fix jumbled rebase * forgot to annotate #[test] * more refactoring * add a link * refactor again * whitespace * format debug log * remove useless itertools * refactor test * fix bug * use sort_file_groups in ListingTable * move check into a better place * refactor test a bit * more testing * more testing * better error message * fix log msg * fix again * add sqllogictest and fixes * fix test * Update datafusion/core/src/datasource/listing/mod.rs Co-authored-by: Andrew Lamb <[email protected]> * Update datafusion/core/src/datasource/physical_plan/file_scan_config.rs Co-authored-by: Andrew Lamb <[email protected]> * more unit tests * rename to split_groups_by_statistics * only use groups if there's <= target_partitions * refactor a bit, no need for projected_schema * fix reverse order * save work for now * lots of test cases in new slt * remove output check * fix * fix last test * comment on params * clippy * revert parquet.slt * no need to pass projection separately * Update datafusion/core/src/datasource/listing/mod.rs Co-authored-by: Nga Tran <[email protected]> * update comment on in * fix test? * un-fix? * add fix back in? * move indices_sorted_by_min to MinMaxStatistics * move MinMaxStatistics to its own module * fix license * add feature flag * update config --------- Co-authored-by: Andrew Lamb <[email protected]> Co-authored-by: Nga Tran <[email protected]>
1 parent 1e0c760 commit 7c1c794

File tree

21 files changed

+1018
-19
lines changed

21 files changed

+1018
-19
lines changed

datafusion/common/src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,11 @@ config_namespace! {
297297

298298
/// Should DataFusion support recursive CTEs
299299
pub enable_recursive_ctes: bool, default = true
300+
301+
/// Attempt to eliminate sorts by packing & sorting files with non-overlapping
302+
/// statistics into the same file groups.
303+
/// Currently experimental
304+
pub split_file_groups_by_statistics: bool, default = false
300305
}
301306
}
302307

datafusion/core/src/datasource/file_format/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ pub(crate) mod test_util {
150150
object_meta: meta,
151151
partition_values: vec![],
152152
range: None,
153+
statistics: None,
153154
extensions: None,
154155
}]];
155156

datafusion/core/src/datasource/listing/helpers.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ pub async fn pruned_partition_list<'a>(
373373
object_meta,
374374
partition_values: partition_values.clone(),
375375
range: None,
376+
statistics: None,
376377
extensions: None,
377378
})
378379
}));

datafusion/core/src/datasource/listing/mod.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ mod url;
2424

2525
use crate::error::Result;
2626
use chrono::TimeZone;
27-
use datafusion_common::ScalarValue;
27+
use datafusion_common::{ScalarValue, Statistics};
2828
use futures::Stream;
2929
use object_store::{path::Path, ObjectMeta};
3030
use std::pin::Pin;
@@ -67,6 +67,11 @@ pub struct PartitionedFile {
6767
pub partition_values: Vec<ScalarValue>,
6868
/// An optional file range for a more fine-grained parallel execution
6969
pub range: Option<FileRange>,
70+
/// Optional statistics that describe the data in this file if known.
71+
///
72+
/// DataFusion relies on these statistics for planning (in particular to sort file groups),
73+
/// so if they are incorrect, incorrect answers may result.
74+
pub statistics: Option<Statistics>,
7075
/// An optional field for user defined per object metadata
7176
pub extensions: Option<Arc<dyn std::any::Any + Send + Sync>>,
7277
}
@@ -83,6 +88,7 @@ impl PartitionedFile {
8388
},
8489
partition_values: vec![],
8590
range: None,
91+
statistics: None,
8692
extensions: None,
8793
}
8894
}
@@ -98,7 +104,8 @@ impl PartitionedFile {
98104
version: None,
99105
},
100106
partition_values: vec![],
101-
range: None,
107+
range: Some(FileRange { start, end }),
108+
statistics: None,
102109
extensions: None,
103110
}
104111
.with_range(start, end)
@@ -128,6 +135,7 @@ impl From<ObjectMeta> for PartitionedFile {
128135
object_meta,
129136
partition_values: vec![],
130137
range: None,
138+
statistics: None,
131139
extensions: None,
132140
}
133141
}

datafusion/core/src/datasource/listing/table.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,16 +739,43 @@ impl TableProvider for ListingTable {
739739
filters: &[Expr],
740740
limit: Option<usize>,
741741
) -> Result<Arc<dyn ExecutionPlan>> {
742-
let (partitioned_file_lists, statistics) =
742+
let (mut partitioned_file_lists, statistics) =
743743
self.list_files_for_scan(state, filters, limit).await?;
744744

745+
let projected_schema = project_schema(&self.schema(), projection)?;
746+
745747
// if no files need to be read, return an `EmptyExec`
746748
if partitioned_file_lists.is_empty() {
747-
let schema = self.schema();
748-
let projected_schema = project_schema(&schema, projection)?;
749749
return Ok(Arc::new(EmptyExec::new(projected_schema)));
750750
}
751751

752+
let output_ordering = self.try_create_output_ordering()?;
753+
match state
754+
.config_options()
755+
.execution
756+
.split_file_groups_by_statistics
757+
.then(|| {
758+
output_ordering.first().map(|output_ordering| {
759+
FileScanConfig::split_groups_by_statistics(
760+
&self.table_schema,
761+
&partitioned_file_lists,
762+
output_ordering,
763+
)
764+
})
765+
})
766+
.flatten()
767+
{
768+
Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
769+
Some(Ok(new_groups)) => {
770+
if new_groups.len() <= self.options.target_partitions {
771+
partitioned_file_lists = new_groups;
772+
} else {
773+
log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
774+
}
775+
}
776+
None => {} // no ordering required
777+
};
778+
752779
// extract types of partition columns
753780
let table_partition_cols = self
754781
.options
@@ -772,6 +799,7 @@ impl TableProvider for ListingTable {
772799
} else {
773800
return Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty()))));
774801
};
802+
775803
// create the execution plan
776804
self.options
777805
.format
@@ -784,7 +812,7 @@ impl TableProvider for ListingTable {
784812
statistics,
785813
projection: projection.cloned(),
786814
limit,
787-
output_ordering: self.try_create_output_ordering()?,
815+
output_ordering,
788816
table_partition_cols,
789817
},
790818
filters.as_ref(),
@@ -937,10 +965,11 @@ impl ListingTable {
937965
// collect the statistics if required by the config
938966
let files = file_list
939967
.map(|part_file| async {
940-
let part_file = part_file?;
968+
let mut part_file = part_file?;
941969
if self.options.collect_stat {
942970
let statistics =
943971
self.do_collect_statistics(ctx, &store, &part_file).await?;
972+
part_file.statistics = Some(statistics.clone());
944973
Ok((part_file, statistics)) as Result<(PartitionedFile, Statistics)>
945974
} else {
946975
Ok((part_file, Statistics::new_unknown(&self.file_schema)))

0 commit comments

Comments
 (0)