Skip to content

Commit 328c305

Browse files
committed
release: adobe.4
includes bugfix for the file range removal workaround
1 parent 7e112ea commit 328c305

File tree

3 files changed

+19
-29
lines changed

3 files changed

+19
-29
lines changed

Cargo.lock

Lines changed: 15 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ classifiers = [
4545
]
4646
dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"]
4747
#dynamic = ["version"]
48-
version = "47.0.0+adobe.3"
48+
version = "47.0.0+adobe.4"
4949

5050
[project.urls]
5151
homepage = "https://datafusion.apache.org/python"

src/dataframe.rs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -883,13 +883,15 @@ impl DistributedPlan {
883883
// if any file has a range defined (even when the range actually covers the entire file).
884884
// The EnforceDistribution optimizer rule adds ranges for both full and partial files,
885885
// so this tries to revert that in order to trigger a repartition when no files are actually split.
886+
// TODO: check whether EnforceDistribution is still adding redundant ranges and remove this
887+
// workaround if no longer needed.
886888
if let Some(file_scan) =
887889
exec.data_source().as_any().downcast_ref::<FileScanConfig>()
888890
{
889891
let mut range_free_file_scan = file_scan.clone();
890892
let mut total_size: usize = 0;
891893
for group in range_free_file_scan.file_groups.iter_mut() {
892-
for group_idx in 0..group.len()-1 {
894+
for group_idx in 0..group.len() {
893895
let file = group.index_mut(group_idx);
894896
if let Some(range) = &file.range {
895897
total_size += (range.end - range.start) as usize;
@@ -900,19 +902,7 @@ impl DistributedPlan {
900902
} else {
901903
total_size += file.object_meta.size as usize;
902904
}
903-
904905
}
905-
// for file in group.iter_mut() {
906-
// if let Some(range) = &file.range {
907-
// total_size += (range.end - range.start) as usize;
908-
// if range.start == 0 && range.end == file.object_meta.size as i64
909-
// {
910-
// file.range = None; // remove redundant range
911-
// }
912-
// } else {
913-
// total_size += file.object_meta.size;
914-
// }
915-
// }
916906
}
917907
let min_size_buckets = max(1, total_size.div_ceil(self.min_size));
918908
let partitions = min(min_size_buckets, desired_parallelism);

0 commit comments

Comments
 (0)