-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Implement tree explain for DataSourceExec
#15029
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
6329ebd
Implement tree explain for DataSourceExec
alamb 1b767c7
Merge remote-tracking branch 'apache/main' into alamb/less_details
alamb cc8de02
improve test
alamb a6c8dbe
Apply suggestions from code review
alamb 67cc5bc
Merge remote-tracking branch 'apache/main' into alamb/less_details
alamb dce545b
fmt
alamb File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -425,25 +425,17 @@ impl DataSource for MemorySourceConfig { | |
} | ||
} | ||
DisplayFormatType::TreeRender => { | ||
let partition_sizes: Vec<_> = | ||
self.partitions.iter().map(|b| b.len()).collect(); | ||
writeln!(f, "partition_sizes={:?}", partition_sizes)?; | ||
|
||
if let Some(output_ordering) = self.sort_information.first() { | ||
writeln!(f, "output_ordering={}", output_ordering)?; | ||
} | ||
|
||
let eq_properties = self.eq_properties(); | ||
let constraints = eq_properties.constraints(); | ||
if !constraints.is_empty() { | ||
writeln!(f, "constraints={}", constraints)?; | ||
} | ||
|
||
if let Some(limit) = self.fetch { | ||
writeln!(f, "fetch={}", limit)?; | ||
} | ||
|
||
write!(f, "partitions={}", partition_sizes.len()) | ||
let total_rows = self.partitions.iter().map(|b| b.len()).sum::<usize>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise, the previous version is too verbose I think |
||
let total_bytes: usize = self | ||
.partitions | ||
.iter() | ||
.flatten() | ||
.map(|batch| batch.get_array_memory_size()) | ||
.sum(); | ||
writeln!(f, "format=memory")?; | ||
alamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
writeln!(f, "rows={total_rows}")?; | ||
writeln!(f, "bytes={total_bytes}")?; | ||
Ok(()) | ||
} | ||
} | ||
} | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,10 +54,36 @@ STORED AS PARQUET | |
LOCATION 'test_files/scratch/explain_tree/table2.parquet'; | ||
|
||
|
||
# table3: Memoru | ||
# table3: Memory | ||
statement ok | ||
CREATE TABLE table3 as select * from table1; | ||
|
||
# table4: JSON | ||
query I | ||
COPY (SELECT * from table1) | ||
TO 'test_files/scratch/explain_tree/table4.json' | ||
---- | ||
3 | ||
|
||
statement ok | ||
CREATE EXTERNAL TABLE table4 | ||
STORED AS JSON | ||
LOCATION 'test_files/scratch/explain_tree/table4.json'; | ||
|
||
# table5: ARROW | ||
query I | ||
COPY (SELECT * from table1) | ||
TO 'test_files/scratch/explain_tree/table5.arrow' | ||
---- | ||
3 | ||
|
||
statement ok | ||
CREATE EXTERNAL TABLE table5 | ||
STORED AS ARROW | ||
LOCATION 'test_files/scratch/explain_tree/table5.arrow'; | ||
|
||
|
||
|
||
######## Begin Queries ######## | ||
|
||
# Filter | ||
|
@@ -83,7 +109,10 @@ physical_plan | |
12)└─────────────┬─────────────┘ | ||
13)┌─────────────┴─────────────┐ | ||
14)│ DataSourceExec │ | ||
15)└───────────────────────────┘ | ||
15)│ -------------------- │ | ||
16)│ files: 1 │ | ||
17)│ format: csv │ | ||
18)└───────────────────────────┘ | ||
|
||
# Aggregate | ||
query TT | ||
|
@@ -110,7 +139,10 @@ physical_plan | |
15)└─────────────┬─────────────┘ | ||
16)┌─────────────┴─────────────┐ | ||
17)│ DataSourceExec │ | ||
18)└───────────────────────────┘ | ||
18)│ -------------------- │ | ||
19)│ files: 1 │ | ||
20)│ format: csv │ | ||
21)└───────────────────────────┘ | ||
|
||
# 2 Joins | ||
query TT | ||
|
@@ -139,7 +171,10 @@ physical_plan | |
15)└─────────────┬─────────────┘└─────────────┬─────────────┘ | ||
16)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
17)│ DataSourceExec ││ DataSourceExec │ | ||
18)└───────────────────────────┘└───────────────────────────┘ | ||
18)│ -------------------- ││ -------------------- │ | ||
19)│ files: 1 ││ files: 1 │ | ||
20)│ format: csv ││ format: parquet │ | ||
21)└───────────────────────────┘└───────────────────────────┘ | ||
|
||
# 3 Joins | ||
query TT | ||
|
@@ -175,18 +210,22 @@ physical_plan | |
13)┌─────────────┴─────────────┐┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
14)│ CoalesceBatchesExec ││ CoalesceBatchesExec ││ DataSourceExec │ | ||
15)│ ││ ││ -------------------- │ | ||
16)│ ││ ││ partition_sizes: [1] │ | ||
17)│ ││ ││ partitions: 1 │ | ||
18)└─────────────┬─────────────┘└─────────────┬─────────────┘└───────────────────────────┘ | ||
19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
20)│ RepartitionExec ││ RepartitionExec │ | ||
21)└─────────────┬─────────────┘└─────────────┬─────────────┘ | ||
22)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
23)│ RepartitionExec ││ RepartitionExec │ | ||
24)└─────────────┬─────────────┘└─────────────┬─────────────┘ | ||
25)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
26)│ DataSourceExec ││ DataSourceExec │ | ||
27)└───────────────────────────┘└───────────────────────────┘ | ||
16)│ ││ ││ bytes: 1560 │ | ||
17)│ ││ ││ format: memory │ | ||
18)│ ││ ││ rows: 1 │ | ||
19)└─────────────┬─────────────┘└─────────────┬─────────────┘└───────────────────────────┘ | ||
20)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
21)│ RepartitionExec ││ RepartitionExec │ | ||
22)└─────────────┬─────────────┘└─────────────┬─────────────┘ | ||
23)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
24)│ RepartitionExec ││ RepartitionExec │ | ||
25)└─────────────┬─────────────┘└─────────────┬─────────────┘ | ||
26)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ | ||
27)│ DataSourceExec ││ DataSourceExec │ | ||
28)│ -------------------- ││ -------------------- │ | ||
29)│ files: 1 ││ files: 1 │ | ||
30)│ format: csv ││ format: parquet │ | ||
31)└───────────────────────────┘└───────────────────────────┘ | ||
|
||
# Long Filter (demonstrate what happens with wrapping) | ||
query TT | ||
|
@@ -213,9 +252,153 @@ physical_plan | |
12)└─────────────┬─────────────┘ | ||
13)┌─────────────┴─────────────┐ | ||
14)│ DataSourceExec │ | ||
15)└───────────────────────────┘ | ||
15)│ -------------------- │ | ||
16)│ files: 1 │ | ||
17)│ format: csv │ | ||
18)└───────────────────────────┘ | ||
|
||
# Query with filter on csv | ||
query TT | ||
explain SELECT int_col FROM table1 WHERE string_col != 'foo'; | ||
---- | ||
logical_plan | ||
01)Projection: table1.int_col | ||
02)--Filter: table1.string_col != Utf8("foo") | ||
03)----TableScan: table1 projection=[int_col, string_col], partial_filters=[table1.string_col != Utf8("foo")] | ||
physical_plan | ||
01)┌───────────────────────────┐ | ||
02)│ CoalesceBatchesExec │ | ||
03)└─────────────┬─────────────┘ | ||
04)┌─────────────┴─────────────┐ | ||
05)│ FilterExec │ | ||
06)│ -------------------- │ | ||
07)│ predicate: │ | ||
08)│ string_col@1 != foo │ | ||
09)└─────────────┬─────────────┘ | ||
10)┌─────────────┴─────────────┐ | ||
11)│ RepartitionExec │ | ||
12)└─────────────┬─────────────┘ | ||
13)┌─────────────┴─────────────┐ | ||
14)│ DataSourceExec │ | ||
15)│ -------------------- │ | ||
16)│ files: 1 │ | ||
17)│ format: csv │ | ||
18)└───────────────────────────┘ | ||
|
||
|
||
# Query with filter on parquet | ||
query TT | ||
explain SELECT int_col FROM table2 WHERE string_col != 'foo'; | ||
---- | ||
logical_plan | ||
01)Projection: table2.int_col | ||
02)--Filter: table2.string_col != Utf8View("foo") | ||
03)----TableScan: table2 projection=[int_col, string_col], partial_filters=[table2.string_col != Utf8View("foo")] | ||
physical_plan | ||
01)┌───────────────────────────┐ | ||
02)│ CoalesceBatchesExec │ | ||
03)└─────────────┬─────────────┘ | ||
04)┌─────────────┴─────────────┐ | ||
05)│ FilterExec │ | ||
06)│ -------------------- │ | ||
07)│ predicate: │ | ||
08)│ string_col@1 != foo │ | ||
09)└─────────────┬─────────────┘ | ||
10)┌─────────────┴─────────────┐ | ||
11)│ RepartitionExec │ | ||
12)└─────────────┬─────────────┘ | ||
13)┌─────────────┴─────────────┐ | ||
14)│ DataSourceExec │ | ||
15)│ -------------------- │ | ||
16)│ files: 1 │ | ||
17)│ format: parquet │ | ||
18)│ │ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know why there is an extra newline here 🤔 |
||
19)│ predicate: │ | ||
20)│ string_col@1 != foo │ | ||
21)└───────────────────────────┘ | ||
|
||
# Query with filter on memory | ||
query TT | ||
explain SELECT int_col FROM table3 WHERE string_col != 'foo'; | ||
---- | ||
logical_plan | ||
01)Projection: table3.int_col | ||
02)--Filter: table3.string_col != Utf8("foo") | ||
03)----TableScan: table3 projection=[int_col, string_col] | ||
physical_plan | ||
01)┌───────────────────────────┐ | ||
02)│ CoalesceBatchesExec │ | ||
03)└─────────────┬─────────────┘ | ||
04)┌─────────────┴─────────────┐ | ||
05)│ FilterExec │ | ||
06)│ -------------------- │ | ||
07)│ predicate: │ | ||
08)│ string_col@1 != foo │ | ||
09)└─────────────┬─────────────┘ | ||
10)┌─────────────┴─────────────┐ | ||
11)│ DataSourceExec │ | ||
12)│ -------------------- │ | ||
13)│ bytes: 1560 │ | ||
14)│ format: memory │ | ||
15)│ rows: 1 │ | ||
16)└───────────────────────────┘ | ||
|
||
# Query with filter on json | ||
query TT | ||
explain SELECT int_col FROM table4 WHERE string_col != 'foo'; | ||
---- | ||
logical_plan | ||
01)Projection: table4.int_col | ||
02)--Filter: table4.string_col != Utf8("foo") | ||
03)----TableScan: table4 projection=[int_col, string_col], partial_filters=[table4.string_col != Utf8("foo")] | ||
physical_plan | ||
01)┌───────────────────────────┐ | ||
02)│ CoalesceBatchesExec │ | ||
03)└─────────────┬─────────────┘ | ||
04)┌─────────────┴─────────────┐ | ||
05)│ FilterExec │ | ||
06)│ -------------------- │ | ||
07)│ predicate: │ | ||
08)│ string_col@1 != foo │ | ||
09)└─────────────┬─────────────┘ | ||
10)┌─────────────┴─────────────┐ | ||
11)│ RepartitionExec │ | ||
12)└─────────────┬─────────────┘ | ||
13)┌─────────────┴─────────────┐ | ||
14)│ DataSourceExec │ | ||
15)│ -------------------- │ | ||
16)│ files: 1 │ | ||
17)│ format: json │ | ||
18)└───────────────────────────┘ | ||
|
||
# Query with filter on arrow | ||
query TT | ||
explain SELECT int_col FROM table5 WHERE string_col != 'foo'; | ||
---- | ||
logical_plan | ||
01)Projection: table5.int_col | ||
02)--Filter: table5.string_col != Utf8("foo") | ||
03)----TableScan: table5 projection=[int_col, string_col], partial_filters=[table5.string_col != Utf8("foo")] | ||
physical_plan | ||
01)┌───────────────────────────┐ | ||
02)│ CoalesceBatchesExec │ | ||
03)└─────────────┬─────────────┘ | ||
04)┌─────────────┴─────────────┐ | ||
05)│ FilterExec │ | ||
06)│ -------------------- │ | ||
07)│ predicate: │ | ||
08)│ string_col@1 != foo │ | ||
09)└─────────────┬─────────────┘ | ||
10)┌─────────────┴─────────────┐ | ||
11)│ RepartitionExec │ | ||
12)└─────────────┬─────────────┘ | ||
13)┌─────────────┴─────────────┐ | ||
14)│ DataSourceExec │ | ||
15)│ -------------------- │ | ||
16)│ files: 1 │ | ||
17)│ format: arrow │ | ||
18)└───────────────────────────┘ | ||
|
||
# cleanup | ||
statement ok | ||
drop table table1; | ||
|
@@ -225,3 +408,9 @@ drop table table2; | |
|
||
statement ok | ||
drop table table3; | ||
|
||
statement ok | ||
drop table table4; | ||
|
||
statement ok | ||
drop table table5; |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Per the description of TreeRender:
datafusion/datafusion/physical-plan/src/display.rs
Lines 48 to 74 in 3dc212c
TreeRender mode should have only the most relevant details for understanding the high level plan