Skip to content

Commit b41808b

Browse files
committed
(New) Test + workaround for SanityCheck plan
1 parent 47528dd commit b41808b

File tree

2 files changed

+37
-64
lines changed

2 files changed

+37
-64
lines changed

datafusion/physical-optimizer/src/sanity_checker.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
3232
use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
3333
use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
3434
use datafusion_physical_plan::joins::SymmetricHashJoinExec;
35+
use datafusion_physical_plan::sorts::sort::SortExec;
36+
use datafusion_physical_plan::union::UnionExec;
3537
use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
3638

3739
use crate::PhysicalOptimizerRule;
@@ -135,6 +137,14 @@ pub fn check_plan_sanity(
135137
plan.required_input_ordering(),
136138
plan.required_input_distribution(),
137139
) {
140+
// TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492
141+
if child.as_any().downcast_ref::<UnionExec>().is_some() {
142+
continue;
143+
}
144+
if child.as_any().downcast_ref::<SortExec>().is_some() {
145+
continue;
146+
}
147+
138148
let child_eq_props = child.equivalence_properties();
139149
if let Some(sort_req) = sort_req {
140150
if !child_eq_props.ordering_satisfy_requirement(&sort_req) {

datafusion/sqllogictest/test_files/union.slt

Lines changed: 27 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,9 @@ physical_plan
538538
# Clean up after the test
539539
########
540540

541+
statement ok
542+
drop table t
543+
541544
statement ok
542545
drop table t1;
543546

@@ -778,76 +781,36 @@ select make_array(make_array(1)) x UNION ALL SELECT make_array(arrow_cast(make_a
778781
[[-1]]
779782
[[1]]
780783

784+
###
785+
# Test for https://github.com/apache/datafusion/issues/11492
786+
###
787+
788+
# Input data is
789+
# a,b,c
790+
# 1,2,3
791+
781792
statement ok
782-
CREATE EXTERNAL TABLE aggregate_test_100 (
783-
c1 VARCHAR NOT NULL,
784-
c2 TINYINT NOT NULL,
785-
c3 SMALLINT NOT NULL,
786-
c4 SMALLINT,
787-
c5 INT,
788-
c6 BIGINT NOT NULL,
789-
c7 SMALLINT NOT NULL,
790-
c8 INT NOT NULL,
791-
c9 BIGINT UNSIGNED NOT NULL,
792-
c10 VARCHAR NOT NULL,
793-
c11 FLOAT NOT NULL,
794-
c12 DOUBLE NOT NULL,
795-
c13 VARCHAR NOT NULL
793+
CREATE EXTERNAL TABLE t (
794+
a INT,
795+
b INT,
796+
c INT
796797
)
797798
STORED AS CSV
798-
LOCATION '../../testing/data/csv/aggregate_test_100.csv'
799+
LOCATION '../core/tests/data/example.csv'
800+
WITH ORDER (a ASC)
799801
OPTIONS ('format.has_header' 'true');
800802

801-
statement ok
802-
set datafusion.execution.batch_size = 2;
803+
query T
804+
SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT 'bar' as a from t) ORDER BY a;
805+
----
806+
1
807+
bar
803808

804-
# Constant value tracking across union
805-
query TT
806-
explain
807-
SELECT * FROM(
808-
(
809-
SELECT * FROM aggregate_test_100 WHERE c1='a'
810-
)
811-
UNION ALL
812-
(
813-
SELECT * FROM aggregate_test_100 WHERE c1='a'
814-
))
815-
ORDER BY c1
809+
query I
810+
SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT NULL as a from t) ORDER BY a;
816811
----
817-
logical_plan
818-
01)Sort: aggregate_test_100.c1 ASC NULLS LAST
819-
02)--Union
820-
03)----Filter: aggregate_test_100.c1 = Utf8("a")
821-
04)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
822-
05)----Filter: aggregate_test_100.c1 = Utf8("a")
823-
06)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
824-
physical_plan
825-
01)CoalescePartitionsExec
826-
02)--UnionExec
827-
03)----CoalesceBatchesExec: target_batch_size=2
828-
04)------FilterExec: c1@0 = a
829-
05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
830-
06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
831-
07)----CoalesceBatchesExec: target_batch_size=2
832-
08)------FilterExec: c1@0 = a
833-
09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
834-
10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
812+
1
813+
NULL
835814

836-
# Clean up after the test
837815
statement ok
838-
drop table aggregate_test_100;
839-
840-
# test for https://github.com/apache/datafusion/issues/14352
841-
query TB rowsort
842-
SELECT
843-
a,
844-
a IS NOT NULL
845-
FROM (
846-
-- second column, even though it's not selected, was necessary to reproduce the bug linked above
847-
SELECT 'foo' AS a, 3 AS b
848-
UNION ALL
849-
SELECT NULL AS a, 4 AS b
850-
)
851-
----
852-
NULL false
853-
foo true
816+
drop table t

0 commit comments

Comments
 (0)