Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -506,10 +506,20 @@ private RexNode rewriteInExists(RexSubQuery e, Set<CorrelationId> variablesSet,
}
switch (logic) {
case TRUE:
builder.join(JoinRelType.SEMI, builder.and(conditions), variablesSet);
if (!variablesSet.isEmpty()) {
// This is correlated. Use SemiJoin.
builder.join(JoinRelType.SEMI, builder.and(conditions), variablesSet);
} else {
builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet);
}
return builder.literal(true);
case FALSE:
builder.join(JoinRelType.ANTI, builder.and(conditions), variablesSet);
if (!variablesSet.isEmpty()) {
// This is correlated. Use AntiJoin.
builder.join(JoinRelType.ANTI, builder.and(conditions), variablesSet);
} else {
builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
}
return builder.literal(false);
}
builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
Expand Down
11 changes: 11 additions & 0 deletions ql/src/test/queries/clientpositive/subquery_in.q
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,15 @@ select * from part where p_size IN (select max(p_size) from part p where p.p_typ
explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);

-- Two uncorrelated subqueries that join reordering rule can be applied on.
create table big (col1 string, col2 int, col3 double);
create table mid (col1 string, col2 int, col3 double);
create table small (col1 string, col2 int, col3 double);

alter table big update statistics set('numRows'='123456', 'rawDataSize'='1234567');
alter table mid update statistics set('numRows'='12345', 'rawDataSize'='123456');
alter table small update statistics set('numRows'='1234', 'rawDataSize'='12345');

explain cbo
select col1 from small where col2 in (select col2 from mid) and col3 in (select col3 from big);

168 changes: 90 additions & 78 deletions ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -219,97 +219,109 @@ POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2
Plan optimized by CBO.

Vertex dependency in root stage
Map 1 <- Map 4 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Map 1 <- Reducer 3 (BROADCAST_EDGE)
Map 4 <- Map 1 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE)
Map 7 <- Reducer 9 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
Reducer 5 <- Map 4 (SIMPLE_EDGE)
Reducer 7 <- Map 6 (SIMPLE_EDGE)
Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
Reducer 9 <- Map 8 (SIMPLE_EDGE)

Stage-0
Fetch Operator
limit:100
Stage-1
Reducer 3 vectorized, llap
File Output Operator [FS_149]
Limit [LIM_148] (rows=2 width=8)
Reducer 6 vectorized, llap
File Output Operator [FS_145]
Limit [LIM_144] (rows=2 width=8)
Number of rows:100
Select Operator [SEL_147] (rows=2 width=8)
Select Operator [SEL_143] (rows=2 width=8)
Output:["_col0"]
<-Reducer 2 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_146]
Select Operator [SEL_145] (rows=2 width=8)
<-Reducer 5 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_142]
Select Operator [SEL_141] (rows=2 width=8)
Output:["_col0"]
Top N Key Operator [TNK_144] (rows=2 width=12)
Top N Key Operator [TNK_140] (rows=2 width=12)
keys:_col1,top n:100
Group By Operator [GBY_143] (rows=2 width=12)
Group By Operator [GBY_139] (rows=2 width=12)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_38]
<-Map 4 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_138]
PartitionCols:_col0
Group By Operator [GBY_37] (rows=10 width=12)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
Select Operator [SEL_36] (rows=5185194 width=228)
Output:["_col0"]
Map Join Operator [MAPJOIN_107] (rows=5185194 width=228)
Conds:SEL_24._col1=RS_123._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)}
<-Reducer 5 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_123]
PartitionCols:_col1
Select Operator [SEL_122] (rows=1 width=197)
Group By Operator [GBY_137] (rows=2 width=12)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col8
Map Join Operator [MAPJOIN_136] (rows=5185194 width=4)
Conds:MAPJOIN_135._col6=RS_129._col0(Inner),Output:["_col8"]
<-Map 7 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_129]
PartitionCols:_col0
Map Join Operator [MAPJOIN_128] (rows=28 width=8)
Conds:SEL_127._col1=RS_125._col0(Inner),Output:["_col0","_col1"]
<-Reducer 9 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_125]
PartitionCols:_col0
Group By Operator [GBY_124] (rows=2 width=4)
Output:["_col0"],keys:KEY._col0
<-Map 8 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_123]
PartitionCols:_col0
Group By Operator [GBY_122] (rows=2 width=4)
Output:["_col0"],keys:d_month_seq
Select Operator [SEL_121] (rows=4 width=12)
Output:["d_month_seq"]
Filter Operator [FIL_120] (rows=4 width=12)
predicate:((d_year = 2000) and ((d_year * d_moy) > 200000) and (d_moy = 2) and d_month_seq is not null)
TableScan [TS_17] (rows=28 width=12)
default@x1_date_dim,x1_date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"]
<-Select Operator [SEL_127] (rows=28 width=8)
Output:["_col0","_col1"]
Filter Operator [FIL_121] (rows=1 width=197)
predicate:_col1 is not null
Group By Operator [GBY_120] (rows=1 width=197)
Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0
<-Map 4 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_119]
PartitionCols:_col0
Group By Operator [GBY_117] (rows=1 width=197)
Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category
Filter Operator [FIL_115] (rows=18 width=197)
predicate:i_category is not null
TableScan [TS_3] (rows=18 width=201)
default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"]
<-Select Operator [SEL_24] (rows=15555582 width=201)
Output:["_col0","_col1","_col2"]
Map Join Operator [MAPJOIN_106] (rows=15555582 width=201)
Conds:SEL_15._col3=RS_132._col0(Left Semi),Output:["_col3","_col5","_col6"]
<-Map 6 [BROADCAST_EDGE] vectorized, llap
SHUFFLE [RS_132]
PartitionCols:_col0
Group By Operator [GBY_130] (rows=2 width=4)
Output:["_col0"],keys:_col0
Select Operator [SEL_127] (rows=4 width=4)
Output:["_col0"]
Filter Operator [FIL_125] (rows=4 width=12)
predicate:((d_year = 2000) and ((d_year * d_moy) > 200000) and (d_moy = 2) and d_month_seq is not null)
TableScan [TS_6] (rows=28 width=8)
default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy"]
<-Select Operator [SEL_15] (rows=15555582 width=201)
Output:["_col3","_col5","_col6"]
Map Join Operator [MAPJOIN_105] (rows=15555582 width=201)
Conds:MAPJOIN_104._col1=RS_135._col0(Inner),Output:["_col3","_col4","_col6"]
<-Reducer 7 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_135]
PartitionCols:_col0
Select Operator [SEL_134]
Output:["_col0","_col1"]
<-Map Join Operator [MAPJOIN_104] (rows=1111113 width=201)
Conds:SEL_2._col0=RS_118._col0(Inner),Output:["_col1","_col3","_col4"]
<-Map 4 [BROADCAST_EDGE] vectorized, llap
SHUFFLE [RS_118]
PartitionCols:_col0
Select Operator [SEL_116] (rows=18 width=201)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_114] (rows=18 width=201)
predicate:(i_item_sk is not null and i_category is not null and i_current_price is not null)
Please refer to the previous TableScan [TS_3]
<-Select Operator [SEL_2] (rows=123457 width=8)
Output:["_col0","_col1"]
Filter Operator [FIL_59] (rows=123457 width=8)
predicate:ss_item_sk is not null
TableScan [TS_0] (rows=123457 width=8)
default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"]
Filter Operator [FIL_126] (rows=28 width=8)
predicate:(d_date_sk is not null and d_month_seq is not null)
TableScan [TS_14] (rows=28 width=8)
default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"]
Dynamic Partitioning Event Operator [EVENT_132] (rows=1 width=4)
Group By Operator [GBY_131] (rows=1 width=4)
Output:["_col0"],keys:_col0
Select Operator [SEL_130] (rows=28 width=4)
Output:["_col0"]
Please refer to the previous Map Join Operator [MAPJOIN_128]
<-Map Join Operator [MAPJOIN_135] (rows=370371 width=4)
Conds:RS_31._col0=SEL_134._col0(Inner),Output:["_col6"]
<-Map 1 [BROADCAST_EDGE] llap
BROADCAST [RS_31]
PartitionCols:_col0
Map Join Operator [MAPJOIN_103] (rows=6 width=228)
Conds:SEL_2._col1=RS_117._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)}
<-Reducer 3 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_117]
PartitionCols:_col1
Select Operator [SEL_116] (rows=1 width=197)
Output:["_col0","_col1"]
Filter Operator [FIL_115] (rows=1 width=197)
predicate:_col1 is not null
Group By Operator [GBY_114] (rows=1 width=197)
Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0
<-Map 2 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_113]
PartitionCols:_col0
Group By Operator [GBY_112] (rows=1 width=197)
Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category
Filter Operator [FIL_111] (rows=18 width=197)
predicate:i_category is not null
TableScan [TS_3] (rows=18 width=197)
default@x1_item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_current_price"]
<-Select Operator [SEL_2] (rows=18 width=201)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_58] (rows=18 width=201)
predicate:(i_item_sk is not null and i_category is not null and i_current_price is not null)
TableScan [TS_0] (rows=18 width=201)
default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"]
<-Select Operator [SEL_134] (rows=123457 width=8)
Output:["_col0","_col1"]
Filter Operator [FIL_133] (rows=123457 width=8)
predicate:ss_item_sk is not null
TableScan [TS_11] (rows=123457 width=8)
default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"]

PREHOOK: query: select count(*) cnt
from
Expand Down
Loading
Loading