Skip to content

Commit

Permalink
Fix bug with explain and Add more message about join type.
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyue-hashdata committed Dec 5, 2024
1 parent 274d8aa commit 98dac6d
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 19 deletions.
25 changes: 11 additions & 14 deletions src/backend/commands/explain.c
Original file line number Diff line number Diff line change
Expand Up @@ -2296,8 +2296,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
/* fall through to print additional fields the same as SeqScan */
/* FALLTHROUGH */
case T_SeqScan:
show_pushdown_runtime_filter_info("Rows Removed by Pushdown Runtime Filter",
planstate, es);
if (gp_enable_runtime_filter_pushdown && IsA(planstate, SeqScanState))
show_pushdown_runtime_filter_info("Rows Removed by Pushdown Runtime Filter",
planstate, es);
/* FALLTHROUGH */
case T_ValuesScan:
case T_CteScan:
Expand Down Expand Up @@ -4144,25 +4145,21 @@ show_instrumentation_count(const char *qlabel, int which,
}

/*
* If it's EXPLAIN ANALYZE, show instrumentation information with pushdown runtime filter
* If it's EXPLAIN ANALYZE, show instrumentation information with pushdown
* runtime filter.
*/
static void
show_pushdown_runtime_filter_info(const char *qlabel, PlanState *planstate, ExplainState *es)
show_pushdown_runtime_filter_info(const char *qlabel,
PlanState *planstate,
ExplainState *es)
{
double nfiltered;

Assert(IsA(planstate, SeqScanState));
Assert(gp_enable_runtime_filter_pushdown && IsA(planstate, SeqScanState));

if (!es->analyze || !planstate->instrument)
return;

nfiltered = planstate->instrument->nfilteredPRF;

/* In text mode, suppress zero counts; they're not interesting enough */
if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT)
{
ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es);
}
if (planstate->instrument->prf_work)
ExplainPropertyFloat(qlabel, NULL, planstate->instrument->nfilteredPRF, 0, es);
}

/*
Expand Down
3 changes: 3 additions & 0 deletions src/backend/commands/explain_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ typedef struct CdbExplain_StatInst
double nloops; /* # of run cycles for this node */
double nfiltered1;
double nfiltered2;
bool prf_work;
double nfilteredPRF;
double execmemused; /* executor memory used (bytes) */
double workmemused; /* work_mem actually used (bytes) */
Expand Down Expand Up @@ -887,6 +888,7 @@ cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ct
si->nloops = instr->nloops;
si->nfiltered1 = instr->nfiltered1;
si->nfiltered2 = instr->nfiltered2;
si->prf_work = instr->prf_work;
si->nfilteredPRF = instr->nfilteredPRF;
si->workmemused = instr->workmemused;
si->workmemwanted = instr->workmemwanted;
Expand Down Expand Up @@ -1218,6 +1220,7 @@ cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx)
instr->nloops = nodeAcc->nsimax->nloops;
instr->nfiltered1 = nodeAcc->nsimax->nfiltered1;
instr->nfiltered2 = nodeAcc->nsimax->nfiltered2;
instr->prf_work = nodeAcc->nsimax->prf_work;
instr->nfilteredPRF = nodeAcc->nsimax->nfilteredPRF;
instr->execmemused = nodeAcc->nsimax->execmemused;
instr->workmemused = nodeAcc->nsimax->workmemused;
Expand Down
16 changes: 11 additions & 5 deletions src/backend/executor/nodeHashjoin.c
Original file line number Diff line number Diff line change
Expand Up @@ -2195,13 +2195,19 @@ CreateRuntimeFilter(HashJoinState* hjstate)
List *targets;

/*
* Only applicatable for inner, right and semi join,
* A build-side Bloom filter tells us if a row is definitely not in the build
* side. This allows us to early-eliminate rows or early-accept rows depending
* on the type of join.
* Left Outer Join and Full Outer Join output all rows, so a build-side Bloom
* filter would only allow us to early-output. Left Antijoin outputs only if
* there is no match, so again early output. We don't implement early output
* for now.
* So it's only applicatable for inner, right and semi join.
*/
jointype = hjstate->js.jointype;
if (jointype != JOIN_INNER
&& jointype != JOIN_RIGHT
&& jointype != JOIN_SEMI
)
if (jointype != JOIN_INNER &&
jointype != JOIN_RIGHT &&
jointype != JOIN_SEMI)
return;

hstate = castNode(HashState, innerPlanState(hjstate));
Expand Down
8 changes: 8 additions & 0 deletions src/backend/executor/nodeSeqscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,14 @@ PassByBloomFilter(SeqScanState *node, TupleTableSlot *slot)
ListCell *lc;
bloom_filter *blm_filter;

/*
* Mark that the pushdown runtime filter is actually taking effect.
*/
if (node->ss.ps.instrument &&
!node->ss.ps.instrument->prf_work &&
list_length(node->filters))
node->ss.ps.instrument->prf_work = true;

foreach (lc, node->filters)
{
sk = lfirst(lc);
Expand Down
1 change: 1 addition & 0 deletions src/include/executor/instrument.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ typedef struct Instrumentation
bool need_bufusage; /* true if we need buffer usage data */
bool need_walusage; /* true if we need WAL usage data */
bool async_mode; /* true if node is in async mode */
bool prf_work; /* true if pushdown runtime filters really work */
/* Info about current plan cycle: */
bool running; /* true if we've completed first tuple */
instr_time starttime; /* Start time of current iteration of node */
Expand Down
53 changes: 53 additions & 0 deletions src/test/regress/expected/gp_runtime_filter.out
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,59 @@ SELECT * FROM t1, t2 WHERE t1.c2 = t2.c2;
Optimizer: Postgres query optimizer
(15 rows)

RESET gp_enable_runtime_filter_pushdown;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
-- case 3: bug fix with explain
DROP TABLE IF EXISTS test_tablesample;
NOTICE: table "test_tablesample" does not exist, skipping
CREATE TABLE test_tablesample (dist int, id int, name text) WITH (fillfactor=10) DISTRIBUTED BY (dist);
INSERT INTO test_tablesample SELECT 0, i, repeat(i::text, 875) FROM generate_series(0, 9) s(i) ORDER BY i;
INSERT INTO test_tablesample SELECT 3, i, repeat(i::text, 875) FROM generate_series(10, 19) s(i) ORDER BY i;
INSERT INTO test_tablesample SELECT 5, i, repeat(i::text, 875) FROM generate_series(20, 29) s(i) ORDER BY i;
SET gp_enable_runtime_filter_pushdown TO on;
EXPLAIN (COSTS OFF) SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
QUERY PLAN
--------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Sample Scan on test_tablesample
Sampling: system ('50'::real) REPEATABLE ('2'::double precision)
Optimizer: Postgres query optimizer
(4 rows)

RESET gp_enable_runtime_filter_pushdown;
DROP TABLE IF EXISTS test_tablesample;
-- case 4: show debug info only when gp_enable_runtime_filter_pushdown is on
DROP TABLE IF EXISTS t1;
NOTICE: table "t1" does not exist, skipping
DROP TABLE IF EXISTS t2;
NOTICE: table "t2" does not exist, skipping
CREATE TABLE t1(c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE t2(c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO t1 SELECT GENERATE_SERIES(1, 1000), GENERATE_SERIES(1, 1000);
INSERT INTO t2 SELECT * FROM t1;
SET gp_enable_runtime_filter_pushdown TO on;
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(t1.c2) FROM t1, t2 WHERE t1.c1 = t2.c1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Finalize Aggregate (actual rows=1 loops=1)
-> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=1)
-> Partial Aggregate (actual rows=1 loops=1)
-> Hash Join (actual rows=340 loops=1)
Hash Cond: (t1.c1 = t2.c1)
Extra Text: (seg2) Hash chain length 1.0 avg, 1 max, using 340 of 524288 buckets.
-> Seq Scan on t1 (actual rows=340 loops=1)
Rows Removed by Pushdown Runtime Filter: 0
-> Hash (actual rows=340 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4108kB
-> Seq Scan on t2 (actual rows=340 loops=1)
Optimizer: Postgres query optimizer
(12 rows)

RESET gp_enable_runtime_filter_pushdown;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
Expand Down
28 changes: 28 additions & 0 deletions src/test/regress/sql/gp_runtime_filter.sql
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,34 @@ RESET gp_enable_runtime_filter_pushdown;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;

-- case 3: bug fix with explain
DROP TABLE IF EXISTS test_tablesample;
CREATE TABLE test_tablesample (dist int, id int, name text) WITH (fillfactor=10) DISTRIBUTED BY (dist);
INSERT INTO test_tablesample SELECT 0, i, repeat(i::text, 875) FROM generate_series(0, 9) s(i) ORDER BY i;
INSERT INTO test_tablesample SELECT 3, i, repeat(i::text, 875) FROM generate_series(10, 19) s(i) ORDER BY i;
INSERT INTO test_tablesample SELECT 5, i, repeat(i::text, 875) FROM generate_series(20, 29) s(i) ORDER BY i;

SET gp_enable_runtime_filter_pushdown TO on;
EXPLAIN (COSTS OFF) SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
RESET gp_enable_runtime_filter_pushdown;

DROP TABLE IF EXISTS test_tablesample;

-- case 4: show debug info only when gp_enable_runtime_filter_pushdown is on
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1(c1 int, c2 int);
CREATE TABLE t2(c1 int, c2 int);
INSERT INTO t1 SELECT GENERATE_SERIES(1, 1000), GENERATE_SERIES(1, 1000);
INSERT INTO t2 SELECT * FROM t1;

SET gp_enable_runtime_filter_pushdown TO on;
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(t1.c2) FROM t1, t2 WHERE t1.c1 = t2.c1;
RESET gp_enable_runtime_filter_pushdown;

DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;

-- Clean up: reset guc
SET gp_enable_runtime_filter TO off;
SET optimizer TO default;

0 comments on commit 98dac6d

Please sign in to comment.