Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions regress/expected/cypher_match.out
Original file line number Diff line number Diff line change
Expand Up @@ -3834,6 +3834,130 @@ NOTICE: graph "issue_2193" has been dropped

(1 row)

--
-- Issue 2378: OPTIONAL MATCH may incorrectly drop null-preserving outer
-- rows when its WHERE clause contains a correlated sub-pattern predicate.
--
-- Cypher OPTIONAL MATCH semantics: the WHERE applies to the optional
-- binding; when no right-hand row survives the predicate, the outer row
-- is still emitted with NULLs in the optional columns. Before the fix,
-- a WHERE containing EXISTS { ... } or COUNT { ... } was attached as an
-- outer filter on the transformed subquery, so it ran after the LATERAL
-- LEFT JOIN produced null-preserving rows and then incorrectly dropped
-- them when the predicate evaluated NULL/false on the nulled side.
--
SELECT create_graph('issue_2378');
NOTICE: graph "issue_2378" has been created
create_graph
--------------

(1 row)

SELECT * FROM cypher('issue_2378', $$
CREATE (a:Person {name: 'Alice'}),
(b:Person {name: 'Bob'}),
(c:Person {name: 'Charlie'}),
(a)-[:KNOWS]->(b),
(a)-[:KNOWS]->(c)
$$) AS (v agtype);
v
---
(0 rows)

-- Correlated EXISTS referencing the optional variable (friend).
-- Neither Bob nor Charlie knows anyone, so for every outer p the
-- predicate fails on all optional matches; expect one row per person
-- with friend = NULL.
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE EXISTS { (friend)-[:KNOWS]->(:Person) }
RETURN p.name AS name, friend.name AS friend
ORDER BY name
$$) AS (name agtype, friend agtype);
name | friend
-----------+--------
"Alice" |
"Bob" |
"Charlie" |
(3 rows)

-- Correlated EXISTS referencing the outer variable (p).
-- Alice knows someone so her optional matches pass; Bob and Charlie
-- don't, so they are emitted with NULL friend.
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE EXISTS { (p)-[:KNOWS]->(:Person) }
RETURN p.name AS name, friend.name AS friend
ORDER BY name, friend
$$) AS (name agtype, friend agtype);
name | friend
-----------+-----------
"Alice" | "Bob"
"Alice" | "Charlie"
"Bob" |
"Charlie" |
(4 rows)

-- Non-correlated EXISTS (was already working; kept as a regression guard).
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE EXISTS { MATCH (x:Person) RETURN x }
RETURN p.name AS name, friend.name AS friend
ORDER BY name, friend
$$) AS (name agtype, friend agtype);
name | friend
-----------+-----------
"Alice" | "Bob"
"Alice" | "Charlie"
"Bob" |
"Charlie" |
(4 rows)

-- Plain scalar predicate on the optional variable (was already working).
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE friend.name = 'Bob'
RETURN p.name AS name, friend.name AS friend
ORDER BY name
$$) AS (name agtype, friend agtype);
name | friend
-----------+--------
"Alice" | "Bob"
"Bob" |
"Charlie" |
(3 rows)

-- Constant-false WHERE on the optional side (was already working).
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE false
RETURN p.name AS name, friend.name AS friend
ORDER BY name
$$) AS (name agtype, friend agtype);
name | friend
-----------+--------
"Alice" |
"Bob" |
"Charlie" |
(3 rows)

SELECT drop_graph('issue_2378', true);
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table issue_2378._ag_label_vertex
drop cascades to table issue_2378._ag_label_edge
drop cascades to table issue_2378."Person"
drop cascades to table issue_2378."KNOWS"
NOTICE: graph "issue_2378" has been dropped
drop_graph
------------

(1 row)

--
-- Clean up
--
Expand Down
73 changes: 73 additions & 0 deletions regress/sql/cypher_match.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1616,6 +1616,79 @@ $$) AS (result agtype);

SELECT drop_graph('issue_2193', true);

--
-- Issue 2378: OPTIONAL MATCH may incorrectly drop null-preserving outer
-- rows when its WHERE clause contains a correlated sub-pattern predicate.
--
-- Cypher OPTIONAL MATCH semantics: the WHERE applies to the optional
-- binding; when no right-hand row survives the predicate, the outer row
-- is still emitted with NULLs in the optional columns. Before the fix,
-- a WHERE containing EXISTS { ... } or COUNT { ... } was attached as an
-- outer filter on the transformed subquery, so it ran after the LATERAL
-- LEFT JOIN produced null-preserving rows and then incorrectly dropped
-- them when the predicate evaluated NULL/false on the nulled side.
--
SELECT create_graph('issue_2378');
SELECT * FROM cypher('issue_2378', $$
CREATE (a:Person {name: 'Alice'}),
(b:Person {name: 'Bob'}),
(c:Person {name: 'Charlie'}),
(a)-[:KNOWS]->(b),
(a)-[:KNOWS]->(c)
$$) AS (v agtype);

-- Correlated EXISTS referencing the optional variable (friend).
-- Neither Bob nor Charlie knows anyone, so for every outer p the
-- predicate fails on all optional matches; expect one row per person
-- with friend = NULL.
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE EXISTS { (friend)-[:KNOWS]->(:Person) }
RETURN p.name AS name, friend.name AS friend
ORDER BY name
$$) AS (name agtype, friend agtype);

-- Correlated EXISTS referencing the outer variable (p).
-- Alice knows someone so her optional matches pass; Bob and Charlie
-- don't, so they are emitted with NULL friend.
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE EXISTS { (p)-[:KNOWS]->(:Person) }
RETURN p.name AS name, friend.name AS friend
ORDER BY name, friend
$$) AS (name agtype, friend agtype);

-- Non-correlated EXISTS (was already working; kept as a regression guard).
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE EXISTS { MATCH (x:Person) RETURN x }
RETURN p.name AS name, friend.name AS friend
ORDER BY name, friend
$$) AS (name agtype, friend agtype);

-- Plain scalar predicate on the optional variable (was already working).
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE friend.name = 'Bob'
RETURN p.name AS name, friend.name AS friend
ORDER BY name
$$) AS (name agtype, friend agtype);

-- Constant-false WHERE on the optional side (was already working).
SELECT * FROM cypher('issue_2378', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
WHERE false
RETURN p.name AS name, friend.name AS friend
ORDER BY name
$$) AS (name agtype, friend agtype);

SELECT drop_graph('issue_2378', true);

--
-- Clean up
--
Expand Down
63 changes: 62 additions & 1 deletion src/backend/parser/cypher_clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -2640,6 +2640,7 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate,
cypher_match *match_self = (cypher_match*) clause->self;
Node *where = match_self->where;


/*
* Check label validity early unless the predecessor clause chain
* contains a data-modifying operation (CREATE, SET, DELETE, MERGE).
Expand All @@ -2655,7 +2656,23 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate,
match_self->where = make_false_where_clause(false);
}

if (has_list_comp_or_subquery((Node *)match_self->where, NULL))
/*
* For a non-optional MATCH with a list comprehension or subquery in
* its WHERE clause, transform the match pattern as a subquery and
* then apply the WHERE as an outer filter. This keeps the parent's
* namespace available to the subquery-bearing predicate.
*
* This rewrite is NOT safe for OPTIONAL MATCH: wrapping the WHERE
* around the transformed clause turns it into a post-filter on the
* LATERAL LEFT JOIN produced by transform_cypher_optional_match_clause,
* which incorrectly drops the null-preserving outer rows that the
* LEFT JOIN generates when no right-hand match exists. For the
* optional case we fall through to the normal transform, which
* places the WHERE inside the right-hand subquery of the LEFT JOIN
* where it correctly scopes to the optional binding (issue #2378).
*/
if (!match_self->optional &&
has_list_comp_or_subquery((Node *)match_self->where, NULL))
{
match_self->where = NULL;
return transform_cypher_clause_with_where(cpstate,
Expand Down Expand Up @@ -2794,10 +2811,28 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate *
List *res_colnames = NIL, *res_colvars = NIL;
Alias *l_alias, *r_alias;
ParseNamespaceItem *jnsitem;
cypher_match *match_self = (cypher_match *) clause->self;
Node *saved_where = match_self->where;
int i = 0;

j->jointype = JOIN_LEFT;

/*
* If the OPTIONAL MATCH carries a WHERE clause, temporarily detach
* it so that the recursive right-hand transform does NOT try to
* apply it inside the inner subquery. We re-apply the predicate
* below as a LEFT JOIN ON condition, which is the only placement
* that both (a) scopes the predicate to the optional binding and
* (b) preserves null-filled outer rows when the predicate fails.
* Without this, a WHERE that contains a sub-pattern predicate
* (e.g. EXISTS {...} referencing the optional variable) either
* gets silently dropped during the inner transform (namespace
* mismatch re-binds the variable in a fresh scope) or gets pulled
* up by the containing wrapper and filters out the null-preserving
* rows. See issue #2378.
*/
match_self->where = NULL;

l_alias = makeAlias(PREV_CYPHER_CLAUSE_ALIAS, NIL);
r_alias = makeAlias(CYPHER_OPT_RIGHT_ALIAS, NIL);

Expand All @@ -2819,6 +2854,32 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate *
j->rarg = transform_clause_for_join(cpstate, clause, &r_rte,
&r_nsitem, r_alias);

/* add right-side nsitem so the re-attached WHERE below can resolve
* newly-bound variables from the optional pattern */
pstate->p_namespace = lappend(pstate->p_namespace, r_nsitem);

/*
* Now that both sides are visible in the namespace, re-attach the
* OPTIONAL MATCH's WHERE predicate as the LEFT JOIN's ON clause.
* PostgreSQL correctly preserves left rows whose right side fails
* an ON condition (LEFT JOIN semantics), which is exactly what
* Cypher OPTIONAL MATCH ... WHERE requires: if the WHERE filters
* out all matches for a given outer row, that outer row is still
* emitted with nulls in the optional columns.
*/
if (saved_where != NULL)
{
Node *where_qual;

where_qual = transform_cypher_expr(cpstate, saved_where,
EXPR_KIND_WHERE);
where_qual = coerce_to_boolean(pstate, where_qual, "WHERE");
j->quals = where_qual;
}

/* restore the WHERE on the node so we don't mutate caller state */
match_self->where = saved_where;

/*
* Since this is a left join, we need to mark j->rarg as it may potentially
* emit NULL. The jindex argument holds rtindex of the join's RTE, which is
Expand Down
Loading