Skip to content

Commit 48e0a64

Browse files
Fix bug when querying datanode with 'first' filter (#218)
1 parent 9864735 commit 48e0a64

File tree

3 files changed

+93
-14
lines changed

3 files changed

+93
-14
lines changed

bin/setup_psqlgraph.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
from psqlgraph import create_all, Node, Edge
77

88

9-
def try_drop_test_data(user, database, root_user="postgres", host=""):
9+
def try_drop_test_data(user, password, database, root_user="postgres", host=""):
1010
print("Dropping old test data")
1111

1212
engine = create_engine(
13-
"postgres://{user}@{host}/postgres".format(user=root_user, host=host)
13+
"postgres://{user}:{pwd}@{host}/postgres".format(
14+
user=root_user, pwd=password, host=host
15+
)
1416
)
1517

1618
conn = engine.connect()
@@ -40,10 +42,12 @@ def setup_database(
4042
print("Setting up test database")
4143

4244
if not no_drop:
43-
try_drop_test_data(user, database, host=host)
45+
try_drop_test_data(user, password, database, host=host)
4446

4547
engine = create_engine(
46-
"postgres://{user}@{host}/postgres".format(user=root_user, host=host)
48+
"postgres://{user}:{pwd}@{host}/postgres".format(
49+
user=root_user, pwd=password, host=host
50+
)
4751
)
4852
conn = engine.connect()
4953
conn.execute("commit")

peregrine/resources/submission/graphql/node.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def query_with_args(classes, args, info):
464464
of_types = [
465465
psqlgraph.Node.get_subclass(label) for label in set(args.get("of_type", []))
466466
]
467-
rv = []
467+
all_items = []
468468
for cls in classes:
469469
if not of_types or cls in of_types:
470470
q = get_authorized_query(cls)
@@ -473,14 +473,10 @@ def query_with_args(classes, args, info):
473473
q.entity()._props["project_id"].astext == args["project_id"]
474474
)
475475

476-
rv.extend(apply_query_args(q, args, info).all())
477-
# apply_arg_limit() applied the limit to individual query results, but we
478-
# are concatenating several query results so we need to apply it again
479-
limit = args.get("first", DEFAULT_LIMIT)
480-
if limit > 0:
481-
return rv[:limit]
482-
else:
483-
return rv
476+
q = apply_query_args(q, args, info)
477+
all_items.extend(q.all())
478+
479+
return all_items
484480

485481

486482
def query_node_with_args(args, info):
@@ -1065,7 +1061,14 @@ def instantiate_graphene(t):
10651061

10661062

10671063
def resolve_datanode(self, info, **args):
1068-
"""The root query for the :class:`DataNode` node interface.
1064+
"""
1065+
The root query for the :class:`DataNode` node interface.
1066+
1067+
NOTE: A limitation of `datanode` is that the `first` and `offset` filters are not applied
1068+
properly. Example: If there are 4 file nodes, a `datanode` query with limit=10 returns up
1069+
to 4*10 = 40 items. `query_with_args()` concatenates the results of all file node queries
1070+
_after_ the filters are applied by `apply_query_args()`. Applying limits/offsets at the end
1071+
of `query_with_args()` causes inconsistent and incomplete results.
10691072
10701073
:returns:
10711074
A list of graphene object classes.

tests/graphql/test_graphql.py

+72
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import os
44
import random
5+
import uuid
56

67
import pytest
78
from flask import g
@@ -1655,6 +1656,77 @@ def test_datanode(graphql_client, client, submitter, pg_driver_clean, cgci_blgsp
16551656
assert j1 == j2
16561657

16571658

1659+
def test_datanode_query_all(
1660+
graphql_client, client, submitter, pg_driver_clean, cgci_blgsp
1661+
):
1662+
"""
1663+
Regression test for a bug where querying all datanode objects does not return all objects
1664+
because "limit" and "offset" are not applied correctly.
1665+
Mitigated by datanode returning the first <limit> items for each file node.
1666+
"""
1667+
post_example_entities_together(client, pg_driver_clean, submitter)
1668+
utils.put_entity_from_file(client, "read_group.json", submitter)
1669+
1670+
# submit 20 SubmittedUnalignedReads and 25 SubmittedAlignedReads records
1671+
n_type_1 = 20
1672+
n_type_2 = 25
1673+
files_type_1 = {}
1674+
files_type_2 = {}
1675+
for _ in range(n_type_1):
1676+
unique_id = str(uuid.uuid4())
1677+
files_type_1[unique_id] = models.SubmittedUnalignedReads(
1678+
f"sub_id_{unique_id}", project_id="CGCI-BLGSP", object_id=unique_id
1679+
)
1680+
for _ in range(n_type_2):
1681+
unique_id = str(uuid.uuid4())
1682+
files_type_2[unique_id] = models.SubmittedAlignedReads(
1683+
f"sub_id_{unique_id}", project_id="CGCI-BLGSP", object_id=unique_id
1684+
)
1685+
1686+
with pg_driver_clean.session_scope() as s:
1687+
rg = pg_driver_clean.nodes(models.ReadGroup).one()
1688+
rg.submitted_unaligned_reads_files = files_type_1.values()
1689+
rg.submitted_aligned_reads_files = files_type_2.values()
1690+
s.merge(rg)
1691+
1692+
def check_results(results):
1693+
print("Datanode query result:", results)
1694+
assert len(results) == n_type_1 + n_type_2
1695+
1696+
sur_res = [e for e in results if e["type"] == "submitted_unaligned_reads"]
1697+
assert len(sur_res) == n_type_1
1698+
assert files_type_1.keys() == set((e["object_id"] for e in sur_res))
1699+
1700+
sar_res = [e for e in results if e["type"] == "submitted_aligned_reads"]
1701+
assert len(sar_res) == n_type_2
1702+
assert files_type_2.keys() == set((e["object_id"] for e in sar_res))
1703+
1704+
# query all the `datanode` records using `limit` and `offset`
1705+
chunk_size = 10
1706+
offset = 0
1707+
results = []
1708+
while True:
1709+
query_txt = "{datanode (first: %s, offset: %s) {object_id type}}" % (
1710+
chunk_size,
1711+
offset,
1712+
)
1713+
resp = graphql_client(query_txt).json
1714+
data = resp.get("data", {}).get("datanode", [])
1715+
if not len(data):
1716+
break
1717+
# "chunk_size * 2" because datanode returns the first <limit> items for each file node
1718+
assert len(data) <= chunk_size * 2
1719+
results = results + data
1720+
offset += chunk_size
1721+
check_results(results)
1722+
1723+
# query all the `datanode` records using `limit = 0` (query all records at once)
1724+
query_txt = "{datanode (first: 0) {object_id type}}"
1725+
resp = graphql_client(query_txt).json
1726+
results = resp.get("data", {}).get("datanode", [])
1727+
check_results(results)
1728+
1729+
16581730
def test_boolean_filter(client, submitter, pg_driver_clean, cgci_blgsp):
16591731
post_example_entities_together(client, pg_driver_clean, submitter)
16601732

0 commit comments

Comments
 (0)