|
2 | 2 | import json
|
3 | 3 | import os
|
4 | 4 | import random
|
| 5 | +import uuid |
5 | 6 |
|
6 | 7 | import pytest
|
7 | 8 | from flask import g
|
@@ -1655,6 +1656,77 @@ def test_datanode(graphql_client, client, submitter, pg_driver_clean, cgci_blgsp
|
1655 | 1656 | assert j1 == j2
|
1656 | 1657 |
|
1657 | 1658 |
|
| 1659 | +def test_datanode_query_all( |
| 1660 | + graphql_client, client, submitter, pg_driver_clean, cgci_blgsp |
| 1661 | +): |
| 1662 | + """ |
| 1663 | + Regression test for a bug where querying all datanode objects does not return all objects |
| 1664 | + because "limit" and "offset" are not applied correctly. |
| 1665 | + Mitigated by datanode returning the first <limit> items for each file node. |
| 1666 | + """ |
| 1667 | + post_example_entities_together(client, pg_driver_clean, submitter) |
| 1668 | + utils.put_entity_from_file(client, "read_group.json", submitter) |
| 1669 | + |
| 1670 | + # submit 20 SubmittedUnalignedReads and 25 SubmittedAlignedReads records |
| 1671 | + n_type_1 = 20 |
| 1672 | + n_type_2 = 25 |
| 1673 | + files_type_1 = {} |
| 1674 | + files_type_2 = {} |
| 1675 | + for _ in range(n_type_1): |
| 1676 | + unique_id = str(uuid.uuid4()) |
| 1677 | + files_type_1[unique_id] = models.SubmittedUnalignedReads( |
| 1678 | + f"sub_id_{unique_id}", project_id="CGCI-BLGSP", object_id=unique_id |
| 1679 | + ) |
| 1680 | + for _ in range(n_type_2): |
| 1681 | + unique_id = str(uuid.uuid4()) |
| 1682 | + files_type_2[unique_id] = models.SubmittedAlignedReads( |
| 1683 | + f"sub_id_{unique_id}", project_id="CGCI-BLGSP", object_id=unique_id |
| 1684 | + ) |
| 1685 | + |
| 1686 | + with pg_driver_clean.session_scope() as s: |
| 1687 | + rg = pg_driver_clean.nodes(models.ReadGroup).one() |
| 1688 | + rg.submitted_unaligned_reads_files = files_type_1.values() |
| 1689 | + rg.submitted_aligned_reads_files = files_type_2.values() |
| 1690 | + s.merge(rg) |
| 1691 | + |
| 1692 | + def check_results(results): |
| 1693 | + print("Datanode query result:", results) |
| 1694 | + assert len(results) == n_type_1 + n_type_2 |
| 1695 | + |
| 1696 | + sur_res = [e for e in results if e["type"] == "submitted_unaligned_reads"] |
| 1697 | + assert len(sur_res) == n_type_1 |
| 1698 | + assert files_type_1.keys() == set((e["object_id"] for e in sur_res)) |
| 1699 | + |
| 1700 | + sar_res = [e for e in results if e["type"] == "submitted_aligned_reads"] |
| 1701 | + assert len(sar_res) == n_type_2 |
| 1702 | + assert files_type_2.keys() == set((e["object_id"] for e in sar_res)) |
| 1703 | + |
| 1704 | + # query all the `datanode` records using `limit` and `offset` |
| 1705 | + chunk_size = 10 |
| 1706 | + offset = 0 |
| 1707 | + results = [] |
| 1708 | + while True: |
| 1709 | + query_txt = "{datanode (first: %s, offset: %s) {object_id type}}" % ( |
| 1710 | + chunk_size, |
| 1711 | + offset, |
| 1712 | + ) |
| 1713 | + resp = graphql_client(query_txt).json |
| 1714 | + data = resp.get("data", {}).get("datanode", []) |
| 1715 | + if not len(data): |
| 1716 | + break |
| 1717 | + # "chunk_size * 2" because datanode returns the first <limit> items for each file node |
| 1718 | + assert len(data) <= chunk_size * 2 |
| 1719 | + results = results + data |
| 1720 | + offset += chunk_size |
| 1721 | + check_results(results) |
| 1722 | + |
| 1723 | + # query all the `datanode` records using `limit = 0` (query all records at once) |
| 1724 | + query_txt = "{datanode (first: 0) {object_id type}}" |
| 1725 | + resp = graphql_client(query_txt).json |
| 1726 | + results = resp.get("data", {}).get("datanode", []) |
| 1727 | + check_results(results) |
| 1728 | + |
| 1729 | + |
1658 | 1730 | def test_boolean_filter(client, submitter, pg_driver_clean, cgci_blgsp):
|
1659 | 1731 | post_example_entities_together(client, pg_driver_clean, submitter)
|
1660 | 1732 |
|
|
0 commit comments