Skip to content

Commit

Permalink
Add relations to PFB-based verbatim manifest for AnVIL (#6066)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Jan 30, 2025
1 parent b308aa7 commit 6439a73
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 23 deletions.
49 changes: 49 additions & 0 deletions src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from more_itertools import (
one,
)
from more_itertools.more import (
always_iterable,
)

from azul import (
JSON,
Expand Down Expand Up @@ -63,6 +66,7 @@
from azul.types import (
AnyMutableJSON,
MutableJSON,
MutableJSONs,
)


Expand Down Expand Up @@ -342,6 +346,15 @@ def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath):
for table in anvil_schema['tables']
}

foreign_keys_by_table = {
table['name']: [
(r['to']['table'], r['from']['column'])
for r in anvil_schema['relationships']
if r['from']['table'] == table['name']
]
for table in anvil_schema['tables']
}

def verbatim_pfb_entity_id(self, replica: JSON) -> str:
replica_type = replica['replica_type']
try:
Expand All @@ -354,6 +367,42 @@ def verbatim_pfb_entity_id(self, replica: JSON) -> str:
else:
return replica['contents'][primary_key]

def verbatim_pfb_relations(self,
replica: JSON
) -> list[tuple[str, str]]:
table_name, contents = replica['replica_type'], replica['contents']
try:
foreign_keys = self.foreign_keys_by_table[table_name]
except KeyError:
if table_name == 'duos_dataset_registration':
return [('anvil_dataset', contents['dataset_id'])]
else:
return super().verbatim_pfb_relations(replica)
else:
return [
(foreign_table_name, foreign_key)
for (foreign_table_name, foreign_key_column) in foreign_keys
for foreign_key in always_iterable(contents[foreign_key_column])
]

def verbatim_pfb_links(self, replica_type: str) -> MutableJSONs:
return [{
'dst': 'anvil_dataset',
'name': '',
'multiplicity': 'ONE_TO_ONE'
}] if replica_type == 'duos_dataset_registration' else [
{
'dst': r['to']['table'],
'name': r['name'],
# Each link is between a foreign key and a primary key.
# Primary keys are unique within their own table, but
# multiple rows in other tables can reference them.
'multiplicity': 'MANY_TO_ONE',
}
for r in anvil_schema['relationships']
if r['from']['table'] == replica_type
]

def verbatim_pfb_schema(self,
replicas: list[JSON]
) -> list[JSON]:
Expand Down
14 changes: 8 additions & 6 deletions test/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,23 +1036,25 @@ def _check_terra_pfb_manifest(self, _catalog: CatalogName, response: bytes):
# field contains an entity, the schema check therefore extends to
# the various entity types.
fastavro.validate(record, record_schema)
object_ = cast(MutableJSON, record['object'])
if 0 == next(num_records):
# PFB requires a special `Metadata` entity to occur first. It is
# used to declare the relations between entity types, thereby
# expressing additional constraints on the `relations` field.
#
# FIXME: We don't currently declare relations
# https://github.com/DataBiosphere/azul/issues/6066
#
# For now, we just check the `name` and the absence of an `id`.
self.assertEqual('Metadata', record['name'])
self.assertIsNone(record['id'])
nodes = cast(MutableJSONs, object_['nodes'])
for node in nodes:
for link in node['links']:
self.assertIn(link['dst'], entity_types)
# The following is redundant given the schema validation above but
# we'll leave it in for illustration.
fields = entity_types[record['name']]['fields']
fields_present = set(record['object'].keys())
fields_present = set(object_.keys())
fields_expected = set(f['name'] for f in fields)
self.assertEqual(fields_present, fields_expected)
for relation in cast(MutableJSONs, record['relations']):
self.assertIn(relation['dst_name'], entity_types)
# We expect to observe the special `Metadata` entity record and at least
# one additional entity record
self.assertGreater(next(num_records), 1)
Expand Down
Loading

0 comments on commit 6439a73

Please sign in to comment.