Skip to content

Commit

Permalink
build!: update vicc normalizers to pull in gks coding changes (#432)
Browse files Browse the repository at this point in the history
close #423
  • Loading branch information
korikuzma committed Mar 9, 2025
1 parent 6550d7c commit 3a1f3ea
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 155 deletions.
10 changes: 5 additions & 5 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ verify_ssl = true
name = "pypi"

[packages]
"ga4gh.vrs" = "~=2.0.0a10"
gene-normalizer = {version = "~=0.4.1", extras = ["etl"]}
variation-normalizer = "~=0.10.0"
disease-normalizer = {version = "~=0.5.0", extras = ["etl"]}
thera-py = {version = "~=0.6.0", extras = ["etl"]}
"ga4gh.vrs" = "==2.0.0a13"
gene-normalizer = {version = "~=0.7.0", extras = ["etl"]}
variation-normalizer = "~=0.12.1"
disease-normalizer = {version = "~=0.8.0", extras = ["etl"]}
thera-py = {version = "~=0.8.0", extras = ["etl"]}
civicpy = "~=3.1"
requests = "*"
pydantic = "==2.*"
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ dependencies = [
"ga4gh.vrs==2.0.0a13",
"ga4gh.cat_vrs~=0.2.1",
"ga4gh.va_spec~=0.2.0a0",
"gene-normalizer[etl]~=0.6.0",
"variation-normalizer~=0.12.0",
"disease-normalizer[etl]~=0.7.0",
"thera-py[etl]~=0.7.1",
"gene-normalizer[etl]~=0.7.0",
"variation-normalizer~=0.12.1",
"disease-normalizer[etl]~=0.8.0",
"thera-py[etl]~=0.8.0",
"civicpy~=3.1",
"requests",
"pydantic==2.*",
Expand Down
5 changes: 4 additions & 1 deletion src/metakb/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ def _add_mappings_and_exts_to_obj(obj: dict, obj_keys: list[str]) -> None:
extensions = mapping.get("extensions") or []
for ext in extensions:
if ext["name"] == NormalizerExtensionName.PRIORITY and ext["value"]:
normalizer_id = mapping["coding"]["code"]
if mapping["coding"]["id"].startswith("MONDO"):
normalizer_id = mapping["coding"]["code"]
else:
normalizer_id = mapping["coding"]["id"]
obj["normalizer_id"] = normalizer_id
obj_keys.append("normalizer_id:$normalizer_id")
break
Expand Down
23 changes: 17 additions & 6 deletions src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ def _update_mapping(
mapping: ConceptMapping,
normalized_id: str,
normalizer_label: str,
match_on_coding_id: bool = True,
) -> Extension:
"""Update ``mapping`` to include extension on whether ``mapping`` contains
code that matches the merged record's primary identifier.
Expand All @@ -572,10 +573,16 @@ def _update_mapping(
matches normalized merged identifier.
:param normalized_id: Concept ID from normalized record
:param normalizer_label: Label from normalized record
:param match_on_coding_id: Whether to match on ``coding.id`` or
``coding.code`` (MONDO is represented differently)
:return: ConceptMapping with normalizer extension added as well as label (
if mapping id matches normalized merged id)
"""
is_priority = normalized_id == mapping.coding.code.root
is_priority = (
normalized_id == mapping.coding.id
if match_on_coding_id
else normalized_id == mapping.coding.code.root.lower()
)

merged_id_ext = Extension(
name=NormalizerExtensionName.PRIORITY.value, value=is_priority
Expand All @@ -599,25 +606,29 @@ def _update_mapping(

normalizer_mappings = normalizer_resp_obj.mappings or []
for mapping in normalizer_mappings:
if normalized_id == mapping.coding.code.root:
if normalized_id == mapping.coding.id:
mappings.append(
_update_mapping(mapping, normalized_id, normalizer_label)
)
else:
mapping_code_lower = mapping.coding.code.root.lower()
if (
is_disease
and mapping_code_lower.startswith(
and mapping.coding.code.root.lower().startswith(
DiseaseNamespacePrefix.MONDO.value
)
) or (
is_gene
and mapping_code_lower.startswith(
and mapping.coding.id.startswith(
(GeneNamespacePrefix.NCBI.value, GeneNamespacePrefix.HGNC.value)
)
):
mappings.append(
_update_mapping(mapping, normalized_id, normalizer_label)
_update_mapping(
mapping,
normalized_id,
normalizer_label,
match_on_coding_id=is_gene,
)
)
return mappings

Expand Down
6 changes: 3 additions & 3 deletions src/metakb/transformers/civic.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,8 +858,8 @@ def _get_ncbi_concept_mapping(ncbigene_id: str, gene: dict) -> ConceptMapping:

civic_ncbi_annotation_match = False
for mapping in mappings:
if mapping.coding.code.root.startswith("ncbigene:"):
if mapping.coding.code.root == ncbigene:
if mapping.coding.id.startswith("ncbigene:"):
if mapping.coding.id == ncbigene:
mapping.extensions.append(
Extension(name="civic_annotation", value=True)
)
Expand All @@ -869,7 +869,7 @@ def _get_ncbi_concept_mapping(ncbigene_id: str, gene: dict) -> ConceptMapping:
_logger.debug(
"CIViC NCBI gene and Gene Normalizer mismatch: %s vs %s",
ncbigene,
mapping.coding.code.root,
mapping.coding.id,
)

if not civic_ncbi_annotation_match:
Expand Down
Loading

0 comments on commit 3a1f3ea

Please sign in to comment.