Skip to content

Commit 14d113e

Browse files
authored
fix!: use correct representation of Coding object in mappings (#457)
close #456 * bump disease normalizer version to pull in updated mappings representations * correct `Coding` representation * `system` MUST use `iriReference`, not a free-text label * `code` MUST use syntax defined by the `system` * `id` will use record `concept_id` * Removes `SYSTEM_URI_TO_NAMESPACE` mapping (since it's no longer needed)
1 parent 3783333 commit 14d113e

File tree

5 files changed

+713
-496
lines changed

5 files changed

+713
-496
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ dependencies = [
3030
"click",
3131
"boto3",
3232
"ga4gh.vrs==2.0.0a13",
33-
"disease-normalizer~=0.7.0",
33+
"disease-normalizer~=0.8.0",
3434
]
3535
dynamic = ["version"]
3636

3737
[project.optional-dependencies]
3838
etl = [
39-
"disease-normalizer[etl]~=0.7.0",
39+
#"disease-normalizer[etl]~=0.7.0",
4040
"owlready2",
4141
"rdflib",
4242
"wikibaseintegrator>=0.12.0",

src/therapy/query.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
from typing import Any, TypeVar
88

99
from botocore.exceptions import ClientError
10-
from disease.schemas import NAMESPACE_TO_SYSTEM_URI as DISEASE_NAMESPACE_TO_SYSTEM_URI
11-
from disease.schemas import NamespacePrefix as DiseaseNamespacePrefix
10+
from disease.query import get_concept_mapping as get_disease_concept_mapping
1211
from ga4gh.core.models import (
1312
Coding,
1413
ConceptMapping,
@@ -23,7 +22,6 @@
2322
from therapy.database import AbstractDatabase
2423
from therapy.schemas import (
2524
NAMESPACE_TO_SYSTEM_URI,
26-
SYSTEM_URI_TO_NAMESPACE,
2725
BaseNormalizationService,
2826
HasIndication,
2927
MatchesNormalized,
@@ -365,7 +363,7 @@ def _add_merged_meta(self, response: NormalizationService) -> NormalizationServi
365363

366364
sources = []
367365
for m in therapy.mappings or []:
368-
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
366+
ns = m.coding.id.split(":")[0]
369367
if ns in PREFIX_LOOKUP:
370368
sources.append(PREFIX_LOOKUP[ns])
371369

@@ -399,39 +397,42 @@ def _add_therapy(
399397
:return: completed response object ready to return to user
400398
"""
401399

402-
def _create_concept_mapping(
400+
def _get_concept_mapping(
403401
concept_id: str,
404402
relation: Relation,
405-
ns_to_system_uri: dict[str, str],
406-
ns_prefix: NamespacePrefix | DiseaseNamespacePrefix,
407403
) -> ConceptMapping:
408-
"""Create concept mapping for therapy or disease identifier
404+
"""Create concept mapping for identifier
409405
410-
``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
411-
namespace prefix, in that order of preference, if available.
406+
``system`` will use system prefix URL, OBO Foundry persistent URL (PURL), or
407+
source homepage, in that order of preference.
412408
413409
:param concept_id: Concept identifier represented as a curie
414410
:param relation: SKOS mapping relationship, default is relatedMatch
415-
:param ns_to_system_uri: Dictionary containing mapping from namespace to
416-
system URI
417-
:param ns_prefix: Namespace prefix enum
418-
:return: Concept mapping for therapy or disease identifier
411+
:raises ValueError: If source of concept ID is not a valid
412+
``NamespacePrefix``
413+
:return: Concept mapping for identifier
419414
"""
420-
source = concept_id.split(":")[0]
415+
source, source_code = concept_id.split(":")
421416

422417
try:
423-
source = ns_prefix(source)
418+
source = NamespacePrefix(source)
424419
except ValueError:
425420
try:
426-
source = ns_prefix(source.upper())
421+
source = NamespacePrefix(source.upper())
427422
except ValueError as e:
428423
err_msg = f"Namespace prefix not supported: {source}"
429424
raise ValueError(err_msg) from e
430425

431-
system = ns_to_system_uri.get(source, source)
426+
if source == NamespacePrefix.CHEBI:
427+
source_code = concept_id
432428

433429
return ConceptMapping(
434-
coding=Coding(code=code(concept_id), system=system), relation=relation
430+
coding=Coding(
431+
id=concept_id,
432+
code=code(source_code),
433+
system=NAMESPACE_TO_SYSTEM_URI[source],
434+
),
435+
relation=relation,
435436
)
436437

437438
therapy_obj = MappableConcept(
@@ -443,20 +444,16 @@ def _create_concept_mapping(
443444

444445
# mappings
445446
mappings = [
446-
_create_concept_mapping(
447+
_get_concept_mapping(
447448
concept_id=record["concept_id"],
448449
relation=Relation.EXACT_MATCH,
449-
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
450-
ns_prefix=NamespacePrefix,
451450
)
452451
]
453452
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
454453
mappings.extend(
455-
_create_concept_mapping(
454+
_get_concept_mapping(
456455
concept_id=source_id,
457456
relation=Relation.RELATED_MATCH,
458-
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
459-
ns_prefix=NamespacePrefix,
460457
)
461458
for source_id in source_ids
462459
)
@@ -490,11 +487,9 @@ def _create_concept_mapping(
490487

491488
if indication.normalized_disease_id:
492489
mappings = [
493-
_create_concept_mapping(
490+
get_disease_concept_mapping(
494491
concept_id=indication.normalized_disease_id,
495492
relation=Relation.RELATED_MATCH,
496-
ns_to_system_uri=DISEASE_NAMESPACE_TO_SYSTEM_URI,
497-
ns_prefix=DiseaseNamespacePrefix,
498493
)
499494
]
500495
else:

src/therapy/schemas.py

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from datetime import datetime
44
from enum import Enum, IntEnum
5+
from types import MappingProxyType
56
from typing import Any, Literal
67

78
from ga4gh.core.models import MappableConcept
@@ -258,37 +259,39 @@ class NamespacePrefix(Enum):
258259
WIKIDATA = "wikidata"
259260

260261

261-
# Source to URI. Will use OBO Foundry persistent URL (PURL) or source homepage
262-
NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
263-
NamespacePrefix.ATC: "https://www.who.int/tools/atc-ddd-toolkit/atc-classification/",
264-
NamespacePrefix.CHEBI: "http://purl.obolibrary.org/obo/chebi.owl",
265-
NamespacePrefix.CHEMBL: "https://www.ebi.ac.uk/chembl/",
266-
NamespacePrefix.CHEMIDPLUS: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
267-
NamespacePrefix.CASREGISTRY: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
268-
NamespacePrefix.CVX: "https://www2a.cdc.gov/vaccines/iis/iisstandards/vaccines.asp?rpt=cvx",
269-
NamespacePrefix.DRUGBANK: "https://go.drugbank.com",
270-
NamespacePrefix.DRUGCENTRAL: "https://drugcentral.org",
271-
NamespacePrefix.DRUGSATFDA_ANDA: "https://www.fda.gov/drugs/types-applications/abbreviated-new-drug-application-anda",
272-
NamespacePrefix.DRUGSATFDA_NDA: "https://www.fda.gov/drugs/types-applications/new-drug-application-nda",
273-
NamespacePrefix.HEMONC: "https://hemonc.org",
274-
NamespacePrefix.INCHIKEY: "https://www.chemspider.com",
275-
NamespacePrefix.IUPHAR_LIGAND: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
276-
NamespacePrefix.GUIDETOPHARMACOLOGY: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
277-
NamespacePrefix.MMSL: "https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html",
278-
NamespacePrefix.MSH: "https://id.nlm.nih.gov/mesh/",
279-
NamespacePrefix.NCIT: "http://purl.obolibrary.org/obo/ncit.owl",
280-
NamespacePrefix.NDC: "https://dps.fda.gov/ndc",
281-
NamespacePrefix.PUBCHEMCOMPOUND: "https://pubchem.ncbi.nlm.nih.gov/docs/compounds",
282-
NamespacePrefix.PUBCHEMSUBSTANCE: "https://pubchem.ncbi.nlm.nih.gov/docs/substances",
283-
NamespacePrefix.RXNORM: "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
284-
NamespacePrefix.SPL: "https://www.fda.gov/industry/fda-data-standards-advisory-board/structured-product-labeling-resources",
285-
NamespacePrefix.UMLS: "https://www.nlm.nih.gov/research/umls/index.html",
286-
NamespacePrefix.UNII: "https://precision.fda.gov/uniisearch",
287-
NamespacePrefix.UNIPROT: "https://www.uniprot.org",
288-
NamespacePrefix.USP: "https://www.usp.org/health-quality-safety/compendial-nomenclature",
289-
NamespacePrefix.VANDF: "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF",
290-
NamespacePrefix.WIKIDATA: "https://www.wikidata.org",
291-
}
262+
# Source to URI. Will use system prefix URL, OBO Foundry persistent URL (PURL) or source homepage
263+
NAMESPACE_TO_SYSTEM_URI: MappingProxyType[NamespacePrefix, str] = MappingProxyType(
264+
{
265+
NamespacePrefix.ATC: "https://atcddd.fhi.no/atc_ddd_index/?code=",
266+
NamespacePrefix.CHEBI: "https://www.ebi.ac.uk/chebi/searchId.do?chebiId=",
267+
NamespacePrefix.CHEMBL: "https://www.ebi.ac.uk/chembl/explore/compound/",
268+
NamespacePrefix.CHEMIDPLUS: "https://commonchemistry.cas.org/detail?cas_rn=",
269+
NamespacePrefix.CASREGISTRY: "https://commonchemistry.cas.org/detail?cas_rn=",
270+
NamespacePrefix.CVX: "https://www2a.cdc.gov/vaccines/iis/iisstandards/vaccines.asp?rpt=cvx",
271+
NamespacePrefix.DRUGBANK: "https://go.drugbank.com/drugs/",
272+
NamespacePrefix.DRUGCENTRAL: "https://drugcentral.org/drugcard/",
273+
NamespacePrefix.DRUGSATFDA_ANDA: "https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm?event=overview.process&ApplNo=",
274+
NamespacePrefix.DRUGSATFDA_NDA: "https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm?event=overview.process&ApplNo=",
275+
NamespacePrefix.HEMONC: "https://hemonc.org",
276+
NamespacePrefix.INCHIKEY: "https://www.chemspider.com",
277+
NamespacePrefix.IUPHAR_LIGAND: "https://www.guidetopharmacology.org/GRAC/LigandDisplayForward?ligandId=",
278+
NamespacePrefix.GUIDETOPHARMACOLOGY: "https://www.guidetopharmacology.org/GRAC/LigandDisplayForward?ligandId=",
279+
NamespacePrefix.MMSL: "https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html",
280+
NamespacePrefix.MSH: "https://id.nlm.nih.gov/mesh/",
281+
NamespacePrefix.NCIT: "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=",
282+
NamespacePrefix.NDC: "https://dps.fda.gov/ndc/searchresult?selection=finished_product&content=PRODUCTNDC&type=",
283+
NamespacePrefix.PUBCHEMCOMPOUND: "https://pubchem.ncbi.nlm.nih.gov/compound/",
284+
NamespacePrefix.PUBCHEMSUBSTANCE: "https://pubchem.ncbi.nlm.nih.gov/substance/",
285+
NamespacePrefix.RXNORM: "https://mor.nlm.nih.gov/RxNav/search?searchBy=RXCUI&searchTerm=",
286+
NamespacePrefix.SPL: "https://www.fda.gov/industry/fda-data-standards-advisory-board/structured-product-labeling-resources",
287+
NamespacePrefix.UMLS: "https://uts.nlm.nih.gov/uts/umls/concept/",
288+
NamespacePrefix.UNII: "https://precision.fda.gov/uniisearch/srs/unii/",
289+
NamespacePrefix.UNIPROT: "http://purl.uniprot.org/uniprot/",
290+
NamespacePrefix.USP: "https://www.usp.org/health-quality-safety/compendial-nomenclature",
291+
NamespacePrefix.VANDF: "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF",
292+
NamespacePrefix.WIKIDATA: "https://www.wikidata.org/wiki/",
293+
}
294+
)
292295

293296
# URI to source
294297
SYSTEM_URI_TO_NAMESPACE = {
@@ -540,21 +543,21 @@ class NormalizationService(BaseNormalizationService):
540543
{
541544
"coding": {
542545
"code": "2555",
543-
"system": "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
546+
"system": "https://mor.nlm.nih.gov/RxNav/search?searchBy=RXCUI&searchTerm=",
544547
},
545548
"relation": "exactMatch",
546549
},
547550
{
548551
"coding": {
549552
"code": "C376",
550-
"system": "http://purl.obolibrary.org/obo/ncit.owl",
553+
"system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=",
551554
},
552555
"relation": "relatedMatch",
553556
},
554557
{
555558
"coding": {
556559
"code": "15663-27-1",
557-
"system": "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
560+
"system": "https://commonchemistry.cas.org/detail?cas_rn=",
558561
},
559562
"relation": "relatedMatch",
560563
},
@@ -843,7 +846,7 @@ class SearchService(BaseModel):
843846
"data_license": "CC0 1.0",
844847
"data_license_url": "https://creativecommons.org/publicdomain/zero/1.0/",
845848
"version": "5.1.10",
846-
"data_url": "https://go.drugbank.com/releases/latest#open-data",
849+
"data_url": "https://go.drugbank.com/drugs//releases/latest#open-data",
847850
"rdp_url": "http://reusabledata.org/drugbank.html",
848851
"data_license_attributes": {
849852
"non_commercial": False,

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def pytest_collection_modifyitems(items):
2121
When creating new test modules, be sure to add them here.
2222
"""
2323
MODULE_ORDER = [ # noqa: N806
24+
"test_schemas",
2425
"test_chembl",
2526
"test_chemidplus",
2627
"test_drugbank",

0 commit comments

Comments
 (0)