@@ -83,188 +83,75 @@ def __init__(self, db_url: str | None = None) -> None:
83
83
self .therapy_query_handler = TherapyQueryHandler (create_therapy_db (db_url ))
84
84
85
85
async def normalize_variation (
86
- self , queries : list [ str ]
86
+ self , query : str
87
87
) -> Allele | CopyNumberChange | CopyNumberCount | None :
88
- """Normalize variation queries.
88
+ """Attempt to normalize a variation query
89
89
90
- :param queries: Candidate query strings to attempt to normalize. Should be
91
- provided in order of preference, as the result of the first one to normalize
92
- successfully will be returned. Use in the event that a prioritized MANE
93
- transcript is unavailable and multiple possible candidates are known.
90
+ :param query: Variation query to normalize
94
91
:raises TokenRetrievalError: If AWS credentials are expired
95
92
:return: A normalized variation, if available.
96
93
"""
97
- for query in queries :
98
- if not query :
99
- continue
100
- try :
101
- variation_norm_resp = (
102
- await self .variation_normalizer .normalize_handler .normalize (query )
103
- )
104
- if variation_norm_resp and variation_norm_resp .variation :
105
- return variation_norm_resp .variation
106
- except TokenRetrievalError as e :
107
- _logger .error (e )
108
- raise e
109
- except Exception as e :
110
- _logger .error (
111
- "Variation Normalizer raised an exception using query %s: %s" ,
112
- query ,
113
- e ,
114
- )
94
+ try :
95
+ variation_norm_resp = (
96
+ await self .variation_normalizer .normalize_handler .normalize (query )
97
+ )
98
+ if variation_norm_resp and variation_norm_resp .variation :
99
+ return variation_norm_resp .variation
100
+ except TokenRetrievalError as e :
101
+ _logger .error (e )
102
+ raise e
103
+ except Exception as e :
104
+ _logger .error (
105
+ "Variation Normalizer raised an exception using query %s: %s" ,
106
+ query ,
107
+ e ,
108
+ )
115
109
return None
116
110
117
- def normalize_gene (
118
- self , queries : list [str ]
119
- ) -> tuple [NormalizedGene | None , str | None ]:
120
- """Normalize gene queries.
121
-
122
- Given a collection of terms, return the normalized concept with the highest
123
- match (see the
124
- `Gene Normalizer docs <https://gene-normalizer.readthedocs.io/latest/usage.html#match-types>`_ for
125
- more details on match types, and how queries are resolved).
111
+ def normalize_gene (self , query : str ) -> tuple [NormalizedGene , str | None ]:
112
+ """Attempt to normalize a gene query
126
113
127
114
>>> from metakb.normalizers import ViccNormalizers
128
115
>>> v = ViccNormalizers()
129
- >>> gene_terms = [
130
- ... "gibberish", # won't match
131
- ... "NETS", # alias
132
- ... "hgnc:1097", # HGNC identifier for BRAF
133
- ... "MARCH3", # previous symbol
134
- ... ]
135
- >>> v.normalize_gene(gene_terms)[0].normalized_id
116
+ >>> v.normalize_gene("BRAF")[1]
136
117
'hgnc:1097'
137
118
138
- :param queries: A list of possible gene terms to normalize. Order is irrelevant,
139
- except for breaking ties (choose earlier if equal).
119
+ :param query: Gene query to normalize
140
120
:raises TokenRetrievalError: If AWS credentials are expired
141
- :return: The highest matched gene's normalized response and ID
121
+ :return: Gene normalization response and normalized gene ID, if available.
142
122
"""
143
- gene_norm_resp = None
144
- normalized_gene_id = None
145
- highest_match = 0
146
- for query_str in queries :
147
- if not query_str :
148
- continue
123
+ return self ._normalize_concept (query , self .gene_query_handler , "gene" )
149
124
150
- try :
151
- gene_norm_resp = self .gene_query_handler .normalize (query_str )
152
- except TokenRetrievalError as e :
153
- _logger .error (e )
154
- raise e
155
- except Exception as e :
156
- _logger .error (
157
- "Gene Normalizer raised an exception using query %s: %s" ,
158
- query_str ,
159
- e ,
160
- )
161
- else :
162
- if gene_norm_resp .match_type > highest_match :
163
- highest_match = gene_norm_resp .match_type
164
- normalized_gene_id = gene_norm_resp .gene .primaryCode .root
165
- if highest_match == 100 :
166
- break
167
- return gene_norm_resp , normalized_gene_id
168
-
169
- def normalize_disease (
170
- self , queries : list [str ]
171
- ) -> tuple [NormalizedDisease | None , str | None ]:
172
- """Normalize disease queries.
125
+ def normalize_disease (self , query : str ) -> tuple [NormalizedDisease , str | None ]:
126
+ """Attempt to normalize a disease query
173
127
174
128
Given a collection of terms, return the normalized concept with the highest
175
129
match.
176
130
177
131
>>> from metakb.normalizers import ViccNormalizers
178
132
>>> v = ViccNormalizers()
179
- >>> disease_terms = [
180
- ... "AML", # alias
181
- ... "von hippel-lindau syndrome", # alias
182
- ... "ncit:C9384", # concept ID
183
- ... ]
184
- >>> v.normalize_disease(disease_terms)[0].normalized_id
185
- 'ncit:C9384'
186
-
187
- :param queries: Disease queries to normalize. Order is irrelevant, except for
188
- breaking ties (choose earlier if equal).
133
+ >>> v.normalize_disease("von hippel-lindau syndrome")[1]
134
+ 'ncit:C3105'
135
+
136
+ :param query: Disease query normalize
189
137
:raises TokenRetrievalError: If AWS credentials are expired
190
- :return: The highest matched disease's normalized response and ID
138
+ :return: Disease normalization response and normalized disease ID, if available.
191
139
"""
192
- highest_match = 0
193
- normalized_disease_id = None
194
- disease_norm_resp = None
195
-
196
- for query in queries :
197
- if not query :
198
- continue
199
-
200
- try :
201
- disease_norm_resp = self .disease_query_handler .normalize (query )
202
- except TokenRetrievalError as e :
203
- _logger .error (e )
204
- raise e
205
- except Exception as e :
206
- _logger .error (
207
- "Disease Normalizer raised an exception using query %s: %s" ,
208
- query ,
209
- e ,
210
- )
211
- else :
212
- if disease_norm_resp .match_type > highest_match :
213
- highest_match = disease_norm_resp .match_type
214
- normalized_disease_id = disease_norm_resp .disease .primaryCode .root
215
- if highest_match == 100 :
216
- break
217
- return disease_norm_resp , normalized_disease_id
218
-
219
- def normalize_therapy (
220
- self , queries : list [str ]
221
- ) -> tuple [NormalizedTherapy | None , str | None ]:
222
- """Normalize therapy queries
140
+ return self ._normalize_concept (query , self .disease_query_handler , "disease" )
223
141
224
- Given a collection of terms, return the normalized concept with the highest
225
- match.
142
+ def normalize_therapy ( self , query : str ) -> tuple [ NormalizedTherapy , str | None ]:
143
+ """Attempt to normalize a therapy query
226
144
227
145
>>> from metakb.normalizers import ViccNormalizers
228
146
>>> v = ViccNormalizers()
229
- >>> therapy_terms = [
230
- ... "VAZALORE", # trade name
231
- ... "RHUMAB HER2", # alias
232
- ... "rxcui:5032", # concept ID
233
- ... ]
234
- >>> v.normalize_therapy(therapy_terms)[0].normalized_id
235
- 'rxcui:5032'
236
-
237
- :param queries: Therapy queries to normalize. Order is irrelevant, except for
238
- breaking ties (choose earlier term if equal).
147
+ >>> v.normalize_therapy("VAZALORE")[1]
148
+ 'rxcui:1191'
149
+
150
+ :param query: Therapy query normalize
239
151
:raises TokenRetrievalError: If AWS credentials are expired
240
- :return: The highest matched therapy's normalized response and ID
152
+ :return: Therapy normalization response and normalized therapy ID, if available.
241
153
"""
242
- highest_match = 0
243
- normalized_therapy_id = None
244
- therapy_norm_resp = None
245
-
246
- for query in queries :
247
- if not query :
248
- continue
249
-
250
- try :
251
- therapy_norm_resp = self .therapy_query_handler .normalize (query )
252
- except TokenRetrievalError as e :
253
- _logger .error (e )
254
- raise e
255
- except Exception as e :
256
- _logger .error (
257
- "Therapy Normalizer raised an exception using query %s: %s" ,
258
- query ,
259
- e ,
260
- )
261
- else :
262
- if therapy_norm_resp .match_type > highest_match :
263
- highest_match = therapy_norm_resp .match_type
264
- normalized_therapy_id = therapy_norm_resp .therapy .primaryCode .root
265
- if highest_match == 100 :
266
- break
267
- return therapy_norm_resp , normalized_therapy_id
154
+ return self ._normalize_concept (query , self .therapy_query_handler , "therapy" )
268
155
269
156
@staticmethod
270
157
def get_regulatory_approval_extension (
@@ -331,6 +218,41 @@ def get_regulatory_approval_extension(
331
218
332
219
return regulatory_approval_extension
333
220
221
+ @staticmethod
222
+ def _normalize_concept (
223
+ query : str ,
224
+ query_handler : GeneQueryHandler | DiseaseQueryHandler | TherapyQueryHandler ,
225
+ concept_name : str ,
226
+ ) -> tuple [NormalizedGene | NormalizedDisease | NormalizedTherapy , str | None ]:
227
+ """Attempt to normalize a concept
228
+
229
+ :param query: Query to normalize
230
+ :param query_handler: Query handler for normalizer
231
+ :param concept_name: Name of concept (gene, disease, therapy)
232
+ :raises TokenRetrievalError: If AWS credentials are expired
233
+ :return: Normalizer response and normalized ID, if available.
234
+ """
235
+ normalizer_resp = None
236
+ normalized_id = None
237
+
238
+ try :
239
+ normalizer_resp = query_handler .normalize (query )
240
+ except TokenRetrievalError as e :
241
+ _logger .error (e )
242
+ raise e
243
+ except Exception as e :
244
+ _logger .error (
245
+ "%s Normalizer raised an exception using query %s: %s" ,
246
+ concept_name .capitalize (),
247
+ query ,
248
+ e ,
249
+ )
250
+ else :
251
+ if normalizer_resp .match_type :
252
+ normalized_id = getattr (normalizer_resp , concept_name ).primaryCode .root
253
+
254
+ return normalizer_resp , normalized_id
255
+
334
256
335
257
class NormalizerName (str , Enum ):
336
258
"""Constrain normalizer CLI options."""
0 commit comments