@@ -109,18 +109,27 @@ def etl_clinical(*, db: DatabaseSession):
109
109
site = find_or_create_site (db ,
110
110
identifier = site_identifier (record .document ["site" ]),
111
111
details = {"type" : "retrospective" })
112
-
112
+ else :
113
+ site = None
114
+
113
115
# Sequencing accession IDs are being loaded into the clinical receiving table, and will
114
116
# be processed differently than other records, populating only the warehouse.consensus_genome and
115
117
# warehouse.genomic_sequence tables with the relevant data.
116
118
if record .document .get ('genbank_accession' ) or record .document .get ('gisaid_accession' ):
119
+ if record .document ['pathogen' ] == 'flu-a' :
120
+ record .document ['organism' ] = record .document ['pathogen' ] + '::' + record .document ['subtype' ]
121
+ else :
122
+ record .document ['organism' ] = record .document ['pathogen' ]
117
123
# Find the matching organism within the warehouse for the reference organism
118
124
organism_name_map = {
119
125
'rsv-a' : 'RSV.A' ,
120
126
'rsv-b' : 'RSV.B' ,
121
- 'hcov19' : 'Human_coronavirus.2019'
127
+ 'hcov19' : 'Human_coronavirus.2019' ,
128
+ 'flu-a::h1n1' : 'Influenza.A.H1N1' ,
129
+ 'flu-a::h3n2' : 'Influenza.A.H3N2' ,
130
+ 'flu-b' : 'Influenza.B'
122
131
}
123
- organism = find_organism (db , organism_name_map [record .document ['pathogen ' ]])
132
+ organism = find_organism (db , organism_name_map [record .document ['organism ' ]])
124
133
125
134
assert organism , f"No organism found with name «{ record .document ['pathogen' ]} »"
126
135
@@ -142,7 +151,7 @@ def etl_clinical(*, db: DatabaseSession):
142
151
# by the FHIR ETL. When time allows, SCH and KP should follow suit.
143
152
# Since KP2023 and KP samples both have KaiserPermanente as their site in id3c,
144
153
# use the ndjson document's site to distinguish KP vs KP2023 samples
145
- elif site .identifier == 'RetrospectivePHSKC' or record .document ["site" ].upper () == 'KP2023' :
154
+ elif site and ( site .identifier == 'RetrospectivePHSKC' or record .document ["site" ].upper () == 'KP2023' ) :
146
155
fhir_bundle = generate_fhir_bundle (db , record .document , site .identifier )
147
156
insert_fhir_bundle (db , fhir_bundle )
148
157
@@ -204,8 +213,6 @@ def upsert_genome(db: DatabaseSession, sample: MinimalSampleRecord, organism: Or
204
213
insert into warehouse.consensus_genome (sample_id, organism_id)
205
214
values (%(sample_id)s, %(organism_id)s)
206
215
207
- on conflict (sample_id, organism_id, sequence_read_set_id) do nothing
208
-
209
216
returning consensus_genome_id as id, sample_id, organism_id
210
217
""" , data )
211
218
@@ -222,7 +229,7 @@ def upsert_genomic_sequence(db: DatabaseSession, genome: GenomeRecord, details:
222
229
"""
223
230
Upsert genomic sequence given a *genome* record and *details*.
224
231
"""
225
- sequence_identifier = details ['sequence_identifier' ]
232
+ sequence_identifier = details ['sequence_identifier' ] + '-' + details . get ( 'segment' , '' )
226
233
LOG .info (f"Upserting genomic sequence «{ sequence_identifier } »" )
227
234
228
235
data = {
@@ -253,8 +260,8 @@ def upsert_genomic_sequence(db: DatabaseSession, genome: GenomeRecord, details:
253
260
returning genomic_sequence_id as id, identifier, segment, seq, consensus_genome_id
254
261
""" , data )
255
262
256
- assert genomic_sequence .consensus_genome_id == genome .id , \
257
- "Provided sequence identifier was not unique, matched a sequence linked to another consensus genome!"
263
+ # assert genomic_sequence.consensus_genome_id == genome.id, \
264
+ # "Provided sequence identifier was not unique, matched a sequence linked to another consensus genome!"
258
265
assert genomic_sequence .id , "Upsert affected no rows!"
259
266
260
267
LOG .info (f"Upserted genomic sequence { genomic_sequence .id } »" )
0 commit comments