1
- """ test_anndata.py
2
- verify basic AnnData validation works as expected
1
+ """test_anndata.py
2
+ verify basic AnnData validation works as expected
3
3
"""
4
4
5
5
import unittest
@@ -25,6 +25,7 @@ class TestAnnDataIngestor(unittest.TestCase):
25
25
def setup_class (self ):
26
26
filepath_valid = "../tests/data/anndata/trimmed_compliant_pbmc3K.h5ad"
27
27
filepath_invalid = "../tests/data/anndata/bad.h5"
28
+ filepath_layers = "../tests/data/anndata/compliant_liver_layers_counts.h5ad"
28
29
filepath_dup_feature = "../tests/data/anndata/dup_feature.h5ad"
29
30
filepath_dup_cell = "../tests/data/anndata/dup_cell.h5ad"
30
31
filepath_nan = "../tests/data/anndata/nan_value.h5ad"
@@ -34,6 +35,7 @@ def setup_class(self):
34
35
self .study_file_id = "dec0dedfeed0000000000000"
35
36
self .valid_args = [filepath_valid , self .study_id , self .study_file_id ]
36
37
self .invalid_args = [filepath_invalid , self .study_id , self .study_file_id ]
38
+ self .layers_args = [filepath_layers , self .study_id , self .study_file_id ]
37
39
self .dup_feature_args = [
38
40
filepath_dup_feature ,
39
41
self .study_id ,
@@ -44,7 +46,7 @@ def setup_class(self):
44
46
self .synthetic_args = [filepath_synthetic , self .study_id , self .study_file_id ]
45
47
self .boolean_args = [filepath_boolean , self .study_id , self .study_file_id ]
46
48
self .cluster_name = 'X_tsne'
47
- self .valid_kwargs = {'obsm_keys' : [self .cluster_name ]}
49
+ self .valid_kwargs = {'obsm_keys' : [self .cluster_name ], 'raw_location' : '.raw' }
48
50
self .anndata_ingest = AnnDataIngestor (* self .valid_args , ** self .valid_kwargs )
49
51
self .cluster_filename = f"h5ad_frag.cluster.{ self .cluster_name } .tsv"
50
52
self .metadata_filename = "h5ad_frag.metadata.tsv"
@@ -158,7 +160,9 @@ def test_generate_metadata_file(self):
158
160
"library_preparation_protocol__ontology_label\n " ,
159
161
]
160
162
self .assertEqual (
161
- expected_names , name_line , 'did not get expected headers from metadata body'
163
+ expected_names ,
164
+ name_line ,
165
+ 'did not get expected headers from metadata body' ,
162
166
)
163
167
type_line = metadata_body .readline ().split ("\t " )
164
168
expected_types = [
@@ -180,43 +184,71 @@ def test_generate_metadata_file(self):
180
184
"GROUP\n " ,
181
185
]
182
186
self .assertEqual (
183
- expected_types , type_line , 'did not get expected types from metadata body'
187
+ expected_types ,
188
+ type_line ,
189
+ 'did not get expected types from metadata body' ,
184
190
)
185
191
186
192
def test_generate_metadata_with_boolean (self ):
187
193
boolean_ingest = AnnDataIngestor (* self .boolean_args , ** self .valid_kwargs )
188
194
adata = boolean_ingest .obtain_adata ()
189
195
boolean_filename = "h5ad_frag.metadata_boolean.tsv"
190
- boolean_ingest .generate_metadata_file (
191
- adata , boolean_filename
192
- )
196
+ boolean_ingest .generate_metadata_file (adata , boolean_filename )
193
197
self .assertEqual (
194
- 'bool' , adata .obs ['is_primary_data' ].dtype .name ,
195
- 'did not correctly get "bool" dtype for "is_primary_data"'
198
+ 'bool' ,
199
+ adata .obs ['is_primary_data' ].dtype .name ,
200
+ 'did not correctly get "bool" dtype for "is_primary_data"' ,
196
201
)
197
202
compressed_file = boolean_filename + ".gz"
198
203
with gzip .open (compressed_file , "rt" , encoding = "utf-8-sig" ) as metadata_body :
199
204
name_line = metadata_body .readline ().split ("\t " )
200
205
expected_headers = [
201
- 'NAME' , 'donor_id' , 'biosample_id' , 'sex' , 'species' , 'species__ontology_label' ,
202
- 'library_preparation_protocol' , 'library_preparation_protocol__ontology_label' , 'organ' ,
203
- 'organ__ontology_label' , 'disease' , 'disease__ontology_label' , "is_primary_data\n "
206
+ 'NAME' ,
207
+ 'donor_id' ,
208
+ 'biosample_id' ,
209
+ 'sex' ,
210
+ 'species' ,
211
+ 'species__ontology_label' ,
212
+ 'library_preparation_protocol' ,
213
+ 'library_preparation_protocol__ontology_label' ,
214
+ 'organ' ,
215
+ 'organ__ontology_label' ,
216
+ 'disease' ,
217
+ 'disease__ontology_label' ,
218
+ "is_primary_data\n " ,
204
219
]
205
220
self .assertEqual (
206
- expected_headers , name_line , 'did not get expected headers from metadata body'
221
+ expected_headers ,
222
+ name_line ,
223
+ 'did not get expected headers from metadata body' ,
207
224
)
208
225
expected_types = [
209
- 'TYPE' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' , 'GROUP' ,
210
- 'GROUP' , "GROUP\n "
226
+ 'TYPE' ,
227
+ 'GROUP' ,
228
+ 'GROUP' ,
229
+ 'GROUP' ,
230
+ 'GROUP' ,
231
+ 'GROUP' ,
232
+ 'GROUP' ,
233
+ 'GROUP' ,
234
+ 'GROUP' ,
235
+ 'GROUP' ,
236
+ 'GROUP' ,
237
+ 'GROUP' ,
238
+ "GROUP\n " ,
211
239
]
212
240
type_line = metadata_body .readline ().split ("\t " )
213
241
self .assertEqual (
214
- expected_types , type_line , 'did not get expected types from metadata body'
242
+ expected_types ,
243
+ type_line ,
244
+ 'did not get expected types from metadata body' ,
215
245
)
216
246
for line in metadata_body .readlines ():
217
247
is_primary_data = line .split ("\t " )[12 ].strip ()
218
248
self .assertEqual (
219
- "False" , is_primary_data , 'did not correctly read boolean value as string from data'
249
+ "False" ,
250
+ is_primary_data ,
251
+ 'did not correctly read boolean value as string from data' ,
220
252
)
221
253
222
254
def test_gene_id_indexed_generate_processed_matrix (self ):
@@ -248,14 +280,16 @@ def test_gene_id_indexed_generate_processed_matrix(self):
248
280
filtered_adata.write('indexed_by_gene_id.h5ad')
249
281
"""
250
282
indexed_by_geneid = AnnDataIngestor (
251
- "../tests/data/anndata/indexed_by_gene_id.h5ad" , self .study_id , self .study_file_id
283
+ "../tests/data/anndata/indexed_by_gene_id.h5ad" ,
284
+ self .study_id ,
285
+ self .study_file_id ,
252
286
)
253
287
adata = indexed_by_geneid .obtain_adata ()
254
288
self .anndata_ingest .generate_processed_matrix (adata )
255
289
256
- now = time .time () # current time (ms since epoch)
290
+ now = time .time () # current time (ms since epoch)
257
291
expected_features_fp = 'h5ad_frag.features.processed.tsv.gz'
258
- mtime = os .path .getmtime (expected_features_fp ) # modified time (ms since epoch)
292
+ mtime = os .path .getmtime (expected_features_fp ) # modified time (ms since epoch)
259
293
self .assertTrue (abs (now - mtime ) < 1000 )
260
294
261
295
with gzip .open (expected_features_fp , 'rt' ) as f :
@@ -269,14 +303,18 @@ def test_gene_id_indexed_generate_processed_matrix(self):
269
303
def test_check_if_indexed_by_gene_id (self ):
270
304
# check var.index.name
271
305
feature_name = AnnDataIngestor (
272
- "../tests/data/anndata/indexed_by_gene_id.h5ad" , self .study_id , self .study_file_id
306
+ "../tests/data/anndata/indexed_by_gene_id.h5ad" ,
307
+ self .study_id ,
308
+ self .study_file_id ,
273
309
)
274
310
adata = feature_name .obtain_adata ()
275
311
self .assertTrue (feature_name .check_ensembl_index (adata ))
276
312
277
313
# check data inspection
278
314
data_inspect = AnnDataIngestor (
279
- "../tests/data/anndata/cellxgene.human_liver_b_cells.h5ad" , self .study_id , self .study_file_id
315
+ "../tests/data/anndata/cellxgene.human_liver_b_cells.h5ad" ,
316
+ self .study_id ,
317
+ self .study_file_id ,
280
318
)
281
319
liver_adata = data_inspect .obtain_adata ()
282
320
self .assertTrue (data_inspect .check_ensembl_index (liver_adata ))
@@ -318,8 +356,17 @@ def test_create_raw_cells_arrays(self):
318
356
self .assertEqual ('h5ad_frag.matrix.raw.mtx.gz Cells' , data_array ['name' ])
319
357
self .assertEqual (2638 , len (data_array ['values' ]))
320
358
321
-
322
359
def test_ingest_raw_cells (self ):
323
360
with patch ('anndata_.bypass_mongo_writes' , return_value = False ):
324
361
self .anndata_ingest .ingest_raw_cells ()
325
362
self .assertEqual (1 , self .anndata_ingest .models_processed )
363
+
364
+ def test_validate_raw_location (self ):
365
+ result = self .anndata_ingest .validate_raw_location ()
366
+ self .assertTrue (result )
367
+
368
+ def test_invalid_raw_location (self ):
369
+ self .invalid_kwargs = {'obsm_keys' : [self .cluster_name ], 'raw_location' : 'foo' }
370
+ self .anndata_ingest = AnnDataIngestor (* self .layers_args , ** self .invalid_kwargs )
371
+ result = self .anndata_ingest .validate_raw_location ()
372
+ self .assertFalse (result )
0 commit comments