Skip to content

Commit 8de8eca

Browse files
authored
Merge pull request #394 from broadinstitute/jb-raw-location-bugfix
Making raw_location optional parameter, adding test coverage (SCP-5984)
2 parents efcd1f2 + 8f2a341 commit 8de8eca

File tree

6 files changed

+183
-2
lines changed

6 files changed

+183
-2
lines changed

ingest/cli_parser.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,6 @@ def create_parser():
282282

283283
parser_differential_expression.add_argument(
284284
"--raw-location",
285-
required=True,
286285
help="location of raw counts. '.raw' for raw slot, "
287286
"else adata.layers key value",
288287
)
594 Bytes
Binary file not shown.
41 Bytes
Binary file not shown.
261 Bytes
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1744144302 # validation cache key
1+
1744734811 # validation cache key

tests/test_ingest.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from unittest.mock import patch, MagicMock
3737
from test_dense import mock_load_r_files
3838
import os
39+
import glob
3940

4041
from pymongo.errors import AutoReconnect, BulkWriteError
4142
from test_expression_files import mock_expression_load
@@ -91,6 +92,13 @@ def execute_ingest(args, mock_storage_client, mock_storage_blob):
9192
parsed_args = create_parser().parse_args(args)
9293
validate_arguments(parsed_args)
9394
arguments = vars(parsed_args)
95+
if "differential_expression" in arguments:
96+
# DE may use metadata or cluster file for annots BUT
97+
# IngestPipeline initialization assumes a "cell_metadata_file"
98+
arguments["cell_metadata_file"] = arguments["annotation_file"]
99+
# IngestPipeline initialization expects "name" and not "cluster_name"
100+
arguments["name"] = arguments["cluster_name"]
101+
94102

95103
ingest = IngestPipeline(**arguments)
96104

@@ -797,6 +805,180 @@ def test_extract_processed_matrix_from_anndata(self):
797805
except:
798806
print(f"Error while deleting file : {file}")
799807

808+
def test_execute_de_dense(self):
809+
args = [
810+
"--study-id",
811+
"5d276a50421aa9117c982845",
812+
"--study-file-id",
813+
"5dd5ae25421aa910a723a337",
814+
"differential_expression",
815+
"--annotation-file",
816+
"../tests/data/differential_expression/de_dense_metadata.tsv",
817+
"--annotation-name",
818+
"cell_type__ontology_label",
819+
"--annotation-type",
820+
"group",
821+
"--annotation-scope",
822+
"study",
823+
"--de-type",
824+
"rest",
825+
"--cluster-file",
826+
"../tests/data/differential_expression/de_dense_cluster.tsv",
827+
"--cluster-name",
828+
"dense_de_integration",
829+
"--matrix-file-path",
830+
"../tests/data/differential_expression/de_dense_matrix.tsv",
831+
"--matrix-file-type",
832+
"dense",
833+
"--study-accession",
834+
"SCP123",
835+
"--differential-expression"
836+
]
837+
838+
ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)
839+
840+
self.assertEqual(len(status), 1)
841+
self.assertEqual(status[0], 0)
842+
843+
expected_file = "dense_de_integration--cell_type__ontology_label--cholinergic_neuron--study--wilcoxon.tsv"
844+
expected_output_match = (
845+
"dense_de_integration--cell_type__ontology_label--*--study--wilcoxon.tsv"
846+
)
847+
848+
files = glob.glob(expected_output_match)
849+
850+
self.assertIn(
851+
expected_file, files, "Expected filename not in found files list"
852+
)
853+
854+
# clean up DE outputs
855+
output_wildcard_match = f"../tests/dense_de_integration--cell_type__ontology_label*.tsv"
856+
files = glob.glob(output_wildcard_match)
857+
858+
for file in files:
859+
try:
860+
os.remove(file)
861+
except:
862+
print(f"Error while deleting file : {file}")
863+
864+
def test_execute_de_sparse(self):
865+
args = [
866+
"--study-id",
867+
"5d276a50421aa9117c982845",
868+
"--study-file-id",
869+
"5dd5ae25421aa910a723a337",
870+
"differential_expression",
871+
"--annotation-file",
872+
"../tests/data/differential_expression/sparse/sparsemini_metadata.txt",
873+
"--annotation-name",
874+
"cell_type__ontology_label",
875+
"--annotation-type",
876+
"group",
877+
"--annotation-scope",
878+
"study",
879+
"--de-type",
880+
"rest",
881+
"--cluster-file",
882+
"../tests/data/differential_expression/sparse/sparsemini_cluster.txt",
883+
"--cluster-name",
884+
"sparse_de_integration",
885+
"--matrix-file-path",
886+
"../tests/data/differential_expression/sparse/sparsemini_matrix.mtx",
887+
"--gene-file",
888+
"../tests/data/differential_expression/sparse/sparsemini_dup_gene_name.tsv",
889+
"--barcode-file",
890+
"../tests/data/differential_expression/sparse/sparsemini_barcodes.tsv",
891+
"--matrix-file-type",
892+
"mtx",
893+
"--study-accession",
894+
"SCP123",
895+
"--differential-expression"
896+
]
897+
898+
ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)
899+
900+
self.assertEqual(len(status), 1)
901+
self.assertEqual(status[0], 0)
902+
903+
expected_file = "sparse_de_integration--cell_type__ontology_label--fibroblast--study--wilcoxon.tsv"
904+
expected_output_match = (
905+
"sparse_de_integration--cell_type__ontology_label--*--study--wilcoxon.tsv"
906+
)
907+
908+
files = glob.glob(expected_output_match)
909+
910+
self.assertIn(
911+
expected_file, files, "Expected filename not in found files list"
912+
)
913+
914+
# clean up DE outputs
915+
output_wildcard_match = f"../tests/sparse_de_integration--cell_type__ontology_label*.tsv"
916+
files = glob.glob(output_wildcard_match)
917+
918+
for file in files:
919+
try:
920+
os.remove(file)
921+
except:
922+
print(f"Error while deleting file : {file}")
923+
924+
def test_execute_de_anndata(self):
925+
args = [
926+
"--study-id",
927+
"5d276a50421aa9117c982845",
928+
"--study-file-id",
929+
"5dd5ae25421aa910a723a337",
930+
"differential_expression",
931+
"--annotation-file",
932+
"../tests/data/anndata/compliant_liver_h5ad_frag.metadata.tsv.gz",
933+
"--annotation-name",
934+
"cell_type__ontology_label",
935+
"--annotation-type",
936+
"group",
937+
"--annotation-scope",
938+
"study",
939+
"--de-type",
940+
"rest",
941+
"--cluster-file",
942+
"../tests/data/anndata/compliant_liver_h5ad_frag.cluster.X_umap.tsv.gz",
943+
"--cluster-name",
944+
"umap",
945+
"--matrix-file-path",
946+
"../tests/data/anndata/compliant_liver.h5ad",
947+
"--matrix-file-type",
948+
"h5ad",
949+
"--raw-location",
950+
".raw",
951+
"--study-accession",
952+
"SCP123",
953+
"--differential-expression"
954+
]
955+
956+
ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)
957+
958+
self.assertEqual(len(status), 1)
959+
self.assertEqual(status[0], 0)
960+
961+
expected_file = "umap--cell_type__ontology_label--plasma_cell--study--wilcoxon.tsv"
962+
expected_output_match = (
963+
"umap--cell_type__ontology_label--*--study--wilcoxon.tsv"
964+
)
965+
966+
files = glob.glob(expected_output_match)
967+
968+
self.assertIn(
969+
expected_file, files, "Expected filename not in found files list"
970+
)
971+
972+
# clean up DE outputs
973+
output_wildcard_match = f"../tests/umap--cell_type__ontology_label*.tsv"
974+
files = glob.glob(output_wildcard_match)
975+
976+
for file in files:
977+
try:
978+
os.remove(file)
979+
except:
980+
print(f"Error while deleting file : {file}")
981+
800982
def test_get_action_from_args(self):
801983
args = [
802984
"--study-id",

0 commit comments

Comments
 (0)