Skip to content

Release 1.41.3 #395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ingest/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,6 @@ def create_parser():

parser_differential_expression.add_argument(
"--raw-location",
required=True,
help="location of raw counts. '.raw' for raw slot, "
"else adata.layers key value",
)
Expand Down
Binary file modified ingest/validation/ontologies/cl.min.tsv.gz
Binary file not shown.
Binary file modified ingest/validation/ontologies/efo.min.tsv.gz
Binary file not shown.
Binary file modified ingest/validation/ontologies/uberon.min.tsv.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion ingest/validation/ontologies/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1744144302 # validation cache key
1744734811 # validation cache key
182 changes: 182 additions & 0 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from unittest.mock import patch, MagicMock
from test_dense import mock_load_r_files
import os
import glob

from pymongo.errors import AutoReconnect, BulkWriteError
from test_expression_files import mock_expression_load
Expand Down Expand Up @@ -91,6 +92,13 @@ def execute_ingest(args, mock_storage_client, mock_storage_blob):
parsed_args = create_parser().parse_args(args)
validate_arguments(parsed_args)
arguments = vars(parsed_args)
if "differential_expression" in arguments:
# DE may use metadata or cluster file for annots BUT
# IngestPipeline initialization assumes a "cell_metadata_file"
arguments["cell_metadata_file"] = arguments["annotation_file"]
# IngestPipeline initialization expects "name" and not "cluster_name"
arguments["name"] = arguments["cluster_name"]


ingest = IngestPipeline(**arguments)

Expand Down Expand Up @@ -797,6 +805,180 @@ def test_extract_processed_matrix_from_anndata(self):
except:
print(f"Error while deleting file : {file}")

def test_execute_de_dense(self):
args = [
"--study-id",
"5d276a50421aa9117c982845",
"--study-file-id",
"5dd5ae25421aa910a723a337",
"differential_expression",
"--annotation-file",
"../tests/data/differential_expression/de_dense_metadata.tsv",
"--annotation-name",
"cell_type__ontology_label",
"--annotation-type",
"group",
"--annotation-scope",
"study",
"--de-type",
"rest",
"--cluster-file",
"../tests/data/differential_expression/de_dense_cluster.tsv",
"--cluster-name",
"dense_de_integration",
"--matrix-file-path",
"../tests/data/differential_expression/de_dense_matrix.tsv",
"--matrix-file-type",
"dense",
"--study-accession",
"SCP123",
"--differential-expression"
]

ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)

self.assertEqual(len(status), 1)
self.assertEqual(status[0], 0)

expected_file = "dense_de_integration--cell_type__ontology_label--cholinergic_neuron--study--wilcoxon.tsv"
expected_output_match = (
"dense_de_integration--cell_type__ontology_label--*--study--wilcoxon.tsv"
)

files = glob.glob(expected_output_match)

self.assertIn(
expected_file, files, "Expected filename not in found files list"
)

# clean up DE outputs
output_wildcard_match = f"../tests/dense_de_integration--cell_type__ontology_label*.tsv"
files = glob.glob(output_wildcard_match)

for file in files:
try:
os.remove(file)
except:
print(f"Error while deleting file : {file}")

def test_execute_de_sparse(self):
args = [
"--study-id",
"5d276a50421aa9117c982845",
"--study-file-id",
"5dd5ae25421aa910a723a337",
"differential_expression",
"--annotation-file",
"../tests/data/differential_expression/sparse/sparsemini_metadata.txt",
"--annotation-name",
"cell_type__ontology_label",
"--annotation-type",
"group",
"--annotation-scope",
"study",
"--de-type",
"rest",
"--cluster-file",
"../tests/data/differential_expression/sparse/sparsemini_cluster.txt",
"--cluster-name",
"sparse_de_integration",
"--matrix-file-path",
"../tests/data/differential_expression/sparse/sparsemini_matrix.mtx",
"--gene-file",
"../tests/data/differential_expression/sparse/sparsemini_dup_gene_name.tsv",
"--barcode-file",
"../tests/data/differential_expression/sparse/sparsemini_barcodes.tsv",
"--matrix-file-type",
"mtx",
"--study-accession",
"SCP123",
"--differential-expression"
]

ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)

self.assertEqual(len(status), 1)
self.assertEqual(status[0], 0)

expected_file = "sparse_de_integration--cell_type__ontology_label--fibroblast--study--wilcoxon.tsv"
expected_output_match = (
"sparse_de_integration--cell_type__ontology_label--*--study--wilcoxon.tsv"
)

files = glob.glob(expected_output_match)

self.assertIn(
expected_file, files, "Expected filename not in found files list"
)

# clean up DE outputs
output_wildcard_match = f"../tests/sparse_de_integration--cell_type__ontology_label*.tsv"
files = glob.glob(output_wildcard_match)

for file in files:
try:
os.remove(file)
except:
print(f"Error while deleting file : {file}")

def test_execute_de_anndata(self):
args = [
"--study-id",
"5d276a50421aa9117c982845",
"--study-file-id",
"5dd5ae25421aa910a723a337",
"differential_expression",
"--annotation-file",
"../tests/data/anndata/compliant_liver_h5ad_frag.metadata.tsv.gz",
"--annotation-name",
"cell_type__ontology_label",
"--annotation-type",
"group",
"--annotation-scope",
"study",
"--de-type",
"rest",
"--cluster-file",
"../tests/data/anndata/compliant_liver_h5ad_frag.cluster.X_umap.tsv.gz",
"--cluster-name",
"umap",
"--matrix-file-path",
"../tests/data/anndata/compliant_liver.h5ad",
"--matrix-file-type",
"h5ad",
"--raw-location",
".raw",
"--study-accession",
"SCP123",
"--differential-expression"
]

ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)

self.assertEqual(len(status), 1)
self.assertEqual(status[0], 0)

expected_file = "umap--cell_type__ontology_label--plasma_cell--study--wilcoxon.tsv"
expected_output_match = (
"umap--cell_type__ontology_label--*--study--wilcoxon.tsv"
)

files = glob.glob(expected_output_match)

self.assertIn(
expected_file, files, "Expected filename not in found files list"
)

# clean up DE outputs
output_wildcard_match = f"../tests/umap--cell_type__ontology_label*.tsv"
files = glob.glob(output_wildcard_match)

for file in files:
try:
os.remove(file)
except:
print(f"Error while deleting file : {file}")

def test_get_action_from_args(self):
args = [
"--study-id",
Expand Down