Skip to content

feat: sample type validation excluded projects #1138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion v03_pipeline/lib/misc/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from v03_pipeline.lib.model import (
DatasetType,
Env,
ReferenceGenome,
SampleType,
)
Expand Down Expand Up @@ -151,12 +152,18 @@ def _validate_field(

def validate_sample_type(
mt: hl.MatrixTable,
coding_and_noncoding_variants_ht: hl.Table,
reference_genome: ReferenceGenome,
sample_type: SampleType,
project_guids: list[str],
coding_and_noncoding_variants_ht: hl.Table,
sample_type_match_threshold: float = SAMPLE_TYPE_MATCH_THRESHOLD,
**_: Any,
) -> None:
if all(
project_guid in Env.SAMPLE_TYPE_VALIDATION_EXCLUDED_PROJECTS
for project_guid in project_guids
):
return
coding_variants_ht = coding_and_noncoding_variants_ht.filter(
coding_and_noncoding_variants_ht.coding,
)
Expand Down
29 changes: 24 additions & 5 deletions v03_pipeline/lib/misc/validation_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from unittest.mock import patch

import hail as hl

Expand Down Expand Up @@ -325,19 +326,21 @@ def test_validate_sample_type(self) -> None:
self.assertIsNone(
validate_sample_type(
mt,
coding_and_noncoding_variants_ht,
ReferenceGenome.GRCh38,
SampleType.WGS,
['project_a'],
coding_and_noncoding_variants_ht,
),
)
self.assertRaisesRegex(
SeqrValidationError,
'specified as WES but appears to be WGS',
validate_sample_type,
mt,
coding_and_noncoding_variants_ht,
ReferenceGenome.GRCh38,
SampleType.WES,
['project_a'],
coding_and_noncoding_variants_ht,
)

# has coding, but not noncoding now.
Expand Down Expand Up @@ -381,19 +384,21 @@ def test_validate_sample_type(self) -> None:
self.assertIsNone(
validate_sample_type(
mt,
coding_and_noncoding_variants_ht,
ReferenceGenome.GRCh38,
SampleType.WES,
['project_a'],
coding_and_noncoding_variants_ht,
),
)
self.assertRaisesRegex(
SeqrValidationError,
'specified as WGS but appears to be WES',
validate_sample_type,
mt,
coding_and_noncoding_variants_ht,
ReferenceGenome.GRCh38,
SampleType.WGS,
['project_a'],
coding_and_noncoding_variants_ht,
)

# has noncoding, but not coding now.
Expand Down Expand Up @@ -439,7 +444,21 @@ def test_validate_sample_type(self) -> None:
'contains noncoding variants but is missing common coding variants',
validate_sample_type,
mt,
coding_and_noncoding_variants_ht,
ReferenceGenome.GRCh38,
SampleType.WGS,
['project_a'],
coding_and_noncoding_variants_ht,
)

# Validation safe if all projects are to be skipped
with patch('v03_pipeline.lib.misc.validation.Env') as mock_env:
mock_env.SAMPLE_TYPE_VALIDATION_EXCLUDED_PROJECTS = ['project_a']
self.assertIsNone(
validate_sample_type(
mt,
ReferenceGenome.GRCh38,
SampleType.WGS,
['project_a'],
coding_and_noncoding_variants_ht,
),
)
8 changes: 8 additions & 0 deletions v03_pipeline/lib/model/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
GCLOUD_ZONE = os.environ.get('GCLOUD_ZONE')
GCLOUD_REGION = os.environ.get('GCLOUD_REGION')
PIPELINE_RUNNER_APP_VERSION = os.environ.get('PIPELINE_RUNNER_APP_VERSION', 'latest')
SAMPLE_TYPE_VALIDATION_EXCLUDED_PROJECTS = tuple(
x
for x in os.environ.get('SAMPLE_TYPE_VALIDATION_EXCLUDED_PROJECTS', '').split(',')
if x
)
SEQR_APP_HAIL_SEARCH_DATA_DIR = os.environ.get('SEQR_APP_HAIL_SEARCH_DATA_DIR')
SEQR_APP_REFERENCE_DATASETS_DIR = os.environ.get('SEQR_APP_REFERENCE_DATASETS_DIR')

Expand All @@ -79,6 +84,9 @@ class Env:
PIPELINE_RUNNER_APP_VERSION: str = PIPELINE_RUNNER_APP_VERSION
PRIVATE_REFERENCE_DATASETS_DIR: str = PRIVATE_REFERENCE_DATASETS_DIR
REFERENCE_DATASETS_DIR: str = REFERENCE_DATASETS_DIR
SAMPLE_TYPE_VALIDATION_EXCLUDED_PROJECTS: tuple[str] = (
SAMPLE_TYPE_VALIDATION_EXCLUDED_PROJECTS
)
SEQR_APP_HAIL_SEARCH_DATA_DIR: str | None = SEQR_APP_HAIL_SEARCH_DATA_DIR
SEQR_APP_REFERENCE_DATASETS_DIR: str | None = SEQR_APP_REFERENCE_DATASETS_DIR
VEP_REFERENCE_DATASETS_DIR: str = VEP_REFERENCE_DATASETS_DIR