Skip to content
Merged

Dev #872

1 change: 1 addition & 0 deletions v03_pipeline/lib/model/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Sex(Enum):
class PipelineVersion(Enum):
V02 = 'v02'
V03 = 'v03'
V3_1 = 'v3.1'


class ReferenceGenome(Enum):
Expand Down
30 changes: 17 additions & 13 deletions v03_pipeline/lib/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
)


def _v03_pipeline_prefix(
def _pipeline_prefix(
root: str,
reference_genome: ReferenceGenome,
dataset_type: DatasetType,
) -> str:
return os.path.join(
root,
PipelineVersion.V03.value,
PipelineVersion.V3_1.value,
reference_genome.value,
dataset_type.value,
)
Expand Down Expand Up @@ -62,15 +62,17 @@ def cached_reference_dataset_query_path(
def family_table_path(
reference_genome: ReferenceGenome,
dataset_type: DatasetType,
sample_type: SampleType,
family_guid: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.HAIL_SEARCH_DATA,
reference_genome,
dataset_type,
),
'families',
sample_type.value,
f'{family_guid}.ht',
)

Expand All @@ -81,7 +83,7 @@ def imputed_sex_path(
callset_path: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.LOADING_DATASETS,
reference_genome,
dataset_type,
Expand All @@ -97,7 +99,7 @@ def imported_callset_path(
callset_path: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.LOADING_DATASETS,
reference_genome,
dataset_type,
Expand Down Expand Up @@ -125,15 +127,17 @@ def metadata_for_run_path(
def project_table_path(
reference_genome: ReferenceGenome,
dataset_type: DatasetType,
sample_type: SampleType,
project_guid: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.HAIL_SEARCH_DATA,
reference_genome,
dataset_type,
),
'projects',
sample_type.value,
f'{project_guid}.ht',
)

Expand All @@ -144,7 +148,7 @@ def relatedness_check_table_path(
callset_path: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.LOADING_DATASETS,
reference_genome,
dataset_type,
Expand All @@ -161,7 +165,7 @@ def remapped_and_subsetted_callset_path(
project_guid: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.LOADING_DATASETS,
reference_genome,
dataset_type,
Expand All @@ -177,7 +181,7 @@ def lookup_table_path(
dataset_type: DatasetType,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.HAIL_SEARCH_DATA,
reference_genome,
dataset_type,
Expand All @@ -191,7 +195,7 @@ def runs_path(
dataset_type: DatasetType,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.HAIL_SEARCH_DATA,
reference_genome,
dataset_type,
Expand All @@ -206,7 +210,7 @@ def sex_check_table_path(
callset_path: str,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.LOADING_DATASETS,
reference_genome,
dataset_type,
Expand Down Expand Up @@ -260,7 +264,7 @@ def variant_annotations_table_path(
dataset_type: DatasetType,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.HAIL_SEARCH_DATA,
reference_genome,
dataset_type,
Expand All @@ -274,7 +278,7 @@ def variant_annotations_vcf_path(
dataset_type: DatasetType,
) -> str:
return os.path.join(
_v03_pipeline_prefix(
_pipeline_prefix(
Env.HAIL_SEARCH_DATA,
reference_genome,
dataset_type,
Expand Down
29 changes: 16 additions & 13 deletions v03_pipeline/lib/paths_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,21 @@ def test_family_table_path(self) -> None:
family_table_path(
ReferenceGenome.GRCh37,
DatasetType.SNV_INDEL,
SampleType.WES,
'franklin',
),
'/hail-search-data/v03/GRCh37/SNV_INDEL/families/franklin.ht',
'/hail-search-data/v3.1/GRCh37/SNV_INDEL/families/WES/franklin.ht',
)
with patch('v03_pipeline.lib.paths.Env') as mock_env:
mock_env.HAIL_SEARCH_DATA = 'gs://seqr-datasets/'
self.assertEqual(
family_table_path(
ReferenceGenome.GRCh37,
DatasetType.SNV_INDEL,
SampleType.WES,
'franklin',
),
'gs://seqr-datasets/v03/GRCh37/SNV_INDEL/families/franklin.ht',
'gs://seqr-datasets/v3.1/GRCh37/SNV_INDEL/families/WES/franklin.ht',
)

def test_valid_filters_path(self) -> None:
Expand Down Expand Up @@ -82,9 +84,10 @@ def test_project_table_path(self) -> None:
project_table_path(
ReferenceGenome.GRCh38,
DatasetType.MITO,
SampleType.WES,
'R0652_pipeline_test',
),
'/hail-search-data/v03/GRCh38/MITO/projects/R0652_pipeline_test.ht',
'/hail-search-data/v3.1/GRCh38/MITO/projects/WES/R0652_pipeline_test.ht',
)

def test_valid_reference_dataset_collection_path(self) -> None:
Expand Down Expand Up @@ -113,7 +116,7 @@ def test_lookup_table_path(self) -> None:
ReferenceGenome.GRCh37,
DatasetType.SV,
),
'/hail-search-data/v03/GRCh37/SV/lookup.ht',
'/hail-search-data/v3.1/GRCh37/SV/lookup.ht',
)

def test_sex_check_table_path(self) -> None:
Expand All @@ -123,7 +126,7 @@ def test_sex_check_table_path(self) -> None:
DatasetType.SNV_INDEL,
'gs://abc.efg/callset.vcf.gz',
),
'/seqr-loading-temp/v03/GRCh38/SNV_INDEL/sex_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
'/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/sex_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
)

def test_relatedness_check_table_path(self) -> None:
Expand All @@ -133,7 +136,7 @@ def test_relatedness_check_table_path(self) -> None:
DatasetType.SNV_INDEL,
'gs://abc.efg/callset.vcf.gz',
),
'/seqr-loading-temp/v03/GRCh38/SNV_INDEL/relatedness_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
'/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/relatedness_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
)

def test_metadata_for_run_path(self) -> None:
Expand All @@ -143,7 +146,7 @@ def test_metadata_for_run_path(self) -> None:
DatasetType.SNV_INDEL,
'manual__2023-06-26T18:30:09.349671+00:00',
),
'/hail-search-data/v03/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/metadata.json',
'/hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/metadata.json',
)

def test_variant_annotations_table_path(self) -> None:
Expand All @@ -152,7 +155,7 @@ def test_variant_annotations_table_path(self) -> None:
ReferenceGenome.GRCh38,
DatasetType.GCNV,
),
'/hail-search-data/v03/GRCh38/GCNV/annotations.ht',
'/hail-search-data/v3.1/GRCh38/GCNV/annotations.ht',
)

def test_remapped_and_subsetted_callset_path(self) -> None:
Expand All @@ -163,7 +166,7 @@ def test_remapped_and_subsetted_callset_path(self) -> None:
'gs://abc.efg/callset.vcf.gz',
'R0111_tgg_bblanken_wes',
),
'/seqr-loading-temp/v03/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
'/seqr-loading-temp/v3.1/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
)
self.assertEqual(
remapped_and_subsetted_callset_path(
Expand All @@ -172,7 +175,7 @@ def test_remapped_and_subsetted_callset_path(self) -> None:
'gs://abc.efg/callset/*.vcf.gz',
'R0111_tgg_bblanken_wes',
),
'/seqr-loading-temp/v03/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/bce53ccdb49a5ed2513044e1d0c6224e3ffcc323f770dc807d9175fd3c70a050.mt',
'/seqr-loading-temp/v3.1/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/bce53ccdb49a5ed2513044e1d0c6224e3ffcc323f770dc807d9175fd3c70a050.mt',
)

def test_imported_callset_path(self) -> None:
Expand All @@ -182,7 +185,7 @@ def test_imported_callset_path(self) -> None:
DatasetType.SNV_INDEL,
'gs://abc.efg/callset.vcf.gz',
),
'/seqr-loading-temp/v03/GRCh38/SNV_INDEL/imported_callsets/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
'/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/imported_callsets/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
)

def test_imputed_sex_path(self) -> None:
Expand All @@ -192,7 +195,7 @@ def test_imputed_sex_path(self) -> None:
DatasetType.SNV_INDEL,
'gs://abc.efg/callset.vcf.gz',
),
'/seqr-loading-temp/v03/GRCh38/SNV_INDEL/imputed_sex/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.tsv',
'/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/imputed_sex/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.tsv',
)

def test_new_variants_table_path(self) -> None:
Expand All @@ -202,5 +205,5 @@ def test_new_variants_table_path(self) -> None:
DatasetType.SNV_INDEL,
'manual__2023-06-26T18:30:09.349671+00:00',
),
'/hail-search-data/v03/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/new_variants.ht',
'/hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/new_variants.ht',
)
3 changes: 3 additions & 0 deletions v03_pipeline/lib/tasks/base/base_update_project_table.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import hail as hl
import luigi

from v03_pipeline.lib.model import SampleType
from v03_pipeline.lib.paths import project_table_path
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
from v03_pipeline.lib.tasks.files import GCSorLocalTarget


class BaseUpdateProjectTableTask(BaseUpdateTask):
sample_type = luigi.EnumParameter(enum=SampleType)
project_guid = luigi.Parameter()

def output(self) -> luigi.Target:
return GCSorLocalTarget(
project_table_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
self.project_guid,
),
)
Expand Down
3 changes: 3 additions & 0 deletions v03_pipeline/lib/tasks/delete_family_table.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import luigi

from v03_pipeline.lib.model import SampleType
from v03_pipeline.lib.paths import family_table_path
from v03_pipeline.lib.tasks.base.base_delete_table import BaseDeleteTableTask
from v03_pipeline.lib.tasks.files import GCSorLocalTarget


class DeleteFamilyTableTask(BaseDeleteTableTask):
sample_type = luigi.EnumParameter(enum=SampleType)
family_guid = luigi.Parameter()

def output(self) -> luigi.Target:
return GCSorLocalTarget(
family_table_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
self.family_guid,
),
)
5 changes: 4 additions & 1 deletion v03_pipeline/lib/tasks/delete_family_table_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hail as hl
import luigi.worker

from v03_pipeline.lib.model import DatasetType, ReferenceGenome
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.paths import family_table_path
from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
Expand Down Expand Up @@ -41,6 +41,7 @@ def setUp(self) -> None:
family_table_path(
ReferenceGenome.GRCh38,
DatasetType.SNV_INDEL,
SampleType.WES,
'abc_1',
),
)
Expand All @@ -50,6 +51,7 @@ def test_delete_family_table_task(self) -> None:
task = DeleteFamilyTableTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WES,
family_guid='abc_1',
)
worker.add(task)
Expand All @@ -60,6 +62,7 @@ def test_delete_family_table_task(self) -> None:
family_table_path(
ReferenceGenome.GRCh38,
DatasetType.SNV_INDEL,
SampleType.WES,
'abc_1',
),
).exists(),
Expand Down
19 changes: 11 additions & 8 deletions v03_pipeline/lib/tasks/delete_family_tables.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import luigi

from v03_pipeline.lib.model import SampleType
from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask
from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask

Expand All @@ -18,12 +19,14 @@ def complete(self) -> bool:
)

def run(self):
for family_guid in self.family_guids:
self.dynamic_delete_family_table_tasks.add(
DeleteFamilyTableTask(
reference_genome=self.reference_genome,
dataset_type=self.dataset_type,
family_guid=family_guid,
),
)
for sample_type in SampleType:
for family_guid in self.family_guids:
self.dynamic_delete_family_table_tasks.add(
DeleteFamilyTableTask(
reference_genome=self.reference_genome,
dataset_type=self.dataset_type,
sample_type=sample_type,
family_guid=family_guid,
),
)
yield self.dynamic_delete_family_table_tasks
Loading
Loading