Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/encoded/schemas/changelogs/file.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## Changelog for file.json

### Schema version 29
* Recent update removed mapped_run_type and mapped_read_length from bam files deriving from fastqs sequenced with Pacific Biosciences or Nanopore platforms

### Minor changes since schema version 28
* Added *element enrichments* to the enum list for *file_format_type*
* Added *curated binding sites*, *curated SNVs*, *dsQTLs*, *eQTLs* and *PWMs* to the enum list for *output_type*.
Expand Down
2 changes: 1 addition & 1 deletion src/encoded/schemas/file.json
Original file line number Diff line number Diff line change
Expand Up @@ -1079,7 +1079,7 @@
"permission": "import_items"
},
"schema_version": {
"default": "28"
"default": "29"
},
"accession": {
"accessionType": "FF"
Expand Down
37 changes: 37 additions & 0 deletions src/encoded/tests/fixtures/schemas/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -1927,6 +1927,43 @@ def file_27(testapp, lab, award, experiment):
return item


@pytest.fixture
def file_28_fastq_nanopore(testapp, lab, award, experiment, base_replicate, platform4):
item = {
'dataset': experiment['@id'],
'file_format': 'fastq',
'md5sum': '15dd66b6f21515393507f4ebfa55e77c',
'replicate': base_replicate['@id'],
'output_type': 'reads',
'file_size': 800,
'platform': platform4['uuid'],
'lab': lab['@id'],
'award': award['@id'],
'status': 'in progress'
}
return testapp.post_json('/file', item).json['@graph'][0]


@pytest.fixture
def file_28_bam_mapped_props(testapp, lab, award, experiment, file_28_fastq_nanopore):
item = {
'dataset': experiment['@id'],
'file_format': 'bam',
'md5sum': 'eeb1325f54a0ec4911c4a3df0ed32f20',
'output_type': 'alignments',
'assembly': 'hg19',
'file_size': 888328,
'derived_from': [file_28_fastq_nanopore['uuid']],
'lab': lab['@id'],
'award': award['@id'],
'mapped_run_type': 'single-ended',
'mapped_read_length': 101,
'status': 'in progress', # avoid s3 upload codepath
'schema_version': '28'
}
return item


@pytest.fixture
def file_nanopore_signal(testapp, experiment, award, lab, replicate_url, platform4):
item = {
Expand Down
7 changes: 7 additions & 0 deletions src/encoded/tests/test_upgrade_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,10 @@ def test_file_upgrade_27_to_28(upgrader, file_27):
value = upgrader.upgrade('file', file_27, current_version='27', target_version='28')
assert value['schema_version'] == '28'
assert value['output_type'] == 'exclusion list regions'


def test_file_upgrade_28_to_29(root, testapp, upgrader, registry, file_28_bam_mapped_props):
value = upgrader.upgrade('file', file_28_bam_mapped_props, registry=registry, current_version='28', target_version='29')
assert value['schema_version'] == '29'
assert 'mapped_run_type' not in value
assert 'mapped_read_length' not in value
28 changes: 27 additions & 1 deletion src/encoded/upgrade/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,6 @@ def file_20_21(value, system):
conn = system['registry'][CONNECTION]
datasetContext = conn.get_by_uuid(value['dataset'])
assay_type = datasetContext.properties.get('assay_term_name', None)

if assay_type == 'DNase-seq' and output_type == 'enrichment':
value['output_type'] = 'FDR cut rate'
return
Expand Down Expand Up @@ -751,3 +750,30 @@ def file_27_28(value, system):
for old_term, new_term in term_pairs:
if output_type == old_term:
value['output_type'] = new_term


@upgrade_step('file', '28', '29')
def file_28_29(value, system):
# https://encodedcc.atlassian.net/browse/ENCD-5950
conn = system['registry'][CONNECTION]
if value.get('file_format', '') == 'bam':
derived_from = value.get('derived_from', None)
checkPlatform = []
if derived_from:
for item in derived_from:
file = conn.get_by_uuid(item)
file_format = file.properties.get('file_format', None)
if file_format == 'fastq':
checkPlatform.append(file.properties.get('platform', None))
if checkPlatform:
for platform in checkPlatform:
if platform in ['ced61406-dcc6-43c4-bddd-4c977cc676e8',
'c7564b38-ab4f-4c42-a401-3de48689a998',
'e2be5728-5744-4da4-8881-cb9526d0389e',
'7cc06b8c-5535-4a77-b719-4c23644e767d',
'8f1a9a8c-3392-4032-92a8-5d196c9d7810',
'6c275b37-018d-4bf8-85f6-6e3b830524a9',
'6ce511d5-eeb3-41fc-bea7-8c38301e88c1'
]:
value.pop('mapped_read_length', 'None')
value.pop('mapped_run_type', 'None')