Skip to content

Commit

Permalink
Merge pull request #48 from cmbi/rebuttal
Browse files Browse the repository at this point in the history
Rebuttal
  • Loading branch information
laurensvdwiel authored Apr 19, 2019
2 parents f7797f0 + 29bc725 commit d3196d2
Show file tree
Hide file tree
Showing 15 changed files with 392 additions and 107 deletions.
2 changes: 1 addition & 1 deletion metadome/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from flask_debugtoolbar import DebugToolbarExtension

_VERSION = '1.0.0 - alpha'
_VERSION = '1.0.1'

# for using the Flask debug toolbar throughout the application
toolbar = DebugToolbarExtension()
Expand Down
1 change: 1 addition & 0 deletions metadome/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
GENCODE_HG_ANNOTATION_FILE_GFF3 = DATA_DIR+"Gencode/gencode.v19.annotation.gff3"
GENCODE_HG_TRANSCRIPTION_FILE = DATA_DIR+"Gencode/gencode.v19.pc_transcripts.fa"
GENCODE_HG_TRANSLATION_FILE = DATA_DIR+"Gencode/gencode.v19.pc_translations.fa"
GENCODE_REFSEQ_FILE = DATA_DIR+"Gencode/gencode.v19.metadata.RefSeq"
GENCODE_SWISSPROT_FILE = DATA_DIR+"Gencode/gencode.v19.metadata.SwissProt"
GENCODE_BASIC_FILE = DATA_DIR+"Gencode/ucsc.gencode.v19.wgEncodeGencodeBasic.txt"

Expand Down
21 changes: 21 additions & 0 deletions metadome/domain/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,27 @@ class MalformedAARegionException(Exception):

class GeneRepository:

@staticmethod
def retrieve_gene_names_for_multiple_transcript_ids(_transcript_ids):
"""Retrieves all gene names for a given set of gencode transcripts
based on multiple Gene objects as {gencode_transcription_id: gene_name}"""
# Open as session
_session = db.create_scoped_session()

try:
_gene_name_per_gencode_transcription_id = {}
for gene in _session.query(Gene).filter(Gene.gencode_transcription_id.in_(_transcript_ids)).all():
_gene_name_per_gencode_transcription_id[gene.gencode_transcription_id] = gene.gene_name
return _gene_name_per_gencode_transcription_id
except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
raise RecoverableError(str(e))
except:
_log.error(traceback.format_exc())
raise
finally:
# Close this session, thus all items are cleared and memory usage is kept at a minimum
_session.remove()

@staticmethod
def retrieve_transcript_id_for_multiple_gene_ids(_gene_ids):
"""Retrieves all gencode transcripts for multiple Gene objects as {gene_id: gencode_transcription_id}"""
Expand Down
36 changes: 34 additions & 2 deletions metadome/domain/wrappers/gencode.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from metadome.default_settings import GENCODE_HG_TRANSLATION_FILE,\
GENCODE_SWISSPROT_FILE, GENCODE_HG_TRANSCRIPTION_FILE,\
GENCODE_HG_ANNOTATION_FILE_GFF3, GENCODE_BASIC_FILE
GENCODE_HG_ANNOTATION_FILE_GFF3, GENCODE_BASIC_FILE, GENCODE_REFSEQ_FILE
from metadome.domain.parsers import gff3
from Bio.Seq import translate
import urllib
Expand Down Expand Up @@ -383,4 +383,36 @@ def retrieve_all_protein_coding_gene_names():
# add the gene name to the set
gene_names.add(tokens[5])

return list(gene_names)
return list(gene_names)

def retrieve_refseq_identifiers_for_transcript(gencode_id):
"""Retrieves the refseq identifiers for a Gencode transcript"""
result = {}
result['NP'] = []
result['NM'] = []
result['NR'] = []
with open(GENCODE_REFSEQ_FILE) as gencode_refseq:
# read the lines in the file
lines = gencode_refseq.readlines()
for line in lines:
# check if the unique identifier is on the current line
if gencode_id in line:
#Add the result to hits
tokens = line.split('\t')

# Only add the translation to the translation list if the gene_name exactly matches the one we are looking for
if gencode_id == tokens[0]:
# add the results
for token in tokens[1:]:
token = token.strip()
if token.startswith('NP'):
result['NP'].append(token)
elif token.startswith('NM'):
result['NM'].append(token)
elif token.startswith('NR'):
result['NR'].append(token)
elif len(token) == 0:
continue
else:
_log.warning("When retrieving matching RefSeq ids for "+gencode_id+" unexpected token: "+token)
return result
8 changes: 7 additions & 1 deletion metadome/presentation/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from metadome.controllers.job import (create_visualization_job_if_needed,
get_visualization_status,
retrieve_visualization)
from metadome.domain.wrappers.gencode import retrieve_refseq_identifiers_for_transcript


_log = logging.getLogger(__name__)
Expand All @@ -30,7 +31,7 @@ def get_transcript_ids_for_gene(gene_name):
_log.debug('get_transcript_ids_for_gene')
# retrieve the transcript ids for this gene
trancripts = GeneRepository.retrieve_all_transcript_ids(gene_name)

# check if there was any return value
if len(trancripts) > 0:
message = "Retrieved transcripts for gene '"+trancripts[0].gene_name+"'"
Expand All @@ -39,9 +40,14 @@ def get_transcript_ids_for_gene(gene_name):

transcript_results = []
for t in trancripts:
# retrieve matching refseq identifiers for this transcript
refseq_ids = retrieve_refseq_identifiers_for_transcript(t.gencode_transcription_id)
refseq_nm_numbers = ", ".join(nm_number for nm_number in refseq_ids['NM'])

transcript_entry = {}
transcript_entry['aa_length'] = t.sequence_length
transcript_entry['gencode_id'] = t.gencode_transcription_id
transcript_entry['refseq_nm_numbers'] = refseq_nm_numbers
transcript_entry['has_protein_data'] = not t.protein_id is None
transcript_results.append(transcript_entry)

Expand Down
4 changes: 2 additions & 2 deletions metadome/presentation/web/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def about():
def method():
return render_template('method.html')

@bp.route('/help', methods=['GET'])
@bp.route('/faq', methods=['GET'])
def help_page():
return render_template('help.html')
return render_template('faq.html')

@bp.route('/visualization_error/<transcript_id>/', methods=['GET'])
def visualization_error(transcript_id):
Expand Down
Loading

0 comments on commit d3196d2

Please sign in to comment.