diff --git a/metadome/__init__.py b/metadome/__init__.py index 31b547e..d961ff2 100644 --- a/metadome/__init__.py +++ b/metadome/__init__.py @@ -1,7 +1,7 @@ import logging from flask_debugtoolbar import DebugToolbarExtension -_VERSION = '1.0.0 - alpha' +_VERSION = '1.0.1' # for using the Flask debug toolbar throughout the application toolbar = DebugToolbarExtension() diff --git a/metadome/default_settings.py b/metadome/default_settings.py index a2cd7c9..f4118e6 100755 --- a/metadome/default_settings.py +++ b/metadome/default_settings.py @@ -48,6 +48,7 @@ GENCODE_HG_ANNOTATION_FILE_GFF3 = DATA_DIR+"Gencode/gencode.v19.annotation.gff3" GENCODE_HG_TRANSCRIPTION_FILE = DATA_DIR+"Gencode/gencode.v19.pc_transcripts.fa" GENCODE_HG_TRANSLATION_FILE = DATA_DIR+"Gencode/gencode.v19.pc_translations.fa" +GENCODE_REFSEQ_FILE = DATA_DIR+"Gencode/gencode.v19.metadata.RefSeq" GENCODE_SWISSPROT_FILE = DATA_DIR+"Gencode/gencode.v19.metadata.SwissProt" GENCODE_BASIC_FILE = DATA_DIR+"Gencode/ucsc.gencode.v19.wgEncodeGencodeBasic.txt" diff --git a/metadome/domain/repositories.py b/metadome/domain/repositories.py index dee95de..a7ef320 100644 --- a/metadome/domain/repositories.py +++ b/metadome/domain/repositories.py @@ -27,6 +27,27 @@ class MalformedAARegionException(Exception): class GeneRepository: + @staticmethod + def retrieve_gene_names_for_multiple_transcript_ids(_transcript_ids): + """Retrieves all gene names for a given set of gencode transcripts + based on multiple Gene objects as {gencode_transcription_id: gene_name}""" + # Open as session + _session = db.create_scoped_session() + + try: + _gene_name_per_gencode_transcription_id = {} + for gene in _session.query(Gene).filter(Gene.gencode_transcription_id.in_(_transcript_ids)).all(): + _gene_name_per_gencode_transcription_id[gene.gencode_transcription_id] = gene.gene_name + return _gene_name_per_gencode_transcription_id + except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e: + raise RecoverableError(str(e)) + except: + _log.error(traceback.format_exc()) + raise + finally: + # Close this session, thus all items are cleared and memory usage is kept at a minimum + _session.remove() + @staticmethod def retrieve_transcript_id_for_multiple_gene_ids(_gene_ids): """Retrieves all gencode transcripts for multiple Gene objects as {gene_id: gencode_transcription_id}""" diff --git a/metadome/domain/wrappers/gencode.py b/metadome/domain/wrappers/gencode.py index 6e7bda4..23afacc 100644 --- a/metadome/domain/wrappers/gencode.py +++ b/metadome/domain/wrappers/gencode.py @@ -1,7 +1,7 @@ import logging from metadome.default_settings import GENCODE_HG_TRANSLATION_FILE,\ GENCODE_SWISSPROT_FILE, GENCODE_HG_TRANSCRIPTION_FILE,\ - GENCODE_HG_ANNOTATION_FILE_GFF3, GENCODE_BASIC_FILE + GENCODE_HG_ANNOTATION_FILE_GFF3, GENCODE_BASIC_FILE, GENCODE_REFSEQ_FILE from metadome.domain.parsers import gff3 from Bio.Seq import translate import urllib @@ -383,4 +383,36 @@ def retrieve_all_protein_coding_gene_names(): # add the gene name to the set gene_names.add(tokens[5]) - return list(gene_names) \ No newline at end of file + return list(gene_names) + +def retrieve_refseq_identifiers_for_transcript(gencode_id): + """Retrieves the refseq identifiers for a Gencode transcript""" + result = {} + result['NP'] = [] + result['NM'] = [] + result['NR'] = [] + with open(GENCODE_REFSEQ_FILE) as gencode_refseq: + # read the lines in the file + lines = gencode_refseq.readlines() + for line in lines: + # check if the unique identifier is on the current line + if gencode_id in line: + #Add the result to hits + tokens = line.split('\t') + + # Only add the translation to the translation list if the gene_name exactly matches the one we are looking for + if gencode_id == tokens[0]: + # add the results + for token in tokens[1:]: + token = token.strip() + if token.startswith('NP'): + result['NP'].append(token) + elif token.startswith('NM'): + result['NM'].append(token) + elif token.startswith('NR'): + result['NR'].append(token) + elif len(token) == 0: + continue + else: + _log.warning("When retrieving matching RefSeq ids for "+gencode_id+" unexpected token: "+token) + return result \ No newline at end of file diff --git a/metadome/presentation/api/routes.py b/metadome/presentation/api/routes.py index 0d4aac7..d2b7ee3 100755 --- a/metadome/presentation/api/routes.py +++ b/metadome/presentation/api/routes.py @@ -14,6 +14,7 @@ from metadome.controllers.job import (create_visualization_job_if_needed, get_visualization_status, retrieve_visualization) +from metadome.domain.wrappers.gencode import retrieve_refseq_identifiers_for_transcript _log = logging.getLogger(__name__) @@ -30,7 +31,7 @@ def get_transcript_ids_for_gene(gene_name): _log.debug('get_transcript_ids_for_gene') # retrieve the transcript ids for this gene trancripts = GeneRepository.retrieve_all_transcript_ids(gene_name) - + # check if there was any return value if len(trancripts) > 0: message = "Retrieved transcripts for gene '"+trancripts[0].gene_name+"'" @@ -39,9 +40,14 @@ def get_transcript_ids_for_gene(gene_name): transcript_results = [] for t in trancripts: + # retrieve matching refseq identifiers for this transcript + refseq_ids = retrieve_refseq_identifiers_for_transcript(t.gencode_transcription_id) + refseq_nm_numbers = ", ".join(nm_number for nm_number in refseq_ids['NM']) + transcript_entry = {} transcript_entry['aa_length'] = t.sequence_length transcript_entry['gencode_id'] = t.gencode_transcription_id + transcript_entry['refseq_nm_numbers'] = refseq_nm_numbers transcript_entry['has_protein_data'] = not t.protein_id is None transcript_results.append(transcript_entry) diff --git a/metadome/presentation/web/routes.py b/metadome/presentation/web/routes.py index 58937eb..70b25ef 100644 --- a/metadome/presentation/web/routes.py +++ b/metadome/presentation/web/routes.py @@ -61,9 +61,9 @@ def about(): def method(): return render_template('method.html') -@bp.route('/help', methods=['GET']) +@bp.route('/faq', methods=['GET']) def help_page(): - return render_template('help.html') + return render_template('faq.html') @bp.route('/visualization_error//', methods=['GET']) def visualization_error(transcript_id): diff --git a/metadome/presentation/web/static/js/dashboard/visualization.js b/metadome/presentation/web/static/js/dashboard/visualization.js index 58ddc3b..0c13d68 100644 --- a/metadome/presentation/web/static/js/dashboard/visualization.js +++ b/metadome/presentation/web/static/js/dashboard/visualization.js @@ -83,6 +83,9 @@ var metadomain_graph_visible = true; // indicates if clinvar variants are annotated in the schematic protein representation var clinvar_variants_visible = false; +//indicates if homologous clinvar variants are annotated in the schematic protein representation +var homologous_clinvar_variants_visible = false; + // indicates the various colors to indicate the tolerance var toleranceColorGradient = [ { offset : "0%", @@ -160,7 +163,8 @@ var positionTip = d3.tip() var positionTip_str = ""; positionTip_str += "Position: p." + d.values[0].protein_pos + " " + d.values[0].cdna_pos + "
"; positionTip_str += "Codon: " + d.values[0].ref_codon + "
"; - positionTip_str += "Residue: " + d.values[0].ref_aa_triplet; + positionTip_str += "Residue: " + d.values[0].ref_aa_triplet + "
"; + positionTip_str += "Tolerance score (dn/ds): "+ (Math.round((d.values[0].sw_dn_ds)*100)/100) +' ('+tolerance_rating(d.values[0].sw_dn_ds) +')'; if (d.values[0].domains.length > 0){ positionTip_str += "
In domain(s): "; var n_domains_at_position = d.values[0].domains.length; @@ -257,10 +261,11 @@ function createGraph(obj) { } // Draw all individual user interface elements based on the data - annotateDomains(domain_data, positional_annotation); + annotateDomains(domain_data, positional_annotation, domain_metadomain_coverage); createToleranceGraph(dataGroup); createToleranceGraphLegend(); - drawMetaDomainLandscape(domain_data, dataGroup); + drawMetaDomainLandscape(domain_data, dataGroup, domain_metadomain_coverage, obj.transcript_id); + createMetaDomainLegend(); // Add schematic protein overview as a custom Axis createSchematicProtein(domain_metadomain_coverage, dataGroup, obj.transcript_id); @@ -272,7 +277,7 @@ function createGraph(obj) { toggleToleranceLandscapeOrMetadomainLandscape(); } -function drawMetaDomainLandscape(domain_data, data){ +function drawMetaDomainLandscape(domain_data, data, domain_metadomain_coverage, transcript_id){ // get all possible domain ids for (var i = 0; i < domain_data.length; i++){ if (domain_data[i].metadomain){ @@ -351,6 +356,10 @@ function drawMetaDomainLandscape(domain_data, data){ }) .style("clip-path", "url(#clip)") .style("fill", "green") + .on("click", function(d) { + // Call this method found in dashboard.js + createPositionalInformation(domain_metadomain_coverage, transcript_id, d) + }) .on("mouseover", function(d) { if (metadomain_graph_visible){ var normal_missense_variant_count = 0; @@ -418,6 +427,10 @@ function drawMetaDomainLandscape(domain_data, data){ }) .style("clip-path", "url(#clip)") .style("fill", "red") + .on("click", function(d) { + // Call this method found in dashboard.js + createPositionalInformation(domain_metadomain_coverage, transcript_id, d) + }) .on("mouseover", function(d) { if (metadomain_graph_visible){ var pathogenic_missense_variant_count = 0; @@ -679,7 +692,7 @@ function createSchematicProtein(domain_metadomain_coverage, groupedTolerance, tr } // Draw the domain annotation -function annotateDomains(protDomain, tolerance_data) { +function annotateDomains(protDomain, tolerance_data, domain_metadomain_coverage) { // append domain view var domains = main_svg.append("g") .attr("class", "domains") @@ -758,7 +771,7 @@ function annotateDomains(protDomain, tolerance_data) { // if there is any, add meta-domain details if (d.metadomain){ - document.getElementById("domain_information_overlay_content").innerHTML += 'This domain has '+d.meta_domain_alignment_depth+' homologous occurrences throughout the human genome.'; + document.getElementById("domain_information_overlay_content").innerHTML += 'This domain has '+domain_metadomain_coverage[d.ID]+' homologous occurrences throughout the human genome.'; } document.getElementById("domain_information_overlay_content").innerHTML += ''; @@ -879,11 +892,6 @@ function createToleranceGraphLegend() { .attr("transform", "translate(" + main_marginLegend.left + "," + main_marginLegend.top + ")") .style("fill", "url(#legendGradient)"); - var context = main_svg.append("g") - .attr("class", "context") - .attr("id", "zoom_landscape") - .attr("transform", "translate(" + main_marginContext.left + "," + main_marginContext.top + ")"); - // append legend text main_svg.append("text") .attr("text-anchor", "middle") @@ -921,6 +929,108 @@ function createToleranceGraphLegend() { .style('user-select', 'none'); } +//Draw the legend for the MetaDomain landscape +function createMetaDomainLegend(){ + // append colors + main_svg.append("rect") + .attr("width", 70) + .attr("height", 20) + .attr("x", 0) + .attr("y", 20) + .attr("dy", 35) + .attr("class", "label legendMetaDomainRect") + .style("fill", "green"); + + // append colors + main_svg.append("rect") + .attr("width", 70) + .attr("height", 20) + .attr("x", 0) + .attr("y", 105) + .attr("dy", 35) + .attr("class", "label legendMetaDomainRect") + .style("fill", "red"); + + // append colors + main_svg.append("rect") + .attr("width", 70) + .attr("height", 20) + .attr("x", 0) + .attr("y", 190) + .attr("dy", 35) + .attr("class", "label legendMetaDomainRect") + .style("fill", "black"); + + // append legend text + main_svg.append("text") + .attr("x", 0) + .attr("y", 20) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("gnomAD") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); + main_svg.append("text") + .attr("x", 0) + .attr("y", 35) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("missense in") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); + main_svg.append("text") + .attr("x", 0) + .attr("y", 50) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("homologues") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); + + // append legend text + main_svg.append("text") + .attr("x", 0) + .attr("y", 105) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("ClinVar") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); + main_svg.append("text") + .attr("x", 0) + .attr("y", 120) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("missense in") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); + main_svg.append("text") + .attr("x", 0) + .attr("y", 135) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("homologues") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); + + // append legend text + main_svg.append("text") + .attr("x", 0) + .attr("y", 190) + .attr("dy", 35) + .attr("class", "label legendMetaDomainText") + .text("no alignment") + .style("font-size", "12px") + .style('pointer-events', 'none') + .style('user-select', 'none'); +} + /******************************************************************************* * Interactive behaviour functions ******************************************************************************/ @@ -929,25 +1039,31 @@ function createToleranceGraphLegend() { function toggleToleranceLandscapeOrMetadomainLandscape(){ // get the tolerance graph var tolerance_graph = d3.select("#tolerance_graph"); - var legend = d3.select("#legendGradientRect"); - var legend_text = d3.selectAll(".legendGradientText"); + var tolerance_legend = d3.select("#legendGradientRect"); + var tolerance_legend_text = d3.selectAll(".legendGradientText"); // Get the metadomain graph var metadomain_graph = d3.select("#metadomain_graph"); + var meta_legend = d3.selectAll(".legendMetaDomainRect"); + var meta_legend_text = d3.selectAll(".legendMetaDomainText"); switch($('input[name=landscape_checkbox]:checked', '#checkbox_for_landscape').val()){ case "metadomain_landscape": tolerance_graph.style("opacity", 0); - legend.style("opacity", 0); - legend_text.style("opacity", 0); + tolerance_legend.style("opacity", 0); + tolerance_legend_text.style("opacity", 0); metadomain_graph.style("opacity", 1); + meta_legend_text.style("opacity", 1); + meta_legend.style("opacity", 1); metadomain_graph_visible = true; break; case "tolerance_landscape": tolerance_graph.style("opacity", 1); - legend.style("opacity", 1); - legend_text.style("opacity", 1); + tolerance_legend.style("opacity", 1); + tolerance_legend_text.style("opacity", 1); metadomain_graph.style("opacity", 0); + meta_legend_text.style("opacity", 0); + meta_legend.style("opacity", 0); metadomain_graph_visible = false; break; default: @@ -962,12 +1078,15 @@ function draw_position_schematic_protein(d, element){ if (d.values[0].ClinVar != null) { pathogenic_missense_variant_count += d.values[0].ClinVar.length; } - + } + + var homologous_pathogenic_missense_variant_count = 0; + if (homologous_clinvar_variants_visible){ // count pathogenic variants linked via meta-domain relationships if (d.values[0].domains != null){ meta_domain_ids.forEach(domain_id => { if (d.values[0].hasOwnProperty('domains') && d.values[0].domains[domain_id] != null){ - pathogenic_missense_variant_count = Math.max(d.values[0].domains[domain_id].pathogenic_missense_variant_count, pathogenic_missense_variant_count); + homologous_pathogenic_missense_variant_count = d.values[0].domains[domain_id].pathogenic_missense_variant_count; } }); } @@ -985,6 +1104,13 @@ function draw_position_schematic_protein(d, element){ return 'red'; } + // if containing pathogenic variants, display it as red + if (homologous_pathogenic_missense_variant_count > 0){ + d3.select(element).style("fill-opacity", 0.7); + return 'red'; + } + + else{ d3.select(element).style("fill-opacity", 0.2); return "grey"; @@ -1001,6 +1127,17 @@ function toggleClinvarVariantsInProtein(clinvar_checkbox){ }); } +function toggleHomologousClinvarVariantsInProtein(clinvar_checkbox){ + var focusAxis = d3.select("#tolerance_axis"); + + homologous_clinvar_variants_visible = clinvar_checkbox.checked; + + focusAxis.selectAll(".toleranceAxisTick").style("fill", function(d, i) { + return draw_position_schematic_protein(d, this); + }); +} + + // Rescale the landscape for zooming or brushing purposes function rescaleLandscape(){ var focus = d3.select("#tolerance_graph"); @@ -1133,4 +1270,28 @@ function tolerance_color(score) { } else { return toleranceColorGradient[8].color; } +} + +//the color coding for specific tolerance scores +//color #f29e2e indicates the average dn/ds tolerance score over all genes +function tolerance_rating(score) { + if (score <= 0.175) { + return ''; + } else if (score <= 0.35) { + return '' ; + } else if (score <= 0.525) { + return '' ; + } else if (score <= 0.7) { + return '' ; + } else if (score <= 0.875) { + return '' ; + } else if (score <= 1.025) { + return '' ; + } else if (score <= 1.2) { + return '' ; + } else if (score <= 1.375) { + return '' ; + } else { + return '' ; + } } \ No newline at end of file diff --git a/metadome/presentation/web/templates/about.html b/metadome/presentation/web/templates/about.html index 4f997dd..6e5b953 100644 --- a/metadome/presentation/web/templates/about.html +++ b/metadome/presentation/web/templates/about.html @@ -46,6 +46,14 @@

Mission

found across homologous domain positions. We hope that these new insights can be used directly in a publication and/or for intelligent design of further experiments.

+ +

Tutorial

+

+ To get more familiar with the using MetaDome and all of it's functionality + we strongly advise to + start the tour. If you feel that any questions are not addressed in the + FAQ below, please contact us. +

Method

@@ -102,6 +110,50 @@

Licensing

href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">CC ANS 4.0

+ +

Privacy

+

+ This section outlines the ways in which the MetaDome website handles information about users. + This should not be read as a legal document, but as a description of how we handle information + that could be considered sensitive. +

+
Sensitive data
+

+ MetaDome does not save any data that a user may provide. The visualization + of MetaDome occurs client-side (in your local browser) and is not + accessible to us. This means we can not know which genes, transcripts, + domains, and positions you are interested in. +

+ +
Personalized tracking
+

+ MetaDome makes use of Google analytics (GA) to help us analyze how users + navigate the various site-pages. + GA uses a single-pixel "web bug" image, which is served from every + page, a javascript script that collects information about each request, + and cookies that maintain information about your usage of the site between + visits. You can read more about how GA works on the GA + website, + which includes a + detailed description of how traffic is tracked and analysed. +

+ We use the information generated by GA purely to assess the usefulness + and popularity of different features of the site. We have configured GA + to not provide the ability to track individual users' usage of the site. + However, GA does provides a high-level anonymized overview of the traffic that passes + through the site, including such information as the approximate geographical + location of users, how often and for how long they visited the site, etc. +

+ We will never (and will not allow any third party to) use the + information by the GA to track or to collect any Personally + Identifiable Information of visitors to our site. + Neither will we will link, or seek to link, an IP address + with the identity of a computer user. +

+ If you have any concerns about our use of Google analytics, please + feel free to contact us. +

Acknowledgments

diff --git a/metadome/presentation/web/templates/base.html b/metadome/presentation/web/templates/base.html index b624a54..148ce0a 100644 --- a/metadome/presentation/web/templates/base.html +++ b/metadome/presentation/web/templates/base.html @@ -10,7 +10,7 @@ -{% block title %}{% endblock %} +{% block title %}MetaDome web server{% endblock %} @@ -71,7 +71,7 @@ + +
+ +
+