diff --git a/metadome/__init__.py b/metadome/__init__.py
index 31b547e..d961ff2 100644
--- a/metadome/__init__.py
+++ b/metadome/__init__.py
@@ -1,7 +1,7 @@
import logging
from flask_debugtoolbar import DebugToolbarExtension
-_VERSION = '1.0.0 - alpha'
+_VERSION = '1.0.1'
# for using the Flask debug toolbar throughout the application
toolbar = DebugToolbarExtension()
diff --git a/metadome/default_settings.py b/metadome/default_settings.py
index a2cd7c9..f4118e6 100755
--- a/metadome/default_settings.py
+++ b/metadome/default_settings.py
@@ -48,6 +48,7 @@
GENCODE_HG_ANNOTATION_FILE_GFF3 = DATA_DIR+"Gencode/gencode.v19.annotation.gff3"
GENCODE_HG_TRANSCRIPTION_FILE = DATA_DIR+"Gencode/gencode.v19.pc_transcripts.fa"
GENCODE_HG_TRANSLATION_FILE = DATA_DIR+"Gencode/gencode.v19.pc_translations.fa"
+GENCODE_REFSEQ_FILE = DATA_DIR+"Gencode/gencode.v19.metadata.RefSeq"
GENCODE_SWISSPROT_FILE = DATA_DIR+"Gencode/gencode.v19.metadata.SwissProt"
GENCODE_BASIC_FILE = DATA_DIR+"Gencode/ucsc.gencode.v19.wgEncodeGencodeBasic.txt"
diff --git a/metadome/domain/repositories.py b/metadome/domain/repositories.py
index dee95de..a7ef320 100644
--- a/metadome/domain/repositories.py
+++ b/metadome/domain/repositories.py
@@ -27,6 +27,27 @@ class MalformedAARegionException(Exception):
class GeneRepository:
+ @staticmethod
+ def retrieve_gene_names_for_multiple_transcript_ids(_transcript_ids):
+ """Retrieves all gene names for a given set of gencode transcripts
+ based on multiple Gene objects as {gencode_transcription_id: gene_name}"""
+ # Open as session
+ _session = db.create_scoped_session()
+
+ try:
+ _gene_name_per_gencode_transcription_id = {}
+ for gene in _session.query(Gene).filter(Gene.gencode_transcription_id.in_(_transcript_ids)).all():
+ _gene_name_per_gencode_transcription_id[gene.gencode_transcription_id] = gene.gene_name
+ return _gene_name_per_gencode_transcription_id
+ except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
+ raise RecoverableError(str(e))
+ except:
+ _log.error(traceback.format_exc())
+ raise
+ finally:
+ # Close this session, thus all items are cleared and memory usage is kept at a minimum
+ _session.remove()
+
@staticmethod
def retrieve_transcript_id_for_multiple_gene_ids(_gene_ids):
"""Retrieves all gencode transcripts for multiple Gene objects as {gene_id: gencode_transcription_id}"""
diff --git a/metadome/domain/wrappers/gencode.py b/metadome/domain/wrappers/gencode.py
index 6e7bda4..23afacc 100644
--- a/metadome/domain/wrappers/gencode.py
+++ b/metadome/domain/wrappers/gencode.py
@@ -1,7 +1,7 @@
import logging
from metadome.default_settings import GENCODE_HG_TRANSLATION_FILE,\
GENCODE_SWISSPROT_FILE, GENCODE_HG_TRANSCRIPTION_FILE,\
- GENCODE_HG_ANNOTATION_FILE_GFF3, GENCODE_BASIC_FILE
+ GENCODE_HG_ANNOTATION_FILE_GFF3, GENCODE_BASIC_FILE, GENCODE_REFSEQ_FILE
from metadome.domain.parsers import gff3
from Bio.Seq import translate
import urllib
@@ -383,4 +383,36 @@ def retrieve_all_protein_coding_gene_names():
# add the gene name to the set
gene_names.add(tokens[5])
- return list(gene_names)
\ No newline at end of file
+ return list(gene_names)
+
+def retrieve_refseq_identifiers_for_transcript(gencode_id):
+ """Retrieves the refseq identifiers for a Gencode transcript"""
+ result = {}
+ result['NP'] = []
+ result['NM'] = []
+ result['NR'] = []
+ with open(GENCODE_REFSEQ_FILE) as gencode_refseq:
+ # read the lines in the file
+ lines = gencode_refseq.readlines()
+ for line in lines:
+ # check if the unique identifier is on the current line
+ if gencode_id in line:
+ #Add the result to hits
+ tokens = line.split('\t')
+
+ # Only add the translation to the translation list if the gene_name exactly matches the one we are looking for
+ if gencode_id == tokens[0]:
+ # add the results
+ for token in tokens[1:]:
+ token = token.strip()
+ if token.startswith('NP'):
+ result['NP'].append(token)
+ elif token.startswith('NM'):
+ result['NM'].append(token)
+ elif token.startswith('NR'):
+ result['NR'].append(token)
+ elif len(token) == 0:
+ continue
+ else:
+ _log.warning("When retrieving matching RefSeq ids for "+gencode_id+" unexpected token: "+token)
+ return result
\ No newline at end of file
diff --git a/metadome/presentation/api/routes.py b/metadome/presentation/api/routes.py
index 0d4aac7..d2b7ee3 100755
--- a/metadome/presentation/api/routes.py
+++ b/metadome/presentation/api/routes.py
@@ -14,6 +14,7 @@
from metadome.controllers.job import (create_visualization_job_if_needed,
get_visualization_status,
retrieve_visualization)
+from metadome.domain.wrappers.gencode import retrieve_refseq_identifiers_for_transcript
_log = logging.getLogger(__name__)
@@ -30,7 +31,7 @@ def get_transcript_ids_for_gene(gene_name):
_log.debug('get_transcript_ids_for_gene')
# retrieve the transcript ids for this gene
trancripts = GeneRepository.retrieve_all_transcript_ids(gene_name)
-
+
# check if there was any return value
if len(trancripts) > 0:
message = "Retrieved transcripts for gene '"+trancripts[0].gene_name+"'"
@@ -39,9 +40,14 @@ def get_transcript_ids_for_gene(gene_name):
transcript_results = []
for t in trancripts:
+ # retrieve matching refseq identifiers for this transcript
+ refseq_ids = retrieve_refseq_identifiers_for_transcript(t.gencode_transcription_id)
+ refseq_nm_numbers = ", ".join(nm_number for nm_number in refseq_ids['NM'])
+
transcript_entry = {}
transcript_entry['aa_length'] = t.sequence_length
transcript_entry['gencode_id'] = t.gencode_transcription_id
+ transcript_entry['refseq_nm_numbers'] = refseq_nm_numbers
transcript_entry['has_protein_data'] = not t.protein_id is None
transcript_results.append(transcript_entry)
diff --git a/metadome/presentation/web/routes.py b/metadome/presentation/web/routes.py
index 58937eb..70b25ef 100644
--- a/metadome/presentation/web/routes.py
+++ b/metadome/presentation/web/routes.py
@@ -61,9 +61,9 @@ def about():
def method():
return render_template('method.html')
-@bp.route('/help', methods=['GET'])
+@bp.route('/faq', methods=['GET'])
def help_page():
- return render_template('help.html')
+ return render_template('faq.html')
@bp.route('/visualization_error//', methods=['GET'])
def visualization_error(transcript_id):
diff --git a/metadome/presentation/web/static/js/dashboard/visualization.js b/metadome/presentation/web/static/js/dashboard/visualization.js
index 58ddc3b..0c13d68 100644
--- a/metadome/presentation/web/static/js/dashboard/visualization.js
+++ b/metadome/presentation/web/static/js/dashboard/visualization.js
@@ -83,6 +83,9 @@ var metadomain_graph_visible = true;
// indicates if clinvar variants are annotated in the schematic protein representation
var clinvar_variants_visible = false;
+//indicates if homologous clinvar variants are annotated in the schematic protein representation
+var homologous_clinvar_variants_visible = false;
+
// indicates the various colors to indicate the tolerance
var toleranceColorGradient = [ {
offset : "0%",
@@ -160,7 +163,8 @@ var positionTip = d3.tip()
var positionTip_str = "";
positionTip_str += "Position: p." + d.values[0].protein_pos + " " + d.values[0].cdna_pos + "";
positionTip_str += "Codon: " + d.values[0].ref_codon + "";
- positionTip_str += "Residue: " + d.values[0].ref_aa_triplet;
+ positionTip_str += "Residue: " + d.values[0].ref_aa_triplet + "";
+ positionTip_str += "Tolerance score (dn/ds): "+ (Math.round((d.values[0].sw_dn_ds)*100)/100) +' ('+tolerance_rating(d.values[0].sw_dn_ds) +')';
if (d.values[0].domains.length > 0){
positionTip_str += " In domain(s): ";
var n_domains_at_position = d.values[0].domains.length;
@@ -257,10 +261,11 @@ function createGraph(obj) {
}
// Draw all individual user interface elements based on the data
- annotateDomains(domain_data, positional_annotation);
+ annotateDomains(domain_data, positional_annotation, domain_metadomain_coverage);
createToleranceGraph(dataGroup);
createToleranceGraphLegend();
- drawMetaDomainLandscape(domain_data, dataGroup);
+ drawMetaDomainLandscape(domain_data, dataGroup, domain_metadomain_coverage, obj.transcript_id);
+ createMetaDomainLegend();
// Add schematic protein overview as a custom Axis
createSchematicProtein(domain_metadomain_coverage, dataGroup, obj.transcript_id);
@@ -272,7 +277,7 @@ function createGraph(obj) {
toggleToleranceLandscapeOrMetadomainLandscape();
}
-function drawMetaDomainLandscape(domain_data, data){
+function drawMetaDomainLandscape(domain_data, data, domain_metadomain_coverage, transcript_id){
// get all possible domain ids
for (var i = 0; i < domain_data.length; i++){
if (domain_data[i].metadomain){
@@ -351,6 +356,10 @@ function drawMetaDomainLandscape(domain_data, data){
})
.style("clip-path", "url(#clip)")
.style("fill", "green")
+ .on("click", function(d) {
+ // Call this method found in dashboard.js
+ createPositionalInformation(domain_metadomain_coverage, transcript_id, d)
+ })
.on("mouseover", function(d) {
if (metadomain_graph_visible){
var normal_missense_variant_count = 0;
@@ -418,6 +427,10 @@ function drawMetaDomainLandscape(domain_data, data){
})
.style("clip-path", "url(#clip)")
.style("fill", "red")
+ .on("click", function(d) {
+ // Call this method found in dashboard.js
+ createPositionalInformation(domain_metadomain_coverage, transcript_id, d)
+ })
.on("mouseover", function(d) {
if (metadomain_graph_visible){
var pathogenic_missense_variant_count = 0;
@@ -679,7 +692,7 @@ function createSchematicProtein(domain_metadomain_coverage, groupedTolerance, tr
}
// Draw the domain annotation
-function annotateDomains(protDomain, tolerance_data) {
+function annotateDomains(protDomain, tolerance_data, domain_metadomain_coverage) {
// append domain view
var domains = main_svg.append("g")
.attr("class", "domains")
@@ -758,7 +771,7 @@ function annotateDomains(protDomain, tolerance_data) {
// if there is any, add meta-domain details
if (d.metadomain){
- document.getElementById("domain_information_overlay_content").innerHTML += 'This domain has '+d.meta_domain_alignment_depth+' homologous occurrences throughout the human genome.';
+ document.getElementById("domain_information_overlay_content").innerHTML += 'This domain has '+domain_metadomain_coverage[d.ID]+' homologous occurrences throughout the human genome.';
}
document.getElementById("domain_information_overlay_content").innerHTML += '';
@@ -879,11 +892,6 @@ function createToleranceGraphLegend() {
.attr("transform", "translate(" + main_marginLegend.left + "," + main_marginLegend.top + ")")
.style("fill", "url(#legendGradient)");
- var context = main_svg.append("g")
- .attr("class", "context")
- .attr("id", "zoom_landscape")
- .attr("transform", "translate(" + main_marginContext.left + "," + main_marginContext.top + ")");
-
// append legend text
main_svg.append("text")
.attr("text-anchor", "middle")
@@ -921,6 +929,108 @@ function createToleranceGraphLegend() {
.style('user-select', 'none');
}
+//Draw the legend for the MetaDomain landscape
+function createMetaDomainLegend(){
+ // append colors
+ main_svg.append("rect")
+ .attr("width", 70)
+ .attr("height", 20)
+ .attr("x", 0)
+ .attr("y", 20)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainRect")
+ .style("fill", "green");
+
+ // append colors
+ main_svg.append("rect")
+ .attr("width", 70)
+ .attr("height", 20)
+ .attr("x", 0)
+ .attr("y", 105)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainRect")
+ .style("fill", "red");
+
+ // append colors
+ main_svg.append("rect")
+ .attr("width", 70)
+ .attr("height", 20)
+ .attr("x", 0)
+ .attr("y", 190)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainRect")
+ .style("fill", "black");
+
+ // append legend text
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 20)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("gnomAD")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 35)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("missense in")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 50)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("homologues")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+
+ // append legend text
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 105)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("ClinVar")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 120)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("missense in")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 135)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("homologues")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+
+ // append legend text
+ main_svg.append("text")
+ .attr("x", 0)
+ .attr("y", 190)
+ .attr("dy", 35)
+ .attr("class", "label legendMetaDomainText")
+ .text("no alignment")
+ .style("font-size", "12px")
+ .style('pointer-events', 'none')
+ .style('user-select', 'none');
+}
+
/*******************************************************************************
* Interactive behaviour functions
******************************************************************************/
@@ -929,25 +1039,31 @@ function createToleranceGraphLegend() {
function toggleToleranceLandscapeOrMetadomainLandscape(){
// get the tolerance graph
var tolerance_graph = d3.select("#tolerance_graph");
- var legend = d3.select("#legendGradientRect");
- var legend_text = d3.selectAll(".legendGradientText");
+ var tolerance_legend = d3.select("#legendGradientRect");
+ var tolerance_legend_text = d3.selectAll(".legendGradientText");
// Get the metadomain graph
var metadomain_graph = d3.select("#metadomain_graph");
+ var meta_legend = d3.selectAll(".legendMetaDomainRect");
+ var meta_legend_text = d3.selectAll(".legendMetaDomainText");
switch($('input[name=landscape_checkbox]:checked', '#checkbox_for_landscape').val()){
case "metadomain_landscape":
tolerance_graph.style("opacity", 0);
- legend.style("opacity", 0);
- legend_text.style("opacity", 0);
+ tolerance_legend.style("opacity", 0);
+ tolerance_legend_text.style("opacity", 0);
metadomain_graph.style("opacity", 1);
+ meta_legend_text.style("opacity", 1);
+ meta_legend.style("opacity", 1);
metadomain_graph_visible = true;
break;
case "tolerance_landscape":
tolerance_graph.style("opacity", 1);
- legend.style("opacity", 1);
- legend_text.style("opacity", 1);
+ tolerance_legend.style("opacity", 1);
+ tolerance_legend_text.style("opacity", 1);
metadomain_graph.style("opacity", 0);
+ meta_legend_text.style("opacity", 0);
+ meta_legend.style("opacity", 0);
metadomain_graph_visible = false;
break;
default:
@@ -962,12 +1078,15 @@ function draw_position_schematic_protein(d, element){
if (d.values[0].ClinVar != null) {
pathogenic_missense_variant_count += d.values[0].ClinVar.length;
}
-
+ }
+
+ var homologous_pathogenic_missense_variant_count = 0;
+ if (homologous_clinvar_variants_visible){
// count pathogenic variants linked via meta-domain relationships
if (d.values[0].domains != null){
meta_domain_ids.forEach(domain_id => {
if (d.values[0].hasOwnProperty('domains') && d.values[0].domains[domain_id] != null){
- pathogenic_missense_variant_count = Math.max(d.values[0].domains[domain_id].pathogenic_missense_variant_count, pathogenic_missense_variant_count);
+ homologous_pathogenic_missense_variant_count = d.values[0].domains[domain_id].pathogenic_missense_variant_count;
}
});
}
@@ -985,6 +1104,13 @@ function draw_position_schematic_protein(d, element){
return 'red';
}
+ // if containing pathogenic variants, display it as red
+ if (homologous_pathogenic_missense_variant_count > 0){
+ d3.select(element).style("fill-opacity", 0.7);
+ return 'red';
+ }
+
+
else{
d3.select(element).style("fill-opacity", 0.2);
return "grey";
@@ -1001,6 +1127,17 @@ function toggleClinvarVariantsInProtein(clinvar_checkbox){
});
}
+function toggleHomologousClinvarVariantsInProtein(clinvar_checkbox){
+ var focusAxis = d3.select("#tolerance_axis");
+
+ homologous_clinvar_variants_visible = clinvar_checkbox.checked;
+
+ focusAxis.selectAll(".toleranceAxisTick").style("fill", function(d, i) {
+ return draw_position_schematic_protein(d, this);
+ });
+}
+
+
// Rescale the landscape for zooming or brushing purposes
function rescaleLandscape(){
var focus = d3.select("#tolerance_graph");
@@ -1133,4 +1270,28 @@ function tolerance_color(score) {
} else {
return toleranceColorGradient[8].color;
}
+}
+
+//the color coding for specific tolerance scores
+//color #f29e2e indicates the average dn/ds tolerance score over all genes
+function tolerance_rating(score) {
+ if (score <= 0.175) {
+ return '';
+ } else if (score <= 0.35) {
+ return '' ;
+ } else if (score <= 0.525) {
+ return '' ;
+ } else if (score <= 0.7) {
+ return '' ;
+ } else if (score <= 0.875) {
+ return '' ;
+ } else if (score <= 1.025) {
+ return '' ;
+ } else if (score <= 1.2) {
+ return '' ;
+ } else if (score <= 1.375) {
+ return '' ;
+ } else {
+ return '' ;
+ }
}
\ No newline at end of file
diff --git a/metadome/presentation/web/templates/about.html b/metadome/presentation/web/templates/about.html
index 4f997dd..6e5b953 100644
--- a/metadome/presentation/web/templates/about.html
+++ b/metadome/presentation/web/templates/about.html
@@ -46,6 +46,14 @@
Mission
found across homologous domain positions. We hope that these
new insights can be used directly in a publication and/or for
intelligent design of further experiments.
+
+
Tutorial
+
+ To get more familiar with the using MetaDome and all of it's functionality
+ we strongly advise to
+ start the tour. If you feel that any questions are not addressed in the
+ FAQ below, please contact us.
+
Method
@@ -102,6 +110,50 @@
Licensing
href="http://creativecommons.org/licenses/by-nc-sa/4.0/"
target="_blank">CC ANS 4.0
+
+
Privacy
+
+ This section outlines the ways in which the MetaDome website handles information about users.
+ This should not be read as a legal document, but as a description of how we handle information
+ that could be considered sensitive.
+
+
Sensitive data
+
+ MetaDome does not save any data that a user may provide. The visualization
+ of MetaDome occurs client-side (in your local browser) and is not
+ accessible to us. This means we can not know which genes, transcripts,
+ domains, and positions you are interested in.
+
+
+
Personalized tracking
+
+ MetaDome makes use of Google analytics (GA) to help us analyze how users
+ navigate the various site-pages.
+ GA uses a single-pixel "web bug" image, which is served from every
+ page, a javascript script that collects information about each request,
+ and cookies that maintain information about your usage of the site between
+ visits. You can read more about how GA works on the GA
+ website,
+ which includes a
+ detailed description of how traffic is tracked and analysed.
+
+ We use the information generated by GA purely to assess the usefulness
+ and popularity of different features of the site. We have configured GA
+ to not provide the ability to track individual users' usage of the site.
+ However, GA does provides a high-level anonymized overview of the traffic that passes
+ through the site, including such information as the approximate geographical
+ location of users, how often and for how long they visited the site, etc.
+
+ We will never (and will not allow any third party to) use the
+ information by the GA to track or to collect any Personally
+ Identifiable Information of visitors to our site.
+ Neither will we will link, or seek to link, an IP address
+ with the identity of a computer user.
+
+ If you have any concerns about our use of Google analytics, please
+ feel free to contact us.
+
Acknowledgments
diff --git a/metadome/presentation/web/templates/base.html b/metadome/presentation/web/templates/base.html
index b624a54..148ce0a 100644
--- a/metadome/presentation/web/templates/base.html
+++ b/metadome/presentation/web/templates/base.html
@@ -10,7 +10,7 @@
-{% block title %}{% endblock %}
+{% block title %}MetaDome web server{% endblock %}
@@ -71,7 +71,7 @@