From f7160f30d12e503e673f3c4da1a06e9fb85f9a00 Mon Sep 17 00:00:00 2001 From: Jorrit Boekel Date: Mon, 3 Feb 2025 13:49:34 +0100 Subject: [PATCH] Add tables of PSM stats to report overview part --- assets/report.html | 46 ++++++++++++++++++++++++++++++++++++++++++ bin/qc_psms.R | 9 +++++---- bin/report_tables.py | 22 ++++++++++++++++++++ workflows/reporting.nf | 4 ++-- 4 files changed, 75 insertions(+), 6 deletions(-) diff --git a/assets/report.html b/assets/report.html index c3ec199..86cf1ea 100644 --- a/assets/report.html +++ b/assets/report.html @@ -98,6 +98,52 @@
{{ ftitle }}
{% endfor %} +

PSMs

+
+
+
IDs
+ + + + + + + + + {% for plate, scans, psms, pc in psmtables.ids %} + + + + + + + {% endfor %} + +
Sample/plateScansPSMs% ID
{{ plate }} {{ scans }} {{ psms }} {{ pc }}
+
+
+
Missed cleavages
+ + + + + + + + + {% for plate, mc, psms, pc in psmtables.miscleav %} + + + + + + + {% endfor %} + +
Sample/plate# missed cleavages# PSMs% PSMs
{{ plate }} {{ mc }} {{ psms }} {{ pc }}
+
+
+ {% if ptmtables.summary %}

PTMs

diff --git a/bin/qc_psms.R b/bin/qc_psms.R index edd115b..59e2bdc 100755 --- a/bin/qc_psms.R +++ b/bin/qc_psms.R @@ -116,8 +116,8 @@ p = ggplotly(ggp, width=400, height=vert_height) %>% layout(legend = list(orientation = 'h', x = 0, y = 1.1, xanchor='left', yanchor='bottom')) # Work around since plotly does not honor above legend.title=element_blank call p$x$layout$legend$title$text = '' - htmlwidgets::saveWidget(p, 'amount_psms.html', selfcontained=F) +write.table(amount_id, 'psms_ids.txt', row.names=F, quote=F, sep='\t') # Missing isobaric values if (length(grep('plex', names(feats)))) { @@ -145,16 +145,16 @@ if (length(grep('plex', names(feats)))) { # Missed cleavages mcl = aggregate(get(scancol)~get(xcol)+get(miscleavcol), feats, length) -colnames(mcl) = c(xcol, 'missed_cleavage', 'nrscan') +colnames(mcl) = c(xcol, 'missed_cleavage', 'nrpsms') mcl_am = subset(merge(mcl, amount_psms, by=xcol), missed_cleavage %in% c(0,1,2)) -mcl_am$percent = mcl_am$nrscan / mcl_am$"PSMs IDed" * 100 +mcl_am$percent = mcl_am$nrpsms / mcl_am$"PSMs IDed" * 100 mc_text_y = max(mcl_am$percent) * 2/6 mcl_am$missed_cleavage = as.factor(mcl_am$missed_cleavage) mcplot = ggplot(mcl_am) + geom_bar(aes(x=.data[[ xcol ]], y=percent, fill=missed_cleavage, group=missed_cleavage), position='dodge', stat='identity') + # 0.9 is the default dodge (90% of 1, 1 used bc all same value) but when not spec -> no dodge at all? - geom_text(position=position_dodge(width=0.9), aes(x=.data[[xcol]], y=mc_text_y, group=missed_cleavage, label=glue('{nrscan} PSMs')), colour="black", size=4, inherit.aes=T) + + geom_text(position=position_dodge(width=0.9), aes(x=.data[[xcol]], y=mc_text_y, group=missed_cleavage, label=glue('{nrpsms} PSMs')), colour="black", size=4, inherit.aes=T) + ylim(c(0, 100)) + ylab('% of PSMs') + theme_bw() + theme(axis.title.x=element_text(size=15), axis.title.y=element_blank(), axis.text=element_text(size=10), axis.text.y=element_text(angle=90), legend.position="top", legend.text=element_text(size=10), legend.title=element_blank()) + @@ -163,6 +163,7 @@ p = ggplotly(mcplot, width=400, height=vert_height) %>% layout(legend = list(orientation = 'h', x = 0, y = 1.1, xanchor='left', yanchor='bottom')) p$x$layout$legend$title$text = '' htmlwidgets::saveWidget(p, 'missed_cleavages.html', selfcontained=F) +write.table(mcl_am, 'miscleav.txt', row.names=F, quote=F, sep='\t') # Now the per-fraction or per-file stats diff --git a/bin/report_tables.py b/bin/report_tables.py index 560c9bf..b9e3db0 100755 --- a/bin/report_tables.py +++ b/bin/report_tables.py @@ -263,6 +263,27 @@ def get_plotly_html(fn): summary_fields = [x for x in summary_field_order if x in fields] break +# PSM tables +psmtables = {'ids': [], 'miscleav': []} +with open('psmids') as fp: + head = next(fp).strip().split() + plates = defaultdict(defaultdict) + for line in fp: + lnmap = {head[ix]: x for ix, x in enumerate(line.strip().split('\t'))} + if lnmap['name'] == 'MS2 scans': + plates[lnmap['plateID']]['scans'] = lnmap['count'] + elif lnmap['name'] == 'PSMs IDed': + plates[lnmap['plateID']].update({'psms': lnmap['count'], 'pc': lnmap['labeltext']}) +psmtables['ids'] = [[p, nms['scans'], nms['psms'], nms['pc']] for p, nms in plates.items()] + +with open('miscleav') as fp: + head = next(fp).strip().split() + plates = defaultdict() + for line in fp: + lnmap = {head[ix]: x for ix, x in enumerate(line.strip().split('\t'))} + print(lnmap) + psmtables['miscleav'].append([lnmap['plateID'], lnmap['missed_cleavage'], lnmap['nrpsms'], lnmap['IDed']]) + # Overlap overlap = defaultdict(dict) @@ -315,6 +336,7 @@ def get_plotly_html(fn): deqmsplots=deqmsplots, deqmscomps=deqmscomps, tabletitles=tabletitles, + psmtables=psmtables, summary_fields=summary_fields, summary_table=summary_table, overlap=overlap, diff --git a/workflows/reporting.nf b/workflows/reporting.nf index b8e339c..08a84ed 100644 --- a/workflows/reporting.nf +++ b/workflows/reporting.nf @@ -79,7 +79,7 @@ process PSMQC { tuple path('psms'), path('filescans'), path('platescans'), val(mzmls), val(fractionation), val(has_newmzmls), val(has_oldmzmls), val(search_engine) output: - tuple path('platescans'), path('amount_psms_files'), path("psmplothtml"), path('psmtable__summary.txt') + tuple path('platescans'), path('amount_psms_files'), path("psmplothtml"), path('psmtable__summary.txt'), path('psms_ids.txt'), path('miscleav.txt') script: """ @@ -151,7 +151,7 @@ process summaryReport { input: - tuple path(platescans), path(plotlibs), path('psmplots'), path(psm_summary), path(featplots), path(feat_summaries), path(feat_overlaps), path('ptmplots'), path(ptmfiles), path('warnings*') + tuple path(platescans), path(plotlibs), path('psmplots'), path(psm_summary), path('psmids'), path('miscleav'), path(featplots), path(feat_summaries), path(feat_overlaps), path('ptmplots'), path(ptmfiles), path('warnings*') output: tuple path('report_groovy_template.html'), path('libs.js')