From f7160f30d12e503e673f3c4da1a06e9fb85f9a00 Mon Sep 17 00:00:00 2001
From: Jorrit Boekel <jorrit.boekel@scilifelab.se>
Date: Mon, 3 Feb 2025 13:49:34 +0100
Subject: [PATCH] Add tables of PSM stats to report overview part

---
 assets/report.html     | 46 ++++++++++++++++++++++++++++++++++++++++++
 bin/qc_psms.R          |  9 +++++----
 bin/report_tables.py   | 22 ++++++++++++++++++++
 workflows/reporting.nf |  4 ++--
 4 files changed, 75 insertions(+), 6 deletions(-)
diff --git a/assets/report.html b/assets/report.html
index c3ec199..86cf1ea 100644
--- a/assets/report.html
+++ b/assets/report.html
@@ -98,6 +98,52 @@ <h5 class="title is-5">{{ ftitle }}</h5>
         {% endfor %}
       </div>
 
+      <h4 class="title is-4">PSMs</h4>
+      <div class="columns">
+        <div class="column">
+          <h5 class="title is-5">IDs</h5>
+          <table class="table is-striped is-narrow is-hoverable is-size-7">
+            <thead>
+              <th>Sample/plate</th>
+              <th>Scans</th>
+              <th>PSMs</th>
+              <th>% ID</th>
+            </thead>
+            <tbody>
+              {% for plate, scans, psms, pc in psmtables.ids %}
+              <tr>
+                <td> {{ plate }} </td>
+                <td> {{ scans }} </td>
+                <td> {{ psms }} </td>
+                <td> {{ pc }} </td>
+              </tr>
+              {% endfor %}
+            </tbody>
+          </table>
+        </div>
+        <div class="column">
+          <h5 class="title is-5">Missed cleavages</h5>
+          <table class="table is-striped is-narrow is-hoverable is-size-7">
+            <thead>
+              <th>Sample/plate</th>
+              <th># missed cleavages</th>
+              <th># PSMs</th>
+              <th>% PSMs</th>
+            </thead>
+            <tbody>
+              {% for plate, mc, psms, pc in psmtables.miscleav %}
+              <tr>
+                <td> {{ plate }} </td>
+                <td> {{ mc }} </td>
+                <td> {{ psms }} </td>
+                <td> {{ pc }} </td>
+              </tr>
+              {% endfor %}
+            </tbody>
+          </table>
+        </div>
+      </div>
+
       {% if ptmtables.summary %}
       <h4 class="title is-4">PTMs</h4>
       <div class="columns">
diff --git a/bin/qc_psms.R b/bin/qc_psms.R
index edd115b..59e2bdc 100755
--- a/bin/qc_psms.R
+++ b/bin/qc_psms.R
@@ -116,8 +116,8 @@ p = ggplotly(ggp, width=400, height=vert_height) %>%
         layout(legend = list(orientation = 'h', x = 0, y = 1.1, xanchor='left', yanchor='bottom'))
 # Work around since plotly does not honor above legend.title=element_blank call
 p$x$layout$legend$title$text = ''
-
 htmlwidgets::saveWidget(p, 'amount_psms.html', selfcontained=F)
+write.table(amount_id, 'psms_ids.txt', row.names=F, quote=F, sep='\t')
 
 # Missing isobaric values
 if (length(grep('plex', names(feats)))) {
@@ -145,16 +145,16 @@ if (length(grep('plex', names(feats)))) {
 
 # Missed cleavages
 mcl = aggregate(get(scancol)~get(xcol)+get(miscleavcol), feats, length)
-colnames(mcl) = c(xcol, 'missed_cleavage', 'nrscan')
+colnames(mcl) = c(xcol, 'missed_cleavage', 'nrpsms')
 mcl_am = subset(merge(mcl, amount_psms, by=xcol), missed_cleavage %in% c(0,1,2))
-mcl_am$percent = mcl_am$nrscan / mcl_am$"PSMs IDed" * 100
+mcl_am$percent = mcl_am$nrpsms / mcl_am$"PSMs IDed" * 100
 mc_text_y = max(mcl_am$percent) * 2/6
 mcl_am$missed_cleavage = as.factor(mcl_am$missed_cleavage)
 
 mcplot = ggplot(mcl_am) +
     geom_bar(aes(x=.data[[ xcol ]], y=percent, fill=missed_cleavage, group=missed_cleavage), position='dodge', stat='identity') +
     # 0.9 is the default dodge (90% of 1, 1 used bc all same value) but when not spec -> no dodge at all?
-    geom_text(position=position_dodge(width=0.9), aes(x=.data[[xcol]], y=mc_text_y, group=missed_cleavage, label=glue('{nrscan} PSMs')), colour="black", size=4, inherit.aes=T) +
+    geom_text(position=position_dodge(width=0.9), aes(x=.data[[xcol]], y=mc_text_y, group=missed_cleavage, label=glue('{nrpsms} PSMs')), colour="black", size=4, inherit.aes=T) +
     ylim(c(0, 100)) + ylab('% of PSMs') +
     theme_bw() +
     theme(axis.title.x=element_text(size=15), axis.title.y=element_blank(), axis.text=element_text(size=10), axis.text.y=element_text(angle=90), legend.position="top", legend.text=element_text(size=10), legend.title=element_blank()) +
@@ -163,6 +163,7 @@ p = ggplotly(mcplot, width=400, height=vert_height) %>%
         layout(legend = list(orientation = 'h', x = 0, y = 1.1, xanchor='left', yanchor='bottom'))
 p$x$layout$legend$title$text = ''
 htmlwidgets::saveWidget(p, 'missed_cleavages.html', selfcontained=F)
+write.table(mcl_am, 'miscleav.txt', row.names=F, quote=F, sep='\t')
 
 
 # Now the per-fraction or per-file stats
diff --git a/bin/report_tables.py b/bin/report_tables.py
index 560c9bf..b9e3db0 100755
--- a/bin/report_tables.py
+++ b/bin/report_tables.py
@@ -263,6 +263,27 @@ def get_plotly_html(fn):
     summary_fields = [x for x in summary_field_order if x in fields]
     break
 
+# PSM tables
+psmtables = {'ids': [], 'miscleav': []}
+with open('psmids') as fp:
+    head = next(fp).strip().split()
+    plates = defaultdict(defaultdict)
+    for line in fp:
+        lnmap = {head[ix]: x for ix, x in enumerate(line.strip().split('\t'))}
+        if lnmap['name'] == 'MS2 scans':
+            plates[lnmap['plateID']]['scans'] = lnmap['count']
+        elif lnmap['name'] == 'PSMs IDed':
+            plates[lnmap['plateID']].update({'psms': lnmap['count'], 'pc': lnmap['labeltext']})
+psmtables['ids'] = [[p, nms['scans'], nms['psms'], nms['pc']] for p, nms in plates.items()]
+
+with open('miscleav') as fp:
+    head = next(fp).strip().split()
+    plates = defaultdict()
+    for line in fp:
+        lnmap = {head[ix]: x for ix, x in enumerate(line.strip().split('\t'))}
+        print(lnmap)
+        psmtables['miscleav'].append([lnmap['plateID'], lnmap['missed_cleavage'], lnmap['nrpsms'], lnmap['IDed']])
+
 
 # Overlap
 overlap = defaultdict(dict)
@@ -315,6 +336,7 @@ def get_plotly_html(fn):
         deqmsplots=deqmsplots,
         deqmscomps=deqmscomps,
         tabletitles=tabletitles,
+        psmtables=psmtables,
         summary_fields=summary_fields,
         summary_table=summary_table,
         overlap=overlap,
diff --git a/workflows/reporting.nf b/workflows/reporting.nf
index b8e339c..08a84ed 100644
--- a/workflows/reporting.nf
+++ b/workflows/reporting.nf
@@ -79,7 +79,7 @@ process PSMQC {
   tuple path('psms'), path('filescans'), path('platescans'), val(mzmls), val(fractionation), val(has_newmzmls), val(has_oldmzmls), val(search_engine)
 
   output:
-  tuple path('platescans'), path('amount_psms_files'), path("psmplothtml"), path('psmtable__summary.txt')
+  tuple path('platescans'), path('amount_psms_files'), path("psmplothtml"), path('psmtable__summary.txt'), path('psms_ids.txt'), path('miscleav.txt')
 
   script:
   """
@@ -151,7 +151,7 @@ process summaryReport {
 
 
   input:
-  tuple path(platescans), path(plotlibs), path('psmplots'), path(psm_summary), path(featplots), path(feat_summaries), path(feat_overlaps), path('ptmplots'), path(ptmfiles), path('warnings*')
+  tuple path(platescans), path(plotlibs), path('psmplots'), path(psm_summary), path('psmids'), path('miscleav'), path(featplots), path(feat_summaries), path(feat_overlaps), path('ptmplots'), path(ptmfiles), path('warnings*')
   
   output:
   tuple path('report_groovy_template.html'), path('libs.js')