diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 2109619..2599d16 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/assets/report.html b/assets/report.html index 45d896b..c9b751c 100644 --- a/assets/report.html +++ b/assets/report.html @@ -150,6 +150,17 @@
Site count
{% endif %} + {% if warnings|length %} +

Warnings

+
+ +
+ {% endif %} +
diff --git a/bin/qc_protein.R b/bin/qc_protein.R index cb6441e..01b9df4 100755 --- a/bin/qc_protein.R +++ b/bin/qc_protein.R @@ -280,7 +280,6 @@ if (length(precursorcols) > 1) { parea$Set = sub('_MS1.*', '', parea$Set) parea$Set = sub('^X([^a-zA-Z])', '\\1', parea$Set) parea = parea[complete.cases(parea$ms1), ] - print(head(parea[c('Set', 'ms1')])) ggp = ggplot(parea) + geom_boxplot(aes(Set, ms1)) + scale_y_log10() + coord_flip() + ylab("Intensity") + theme_bw() + theme(axis.title=element_text(size=15), axis.text=element_text(size=10), axis.title.y=element_blank()) } else { diff --git a/bin/qc_psms.R b/bin/qc_psms.R index 60dcc99..95483c5 100755 --- a/bin/qc_psms.R +++ b/bin/qc_psms.R @@ -5,7 +5,6 @@ library(tidyr) library(glue) library(stringr) -# library(reshape2) args = commandArgs(trailingOnly=TRUE) has_fractions = args[1] == TRUE newmzmlfn = ifelse(args[2] == 'FALSE', '', args[2]) @@ -96,10 +95,8 @@ if (length(grep('plex', names(feats)))) { # Missed cleavages mcl = aggregate(get(scancol)~get(xcol)+get(miscleavcol), feats, length) colnames(mcl) = c(xcol, 'missed_cleavage', 'nrscan') -#mcl$nrscan = as.factor(mcl$nrscan) mcl_am = subset(merge(mcl, amount_psms, by=xcol), missed_cleavage %in% c(0,1,2)) mcl_am$percent = mcl_am$nrscan / mcl_am$"PSMs IDed" * 100 -# mcl_am$textycoord = ifelse(mcl_am$missed_cleavage!=0, mcl_am$percent, 10) mc_text_y = max(mcl_am$percent) * 2/6 mcl_am$missed_cleavage = as.factor(mcl_am$missed_cleavage) @@ -120,8 +117,6 @@ htmlwidgets::saveWidget(p, 'missed_cleavages.html', selfcontained=F) # Now the per-fraction or per-file stats if (has_fractions) { xcol = 'Fraction' - # + plateID' - # plateID not necessary because we take subfeats? } else { xcol = filenamecol } diff --git a/bin/report_tables.py b/bin/report_tables.py index 758a273..40861ba 100755 --- a/bin/report_tables.py +++ b/bin/report_tables.py @@ -4,7 +4,6 @@ import os import sys import shutil -import tomllib import argparse from glob import glob from collections import defaultdict @@ -271,6 +270,15 @@ def get_plotly_html(fn): overlap[feattype] = False +warnings = [] +for fn in glob('warnings*'): + if os.path.exists(fn): + with open(fn) as fp: + for line in fp: + if warn := line.strip(): + warnings.append(warn) + + # Write to template with open('report_groovy_template.html', 'w') as fp: fp.write(template.render(reportdate=date, @@ -293,4 +301,5 @@ def get_plotly_html(fn): ptmplots=ptmplots, ptmtables=ptmtables, ptmtitles=ptm_headers, + warnings=warnings, )) diff --git a/main.nf b/main.nf index a88fbb1..f8ef6c3 100644 --- a/main.nf +++ b/main.nf @@ -19,69 +19,6 @@ include { REPORTING } from './workflows/reporting.nf' ---------------------------------------------------------------------------------------- */ - -def multifile_format(fileparam) { - if (!fileparam) { - return false - } - sum_fn = file(fileparam) - if (!(sum_fn instanceof List)) { - sum_fn = [sum_fn] - } - return sum_fn.join(', ') -} - - -def create_workflow_summary(summary) { - - def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') - yaml_file.text = """ - id: 'lehtiolab-ddamsproteomics-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'lehtiolab/ddamsproteomics Workflow Summary' - section_href: 'https://github.com/lehtiolab/ddamsproteomics' - plot_type: 'html' - data: | -
-${summary.collect { k,v -> "
$k
${v ?: 'N/A'}
" }.join("\n")} -
- """.stripIndent() - - return yaml_file -} - - -/* - * Parse software version numbers - */ -process get_software_versions { - - publishDir "${params.outdir}", mode: 'copy', overwrite: true - - output: - file 'software_versions.yaml' into software_versions_qc - - script: - noms1 = params.noms1quant || params.noquant - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - echo 2023.01.1202 > v_msgf.txt - ${!noms1 && !params.hardklor ? 'dinosaur | head -n2 | grep Dinosaur > v_dino.txt || true' : ''} - ${!noms1 && params.hardklor ? 'hardklor | head -n1 > v_hk.txt || true' : ''} - kronik | head -n2 | tr -cd '[:digit:],\\.' > v_kr.txt || true - #luciphor2 |& grep Version > v_luci.txt # incorrect version from binary (2014), echo below - echo Version: 2020_04_03 > v_luci.txt # deprecate when binary is correct - echo 3.5 > v_perco.txt - msstitch --version > v_mss.txt - echo 2.9.1 > v_openms.txt - Rscript <(echo "packageVersion('DEqMS')") > v_deqms.txt - scrape_software_versions.py > software_versions.yaml - """ -} - - - process createTargetDecoyFasta { label 'msstitch' @@ -108,12 +45,6 @@ def fr_or_file(it, length) { return it.size() > length ? it[length] : "${file(it[0]).baseName}.${file(it[0]).extension}" } -def plate_or_no(it, length) { - return it.size() > 3 ? it[3] : "no_plate" -} - - - // Parse mzML input to get files and sample names etc // get setname, sample name (baseName), input mzML file. @@ -156,7 +87,6 @@ process isobaricQuant { activationtype = [auto: 'auto', any: 'any', hcd:'beam-type collision-induced dissociation', cid:'Collision-induced dissociation', etd:'Electron transfer dissociation'][params.activation] plextype = isobtype ? isobtype.replaceFirst(/[0-9]+plex/, "") : 'false' massshift = [tmt:0.0013, itraq:0.00125, false:0][plextype] - //(is_stripped, parsed_infile) = stripchars_infile(infile) """ ${isobtype ? "IsobaricAnalyzer -type $isobtype -in \"${infile}\" -out \"${infile.baseName}.consensusXML\" -extraction:select_activation \"$activationtype\" -extraction:reporter_mass_shift $massshift -extraction:min_precursor_intensity 1.0 -extraction:keep_unannotated_precursor true -quantification:isotope_correction true" : ''} """ @@ -165,12 +95,6 @@ process isobaricQuant { process dinosaur { -container 'lehtiolab/ddamsproteomics:2.18' -// Biocontainers arent working with dinosaur tests, FIXME create auto build of it - -// container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -// 'https://depot.galaxyproject.org/singularity/dinosaur:1.2.0--0' : -// 'quay.io/biocontainers/dinosaur:1.2.0--0'}" input: tuple val(sample), path(infile) @@ -686,28 +610,13 @@ if (params.sampletable) { // .map { it -> [it[2].replaceAll('[ ]+$', '').replaceAll('^[ ]+', ''), file(it[0]).baseName.replaceAll(regex_specialchars, '_'), file(it[0]), it[1], plate_or_no(it, 3), fr_or_file(it, 4)] } mzml_in -// FIXME clean out // Prepare mzml files (sort, collect) for processes that need all of them .toList() .map { it.sort( {a, b -> a.sample <=> b.sample}) } // sort on sample for consistent .sh script in -resume .map { it -> [it.collect() { it.setname }, it.collect() { it.mzmlfile }, it.collect() { it.plate }, it.collect() { it.fraction } ] } // lists: [sets], [mzmlfiles], [plates], [fractions] - //.tap { mzmlfiles_psmqc } // FIXME this prevents completion of the pipeline somehow .map { it -> it[0..2] } // remove basenames, fractions .set { mzmlfiles_all_sort } - -/* -// FIXME this needed still? - if (!is_rerun) { - mzml_in - .count() - .subscribe { println "$it mzML files in analysis" } - .into { mzmlcount_psm; mzmlcount_percolator } - } else { - Channel.value(0).into { mzmlcount_psm; mzmlcount_percolator } - } -*/ - // Spec lookup prep if needed do_quant = false if (is_rerun) { @@ -816,10 +725,9 @@ if (params.sampletable) { | set { specquant_lookups } } - tdb = Channel.fromPath(params.tdb) createTargetDecoyFasta(tdb) - // bothdbs.into { psmdbs; fdrdbs; ptmdbs } + if (!is_rerun) { search_mods = [params.mods ? params.mods : false, params.ptms ?: false, @@ -902,8 +810,6 @@ if (params.sampletable) { } else { totalproteome_ch = Channel.empty() } -println(params.genes) -println(params.onlypeptides) if (params.genes) { totalprot_col = 'Gene Name' } else if (params.onlypeptides) { @@ -939,9 +845,11 @@ println(params.onlypeptides) do_ms1, !params.onlypeptides ) - ptm_ch = PTMANALYSIS.out + ptm_ch = PTMANALYSIS.out.ptms + ptmwarn_ch = PTMANALYSIS.out.warnings } else { ptm_ch = Channel.empty() + ptmwarn_ch = Channel.empty() } splitpsms_ch @@ -1036,92 +944,13 @@ println(params.onlypeptides) params.proteinconflvl, all_setnames, ptm_ch, + psmwarnings_ch + .concat(MSGFPERCO.out.warnings) + .concat(ptmwarn_ch) + .toList().toList() + .filter { it[0] } + .ifEmpty(nofile), ) - - -/* - - // QC for PSMs - - // FIXME allplates could go as file into R QC directly? - if (fractionation) { - countMS2sPerPlate.out.allplates - .splitText() - .map { it -> it.trim() } - .toList() - .map { it -> [it] } - .set { allplates_split } - countMS2sPerPlate.out.counted - .combine(allplates_split) - .set { scans_result } - } else { - countMS2sPerPlate.out.counted - | map { it -> [it[0], 'NA', ['noplates']] } - | set { scans_result } - //| into { scans_platecount; scans_result } - } - psm_result // From createPSMTable - .filter { it[0] == 'target' } - .combine(scans_result) - .map { it -> [it[0], it[1], it[2], it[3], it[4].unique()] } - .set { targetpsm_result } - - - - -mzmlfiles_all_count -// Count lookup can be spectra if needed quant - .merge(countlookup) - .set { specfilein } - - -process countMS2perFile { - - input: - set val(setnames), file(mzmlfiles), val(platenames), file(speclookup) from specfilein - -/// The following is maybe OK? - -if (complementary_run) { - oldnewsets - .splitText() - .map { it -> it.trim() } - .toList() - .set { allsetnames } - cleaned_psms - .flatMap { it -> [['target', it[0]], ['decoy', it[1]]] } - .set { td_oldpsms } -} else { - mzml_in - .map { it -> it[2] } - .unique() - .toList() - .set { allsetnames } - - // if not using this youll have a combine on an open channel without - // anything from complement cleaner. Will not run createPTMLookup then - cleaned_ptmpsms = Channel.value('NA') -} - -/// until here - we can investigate later - -// Set names are first item in input lists, collect them for PSM tables and QC purposes -// FIXME just reuse channels -allsetnames - .into { setnames_featqc; setnames_psms; setnames_psmqc; setnames_ptms } - - - -if (!params.quantlookup) { - ptm_lookup_old - .concat(ptm_lookup_new) -// PTM lookup needs not to have quant, can be spectra - .set { ptm_lookup_in } -} - -*/ - -// publishDir "${params.outdir}", mode: 'copy', overwrite: true, saveAs: {["target_psmlookup.sql", "decoy_psmlookup.sql", "target_psmtable.txt", "decoy_psmtable.txt"].contains(it) && (is_rerun || !no_psms) ? it : null} target_psmtable.map { it[1] } .concat(psmtables_ch | filter { it[0] == 'decoy' } | map { it[1] }) diff --git a/nextflow.config b/nextflow.config index 488eec3..b436d5b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,18 +11,11 @@ nextflow.enable.configProcessNamesValidation = false params { help = false outdir = './results' - igenomes_base = "./iGenomes" tracedir = "${params.outdir}/pipeline_info" clusterOptions = false - awsqueue = false - awsregion = 'eu-west-1' external_config_version = 'master' name = false - email = false - plaintext_email = false - - mzmls = false input = false tdb = false mods = false @@ -226,6 +219,8 @@ process { container = containers['openms'][engine] } withName: dinosaur { + // Biocontainers arent working with dinosaur tests + // this container is our own DDA container with conda container = containers['dinosaur'][engine] } withName: hardklor { diff --git a/workflows/ptms.nf b/workflows/ptms.nf index da8994a..e030a12 100644 --- a/workflows/ptms.nf +++ b/workflows/ptms.nf @@ -31,7 +31,7 @@ process luciphorPTMLocalizationScoring { output: tuple val(setname), path('luciphor.out'), path('all_scores.debug'), emit: ptms optional true - tuple val(setname), path('warnings'), emit: warnings optional true + path('warnings'), emit: warnings optional true script: @@ -300,8 +300,7 @@ workflow PTMANALYSIS { | luciphorPTMLocalizationScoring luciphorPTMLocalizationScoring.out.ptms - | join(luciphorPTMLocalizationScoring.out.warnings, remainder: true) - // setname, null, warnings || setname, psms, scores, null || empty + // setname, psms, scores || empty | map { [it[0], it[1] ?: nofile, it[1] ? it[2] : nofile] } | join(psms) | map { it + [ptm_minscore_high, file(msgfmodfile), locptms, stab_ptms, get_non_ptms(it[0], setisobaric, othermods), ]} @@ -364,9 +363,10 @@ workflow PTMANALYSIS { } emit: - createPTMTable.out.allpsms + ptms = createPTMTable.out.allpsms | combine(do_proteingroup ? addMasterProteinsGenes.out : mergePTMPeps.out) -} + warnings = luciphorPTMLocalizationScoring.out.warnings + | concat(createPTMTable.out.warnings) diff --git a/workflows/reporting.nf b/workflows/reporting.nf index 7e47a88..3ecded6 100644 --- a/workflows/reporting.nf +++ b/workflows/reporting.nf @@ -151,18 +151,18 @@ process summaryReport { container params.report_container input: - tuple path('platescans'), path(plotlibs), path('psmplots'), path(psm_summary), path(featplots), path(feat_summaries), path(feat_overlaps), path('ptmplots'), path(ptmfiles) + tuple path(platescans), path(plotlibs), path('psmplots'), path(psm_summary), path(featplots), path(feat_summaries), path(feat_overlaps), path('ptmplots'), path(ptmfiles), path('warnings*') output: tuple path('report_groovy_template.html'), path('libs.js') script: + has_plates = platescans.size() """ # xargs removes trailing whitespace - plates=\$(cut -f1 platescans | sort -u | tr '\n' ' ' | xargs) report_tables.py --version "${workflow.manifest.version}" --doi "${workflow.manifest.doi}" \ --templatedir "$baseDir/assets" \ - --plates \$plates + ${has_plates ? "--plates \$(cut -f1 $platescans | sort -u | tr '\n' ' ' | xargs)" :''} """ } @@ -182,6 +182,7 @@ workflow REPORTING { prot_gene_conflvl setnames ptms + warnings main: nofile = "${baseDir}/assets/NO__FILE" @@ -214,7 +215,6 @@ workflow REPORTING { // Only pick one PTM table for reporting nr of sites etc ptms | filter { it[1].name.contains('_not_adjusted') } -|view() | PTMQC | ifEmpty([nofile, nofile]) | set { ptmqc } @@ -222,6 +222,7 @@ workflow REPORTING { PSMQC.out | combine(feat_qc_ch) | combine(ptmqc) + | combine(warnings) | summaryReport emit: