Skip to content

Commit

Permalink
Remove channels feature, to remove channels which are empty instead o…
Browse files Browse the repository at this point in the history
…f --keepnapsms
  • Loading branch information
glormph committed Jan 31, 2025
1 parent 3f423a1 commit f1bee20
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 5 deletions.
32 changes: 27 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

include { paramsSummaryMap } from 'plugin/nf-schema'

include { msgf_info_map; listify; stripchars_infile; get_regex_specialchars; read_header } from './modules.nf'
include { msgf_info_map; get_complement_field_nr; listify; stripchars_infile; get_regex_specialchars; read_header } from './modules.nf'
include { MSGFPERCO } from './workflows/msgf_perco.nf'
include { SAGEPERCO } from './workflows/sage_perco.nf'
include { PTMANALYSIS } from './workflows/ptms.nf'
Expand Down Expand Up @@ -340,18 +340,27 @@ process splitPSMs {
container params.__containers[tag][workflow.containerEngine]

input:
tuple val(td), path('psms'), val(setnames)
tuple val(td), path('psms'), val(setnames), val(remove_channels)

output:
tuple val(td), path({listify(setnames).collect { "${it}.tsv" }}) optional true

script:
"""
msstitch split -i psms --splitcol bioset
${td == 'target' ?
remove_channels.collect {
setch -> setch[1].collect {
ch -> "colnum=${get_complement_field_nr("${setch[0]}.tsv", ch)} && \
cut -f \$colnum ${setch[0]}.tsv > tmprm && mv tmprm ${setch[0]}.tsv"
}.join(' && ')
}.join(' && ')
: ''}
"""
}



process splitTotalProteomePSMs {

tag 'msstitch'
Expand Down Expand Up @@ -454,13 +463,20 @@ process sampleTableCheckClean {
container params.__containers[tag][workflow.containerEngine]

input:
tuple path('sampletable'), val(do_deqms)
tuple path('sampletable'), val(do_deqms), val(remove_channels)

output:
tuple path('clean_sampletable'), path('sampletable_no_special_chars')

script:
"""
# Remove empty channels
${remove_channels.collect {
setch -> setch[1].collect {
ch -> "grep -v '^${ch}\t${setch[0]}' sampletable > tmpst && mv tmpst sampletable"
}.join(' && ')
}.join(' && ')
}
# First add NO__GROUP marker for no-samplegroups clean sampletable from special chars
awk -v FS="\\t" -v OFS="\\t" \'{if (NF==3) print \$1,\$2,\$3,"NO__GROUP"; else print}\' sampletable > clean_sampletable
# Check if there are samplegroups at all
Expand Down Expand Up @@ -633,6 +649,12 @@ workflow {
}.collectEntries() {
x-> [x[0], x[2..-1]]
} : [:]
// Remove channels from specific sets if those are empty: --remove_channels 'setA:126:127 setB:131'
rmch = params.remove_channels ? params.remove_channels.tokenize(' ') : false
remove_channels_psmtable = rmch ? rmch.collect { y -> y.tokenize(':')
}.collect { x -> [x[0], x[1..-1].collect { ch -> "${setisobaric[x[0]]}_${ch}" } ] } : [:]
remove_channels_sampletable = rmch ? rmch.collect { y -> y.tokenize(':')
}.collect { x -> [x[0], x[1..-1]] } : [:]

do_ms1 = !params.noquant && !params.noms1quant
do_normalize = (!params.noquant && (params.mediannormalize || params.deqms) && params.isobaric)
Expand Down Expand Up @@ -854,7 +876,7 @@ workflow {
psmtables_ch
| filter { it[0] == 'decoy' }
| concat(target_psmtable)
| map { [it[0], it[1], all_setnames] }
| map { [it[0], it[1], all_setnames, remove_channels_psmtable] }
| splitPSMs
| map{ it -> [it[0], listify(it[1]).collect() { it.baseName.replaceFirst(/\.tsv$/, "") }, it[1]]} // get setname from {setname}.tsv
| transpose
Expand Down Expand Up @@ -953,7 +975,7 @@ workflow {

if (params.sampletable) {
Channel.fromPath(params.sampletable)
| map { [it, params.deqms] }
| map { [it, params.deqms, remove_channels_sampletable] }
| sampleTableCheckClean
| set { sampletable_ch }
} else {
Expand Down
4 changes: 4 additions & 0 deletions modules.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ def get_field_nr_multi(fn, fieldnames) {
return "\$(head -n1 ${fn} | tr '\\t' '\\n' | grep -En '(${fieldnames.join('|')})' | cut -f 1 -d':' | tr '\\n' ',' | sed 's/\\,\$//')"
}

def get_complement_field_nr(fn, fieldname) {
/* return field nrs comma separated like: 1,2,5,9 */
return "\$(head -n1 ${fn} | tr '\\t' '\\n' | grep -vwn '^${fieldname}\$' | cut -f 1 -d':' | tr '\\n' ',' | sed 's/\\,\$//')"
}

def parse_isotype(isobtype) {
return ['tmt16plex', 'tmt18plex'].any { it == isobtype } ? 'tmtpro' : isobtype
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ params {
phospho = false
maxvarmods = 2
isobaric = false
remove_channels = false
instrument = 'qe' // Default instrument is Q-Exactive
prectol = '10.0ppm'
iso_err = '-1,2'
Expand Down
3 changes: 3 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@
},
"msgf": {
"type": "boolean"
},
"remove_channels": {
"type": "boolean"
}
}
}
1 change: 1 addition & 0 deletions tests/tmt16_fast.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ $NXFCMD --name ${name} --outdir ${resultsdir} \
--locptms Phospho \
--psmconflvl 0.2 --pepconflvl 0.2 \
--deqms --keepnapsmsquant --genes \
--remove_channels '0set-A:127C:128N' \
--hirief https://github.com/nf-core/test-datasets/raw/6defbf8a92a46b0ac48bb05f9ad96b62716b4a5d/testdata/formatted_known_peptides_ENSUniRefseq_TMT_predpi_20150825.txt
# FIXME cannot run with carbamyl +43 -> -261 and Phospho, luciprep crash \

0 comments on commit f1bee20

Please sign in to comment.