Skip to content

Commit

Permalink
Configure multiqc to extract and display gfastats results of each tan…
Browse files Browse the repository at this point in the history
…tan, windowmasker, repeatmasker.
  • Loading branch information
U13bs1125 committed Jul 8, 2024
1 parent c6638d3 commit b34fae6
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 4 deletions.
6 changes: 3 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ process {
]

withName: 'GFSTTANTAN' {
ext.prefix = { "${meta.id}.tantan" }
ext.prefix = { "${meta.id}_tantan" }
}

withName: 'GFSTWINDOWMASK' {
ext.prefix = { "${meta.id}.windowmaskr" }
ext.prefix = { "${meta.id}_windowmasker" }
}

withName: 'GFSTREPEATMOD' {
ext.prefix = { "${meta.id}.repeatmoder" }
ext.prefix = { "${meta.id}_repeatmodeler" }
}

withName: 'WINDOWMASKER_USTAT' {
Expand Down
79 changes: 79 additions & 0 deletions modules/local/custommodule.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
process CUSTOMMODULE {
label 'process_single'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jq:1.6':
'biocontainers/jq:1.6' }"


input:
path (assemt)
path (assemw)
path (assemr)

output:
path "*_mqc.tsv", emit: tsv

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
echo "# id: 'tantan repeat summary'" > tantanmqc_mqc.tsv
echo "# section_name: 'tantan repeat masking summary statistics'" >> tantanmqc_mqc.tsv
echo "# format: 'tsv'" >> tantanmqc_mqc.tsv
echo "# plot_type: 'heatmap'" >> tantanmqc_mqc.tsv
echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> tantanmqc_mqc.tsv
echo "# pconfig:" >> tantanmqc_mqc.tsv
echo "# id: 'tantan repeat summary'" >> tantanmqc_mqc.tsv
echo "# title: 'tantan repeat summary'" >> tantanmqc_mqc.tsv
echo "# ylab: ''" >> tantanmqc_mqc.tsv
echo "id\tTotal scaffold length\tTotal contig length\tsoft masked bases" >> tantanmqc_mqc.tsv
for i in $assemt
do
printf "\$(basename \$i .assembly_summary)\t" >>tantanmqc_mqc.tsv
grep 'Total scaffold length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> tantanmqc_mqc.tsv
grep 'Total contig length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> tantanmqc_mqc.tsv
grep 'soft-masked bases' \$i | tail -n 1 | awk '{print \$4}' >> tantanmqc_mqc.tsv
done
echo "# id: 'windowmasker repeat summary'" > windowmqc_mqc.tsv
echo "# section_name: 'windowmasker repeat masking summary statistics'" >> windowmqc_mqc.tsv
echo "# format: 'tsv'" >> windowmqc_mqc.tsv
echo "# plot_type: 'heatmap'" >> windowmqc_mqc.tsv
echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> windowmqc_mqc.tsv
echo "# pconfig:" >> windowmqc_mqc.tsv
echo "# id: 'windowmasker repeat summary'" >> windowmqc_mqc.tsv
echo "# title: 'windowmasker repeat summary'" >> windowmqc_mqc.tsv
echo "# ylab: ''" >> windowmqc_mqc.tsv
echo "id\tTotal scaffold length\tTotal contig length\tsoft masked bases" >> windowmqc_mqc.tsv
for i in $assemw
do
printf "\$(basename \$i .assembly_summary)\t" >> windowmqc_mqc.tsv
grep 'Total scaffold length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> windowmqc_mqc.tsv
grep 'Total contig length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> windowmqc_mqc.tsv
grep 'soft-masked bases' \$i | tail -n 1 | awk '{print \$4}' >> windowmqc_mqc.tsv
done
echo "# id: 'repeatmasker repeat summary'" > repeatmaskmqc_mqc.tsv
echo "# section_name: 'repeatmasker repeat masking summary statistics'" >> repeatmaskmqc_mqc.tsv
echo "# format: 'tsv'" >> repeatmaskmqc_mqc.tsv
echo "# plot_type: 'heatmap'" >> repeatmaskmqc_mqc.tsv
echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> repeatmaskmqc_mqc.tsv
echo "# pconfig:" >> repeatmaskmqc_mqc.tsv
echo "# id: 'repeatmasker repeat summary'" >> repeatmaskmqc_mqc.tsv
echo "# title: 'repeatmasker repeat summary'" >> repeatmaskmqc_mqc.tsv
echo "# ylab: ''" >> repeatmaskmqc_mqc.tsv
echo "id\tTotal scaffold length\tTotal contig length\tsoft masked bases" >> repeatmaskmqc_mqc.tsv
for i in $assemr
do
printf "\$(basename \$i .assembly_summary)\t" >> repeatmaskmqc_mqc.tsv
grep 'Total scaffold length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> repeatmaskmqc_mqc.tsv
grep 'Total contig length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> repeatmaskmqc_mqc.tsv
grep 'soft-masked bases' \$i | tail -n 1 | awk '{print \$4}' >> repeatmaskmqc_mqc.tsv
done
"""
}
13 changes: 12 additions & 1 deletion workflows/pairgenomealignmask.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include { REPEATMODELER_REPEATMODELER } from '../modules/nf-core/repeatmodeler/r
include { REPEATMODELER_MASKER } from '../modules/nf-core/repeatmodeler/repeatmasker/main'
include { REPEATMODELER_BUILDDATABASE } from '../modules/nf-core/repeatmodeler/builddatabase/main'
include { TANTAN } from '../modules/local/tantan.nf'
include { CUSTOMMODULE } from '../modules/local/custommodule.nf'
include { GFASTATS as GFSTTANTAN } from '../modules/nf-core/gfastats/main'
include { GFASTATS as GFSTREPEATMOD } from '../modules/nf-core/gfastats/main'
include { GFASTATS as GFSTWINDOWMASK } from '../modules/nf-core/gfastats/main'
Expand Down Expand Up @@ -98,7 +99,17 @@ workflow PAIRGENOMEALIGNMASK {
WINDOWMASKER_USTAT.out.intervals
)

ch_multiqc_files = ch_multiqc_files.mix(WINDOWMASKER_MKCOUNTS.out.counts.collect{it[1]})
//
// MODULE: CUSTOMMODULE
//
CUSTOMMODULE (
GFSTTANTAN.out.assembly_summary.collect{it[1]},
GFSTWINDOWMASK.out.assembly_summary.collect{it[1]},
GFSTREPEATMOD.out.assembly_summary.collect{it[1]}
)
ch_multiqc_files = ch_multiqc_files.mix(CUSTOMMODULE.out.tsv)


ch_versions = ch_versions.mix(WINDOWMASKER_MKCOUNTS.out.versions.first())

//
Expand Down

0 comments on commit b34fae6

Please sign in to comment.