From b34fae69d12f85daf6747bccd98051bb89cdea41 Mon Sep 17 00:00:00 2001 From: Mohammed Mahdi Date: Mon, 8 Jul 2024 16:24:20 +0900 Subject: [PATCH] Configure multiqc to extract and display gfastats results of each tantan, windowmasker, repeatmasker. --- conf/modules.config | 6 +-- modules/local/custommodule.nf | 79 ++++++++++++++++++++++++++++++++ workflows/pairgenomealignmask.nf | 13 +++++- 3 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 modules/local/custommodule.nf diff --git a/conf/modules.config b/conf/modules.config index 44d04e4..09ebd1e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,15 +19,15 @@ process { ] withName: 'GFSTTANTAN' { - ext.prefix = { "${meta.id}.tantan" } + ext.prefix = { "${meta.id}_tantan" } } withName: 'GFSTWINDOWMASK' { - ext.prefix = { "${meta.id}.windowmaskr" } + ext.prefix = { "${meta.id}_windowmasker" } } withName: 'GFSTREPEATMOD' { - ext.prefix = { "${meta.id}.repeatmoder" } + ext.prefix = { "${meta.id}_repeatmodeler" } } withName: 'WINDOWMASKER_USTAT' { diff --git a/modules/local/custommodule.nf b/modules/local/custommodule.nf new file mode 100644 index 0000000..0a03a8d --- /dev/null +++ b/modules/local/custommodule.nf @@ -0,0 +1,79 @@ +process CUSTOMMODULE { + label 'process_single' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/jq:1.6': + 'biocontainers/jq:1.6' }" + + + input: + path (assemt) + path (assemw) + path (assemr) + + output: + path "*_mqc.tsv", emit: tsv + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + echo "# id: 'tantan repeat summary'" > tantanmqc_mqc.tsv + echo "# section_name: 'tantan repeat masking summary statistics'" >> tantanmqc_mqc.tsv + echo "# format: 'tsv'" >> tantanmqc_mqc.tsv + echo "# plot_type: 'heatmap'" >> tantanmqc_mqc.tsv + echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> tantanmqc_mqc.tsv + echo "# pconfig:" >> tantanmqc_mqc.tsv + echo "# id: 'tantan repeat summary'" >> tantanmqc_mqc.tsv + echo "# title: 'tantan repeat summary'" >> tantanmqc_mqc.tsv + echo "# ylab: ''" >> tantanmqc_mqc.tsv + echo "id\tTotal scaffold length\tTotal contig length\tsoft masked bases" >> tantanmqc_mqc.tsv + for i in $assemt + do + printf "\$(basename \$i .assembly_summary)\t" >>tantanmqc_mqc.tsv + grep 'Total scaffold length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> tantanmqc_mqc.tsv + grep 'Total contig length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> tantanmqc_mqc.tsv + grep 'soft-masked bases' \$i | tail -n 1 | awk '{print \$4}' >> tantanmqc_mqc.tsv + done + + + echo "# id: 'windowmasker repeat summary'" > windowmqc_mqc.tsv + echo "# section_name: 'windowmasker repeat masking summary statistics'" >> windowmqc_mqc.tsv + echo "# format: 'tsv'" >> windowmqc_mqc.tsv + echo "# plot_type: 'heatmap'" >> windowmqc_mqc.tsv + echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> windowmqc_mqc.tsv + echo "# pconfig:" >> windowmqc_mqc.tsv + echo "# id: 'windowmasker repeat summary'" >> windowmqc_mqc.tsv + echo "# title: 'windowmasker repeat summary'" >> windowmqc_mqc.tsv + echo "# ylab: ''" >> windowmqc_mqc.tsv + echo "id\tTotal scaffold length\tTotal contig length\tsoft masked bases" >> windowmqc_mqc.tsv + for i in $assemw + do + printf "\$(basename \$i .assembly_summary)\t" >> windowmqc_mqc.tsv + grep 'Total scaffold length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> windowmqc_mqc.tsv + grep 'Total contig length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> windowmqc_mqc.tsv + grep 'soft-masked bases' \$i | tail -n 1 | awk '{print \$4}' >> windowmqc_mqc.tsv + done + + + echo "# id: 'repeatmasker repeat summary'" > repeatmaskmqc_mqc.tsv + echo "# section_name: 'repeatmasker repeat masking summary statistics'" >> repeatmaskmqc_mqc.tsv + echo "# format: 'tsv'" >> repeatmaskmqc_mqc.tsv + echo "# plot_type: 'heatmap'" >> repeatmaskmqc_mqc.tsv + echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> repeatmaskmqc_mqc.tsv + echo "# pconfig:" >> repeatmaskmqc_mqc.tsv + echo "# id: 'repeatmasker repeat summary'" >> repeatmaskmqc_mqc.tsv + echo "# title: 'repeatmasker repeat summary'" >> repeatmaskmqc_mqc.tsv + echo "# ylab: ''" >> repeatmaskmqc_mqc.tsv + echo "id\tTotal scaffold length\tTotal contig length\tsoft masked bases" >> repeatmaskmqc_mqc.tsv + for i in $assemr + do + printf "\$(basename \$i .assembly_summary)\t" >> repeatmaskmqc_mqc.tsv + grep 'Total scaffold length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> repeatmaskmqc_mqc.tsv + grep 'Total contig length' \$i | tail -n 1 | awk '{print \$4}' | tr '\n' '\t' >> repeatmaskmqc_mqc.tsv + grep 'soft-masked bases' \$i | tail -n 1 | awk '{print \$4}' >> repeatmaskmqc_mqc.tsv + done + """ +} diff --git a/workflows/pairgenomealignmask.nf b/workflows/pairgenomealignmask.nf index bb7de88..aa16e21 100644 --- a/workflows/pairgenomealignmask.nf +++ b/workflows/pairgenomealignmask.nf @@ -10,6 +10,7 @@ include { REPEATMODELER_REPEATMODELER } from '../modules/nf-core/repeatmodeler/r include { REPEATMODELER_MASKER } from '../modules/nf-core/repeatmodeler/repeatmasker/main' include { REPEATMODELER_BUILDDATABASE } from '../modules/nf-core/repeatmodeler/builddatabase/main' include { TANTAN } from '../modules/local/tantan.nf' +include { CUSTOMMODULE } from '../modules/local/custommodule.nf' include { GFASTATS as GFSTTANTAN } from '../modules/nf-core/gfastats/main' include { GFASTATS as GFSTREPEATMOD } from '../modules/nf-core/gfastats/main' include { GFASTATS as GFSTWINDOWMASK } from '../modules/nf-core/gfastats/main' @@ -98,7 +99,17 @@ workflow PAIRGENOMEALIGNMASK { WINDOWMASKER_USTAT.out.intervals ) - ch_multiqc_files = ch_multiqc_files.mix(WINDOWMASKER_MKCOUNTS.out.counts.collect{it[1]}) + // + // MODULE: CUSTOMMODULE + // + CUSTOMMODULE ( + GFSTTANTAN.out.assembly_summary.collect{it[1]}, + GFSTWINDOWMASK.out.assembly_summary.collect{it[1]}, + GFSTREPEATMOD.out.assembly_summary.collect{it[1]} + ) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOMMODULE.out.tsv) + + ch_versions = ch_versions.mix(WINDOWMASKER_MKCOUNTS.out.versions.first()) //