Merge pull request #64 from nf-core/dev

Prepare for release
nf-core · May 3, 2022 · 429087d · 429087d
2 parents 82f90ec + acea6fe
commit 429087d
Show file tree

Hide file tree

Showing 7 changed files with 56 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,12 +3,14 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v1.0.0 - [02/24/2022]
+## v1.0.0 - [05/03/2022]
 
-- bump version and ready for release.
+- bump version for release.
+- update the README.md.
 
-## v1.0dev - [01/25/2022]
+## v1.0dev - [04/25/2022]
 
+- add `totalLinks` parameter for prepare_circos.
 - add filters to chromosome names for `hipeak`.
 - add parameter `anchor_peaks`.
 - Update `MAPS` for new version of `VGAM`.
@@ -48,7 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - change version number output from txt to yml file.
 - update citation.md
 - change the juicer_tools download on fly
-- merge the reviewer comments from [#2](https://github.com/nf-core/hicar/pull/2/)
+- resolve questions about installation of R packages
 - remove juicer_tools bin file
 - remove install_packages.r file
 - rename the parepare_circos to circos_prepare
@@ -58,11 +60,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### change on [11/02/2021]
 
-- merge the reviewer comments from [#1](https://github.com/nf-core/hicar/pull/1/)
+- add module to covert pair file to bam for visualization
+- decrease the memory cost for differential analysis
+- add module to create `circos` plot
+- add module `igv`
+- add module `juicer`
+- update QC documentation
+- update the memory cost and add ignore `errorStrategy` for `bedtools` sort
+- improve memory cost for modules `trackviewer`, `juicer` and `prepare_counts`
+- handle multiple errors for `MAPS`
+- update the module to prepare the `macs_gsize`
+- fix multiple typos in documentation
 - change the filename from design.csv to test_samplesheet.csv
 - change the filename from samplesheet.csv to test_full_samplesheet.csv
 - use nf-core repository URL
-- update the documentation of README.md
 - update the multiqc_config.yaml file format
 - remove the regrexp check for replicate in schema_input.json
 - update output.md

diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![GitHub Actions CI Status](https://github.com/nf-core/hicar/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+CI%22)
 [![GitHub Actions Linting Status](https://github.com/nf-core/hicar/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+linting%22)
 [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hicar/results)
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.5618247-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.5618247)
+[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6499091-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.6499091)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
@@ -32,9 +32,9 @@ On release, automated continuous integration tests run the pipeline on a full-si
 4. Filter reads ([`pairtools`](https://pairtools.readthedocs.io/en/latest/))
 5. Quality analysis ([`pairsqc`](https://github.com/4dn-dcic/pairsqc))
 6. Call peaks for ATAC reads (R2 reads) ([`MACS2`](https://macs3-project.github.io/MACS/)) and/or call peaks for R1 reads.
-7. Find TADs and loops ([`MAPS`](https://github.com/ijuric/MAPS))
+7. Find genomic interaction loops ([`MAPS`](https://github.com/ijuric/MAPS))
 8. Differential analysis ([`edgeR`](https://bioconductor.org/packages/edgeR/))
-9. Annotation TADs and loops ([`ChIPpeakAnno`](https://bioconductor.org/packages/ChIPpeakAnno/))
+9. Annotate genomic interaction loops ([`ChIPpeakAnno`](https://bioconductor.org/packages/ChIPpeakAnno/))
 10. Create cooler files ([`cooler`](https://cooler.readthedocs.io/en/latest/index.html), .hic files [`Juicer_tools`](https://github.com/aidenlab/juicer/wiki), and circos files [`circos`](http://circos.ca/)) for visualization.
 11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
 
@@ -96,7 +96,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 
 ## Citations
 
-If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.5618247](https://doi.org/10.5281/zenodo.5618247)
+If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.6499091](https://doi.org/10.5281/zenodo.6499091)
 
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -717,6 +717,7 @@ process {
         ]
     }
     withName: 'CIRCOS_PREPARE' {
+        ext.args    = '--totalLinks 24000'// if you increase this number, please also try to increase the number in housekeeping.conf
         publishDir  = [
             path: { "${params.outdir}/circos" },
             mode: params.publish_dir_mode,

diff --git a/modules/local/bioc/diffhicar.nf b/modules/local/bioc/diffhicar.nf
@@ -184,6 +184,7 @@ process DIFFHICAR {
             write.csv(res.s, fname(name, "csv", "edgeR.DEtable", name, "padj0.05.lfc1"), row.names = FALSE)
             ## Volcano plot
             res\$qvalue <- -10*log10(res\$PValue)
+            res.s\$qvalue <- -10*log10(res.s\$PValue)
             pdf(fname(name, "pdf", "Volcano-plot", name))
             plot(x=res\$logFC, y=res\$qvalue,
                 main = paste("Volcano plot for", name),

diff --git a/modules/local/circos/circos_prepare.nf b/modules/local/circos/circos_prepare.nf
@@ -17,6 +17,7 @@ process CIRCOS_PREPARE {
     path "versions.yml"                             , emit: versions
 
     script:
+    def args = task.ext.args ?: ''
     """
     #!/usr/bin/env Rscript
 
@@ -43,6 +44,28 @@ process CIRCOS_PREPARE {
     gtf <- "$gtf"
     ucscname <- "$ucscname"
     outfolder <- "${meta.id}"
+    totalLinks <- 1e4
+
+    args <- strsplit("${args}", "\\\\s+")[[1]]
+    parse_args <- function(options, args){
+        out <- lapply(options, function(.ele){
+            if(any(.ele[-3] %in% args)){
+                if(.ele[3]=="logical"){
+                    TRUE
+                }else{
+                    id <- which(args %in% .ele[-3])[1]
+                    x <- args[id+1]
+                    mode(x) <- .ele[3]
+                    x
+                }
+            }
+        })
+    }
+    option_list <- list("pattern"=c("--totalLinks", "-n", "numeric"))
+    opt <- parse_args(option_list, args)
+    if(!is.null(opt[["totalLinks"]])){
+        totalLinks <- opt[["totalLinks"]]
+    }
 
     dir.create(outfolder, showWarnings = FALSE)
 
@@ -57,17 +80,18 @@ process CIRCOS_PREPARE {
         pe <- import(interaction, format="BEDPE")
     }
     seqlevelsStyle(first(pe)) <- seqlevelsStyle(second(pe)) <- "UCSC"
-    pes <- pe[order(mcols(pe)\$score, decreasing=TRUE)]
+    pes <- unique(pe[order(mcols(pe)\$score, decreasing=TRUE)])
     pes_cis <- pes[seqnames(first(pe))==seqnames(second(pe))]
     pes_trans <- pes[seqnames(first(pe))!=seqnames(second(pe))]
-    if(length(pes_cis)>0){ # keep top 10K events for plot
-        pes <- pes_cis[seq.int(min(1e4, length(pes_cis)))]
+    if(length(pes_cis)>0){ # keep top events for plot, default 24K
+        pes <- pes_cis[seq.int(min(totalLinks, length(pes_cis)))]
     }else{
         stop("No data available for plot")
     }
     if(length(pes_trans)>0){
-        pes <- sort(c(pes,
-                    pes_trans[seq.int(min(1e4, length(pes_trans)))])) ## keep top 10K links only. otherwise hard to plot.
+        ## keep top 24K links only. otherwise hard to plot.
+        pes <- sort(c(pes[seq.int(min(floor(totalLinks/2), length(pes_trans)))],
+                    pes_trans[seq.int(min(floor(totalLinks/2), length(pes_trans)))]))
     }
     out <- as.data.frame(pes)
     scores <- sqrt(range(mcols(pe)\$score)/10)

diff --git a/modules/local/hipeak/diff_hipeak.nf b/modules/local/hipeak/diff_hipeak.nf
@@ -140,7 +140,9 @@ process DIFF_HIPEAK {
     peaks\$ID <- seq_along(peaks)
     peaks.s <- split(peaks, paste(seqnames(first(peaks)), seqnames(second(peaks)), sep="___"))
     try_res <- try({cnts <- bplapply(file.path("pairs", pc), countByOverlaps, peaks=peaks.s, sep="___", BPPARAM = param)})
-    if(inherits(try_res, "try-error")){
+    sizeFactor <- vapply(cnts, FUN=function(.ele) .ele\$total,
+                        FUN.VALUE = numeric(1))
+    if(inherits(try_res, "try-error") || all(sizeFactor==0)){ # check sizeFactor to make sure bplapply work
         cnts <- lapply(file.path("pairs", pc), countByOverlaps, peaks=peaks.s, sep="___")
     }
     h5closeAll()

diff --git a/nextflow.config b/nextflow.config
@@ -233,7 +233,7 @@ manifest {
     description     = 'This pipeline analyses data for HiCAR data, a robust and sensitive multi-omic co-assay for simultaneous measurement of transcriptome, chromatin accessibility and cis-regulatory chromatin contacts.'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
-    version         = '1.0.0'
+    version = '1.0.0'
 }
 
 // Load modules.config for DSL2 module specific options