From 1179eaa959f9304328ea4a37a78eae2baf59b3f0 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 22 Apr 2022 08:40:07 -0400 Subject: [PATCH 01/10] add totalLinks parameter for prepare_circos --- CHANGELOG.md | 1 + conf/modules.config | 1 + modules/local/bioc/diffhicar.nf | 1 + modules/local/circos/circos_prepare.nf | 34 ++++++++++++++++++++++---- modules/local/hipeak/diff_hipeak.nf | 1 + 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6e118b5..01459a63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0dev - [01/25/2022] +- add `totalLinks` parameter for prepare_circos. - add filters to chromosome names for `hipeak`. - add parameter `anchor_peaks`. - Update `MAPS` for new version of `VGAM`. diff --git a/conf/modules.config b/conf/modules.config index cd8aa8f8..68af3bef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -717,6 +717,7 @@ process { ] } withName: 'CIRCOS_PREPARE' { + ext.args = '--totalLinks 24000'// if you increase this number, please also try to increase the number in housekeeping.conf publishDir = [ path: { "${params.outdir}/circos" }, mode: params.publish_dir_mode, diff --git a/modules/local/bioc/diffhicar.nf b/modules/local/bioc/diffhicar.nf index 75945fde..e60ec709 100644 --- a/modules/local/bioc/diffhicar.nf +++ b/modules/local/bioc/diffhicar.nf @@ -184,6 +184,7 @@ process DIFFHICAR { write.csv(res.s, fname(name, "csv", "edgeR.DEtable", name, "padj0.05.lfc1"), row.names = FALSE) ## Volcano plot res\$qvalue <- -10*log10(res\$PValue) + res.s\$qvalue <- -10*log10(res.s\$PValue) pdf(fname(name, "pdf", "Volcano-plot", name)) plot(x=res\$logFC, y=res\$qvalue, main = paste("Volcano plot for", name), diff --git a/modules/local/circos/circos_prepare.nf b/modules/local/circos/circos_prepare.nf index a2da7299..add2a645 100644 --- a/modules/local/circos/circos_prepare.nf +++ b/modules/local/circos/circos_prepare.nf @@ -17,6 +17,7 @@ process CIRCOS_PREPARE { path "versions.yml" , emit: versions script: + def args = task.ext.args ?: '' """ #!/usr/bin/env Rscript @@ -43,6 +44,28 @@ process CIRCOS_PREPARE { gtf <- "$gtf" ucscname <- "$ucscname" outfolder <- "${meta.id}" + totalLinks <- 1e4 + + args <- strsplit("${args}", "\\\\s+")[[1]] + parse_args <- function(options, args){ + out <- lapply(options, function(.ele){ + if(any(.ele[-3] %in% args)){ + if(.ele[3]=="logical"){ + TRUE + }else{ + id <- which(args %in% .ele[-3])[1] + x <- args[id+1] + mode(x) <- .ele[3] + x + } + } + }) + } + option_list <- list("pattern"=c("--totalLinks", "-n", "numeric")) + opt <- parse_args(option_list, args) + if(!is.null(opt[["totalLinks"]])){ + totalLinks <- opt[["totalLinks"]] + } dir.create(outfolder, showWarnings = FALSE) @@ -57,17 +80,18 @@ process CIRCOS_PREPARE { pe <- import(interaction, format="BEDPE") } seqlevelsStyle(first(pe)) <- seqlevelsStyle(second(pe)) <- "UCSC" - pes <- pe[order(mcols(pe)\$score, decreasing=TRUE)] + pes <- unique(pe[order(mcols(pe)\$score, decreasing=TRUE)]) pes_cis <- pes[seqnames(first(pe))==seqnames(second(pe))] pes_trans <- pes[seqnames(first(pe))!=seqnames(second(pe))] - if(length(pes_cis)>0){ # keep top 10K events for plot - pes <- pes_cis[seq.int(min(1e4, length(pes_cis)))] + if(length(pes_cis)>0){ # keep top events for plot, default 24K + pes <- pes_cis[seq.int(min(totalLinks, length(pes_cis)))] }else{ stop("No data available for plot") } if(length(pes_trans)>0){ - pes <- sort(c(pes, - pes_trans[seq.int(min(1e4, length(pes_trans)))])) ## keep top 10K links only. otherwise hard to plot. + ## keep top 24K links only. otherwise hard to plot. + pes <- sort(c(pes[seq.int(min(floor(totalLinks/2), length(pes_trans)))], + pes_trans[seq.int(min(floor(totalLinks/2), length(pes_trans)))])) } out <- as.data.frame(pes) scores <- sqrt(range(mcols(pe)\$score)/10) diff --git a/modules/local/hipeak/diff_hipeak.nf b/modules/local/hipeak/diff_hipeak.nf index fd0c9e53..6d098e36 100644 --- a/modules/local/hipeak/diff_hipeak.nf +++ b/modules/local/hipeak/diff_hipeak.nf @@ -145,6 +145,7 @@ process DIFF_HIPEAK { } h5closeAll() rm(peaks.s) + saveRDS(cnts, "cnts.rds") samples <- sub("(_REP\\\\d+)\\\\.(.*?)h5\$", "\\\\1", pc) sizeFactor <- vapply(cnts, FUN=function(.ele) .ele\$total, FUN.VALUE = numeric(1)) From 4f990c873fca154a8dda034eb018e831395c51ef Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 22 Apr 2022 10:14:05 -0400 Subject: [PATCH 02/10] fix the issue if bplapply does not work in diff_hipeak. --- modules/local/hipeak/diff_hipeak.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/local/hipeak/diff_hipeak.nf b/modules/local/hipeak/diff_hipeak.nf index 6d098e36..982bc8b2 100644 --- a/modules/local/hipeak/diff_hipeak.nf +++ b/modules/local/hipeak/diff_hipeak.nf @@ -140,12 +140,13 @@ process DIFF_HIPEAK { peaks\$ID <- seq_along(peaks) peaks.s <- split(peaks, paste(seqnames(first(peaks)), seqnames(second(peaks)), sep="___")) try_res <- try({cnts <- bplapply(file.path("pairs", pc), countByOverlaps, peaks=peaks.s, sep="___", BPPARAM = param)}) - if(inherits(try_res, "try-error")){ + sizeFactor <- vapply(cnts, FUN=function(.ele) .ele\$total, + FUN.VALUE = numeric(1)) + if(inherits(try_res, "try-error") || all(sizeFactor==0)){ # check sizeFactor to make sure bplapply work cnts <- lapply(file.path("pairs", pc), countByOverlaps, peaks=peaks.s, sep="___") } h5closeAll() rm(peaks.s) - saveRDS(cnts, "cnts.rds") samples <- sub("(_REP\\\\d+)\\\\.(.*?)h5\$", "\\\\1", pc) sizeFactor <- vapply(cnts, FUN=function(.ele) .ele\$total, FUN.VALUE = numeric(1)) From 217a9cc1e0f940f45cf8be2477628ccb09f44aed Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Wed, 27 Apr 2022 14:42:19 -0400 Subject: [PATCH 03/10] prepare for release --- CHANGELOG.md | 4 ++-- README.md | 4 ++-- nextflow.config | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01459a63..a807a28e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [02/24/2022] +## v1.0.0 - [04/27/2022] - bump version and ready for release. -## v1.0dev - [01/25/2022] +## v1.0dev - [04/25/2022] - add `totalLinks` parameter for prepare_circos. - add filters to chromosome names for `hipeak`. diff --git a/README.md b/README.md index 74973c93..3e72b9cd 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/hicar/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/hicar/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hicar/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.5618247-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.5618247) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6499091-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.6499091) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -96,7 +96,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.5618247](https://doi.org/10.5281/zenodo.5618247) +If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.6499091](https://doi.org/10.5281/zenodo.6499091) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/nextflow.config b/nextflow.config index 71c62bc2..43835968 100644 --- a/nextflow.config +++ b/nextflow.config @@ -233,7 +233,7 @@ manifest { description = 'This pipeline analyses data for HiCAR data, a robust and sensitive multi-omic co-assay for simultaneous measurement of transcriptome, chromatin accessibility and cis-regulatory chromatin contacts.' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '1.0.0' + version = '1.0.0' } // Load modules.config for DSL2 module specific options From e41187c61c4297bf89611ea40f93538fc112ae8c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 28 Apr 2022 08:39:36 +0200 Subject: [PATCH 04/10] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a807a28e..0698e8e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [04/27/2022] +## v1.0.0 - [04/28/2022] - bump version and ready for release. From 7cdae3636a0f1efb8561d69ab694ebe7dfa7be98 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 29 Apr 2022 08:22:08 +0200 Subject: [PATCH 05/10] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0698e8e8..aed8d32f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [04/28/2022] +## v1.0.0 - [04/29/2022] - bump version and ready for release. From 6440282643dedd17a4c70591cdeb473b9f54ca81 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 29 Apr 2022 09:21:33 +0200 Subject: [PATCH 06/10] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aed8d32f..a1292abe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0.0 - [04/29/2022] -- bump version and ready for release. +- bump version for release. ## v1.0dev - [04/25/2022] From c1e4aa9960c2a33d8ccd346765c2d681682b42c7 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Mon, 2 May 2022 09:11:47 -0400 Subject: [PATCH 07/10] update CHANGELOG to trigger change in dev. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1292abe..ef85a32d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [04/29/2022] +## v1.0.0 - [05/02/2022] - bump version for release. From 2b94e25ca28ce26a5481fb7e955f90c1ddf9fa8e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 3 May 2022 14:31:14 +0200 Subject: [PATCH 08/10] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef85a32d..c76c128a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [05/02/2022] +## v1.0.0 - [05/03/2022] - bump version for release. From 301686ef22e3ec8b15604030d3840fc833a81c3e Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Tue, 3 May 2022 09:23:58 -0400 Subject: [PATCH 09/10] update changelogs to clean up the sentences with 'merging comments' --- CHANGELOG.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c76c128a..6dfd2362 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,7 +49,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - change version number output from txt to yml file. - update citation.md - change the juicer_tools download on fly -- merge the reviewer comments from [#2](https://github.com/nf-core/hicar/pull/2/) +- resolve questions about installation of R packages - remove juicer_tools bin file - remove install_packages.r file - rename the parepare_circos to circos_prepare @@ -59,11 +59,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### change on [11/02/2021] -- merge the reviewer comments from [#1](https://github.com/nf-core/hicar/pull/1/) +- add module to covert pair file to bam for visualization +- decrease the memory cost for differential analysis +- add module to create `circos` plot +- add module `igv` +- add module `juicer` +- update QC documentation +- update the memory cost and add ignore `errorStrategy` for `bedtools` sort +- improve memory cost for modules `trackviewer`, `juicer` and `prepare_counts` +- handle multiple errors for `MAPS` +- update the module to prepare the `macs_gsize` +- fix multiple typos in documentation - change the filename from design.csv to test_samplesheet.csv - change the filename from samplesheet.csv to test_full_samplesheet.csv - use nf-core repository URL -- update the documentation of README.md - update the multiqc_config.yaml file format - remove the regrexp check for replicate in schema_input.json - update output.md From 9516795f72533e2966cfe80949dcde71b242f91b Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Tue, 3 May 2022 09:33:47 -0400 Subject: [PATCH 10/10] update README and CHANGELOG --- CHANGELOG.md | 1 + README.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dfd2362..e1dac46f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0.0 - [05/03/2022] - bump version for release. +- update the README.md. ## v1.0dev - [04/25/2022] diff --git a/README.md b/README.md index 3e72b9cd..ced4d695 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,9 @@ On release, automated continuous integration tests run the pipeline on a full-si 4. Filter reads ([`pairtools`](https://pairtools.readthedocs.io/en/latest/)) 5. Quality analysis ([`pairsqc`](https://github.com/4dn-dcic/pairsqc)) 6. Call peaks for ATAC reads (R2 reads) ([`MACS2`](https://macs3-project.github.io/MACS/)) and/or call peaks for R1 reads. -7. Find TADs and loops ([`MAPS`](https://github.com/ijuric/MAPS)) +7. Find genomic interaction loops ([`MAPS`](https://github.com/ijuric/MAPS)) 8. Differential analysis ([`edgeR`](https://bioconductor.org/packages/edgeR/)) -9. Annotation TADs and loops ([`ChIPpeakAnno`](https://bioconductor.org/packages/ChIPpeakAnno/)) +9. Annotate genomic interaction loops ([`ChIPpeakAnno`](https://bioconductor.org/packages/ChIPpeakAnno/)) 10. Create cooler files ([`cooler`](https://cooler.readthedocs.io/en/latest/index.html), .hic files [`Juicer_tools`](https://github.com/aidenlab/juicer/wiki), and circos files [`circos`](http://circos.ca/)) for visualization. 11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))