diff --git a/CHANGELOG.md b/CHANGELOG.md index c6e118b5..e1dac46f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [02/24/2022] +## v1.0.0 - [05/03/2022] -- bump version and ready for release. +- bump version for release. +- update the README.md. -## v1.0dev - [01/25/2022] +## v1.0dev - [04/25/2022] +- add `totalLinks` parameter for prepare_circos. - add filters to chromosome names for `hipeak`. - add parameter `anchor_peaks`. - Update `MAPS` for new version of `VGAM`. @@ -48,7 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - change version number output from txt to yml file. - update citation.md - change the juicer_tools download on fly -- merge the reviewer comments from [#2](https://github.com/nf-core/hicar/pull/2/) +- resolve questions about installation of R packages - remove juicer_tools bin file - remove install_packages.r file - rename the parepare_circos to circos_prepare @@ -58,11 +60,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### change on [11/02/2021] -- merge the reviewer comments from [#1](https://github.com/nf-core/hicar/pull/1/) +- add module to covert pair file to bam for visualization +- decrease the memory cost for differential analysis +- add module to create `circos` plot +- add module `igv` +- add module `juicer` +- update QC documentation +- update the memory cost and add ignore `errorStrategy` for `bedtools` sort +- improve memory cost for modules `trackviewer`, `juicer` and `prepare_counts` +- handle multiple errors for `MAPS` +- update the module to prepare the `macs_gsize` +- fix multiple typos in documentation - change the filename from design.csv to test_samplesheet.csv - change the filename from samplesheet.csv to test_full_samplesheet.csv - use nf-core repository URL -- update the documentation of README.md - update the multiqc_config.yaml file format - remove the regrexp check for replicate in schema_input.json - update output.md diff --git a/README.md b/README.md index 74973c93..ced4d695 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/hicar/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/hicar/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hicar/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.5618247-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.5618247) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6499091-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.6499091) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -32,9 +32,9 @@ On release, automated continuous integration tests run the pipeline on a full-si 4. Filter reads ([`pairtools`](https://pairtools.readthedocs.io/en/latest/)) 5. Quality analysis ([`pairsqc`](https://github.com/4dn-dcic/pairsqc)) 6. Call peaks for ATAC reads (R2 reads) ([`MACS2`](https://macs3-project.github.io/MACS/)) and/or call peaks for R1 reads. -7. Find TADs and loops ([`MAPS`](https://github.com/ijuric/MAPS)) +7. Find genomic interaction loops ([`MAPS`](https://github.com/ijuric/MAPS)) 8. Differential analysis ([`edgeR`](https://bioconductor.org/packages/edgeR/)) -9. Annotation TADs and loops ([`ChIPpeakAnno`](https://bioconductor.org/packages/ChIPpeakAnno/)) +9. Annotate genomic interaction loops ([`ChIPpeakAnno`](https://bioconductor.org/packages/ChIPpeakAnno/)) 10. Create cooler files ([`cooler`](https://cooler.readthedocs.io/en/latest/index.html), .hic files [`Juicer_tools`](https://github.com/aidenlab/juicer/wiki), and circos files [`circos`](http://circos.ca/)) for visualization. 11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) @@ -96,7 +96,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.5618247](https://doi.org/10.5281/zenodo.5618247) +If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.6499091](https://doi.org/10.5281/zenodo.6499091) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/conf/modules.config b/conf/modules.config index cd8aa8f8..68af3bef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -717,6 +717,7 @@ process { ] } withName: 'CIRCOS_PREPARE' { + ext.args = '--totalLinks 24000'// if you increase this number, please also try to increase the number in housekeeping.conf publishDir = [ path: { "${params.outdir}/circos" }, mode: params.publish_dir_mode, diff --git a/modules/local/bioc/diffhicar.nf b/modules/local/bioc/diffhicar.nf index 75945fde..e60ec709 100644 --- a/modules/local/bioc/diffhicar.nf +++ b/modules/local/bioc/diffhicar.nf @@ -184,6 +184,7 @@ process DIFFHICAR { write.csv(res.s, fname(name, "csv", "edgeR.DEtable", name, "padj0.05.lfc1"), row.names = FALSE) ## Volcano plot res\$qvalue <- -10*log10(res\$PValue) + res.s\$qvalue <- -10*log10(res.s\$PValue) pdf(fname(name, "pdf", "Volcano-plot", name)) plot(x=res\$logFC, y=res\$qvalue, main = paste("Volcano plot for", name), diff --git a/modules/local/circos/circos_prepare.nf b/modules/local/circos/circos_prepare.nf index a2da7299..add2a645 100644 --- a/modules/local/circos/circos_prepare.nf +++ b/modules/local/circos/circos_prepare.nf @@ -17,6 +17,7 @@ process CIRCOS_PREPARE { path "versions.yml" , emit: versions script: + def args = task.ext.args ?: '' """ #!/usr/bin/env Rscript @@ -43,6 +44,28 @@ process CIRCOS_PREPARE { gtf <- "$gtf" ucscname <- "$ucscname" outfolder <- "${meta.id}" + totalLinks <- 1e4 + + args <- strsplit("${args}", "\\\\s+")[[1]] + parse_args <- function(options, args){ + out <- lapply(options, function(.ele){ + if(any(.ele[-3] %in% args)){ + if(.ele[3]=="logical"){ + TRUE + }else{ + id <- which(args %in% .ele[-3])[1] + x <- args[id+1] + mode(x) <- .ele[3] + x + } + } + }) + } + option_list <- list("pattern"=c("--totalLinks", "-n", "numeric")) + opt <- parse_args(option_list, args) + if(!is.null(opt[["totalLinks"]])){ + totalLinks <- opt[["totalLinks"]] + } dir.create(outfolder, showWarnings = FALSE) @@ -57,17 +80,18 @@ process CIRCOS_PREPARE { pe <- import(interaction, format="BEDPE") } seqlevelsStyle(first(pe)) <- seqlevelsStyle(second(pe)) <- "UCSC" - pes <- pe[order(mcols(pe)\$score, decreasing=TRUE)] + pes <- unique(pe[order(mcols(pe)\$score, decreasing=TRUE)]) pes_cis <- pes[seqnames(first(pe))==seqnames(second(pe))] pes_trans <- pes[seqnames(first(pe))!=seqnames(second(pe))] - if(length(pes_cis)>0){ # keep top 10K events for plot - pes <- pes_cis[seq.int(min(1e4, length(pes_cis)))] + if(length(pes_cis)>0){ # keep top events for plot, default 24K + pes <- pes_cis[seq.int(min(totalLinks, length(pes_cis)))] }else{ stop("No data available for plot") } if(length(pes_trans)>0){ - pes <- sort(c(pes, - pes_trans[seq.int(min(1e4, length(pes_trans)))])) ## keep top 10K links only. otherwise hard to plot. + ## keep top 24K links only. otherwise hard to plot. + pes <- sort(c(pes[seq.int(min(floor(totalLinks/2), length(pes_trans)))], + pes_trans[seq.int(min(floor(totalLinks/2), length(pes_trans)))])) } out <- as.data.frame(pes) scores <- sqrt(range(mcols(pe)\$score)/10) diff --git a/modules/local/hipeak/diff_hipeak.nf b/modules/local/hipeak/diff_hipeak.nf index fd0c9e53..982bc8b2 100644 --- a/modules/local/hipeak/diff_hipeak.nf +++ b/modules/local/hipeak/diff_hipeak.nf @@ -140,7 +140,9 @@ process DIFF_HIPEAK { peaks\$ID <- seq_along(peaks) peaks.s <- split(peaks, paste(seqnames(first(peaks)), seqnames(second(peaks)), sep="___")) try_res <- try({cnts <- bplapply(file.path("pairs", pc), countByOverlaps, peaks=peaks.s, sep="___", BPPARAM = param)}) - if(inherits(try_res, "try-error")){ + sizeFactor <- vapply(cnts, FUN=function(.ele) .ele\$total, + FUN.VALUE = numeric(1)) + if(inherits(try_res, "try-error") || all(sizeFactor==0)){ # check sizeFactor to make sure bplapply work cnts <- lapply(file.path("pairs", pc), countByOverlaps, peaks=peaks.s, sep="___") } h5closeAll() diff --git a/nextflow.config b/nextflow.config index 71c62bc2..43835968 100644 --- a/nextflow.config +++ b/nextflow.config @@ -233,7 +233,7 @@ manifest { description = 'This pipeline analyses data for HiCAR data, a robust and sensitive multi-omic co-assay for simultaneous measurement of transcriptome, chromatin accessibility and cis-regulatory chromatin contacts.' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '1.0.0' + version = '1.0.0' } // Load modules.config for DSL2 module specific options