Skip to content

Commit

Permalink
Merge pull request #45 from m-jahn/dev
Browse files Browse the repository at this point in the history
fix: formatting issues and examples for CRAN
  • Loading branch information
showteeth authored Jan 24, 2025
2 parents 8465fd8 + 6430c5f commit 6cd38ab
Show file tree
Hide file tree
Showing 19 changed files with 390 additions and 322 deletions.
16 changes: 8 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@ Authors@R: c(
comment = c(ORCID = "0000-0002-3913-153X"))
)
Maintainer: Yabing Song <[email protected]>
Description: The goal of 'ggcoverage' is to visualize coverage tracks from
genomics, transcriptomics or proteomics data. It contains functions to
load data from BAM, BigWig, BedGraph, txt, or xlsx files, create
genome/protein coverage plots, and add various annotations including
base and amino acid composition, GC content, copy number variation
(CNV), genes, transcripts, ideograms, peak highlights, HiC contact
maps, contact links and protein features. It is based on and
integrates well with 'ggplot2'.
Description: Visualize coverage tracks from genomics, transcriptomics
or proteomics data. The package contains functions to load data from 'BAM',
'BigWig', 'BedGraph', 'txt', or 'xlsx' files, create genome/protein coverage
plots, and add various annotations including base and amino acid
composition, GC content, copy number variation (CNV), genes, transcripts,
ideograms, peak highlights, HiC contact maps, contact links and protein
features. It is based on and integrates well with 'ggplot2'.
License: MIT + file LICENSE
URL: https://showteeth.github.io/ggcoverage/,
https://github.com/showteeth/ggcoverage
Expand Down Expand Up @@ -51,6 +50,7 @@ Suggests:
HiCBricks,
htmltools,
knitr,
openxlsx,
rmarkdown
VignetteBuilder:
knitr
Expand Down
58 changes: 46 additions & 12 deletions R/ConsensusPeak.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,21 @@
#' @export
#'
#' @examples
#' # library(ggcoverage)
#' # peak.file <- system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage")
#' # peak.df <- GetConsensusPeak(peak.file = peak.file)
GetConsensusPeak <- function(peak.file, peak.folder = NULL, mspc.path = NULL, rep.type = c("bio", "tec"), stringency.threshold = 1e-8,
weak.threshold = 1e-4, gamma = 1e-8, alpha = 0.05, min.overlap.num = 1,
multiple.intersections = c("Lowest", "Highest"), parallelism.degree = 1) {
#' peak_file <- system.file("extdata", "ChIP-seq", "consensus.peak", package = "ggcoverage")
#' peak_df <- GetConsensusPeak(peak.file = peak_file)
#' head(peak_df)
#'
GetConsensusPeak <- function(peak.file,
peak.folder = NULL,
mspc.path = NULL,
rep.type = c("bio", "tec"),
stringency.threshold = 1e-8,
weak.threshold = 1e-4,
gamma = 1e-8,
alpha = 0.05,
min.overlap.num = 1,
multiple.intersections = c("Lowest", "Highest"),
parallelism.degree = 1) {
# check parameters
rep.type <- match.arg(arg = rep.type)
multiple.intersections <- match.arg(arg = multiple.intersections)
Expand All @@ -39,7 +48,9 @@ GetConsensusPeak <- function(peak.file, peak.folder = NULL, mspc.path = NULL, re
stop("Peak file number is less than or equal to one!")
} else if (length(peak.file) == 1) {
# read file directly, do not get consensus peaks
consensus.peak.df <- read.table(file = peak.file, sep = "\t", header = FALSE)
consensus.peak.df <- read.table(file = peak.file,
sep = "\t",
header = FALSE)
consensus.peak.df <- consensus.peak.df[, 1:5]
colnames(consensus.peak.df) <- c("chr", "start", "stop", "name", "score")
} else {
Expand All @@ -62,9 +73,26 @@ GetConsensusPeak <- function(peak.file, peak.folder = NULL, mspc.path = NULL, re

# full command
mspc.cmd <- paste(
mspc.path, input.para, "-r", rep.type, "-s", stringency.threshold,
"-w", weak.threshold, "-g", gamma, "-a", alpha, "-c", min.overlap.num, "-m", multiple.intersections,
"-d", parallelism.degree, "-o", out.folder
mspc.path,
input.para,
"-r",
rep.type,
"-s",
stringency.threshold,
"-w",
weak.threshold,
"-g",
gamma,
"-a",
alpha,
"-c",
min.overlap.num,
"-m",
multiple.intersections,
"-d",
parallelism.degree,
"-o",
out.folder
)
# change language information
full.mspc.cmd <- paste0("export LC_ALL=en_US.UTF-8;", mspc.cmd)
Expand All @@ -78,11 +106,17 @@ GetConsensusPeak <- function(peak.file, peak.folder = NULL, mspc.path = NULL, re
# obtain results
if (!file.exists(file.path(out.folder, "ConsensusPeaks.bed"))) {
out.base <- basename(out.folder)
all.tmp.dirs <- sort(dir(path = dirname(out.folder), pattern = out.base, full.names = TRUE))
all.tmp.dirs <- sort(dir(
path = dirname(out.folder),
pattern = out.base,
full.names = TRUE
))
out.folder <- all.tmp.dirs[length(all.tmp.dirs)]
}
consensus.peak.file <- file.path(out.folder, "ConsensusPeaks.bed")
consensus.peak.df <- read.table(file = consensus.peak.file, sep = "\t", header = TRUE)
consensus.peak.df <- read.table(file = consensus.peak.file,
sep = "\t",
header = TRUE)
}
return(consensus.peak.df)
}
52 changes: 27 additions & 25 deletions R/geom_cnv.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,35 +22,37 @@
#' @export
#'
#' @examples
#' \dontrun{
#' library("BSgenome.Hsapiens.UCSC.hg19")
#' \donttest{
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) {
#' library("BSgenome.Hsapiens.UCSC.hg19")
#'
#' # load track data
#' track_file <-
#' system.file("extdata", "DNA-seq", "SRR054616.bw", package = "ggcoverage")
#' track_df <- LoadTrackFile(
#' track.file = track_file,
#' format = "bw",
#' region = "4:1-160000000"
#' )
#' track_df$seqnames <- paste0("chr", track_df$seqnames)
#' # load track data
#' track_file <-
#' system.file("extdata", "DNA-seq", "SRR054616.bw", package = "ggcoverage")
#' track_df <- LoadTrackFile(
#' track.file = track_file,
#' format = "bw",
#' region = "4:1-160000000"
#' )
#' track_df$seqnames <- paste0("chr", track_df$seqnames)
#'
#' # read CNV data
#' cnv_file <-
#' system.file("extdata", "DNA-seq", "SRR054616_copynumber.txt", package = "ggcoverage")
#' cnv_df <- read.table(file = cnv_file, sep = "\t", header = TRUE)
#' # read CNV data
#' cnv_file <-
#' system.file("extdata", "DNA-seq", "SRR054616_copynumber.txt", package = "ggcoverage")
#' cnv_df <- read.table(file = cnv_file, sep = "\t", header = TRUE)
#'
#' # plot coverage, GC content, CNV
#' basic_coverage <- ggcoverage(
#' data = track_df,
#' color = "grey",
#' mark.region = NULL,
#' range.position = "out"
#' )
#' # plot coverage, GC content, CNV
#' basic_coverage <- ggcoverage(
#' data = track_df,
#' color = "grey",
#' mark.region = NULL,
#' range.position = "out"
#' )
#'
#' basic_coverage +
#' geom_gc(bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19) +
#' geom_cnv(cnv.df = cnv_df, bin.col = 3, cn.col = 4)
#' basic_coverage +
#' geom_gc(bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19) +
#' geom_cnv(cnv.df = cnv_df, bin.col = 3, cn.col = 4)
#' }
#' }
geom_cnv <- function(cnv.df, bin.col = 3, cn.col = 4, ref.cn = 2,
bin.point.color = "grey", bin.point.alpha = 0.6, cn.line.color = "red",
Expand Down
42 changes: 22 additions & 20 deletions R/geom_gc.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,31 @@
#' @export
#'
#' @examples
#' \dontrun{
#' library("BSgenome.Hsapiens.UCSC.hg19")
#' \donttest{
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) {
#' library("BSgenome.Hsapiens.UCSC.hg19")
#'
#' # load track data
#' track_file <-
#' system.file("extdata", "DNA-seq", "SRR054616.bw", package = "ggcoverage")
#' track_df <- LoadTrackFile(
#' track.file = track_file,
#' format = "bw",
#' region = "4:1-160000000"
#' )
#' track_df$seqnames <- paste0("chr", track_df$seqnames)
#' # load track data
#' track_file <-
#' system.file("extdata", "DNA-seq", "SRR054616.bw", package = "ggcoverage")
#' track_df <- LoadTrackFile(
#' track.file = track_file,
#' format = "bw",
#' region = "4:1-160000000"
#' )
#' track_df$seqnames <- paste0("chr", track_df$seqnames)
#'
#' # plot coverage and GC content
#' basic_coverage <- ggcoverage(
#' data = track_df,
#' color = "grey",
#' mark.region = NULL,
#' range.position = "out"
#' )
#' # plot coverage and GC content
#' basic_coverage <- ggcoverage(
#' data = track_df,
#' color = "grey",
#' mark.region = NULL,
#' range.position = "out"
#' )
#'
#' basic_coverage +
#' geom_gc(bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19)
#' basic_coverage +
#' geom_gc(bs.fa.seq = BSgenome.Hsapiens.UCSC.hg19)
#' }
#' }
geom_gc <- function(fa.file = NULL, bs.fa.seq = NULL, chr.split = "[[:space:]]", guide.line = NULL,
line.color = "black", guide.line.color = "red", guide.line.type = "dashed",
Expand Down
65 changes: 33 additions & 32 deletions R/geom_ideogram.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,43 +35,44 @@
#' @export
#'
#' @examples
#' \dontrun{
#' # note that you need to have package 'ggbio' installed
#' library(ggbio)
#' \donttest{
#' if (requireNamespace("ggbio", quietly = TRUE)) {
#' library(ggbio)
#'
#' # load metadata
#' meta_file <-
#' system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage")
#' sample_meta <- read.csv(meta_file)
#' # load metadata
#' meta_file <-
#' system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage")
#' sample_meta <- read.csv(meta_file)
#'
#' # track folder
#' track_folder <-
#' system.file("extdata", "RNA-seq", package = "ggcoverage")
#' # load bigwig file
#' track_df <- LoadTrackFile(
#' track.folder = track_folder,
#' format = "bw",
#' region = "chr14:21,677,306-21,737,601",
#' extend = 2000,
#' meta.info = sample_meta
#' )
#' # track folder
#' track_folder <-
#' system.file("extdata", "RNA-seq", package = "ggcoverage")
#' # load bigwig file
#' track_df <- LoadTrackFile(
#' track.folder = track_folder,
#' format = "bw",
#' region = "chr14:21,677,306-21,737,601",
#' extend = 2000,
#' meta.info = sample_meta
#' )
#'
#' # gene annotation
#' gtf_file <-
#' system.file("extdata", "used_hg19.gtf", package = "ggcoverage")
#' gtf_gr <- rtracklayer::import.gff(con = gtf_file, format = "gtf")
#' # gene annotation
#' gtf_file <-
#' system.file("extdata", "used_hg19.gtf", package = "ggcoverage")
#' gtf_gr <- rtracklayer::import.gff(con = gtf_file, format = "gtf")
#'
#' # coverage plot + ideogram
#' basic_coverage <- ggcoverage(
#' data = track_df,
#' plot.type = "facet",
#' range.position = "in",
#' facet.y.scale = "fixed"
#' )
#' # coverage plot + ideogram
#' basic_coverage <- ggcoverage(
#' data = track_df,
#' plot.type = "facet",
#' range.position = "in",
#' facet.y.scale = "fixed"
#' )
#'
#' basic_coverage +
#' geom_gene(gtf.gr = gtf_gr) +
#' geom_ideogram(genome = "hg19", plot.space = 0)
#' basic_coverage +
#' geom_gene(gtf.gr = gtf_gr) +
#' geom_ideogram(genome = "hg19", plot.space = 0)
#' }
#' }
#'
geom_ideogram <- function(genome = "hg19", mark.color = "red", mark.alpha = 0.7, mark.line.size = 1,
Expand Down
46 changes: 24 additions & 22 deletions R/geom_protein.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,31 +33,33 @@
#' @export
#'
#' @examples
#' \dontrun{
#' library(ggplot2)
#' library(openxlsx)
#' \donttest{
#' if (requireNamespace("openxlsx", quietly = TRUE)) {
#' library(ggplot2)
#' library(openxlsx)
#'
#' # import coverage dataframe with function from openxlsx
#' coverage.file <- system.file(
#' "extdata", "Proteomics", "MS_BSA_coverage.xlsx",
#' package = "ggcoverage"
#' )
#' coverage.df <- read.xlsx(coverage.file)
#' head(coverage.df)
#' # import coverage dataframe with function from openxlsx
#' coverage.file <- system.file(
#' "extdata", "Proteomics", "MS_BSA_coverage.xlsx",
#' package = "ggcoverage"
#' )
#' coverage.df <- read.xlsx(coverage.file)
#' head(coverage.df)
#'
#' # get fasta file
#' fasta.file <- system.file(
#' "extdata", "Proteomics", "MS_BSA_coverage.fasta",
#' package = "ggcoverage"
#' )
#' # get fasta file
#' fasta.file <- system.file(
#' "extdata", "Proteomics", "MS_BSA_coverage.fasta",
#' package = "ggcoverage"
#' )
#'
#' protein.id <- "sp|P02769|ALBU_BOVIN"
#' ggplot() +
#' geom_protein(
#' coverage.df = coverage.df,
#' fasta.file = fasta.file,
#' protein.id = protein.id
#' )
#' protein.id <- "sp|P02769|ALBU_BOVIN"
#' ggplot() +
#' geom_protein(
#' coverage.df = coverage.df,
#' fasta.file = fasta.file,
#' protein.id = protein.id
#' )
#' }
#' }
geom_protein <- function(coverage.df, fasta.file, protein.id, XCorr.threshold = 2,
confidence = "High", contaminant = NULL, remove.na = TRUE,
Expand Down
Loading

0 comments on commit 6cd38ab

Please sign in to comment.