Skip to content

Commit afc2b17

Browse files
committed
feat: improved arrow style for genomic features, closes showteeth#28
1 parent 9fa5b1d commit afc2b17

20 files changed

+453
-358
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ inst/doc
22
.Rhistory
33
.Rproj*
44
.Rproj.user
5+
vignettes/*.html
6+
vignettes/png/*

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export(FormatTrack)
1414
export(GetConsensusPeak)
1515
export(GetPlotData)
1616
export(LoadTrackFile)
17+
export(geom_arrows)
1718
export(geom_base)
1819
export(geom_cnv)
1920
export(geom_coverage)
@@ -125,6 +126,7 @@ importFrom(ggplot2,unit)
125126
importFrom(ggrepel,geom_text_repel)
126127
importFrom(grDevices,col2rgb)
127128
importFrom(grDevices,colorRampPalette)
129+
importFrom(grDevices,grey)
128130
importFrom(gridExtra,tableGrob)
129131
importFrom(gridExtra,ttheme_default)
130132
importFrom(magrittr,"%>%")

R/geom_gene.R

Lines changed: 121 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
#' and tight (place non-overlap genes in one line). Default: loose.
77
#' @param gene.size The line size of gene. Default: 1.
88
#' @param utr.size The line size of UTR. Default: 2.
9-
#' @param exon.size The line size of exon. Default: 4.
10-
#' @param arrow.size The line size of arrow. Default: 1.
9+
#' @param exon.size The line size of exon. Default: 3.
10+
#' @param arrow.size The line size of arrow. Default: 1.5.
11+
#' @param arrow.gap The gap distance between intermittent arrows. Default: NULL.
12+
#' Set arrow.num and arrow.gap to NULL to suppress intermittent arrows.
13+
#' @param arrow.num Total number of intermittent arrows over whole region. Default: 50.
14+
#' Set arrow.num and arrow.gap to NULL to suppress intermittent arrows.
1115
#' @param color.by Color the line by. Default: strand.
1216
#' @param fill.color Color used for \code{color.by}.
13-
#' Default: darkblue for - (minus strand), darkgreen for + (plus strand).
17+
#' Default: blue for - (minus strand), green for + (plus strand).
1418
#' @param show.utr Logical value, whether to show UTR. Default: TRUE.
15-
#' @param arrow.gap The gap distance between arrow. Default: NULL.
16-
#' @param arrow.num Total arrow num of whole region. Default: 50.
17-
#' @param arrow.length The length of arrow. Default: 0.06.
1819
#' @param label.size The size of gene label. Default: 3.
1920
#' @param label.vjust The vjust of gene label. Default: 2.
2021
#' @param plot.space Top and bottom margin. Default: 0.1.
@@ -32,44 +33,77 @@
3233
#' @export
3334
#'
3435
#' @examples
35-
#' # library(ggcoverage)
36-
#' # library(utils)
37-
#' # library(rtracklayer)
38-
#' # meta.file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage")
39-
#' # sample.meta <- utils::read.csv(meta.file)
36+
#' library(ggcoverage)
37+
#' library(utils)
38+
#' library(rtracklayer)
39+
#'
40+
#' # load metadata
41+
#' meta_file <- system.file("extdata", "RNA-seq", "meta_info.csv", package = "ggcoverage")
42+
#' sample_meta <- read.csv(meta_file)
43+
#'
4044
#' # track folder
41-
#' # track.folder <- system.file("extdata", "RNA-seq", package = "ggcoverage")
45+
#' track_folder <- system.file("extdata", "RNA-seq", package = "ggcoverage")
46+
#'
4247
#' # load bigwig file
43-
#' # track.df <- LoadTrackFile(
44-
#' # track.folder = track.folder, format = "bw",
45-
#' # meta.info = sample.meta
46-
#' # )
47-
#' # gtf.file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage")
48-
#' # gtf.gr <- rtracklayer::import.gff(con = gtf.file, format = "gtf")
49-
#' # basic.coverage <- ggcoverage(data = track.df, color = "auto", range.position = "out")
50-
#' # basic.coverage + geom_gene(gtf.gr = gtf.gr)
51-
geom_gene <- function(gtf.gr, overlap.gene.gap = 0.1, overlap.style = "loose", gene.size = 1,
52-
utr.size = 2, exon.size = 4, arrow.size = 1, color.by = "strand",
53-
fill.color = c("-" = "darkblue", "+" = "darkgreen"), show.utr = TRUE,
54-
arrow.gap = NULL, arrow.num = 50, arrow.length = 0.06,
55-
label.size = 3, label.vjust = 2, plot.space = 0.1, plot.height = 0.2) {
56-
structure(list(
57-
gtf.gr = gtf.gr,
58-
overlap.gene.gap = overlap.gene.gap, overlap.style = overlap.style, gene.size = gene.size,
59-
utr.size = utr.size, exon.size = exon.size, arrow.size = arrow.size, color.by = color.by,
60-
fill.color = fill.color, show.utr = show.utr, arrow.gap = arrow.gap, arrow.num = arrow.num,
61-
arrow.length = arrow.length, label.size = label.size, label.vjust = label.vjust,
62-
plot.space = plot.space, plot.height = plot.height
63-
),
64-
class = "gene"
48+
#' track_df <- LoadTrackFile(
49+
#' track.folder = track_folder,
50+
#' format = "bw",
51+
#' region = "chr14:21,677,306-21,737,601",
52+
#' extend = 2000,
53+
#' meta.info = sample_meta
54+
#' )
55+
#'
56+
#' # load GTF file
57+
#' gtf_file <- system.file("extdata", "used_hg19.gtf", package = "ggcoverage")
58+
#' gtf_gr <- rtracklayer::import.gff(con = gtf_file, format = "gtf")
59+
#'
60+
#' # plot coverage and gene annotation
61+
#' basic.coverage <- ggcoverage(data = track_df, range.position = "out")
62+
#' basic.coverage +
63+
#' geom_gene(gtf.gr = gtf_gr)
64+
geom_gene <- function(gtf.gr,
65+
overlap.gene.gap = 0.1,
66+
overlap.style = "loose",
67+
gene.size = 1,
68+
utr.size = 2,
69+
exon.size = 3,
70+
arrow.size = 1.5,
71+
arrow.gap = NULL,
72+
arrow.num = 50,
73+
color.by = "strand",
74+
fill.color = c("-" = "cornflowerblue",
75+
"+" = "darkolivegreen3"),
76+
show.utr = FALSE,
77+
label.size = 3,
78+
label.vjust = 2,
79+
plot.space = 0.1,
80+
plot.height = 0.2) {
81+
structure(
82+
list(
83+
gtf.gr = gtf.gr,
84+
overlap.gene.gap = overlap.gene.gap,
85+
overlap.style = overlap.style,
86+
gene.size = gene.size,
87+
utr.size = utr.size,
88+
exon.size = exon.size,
89+
arrow.size = arrow.size,
90+
arrow.gap = arrow.gap,
91+
arrow.num = arrow.num,
92+
color.by = color.by,
93+
fill.color = fill.color,
94+
show.utr = show.utr,
95+
label.size = label.size,
96+
label.vjust = label.vjust,
97+
plot.space = plot.space,
98+
plot.height = plot.height
99+
),
100+
class = "gene"
65101
)
66102
}
67103

68104
#' @export
69105
ggplot_add.gene <- function(object, plot, object_name) {
70106
# get plot data
71-
# track.data <- plot$layers[[1]]$data
72-
# get plot data, plot data should contain bins
73107
if ("patchwork" %in% class(plot)) {
74108
track.data <- plot[[1]]$layers[[1]]$data
75109
} else {
@@ -78,9 +112,7 @@ ggplot_add.gene <- function(object, plot, object_name) {
78112
# prepare plot range
79113
# the plot region are not normal, so start is minimum value
80114
plot.range.chr <- track.data[1, "seqnames"]
81-
# plot.range.start <- track.data[1, "start"]
82115
plot.range.start <- min(track.data[, "start"])
83-
# plot.range.end <- track.data[nrow(track.data), "end"]
84116
plot.range.end <- max(track.data[, "end"])
85117
plot.range.gr <- GenomicRanges::GRanges(
86118
seqnames = plot.range.chr,
@@ -99,15 +131,12 @@ ggplot_add.gene <- function(object, plot, object_name) {
99131
show.utr <- object$show.utr
100132
arrow.gap <- object$arrow.gap
101133
arrow.num <- object$arrow.num
102-
arrow.length <- object$arrow.length
103134
label.size <- object$label.size
104135
label.vjust <- object$label.vjust
105136
plot.space <- object$plot.space
106137
plot.height <- object$plot.height
107138

108-
# process
109139
# get gene in region
110-
# gtf.gr <- rtracklayer::import.gff(gtf.file,format = 'gtf')
111140
gtf.df.used <- IRanges::subsetByOverlaps(x = gtf.gr, ranges = plot.range.gr) %>% as.data.frame()
112141
# check information
113142
used.gtf.columns <- c("seqnames", "start", "end", "strand", "type", "gene_name")
@@ -160,152 +189,77 @@ ggplot_add.gene <- function(object, plot, object_name) {
160189
gene.info.used.utr$start <- as.numeric(gene.info.used.utr$start)
161190
gene.info.used.utr$end <- as.numeric(gene.info.used.utr$end)
162191
# change UTR
163-
if (nrow(gene.info.used.utr) == 0) {
164-
warning("No UTR detected in provided GTF!")
192+
if (show.utr & nrow(gene.info.used.utr) == 0) {
193+
warning("No UTR detected in provided GTF, omitting plotting UTRs.")
165194
show.utr <- FALSE
166195
}
167-
# create plot without arrow
196+
# plot genomic features with arrow at the end
168197
if (show.utr) {
169198
# substract UTR from exon
170199
gene.exon.utr <- SplitExonUTR(exon.df = gene.info.used.exon, utr.df = gene.info.used.utr)
171200
gene.info.used.exon <- gene.exon.utr$exon
172201
gene.info.used.utr <- gene.exon.utr$utr
173-
gene.plot <- ggplot() +
174-
geom_segment(
175-
data = gene.info.used.gene,
176-
mapping = aes_string(
177-
x = "start",
178-
y = "group",
179-
xend = "end",
180-
yend = "group",
181-
color = color.by
182-
),
183-
show.legend = FALSE,
184-
size = gene.size
185-
) +
186-
geom_segment(
187-
data = gene.info.used.utr,
188-
mapping = aes_string(
189-
x = "start",
190-
y = "group",
191-
xend = "end",
192-
yend = "group",
193-
color = color.by
194-
),
195-
show.legend = FALSE,
196-
size = utr.size
197-
) +
198-
geom_segment(
199-
data = gene.info.used.exon,
200-
mapping = aes_string(
201-
x = "start",
202-
y = "group",
203-
xend = "end",
204-
yend = "group",
205-
color = color.by
206-
),
207-
show.legend = FALSE,
208-
size = exon.size
209-
)
210-
} else {
211-
gene.plot <- ggplot() +
212-
geom_segment(
213-
data = gene.info.used.gene,
214-
mapping = aes_string(
215-
x = "start",
216-
y = "group",
217-
xend = "end",
218-
yend = "group",
219-
color = color.by
220-
),
221-
show.legend = FALSE,
222-
size = gene.size
223-
) +
224-
geom_segment(
225-
data = gene.info.used.exon,
226-
mapping = aes_string(
227-
x = "start",
228-
y = "group",
229-
xend = "end",
230-
yend = "group",
231-
color = color.by
232-
),
233-
show.legend = FALSE,
234-
size = exon.size
235-
)
202+
}
203+
gene.plot <- ggplot() +
204+
geom_arrows(gene.info.used.gene, color.by, gene.size, arrow.size) +
205+
geom_arrows(gene.info.used.exon, color.by, exon.size, arrow.size)
206+
if (show.utr) {
207+
gene.plot <- gene.plot +
208+
geom_arrows(gene.info.used.utr, color.by, utr.size, arrow.size)
236209
}
237210

238-
if (is.null(arrow.gap)) {
239-
if (is.null(arrow.num)) {
240-
stop("Please provide either arrow.num or arrow.gap!")
241-
} else {
211+
if (!is.null(arrow.gap) || !is.null(arrow.num)) {
212+
if (!is.null(arrow.num)) {
242213
arrow.gap <- (plot.range.end - plot.range.start) / arrow.num
243214
}
244-
}
245-
arrow.list <- list()
246-
# create arrow based on gene
247-
for (i in 1:nrow(gene.info.used.gene)) {
248-
gene.seq <- as.character(gene.info.used.gene[i, "seqnames"])
249-
gene.start <- as.numeric(gene.info.used.gene[i, "start"])
250-
gene.end <- as.numeric(gene.info.used.gene[i, "end"])
251-
gene.strand <- as.character(gene.info.used.gene[i, "strand"])
252-
gene.type <- as.character(gene.info.used.gene[i, "type"])
253-
gene.gene_type <- as.character(gene.info.used.gene[i, "gene_type"])
254-
gene.name <- as.character(gene.info.used.gene[i, "gene_name"])
255-
gene.group <- as.numeric(gene.info.used.gene[i, "group"])
256-
gene.gap <- gene.end - gene.start
257-
if (gene.gap <= arrow.gap) {
258-
# create only one arrow
259-
arrow.pos <- floor((gene.end + gene.start) / 2)
260-
arrow.list[[gene.name]] <- c(
261-
gene.seq, arrow.pos, arrow.pos + 1, gene.strand,
262-
gene.type, gene.gene_type, gene.name, gene.group
263-
)
264-
} else {
265-
gene.arrow.num <- floor(gene.gap / arrow.gap)
266-
gene.arrow.start <- (arrow.gap * 0:gene.arrow.num) + gene.start
267-
gene.arrow.end <- gene.arrow.start + 1
268-
for (grn in 1:length(gene.arrow.start)) {
269-
arrow.list[[paste(gene.name, grn, sep = "_")]] <-
270-
c(
271-
gene.seq, gene.arrow.start[grn], gene.arrow.end[grn], gene.strand,
272-
gene.type, gene.gene_type, gene.name, gene.group
273-
)
215+
arrow.list <- list()
216+
# create arrow based on gene
217+
for (i in 1:nrow(gene.info.used.gene)) {
218+
gene.seq <- as.character(gene.info.used.gene[i, "seqnames"])
219+
gene.start <- as.numeric(gene.info.used.gene[i, "start"])
220+
gene.end <- as.numeric(gene.info.used.gene[i, "end"])
221+
gene.strand <- as.character(gene.info.used.gene[i, "strand"])
222+
gene.type <- as.character(gene.info.used.gene[i, "type"])
223+
gene.gene_type <- as.character(gene.info.used.gene[i, "gene_type"])
224+
gene.name <- as.character(gene.info.used.gene[i, "gene_name"])
225+
gene.group <- as.numeric(gene.info.used.gene[i, "group"])
226+
gene.gap <- gene.end - gene.start
227+
if (gene.gap <= arrow.gap) {
228+
# create only one arrow
229+
arrow.pos <- floor((gene.end + gene.start) / 2)
230+
arrow.list[[gene.name]] <- c(
231+
gene.seq, arrow.pos, arrow.pos + 1, gene.strand,
232+
gene.type, gene.gene_type, gene.name, gene.group
233+
)
234+
} else {
235+
gene.arrow.num <- floor(gene.gap / arrow.gap)
236+
gene.arrow.start <- (arrow.gap * 0:gene.arrow.num) + gene.start
237+
gene.arrow.end <- gene.arrow.start + 1
238+
for (grn in 1:length(gene.arrow.start)) {
239+
arrow.list[[paste(gene.name, grn, sep = "_")]] <-
240+
c(
241+
gene.seq, gene.arrow.start[grn], gene.arrow.end[grn], gene.strand,
242+
gene.type, gene.gene_type, gene.name, gene.group
243+
)
244+
}
274245
}
275246
}
247+
arrow.df <- do.call(rbind, arrow.list) %>% as.data.frame()
248+
colnames(arrow.df) <- c("seqnames", "start", "end", "strand", "type", "gene_type", "gene_name", "group")
249+
arrow.df$start <- as.numeric(arrow.df$start)
250+
arrow.df$end <- as.numeric(arrow.df$end)
251+
arrow.df$group <- as.numeric(arrow.df$group)
252+
gene.plot <- gene.plot +
253+
geom_arrows(arrow.df, color.by, gene.size / 2, arrow.size, 35, TRUE)
276254
}
277-
arrow.df <- do.call(rbind, arrow.list) %>% as.data.frame()
278-
colnames(arrow.df) <- c("seqnames", "start", "end", "strand", "type", "gene_type", "gene_name", "group")
279-
arrow.df$start <- as.numeric(arrow.df$start)
280-
arrow.df$end <- as.numeric(arrow.df$end)
281-
arrow.df$group <- as.numeric(arrow.df$group)
282-
283-
gene.arrow.plot <- gene.plot + geom_segment(
284-
data = arrow.df,
285-
mapping = aes_string(
286-
x = "start",
287-
y = "group",
288-
xend = "end",
289-
yend = "group",
290-
color = color.by
291-
),
292-
arrow = arrow(
293-
ends = ifelse(arrow.df$strand == "-", "first", "last"),
294-
type = "open",
295-
angle = 45,
296-
length = unit(x = arrow.length, units = "inches")
297-
),
298-
show.legend = FALSE,
299-
size = arrow.size
300-
)
301255

302256
label.df <- data.frame(
303257
pos = (gene.info.used.gene$start + gene.info.used.gene$end) / 2,
304258
group = gene.info.used.gene$group,
305259
gene = gene.info.used.gene$gene_name
306260
)
307261

308-
gene.final.plot <- gene.arrow.plot +
262+
gene.final.plot <- gene.plot +
309263
geom_text(
310264
data = label.df,
311265
mapping = aes_string(x = "pos", y = "group", label = "gene"),
@@ -317,6 +271,7 @@ ggplot_add.gene <- function(object, plot, object_name) {
317271
fill.color = fill.color, x.range = c(plot.range.start, plot.range.end),
318272
margin.len = plot.space
319273
)
274+
320275
# assemble plot
321276
patchwork::wrap_plots(plot + theme(plot.margin = margin(t = plot.space, b = plot.space)),
322277
gene.final.plot,

0 commit comments

Comments
 (0)