-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplot_markers.R
executable file
·80 lines (67 loc) · 3.88 KB
/
plot_markers.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env Rscript
##############################################################
# script: plot_markers.R
# author: Jia-Xing Yue (GitHub ID: yjx1217)
# last edited: 2018.05.27
# description: plot polymorphic markers along the genome
# example: Rscript --vanilla plot_markers.R --genome_fai genome.fa.fai --marker_table SNP.markers.txt.gz (--centromere_gff ref.centromere.gff) --output_prefix output_prefix
##############################################################
library(ggplot2)
library(optparse)
library(scales)
option_list <- list(
make_option(c("--genome_fai"), type = "character", default = "genome.fa.fai",
help = "reference genome index file generated by samtools faidx [default=%default]", metavar = "character"),
make_option(c("--marker_table"), type = "character", default = NULL,
help = "input marker table file [default=%default]", metavar = "character"),
make_option(c("--output_prefix"), type = "character", default = "output_prefix",
help = "the prefix for the pdf plot [default=%default]", metavar = "character"),
make_option(c("--centromere_gff"), type = "character", default = NULL,
help = "the gff file for centromere", metavar = "character")
);
opt_parser <- OptionParser(option_list = option_list)
opt <- parse_args(opt_parser)
genome_fai <- read.table(opt$genome_fai, header = FALSE, sep = "\t")
colnames(genome_fai) <- c("chr", "length", "offset", "linebases", "linewidth")
marker_table <- read.table(opt$marker_table, header = TRUE, sep = "\t")
colnames(marker_table) <- c("ref_chr", "ref_start", "ref_end", "ref_allele", "query_alelle", "query_chr", "query_start", "query_end", "relative_orientation", "marker_type")
if (!is.null(opt$centromere_gff)) {
centromere_gff <- read.table(opt$centromere_gff, header = FALSE, sep = "\t")
colnames(centromere_gff) <- c("chr", "ref", "type", "start", "end", "score", "strand", "phase", "info")
}
# re-order the chromosomes
chr_list <- as.vector(unique(genome_fai$chr))
genome_fai$chr <-factor(chr_list, levels = rev(chr_list))
marker_colors <- c(SNP = "red", INDEL = "blue")
# plot
line_thickness_adjust <- 8
if (!is.null(opt$centromere)) {
ggplot(data=genome_fai, aes(x = chr, y = length)) +
geom_bar(stat = "identity", width = 0.5, fill = "grey90", color = "black") +
geom_segment(data = marker_table, aes(x = ref_chr, xend = ref_chr, y = ref_start - line_thickness_adjust, yend = ref_end + line_thickness_adjust, color = marker_type),
linewidth = 4) +
geom_point(data = centromere_gff, aes(x = chr, y = (start + end) / 2),
fill = "white", shape = 21, size = 5) +
scale_color_manual(values = marker_colors, name = "Marker type") +
scale_fill_manual(values = marker_colors, name = "Marker type") +
scale_x_discrete(name = "Chromosome") +
scale_y_continuous(name = "Size (bp)", labels = comma_format()) +
coord_flip() +
ggtitle(opt$output_prefix) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
} else {
ggplot(data=genome_fai, aes(x = chr, y = length)) +
geom_bar(stat = "identity", width = 0.5, fill = "grey90", color = "black") +
geom_segment(data = marker_table, aes(x = ref_chr, xend = ref_chr, y = ref_start - line_thickness_adjust, yend = ref_end + line_thickness_adjust, color = marker_type),
linewidth = 4) +
scale_color_manual(values = marker_colors, name = "Marker type") +
scale_fill_manual(values = marker_colors, name = "Marker type") +
scale_x_discrete(name = "Chromosome") +
scale_y_continuous(name = "Size (bp)", labels = comma_format()) +
coord_flip() +
ggtitle(opt$output_prefix) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
}
ggsave(filename = paste0(opt$output_prefix, ".pdf"), device = "pdf", width = 8, height = 6, limitsize = FALSE)