-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathassess_CNV_significance_for_FREEC.R
executable file
·100 lines (86 loc) · 3.67 KB
/
assess_CNV_significance_for_FREEC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env Rscript
##############################################################
# script: assess_CNV_significance_for_FREEC.R
# author: Jia-Xing Yue (GitHub ID: yjx1217)
# last edited: 2018.05.27
# description: plot CNV for FREEC
# example: Rscript --vanilla --slave assess_CNV_significance_for_FREEC.R --cnv input.bam_CNV.txt --ratio input.bam_ratio.txt --output output.CNV_significance_test.txt --genome_fai genome.fa.fai
##############################################################
library("optparse")
option_list <- list(
make_option(c("--cnv"), type = "character", default = NULL,
help = "name of the bam_CNV.txt file generated by FREEC", metavar = "character"),
make_option(c("--ratio"), type = "character", default = NULL,
help = "name of the bam_ratio.txt file generated by FREEC", metavar = "character"),
make_option(c("--output"), type = "character", default = NULL,
help = "the output file name", metavar = "character"),
make_option(c("--genome_fai"), type = "character", default = NULL,
help = "path to the genome.fai file", metavar = "character")
);
opt_parser <- OptionParser(option_list = option_list)
opt <- parse_args(opt_parser)
cnv_data <- read.table(opt$cnv, header = FALSE, sep = "\t")
colnames(cnv_data) <- c("chr", "start", "end", "copy_number", "status")
cnv_data$MWU_test_p_value <- NA
cnv_data$KS_test_p_value <- NA
ratio_data <- read.table(opt$ratio, header = TRUE, sep = "\t")
ratio_data$Ratio[which(ratio_data$Ratio == -1)] <- NA
ratio_data_filtered <- subset(ratio_data, !is.na(ratio_data$Ratio))
genome_fai <- read.table(opt$genome_fai, header = FALSE, sep = "\t")
colnames(genome_fai) <- c("chr", "length", "offset", "linebases", "linewidth")
# re-order the chromosomes
chr_list <- as.vector(unique(genome_fai$chr))
cnv_data$chr <- factor(cnv_data$chr, levels = chr_list)
if ((nrow(cnv_data) > 0) & (nrow(ratio_data_filtered) > 0)) {
# print(paste0("pass checkpoint"))
background <- vector()
test <- as.list(1:nrow(cnv_data))
for (i in 1:nrow(cnv_data)) {
test[[i]] <- vector()
}
# define background and test sets
for (j in 1:nrow(ratio_data_filtered)) {
j_chr <- ratio_data_filtered$Chromosome[j]
j_start <- ratio_data_filtered$Start[j]
j_value <- ratio_data_filtered$Ratio[j]
flag <- 0
for (i in 1:nrow(cnv_data)) {
i_chr <- cnv_data$chr[i]
i_start <- cnv_data$start[i]
i_end <- cnv_data$end[i]
if (j_chr == i_chr) {
if ((j_start >= i_start) & (j_start <= i_end)) {
test[[i]] <- append(test[[i]], j_value)
flag <- 1
break
}
}
}
if (flag == 0) {
background <- append(background, j_value)
}
}
# loop through the CNV candidates
MWU_test <- function(test, background) {
resultw <- try(wilcox.test(test, background), silent = TRUE)
if (class(resultw)=="try-error") {
return(list("statistic" = NA, "parameter" = NA, "p.value" = NA, "null.value" = NA, "alternative" = NA, "method" = NA,"data.name" = NA))
} else {
return(resultw)
}
}
KS_test <- function(test, background){
resultks <- try(ks.test(test, background), silent = TRUE)
if (class(resultks)=="try-error") {
return(list("statistic" = NA, "p.value" = NA, "alternative" = NA, "method" = NA, "data.name" = NA))
} else {
return(resultks)
}
}
for (i in 1:nrow(cnv_data)) {
cnv_data$MWU_test_p_value[i] <- MWU_test(test[[i]], background)$p.value
cnv_data$KS_test_p_value[i] <- KS_test(test[[i]], background)$p.value
}
}
cnv_data_sorted <- cnv_data[order(cnv_data$chr),]
write.table(cnv_data_sorted, opt$output, sep = "\t", row.names = FALSE, quote = FALSE)