-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07_CTCF_Threshold_mm.Rmd
125 lines (103 loc) · 3.96 KB
/
07_CTCF_Threshold_mm.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
---
title: "CTCF threshold analysis"
author: "Mikhail Dozmorov"
date: "`r Sys.Date()`"
output:
pdf_document:
toc: no
html_document:
theme: cerulean
toc: yes
---
```{r setup, echo=FALSE, message=FALSE, warning=FALSE}
# Set up the environment
library(knitr)
opts_chunk$set(cache.path='cache/', fig.path='img/', cache=F, tidy=T, fig.keep='high', echo=F, dpi=100, warnings=F, message=F, comment=NA, warning=F, results='as.is', fig.width = 10, fig.height = 6) #out.width=700,
library(pander)
panderOptions('table.split.table', Inf)
set.seed(1)
```
# Libraries
```{r libraries}
library(tidyverse)
library(readxl)
library(writexl)
library(cowplot)
library(stringr)
library("ggsci")
library(scales)
# scales::show_col(pal_lancet("lanonc")(8))
mycols = pal_lancet("lanonc")(8)
# Color palette for the heatmap, https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/colorPaletteCheatsheet.pdf
col3 <- colorRampPalette(c('blue', 'white', 'red'))(20)
# col3 <- colorRampPalette(c('blue', 'gray', 'yellow'))(20)
# col3 <- colorRampPalette(c('green', 'black', 'red'))(20)
# col3 <- colorRamps::green2red(n = 20)
library(GenomicRanges)
```
# Settings
```{r settings}
dir_data <- "/Users/mdozmorov/Documents/Data/GoogleDrive/CTCF.dev"
# Project folder path
dir_project <- "/Users/mdozmorov/Documents/Work/GitHub/CTCF.dev"
# Input files
fileNameIn1 <- file.path(dir_project, "RData", "mm10.MA0139.1.bed")
fileNameIn2 <- file.path(dir_project, "data", "mm10-CTCF.bed")
# Output files
fileNameOut1 <- file.path(dir_project, "results", "Figure_mouse_pvalues_threshold.svg")
```
# Load data
```{r data}
# Read in data
mtx <- read_tsv(fileNameIn1, col_names = FALSE, col_types = c("cddcdcdd"))
gr_meme <- GRanges(seqnames = mtx$X1, ranges = IRanges(start = mtx$X2, end = mtx$X3), strand = mtx$X6)
gr_meme$pval <- mtx$X8
gr_meme$qval <- mtx$X9
mtx1 <- read_tsv(fileNameIn2, col_names = FALSE, col_types = c("cddccc"))
gr_encode <- GRanges(seqnames = mtx1$X1, ranges = IRanges(start = mtx1$X2, end = mtx1$X3))
```
## Summary statistics
```{r}
print("Summary of full MEME p-value distribution")
summary(gr_meme$pval)
print(paste("Length of full MEME CTCF set:", length(gr_meme)))
print(paste("Length of ENCODE SCREEN CTCF set:", length(gr_encode)))
```
## Scanning the threshold
```{r}
# thresholds <- seq(from = 1E-8, to = 1E-4, length.out = 10) %>% rev()
thresholds <- c(1.00E-4, 5.00E-4, 1.00E-5, 5.00E-5, 1.00E-6, 5.00E-6, 1.00E-7, 5.00E-7, 1.00E-8)
# formatC(thresholds, format = "e")
gr_meme_pos_list <- vector(mode = "numeric", length = length(thresholds))
gr_meme_neg_list <- vector(mode = "numeric", length = length(thresholds))
for (i in 1:length(thresholds)) {
pval_cutoff <- thresholds[i]
gr_meme_cutoff <- gr_meme[gr_meme$pval < pval_cutoff]
gr_meme_overlaps <- countOverlaps(gr_meme_cutoff, gr_encode)
gr_meme_pos_list[i] <- sum(gr_meme_overlaps > 0) / length(gr_meme_overlaps)
gr_meme_neg_list[i] <- sum(gr_meme_overlaps == 0) / length(gr_meme_overlaps)
}
gr_summary <- data.frame(Threshold = thresholds, TP = gr_meme_pos_list, FP = gr_meme_neg_list)
mtx_to_plot <- pivot_longer(gr_summary, cols = c("TP", "FP"), names_to = "Type")
p <- ggplot(mtx_to_plot, aes(x = Threshold, y = value, color = Type)) +
geom_line() +
scale_x_continuous(breaks = thresholds, labels = scientific) +
scale_y_continuous(breaks = seq(0, 1, 0.2)) +
scale_color_manual(values = mycols[1:2]) +
coord_trans(x ="log10") +
geom_vline(xintercept = 1E-6, linetype = "dashed", color = "black") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
plot.title = element_text(hjust = 0.5),
legend.text = element_text(size = 6)) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
xlab("FIMO p-value threshold") +
ylab("FP/TP rate")
# ggsave(filename = fileNameOut1, width = 4.5, height = 1.5)
dev.off()
svg(filename = fileNameOut1, width = 4.5, height = 1.5)
print(p)
dev.off()
```