Skip to content

Commit

Permalink
Merge pull request #29 from MPUSP/dev
Browse files Browse the repository at this point in the history
feat: various fixes for improved tiling function
  • Loading branch information
m-jahn authored Feb 21, 2024
2 parents d41f5e4 + da7135c commit eaea7ac
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 95 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ logs/**
test/**
.snakemake
.snakemake/**
.vscode/**
1 change: 1 addition & 0 deletions .test/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ filter_guides:

report:
show_examples: 20
show_genomic_range: [0, 50000]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ This table lists all parameters that can be used to run the workflow.
| export_as_gff | logical | export result table also as `.gff` file | `False` |
| REPORT | | | |
| show_examples | numeric | number of genes to show guide position | `10` |
| show_genomic_range | numeric | genome start and end pos to show tiling guides | `[0, 50000]` |

### Target type

Expand Down
1 change: 1 addition & 0 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ filter_guides:

report:
show_examples: 20
show_genomic_range: [0, 50000]
3 changes: 2 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ configfile: "config/config.yml"


# container definition (optional)
container: "oras://ghcr.io/MPUSP/snakemake-crispr-guides:1.1.0"
container: "oras://ghcr.io/MPUSP/snakemake-crispr-guides:1.2.0"


# target rule
Expand Down Expand Up @@ -138,6 +138,7 @@ rule report_html:
"results/filter_guides/guideRNAs_{guideset}.csv",
guideset=config["design_guides"]["target_type"],
),
list_tx=rules.design_guides.output.list_tx,
params:
config["report"],
output:
Expand Down
191 changes: 101 additions & 90 deletions workflow/notebooks/report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ target_type <- snakemake@config$design_guides$target_type
tss_window <- snakemake@config$design_guides$tss_window
tiling_window <- snakemake@config$design_guides$tiling_window
n_examples <- snakemake@config$report$show_examples
show_genomic_range <- snakemake@config$report$show_genomic_range
# custom ggplot2 theme that is reused for all later plots
custom_colors <- c("#E7298A", "#66A61E", "#E6AB02", "#7570B3", "#B3B3B3", "#1B9E77", "#D95F02", "#A6761D")
Expand Down Expand Up @@ -101,7 +102,7 @@ list_fontpars <- list(face = "plain", size = 14)
# default figure size
figwidth <- 7.0
figheight <- 4.5
figheight2 <- 1 + (n_examples * 0.75)
figheight2 <- 1.1 + (n_examples * 0.75)
# function to export an image as svg and png
save_plot <- function(pl, path = "../figures/", width = 6, height = 6) {
Expand Down Expand Up @@ -141,10 +142,19 @@ Only the head of the table is shown (first 10 rows).
*NOTE: tables are only rendered in HTML output but not PDF.*

```{r, echo = FALSE}
input_tx <- snakemake@input$list_tx
list_tx <- read_csv(input_tx, show_col_types = FALSE)
for (tg in target_type) {
input_file <- paste0("results/filter_guides/guideRNAs_", tg, ".csv")
assign(paste0("df_", tg), read_csv(input_file, show_col_types = FALSE))
}
if ("df_intergenic" %in% ls()) {
figheight3 <- 1.1 + (length(unique(df_intergenic$seqnames)) * 0.75)
} else {
figheight3 <- figheight2
}
```

### Targets (genes) {-}
Expand Down Expand Up @@ -458,7 +468,7 @@ for (tg in target_type) {
if (tg == "intergenic") {
titletext <- "Distribution of all guide RNAs over the tiling windows"
df_guides <- df_guides %>%
mutate(dist_to_tss = start - (tiling_window * (tx_window - 1)))
mutate(dist_to_tss = start - tiling_window * tx_window)
}
if (tg == "target") {
titletext <- "Distribution of all guide RNAs over the TSS window"
Expand Down Expand Up @@ -519,116 +529,117 @@ print("No data with guide RNAs for no-target controls available.")
- The number of example genes/regions is controlled by parameter `show_examples` in the config file

```{r, echo = FALSE}
# function to draw guide RNAs
draw_guide_location <- function(df, n_examples) {
df <- filter(df, tx_name %in% unique(df$tx_name)[1:n_examples])
if ("tss_strand" %in% colnames(df)) {
df %>%
mutate(tx_name = paste0(tx_name, " (", tss_strand, ")") %>%
factor(., unique(.))) %>%
mutate(
xstart = case_when(
tss_strand == "+" & strand == "+" ~ start - tss_pos,
tss_strand == "+" & strand == "-" ~ end - tss_pos,
tss_strand == "-" & strand == "-" ~ -1 * (end - tss_pos),
tss_strand == "-" & strand == "+" ~ -1 * (start - tss_pos),
),
xend = case_when(
tss_strand == "+" & strand == "+" ~ end - tss_pos,
tss_strand == "+" & strand == "-" ~ start - tss_pos,
tss_strand == "-" & strand == "-" ~ -1 * (start - tss_pos),
tss_strand == "-" & strand == "+" ~ -1 * (end - tss_pos),
),
y = ifelse(strand == tss_strand, 0.25, -0.25)
) %>%
ggplot(aes(x = xstart, y = y, xend = xend, yend = y)) +
geom_segment(
x = tss_window[1], y = 0, xend = tss_window[2], yend = 0,
linewidth = 2, lineend = "square", linejoin = "mitre", color = grey(0.7)
) +
geom_segment(aes(x = 0, y = 0, xend = tx_width, yend = 0),
linewidth = 1.33, color = grey(0.3), lineend = "square", linejoin = "mitre", arrow = arrow(
angle = 30, length = unit(0.02, "inches"),
type = "closed"
)
) +
geom_segment(aes(color = ifelse(strand == tss_strand, "+", "-")),
linewidth = 1.5, lineend = "square", linejoin = "mitre", arrow = arrow(
angle = 30, length = unit(0.01, "inches"),
type = "closed"
)
) +
facet_wrap(~tx_name, ncol = 1) +
coord_cartesian(ylim = c(-0.5, 0.5), xlim = tss_window) +
labs(
x = "distance to TSS [nt]", y = "",
title = paste0("Guide RNA location in TSS window, for the first ", n_examples, " genes"),
subtitle = paste0(
"Colored arrows: guide RNAs, gray line: TSS window, black arrow: transcript.\n",
"Note that genes on the '-' strand are shown as reverse-complement"
)
) +
custom_theme(legend.position = 0) +
scale_color_manual(values = custom_colors[1:2]) +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
} else {
# for no-gene targeting guides, the situation is much simpler
df %>%
mutate(
start = start - (tiling_window * (tx_window - 1)),
end = end - (tiling_window * (tx_window - 1)),
xstart = ifelse(strand == "+", start, end),
xend = ifelse(strand == "+", end, start),
y = ifelse(strand == "+", 0.25, -0.25)
) %>%
ggplot(aes(x = xstart, y = y, xend = xend, yend = y)) +
geom_segment(
x = 0, y = 0, xend = tiling_window, yend = 0,
linewidth = 2, lineend = "square", linejoin = "mitre", color = grey(0.7)
) +
geom_segment(aes(color = strand),
linewidth = 1.5, lineend = "square", linejoin = "mitre", arrow = arrow(
angle = 30, length = unit(0.01, "inches"),
type = "closed"
)
) +
facet_wrap(~tx_name, ncol = 1) +
coord_cartesian(ylim = c(-0.5, 0.5), xlim = c(0, tiling_window)) +
labs(
x = "position in tile [nt]", y = "",
title = paste0("Guide RNA location in genomic tiles, for the first ", n_examples, " tiles"),
subtitle = "Colored arrows: guide RNAs, gray line: genomic window."
) +
custom_theme(legend.position = 0) +
scale_color_manual(values = custom_colors[1:2]) +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
}
# function to draw guide RNAs for genes/targets
draw_guide_target <- function(df, n_examples) {
df %>%
filter(tx_name %in% unique(df$tx_name)[1:n_examples]) %>%
mutate(tx_name = paste0(tx_name, " (", tss_strand, ")") %>%
factor(., unique(.))) %>%
mutate(
xstart = case_when(
tss_strand == "+" & strand == "+" ~ start - tss_pos,
tss_strand == "+" & strand == "-" ~ end - tss_pos,
tss_strand == "-" & strand == "-" ~ -1 * (end - tss_pos),
tss_strand == "-" & strand == "+" ~ -1 * (start - tss_pos),
),
xend = case_when(
tss_strand == "+" & strand == "+" ~ end - tss_pos,
tss_strand == "+" & strand == "-" ~ start - tss_pos,
tss_strand == "-" & strand == "-" ~ -1 * (start - tss_pos),
tss_strand == "-" & strand == "+" ~ -1 * (end - tss_pos),
),
y = ifelse(strand == tss_strand, 0.25, -0.25)
) %>%
ggplot(aes(x = xstart, y = y, xend = xend, yend = y)) +
geom_segment(
x = tss_window[1], y = 0, xend = tss_window[2], yend = 0,
linewidth = 2, lineend = "square", linejoin = "mitre", color = grey(0.7)
) +
geom_segment(aes(x = 0, y = 0, xend = tx_width, yend = 0),
linewidth = 1.33, color = grey(0.3), lineend = "square", linejoin = "mitre", arrow = arrow(
angle = 30, length = unit(0.02, "inches"),
type = "closed"
)
) +
geom_segment(aes(color = ifelse(strand == tss_strand, "+", "-")),
linewidth = 1.5, lineend = "square", linejoin = "mitre", arrow = arrow(
angle = 30, length = unit(0.01, "inches"),
type = "closed"
)
) +
facet_wrap(~tx_name, ncol = 1) +
coord_cartesian(ylim = c(-0.5, 0.5), xlim = tss_window) +
labs(
x = "distance to TSS [nt]", y = "",
title = paste0("Guide RNA location in TSS window, for the first ", n_examples, " genes"),
subtitle = paste0(
"Colored arrows: guide RNAs, gray line: TSS window, black arrow: transcript.\n",
"Note that genes on the '-' strand are shown as reverse-complement"
)
) +
custom_theme(legend.position = 0) +
scale_color_manual(values = custom_colors[1:2]) +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
}
# for no-gene targeting guides, we plot an overview about the genomic region(s)
draw_guide_intergenic <- function(
df, n_examples, show_genomic_range = c(0, 50000)) {
df %>%
filter(seqnames %in% unique(df$seqnames)[1:n_examples]) %>%
mutate(
xstart = ifelse(strand == "+", start, end),
xend = ifelse(strand == "+", end, start),
y = ifelse(strand == "+", 0.25, -0.25)
) %>%
ggplot(aes(x = xstart, y = y, xend = xend, yend = y)) +
geom_segment(
data = list_tx,
aes(x = start, y = ifelse(strand == "+", 0.125, -0.125), xend = end, yend = ifelse(strand == "+", 0.125, -0.125)),
linewidth = 2, lineend = "square", linejoin = "mitre", color = grey(0.7)
) +
geom_segment(aes(color = strand),
linewidth = 1.5, lineend = "square", linejoin = "mitre", arrow = arrow(
angle = 30, length = unit(0.01, "inches"),
type = "closed"
)
) +
facet_wrap(~seqnames, ncol = 1) +
coord_cartesian(ylim = c(-0.5, 0.5), xlim = show_genomic_range) +
labs(
x = "position [nt]", y = "",
title = paste0("Guide RNA location per chromosome (max. ", n_examples, "shown)"),
subtitle = "Colored arrows: guide RNAs, gray lines: annotated genes."
) +
custom_theme(legend.position = 0) +
scale_color_manual(values = custom_colors[1:2]) +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
}
```

### Targets (genes) {-}

```{r, echo = FALSE, warning = FALSE, fig.width = figwidth, fig.height = figheight2}
if ("target" %in% target_type) {
draw_guide_location(df_target, n_examples)
draw_guide_target(df_target, n_examples)
} else {
print("No data with guide RNAs targeting genes etc available.")
}
```

### Intergenic regions {-}

```{r, echo = FALSE, warning = FALSE, fig.width = figwidth, fig.height = figheight2}
```{r, echo = FALSE, warning = FALSE, fig.width = figwidth, fig.height = figheight3}
if ("intergenic" %in% target_type) {
draw_guide_location(df_intergenic, n_examples)
draw_guide_intergenic(df_intergenic, n_examples, show_genomic_range)
} else {
print("No data with guide RNAs targeting intergenic regions available.")
}
```

### No-target controls (NTC) {-}

```{r, echo = FALSE, warning = FALSE, fig.width = figwidth, fig.height = figheight2}
```{r, echo = FALSE, warning = FALSE, fig.width = figwidth, fig.height = figheight}
print("No data with guide RNAs for no-target controls available.")
```

Expand Down
4 changes: 2 additions & 2 deletions workflow/scripts/design_guides.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ if ("intergenic" %in% target_type) {
# a similar procedure for benchmarking intergenic guides is applied as
# as for targets (e.g. genes)
# first, to filter intergenic regions by score later on, we apply
# a tiling approach: guides are grouped into bins of 1000 nt width
# a tiling approach: guides are grouped into bins of for example 1000 nt width
# for later filtering
list_intergenic$tx_window <- as_tibble(list_intergenic) %>%
group_by(seqnames) %>%
mutate(tx_window = cut_interval(start, length = tiling_window, labels = FALSE)) %>%
mutate(tx_window = floor(start / tiling_window)) %>%
pull(tx_window)
list_intergenic$tx_name <- with(
list_intergenic,
Expand Down
16 changes: 14 additions & 2 deletions workflow/scripts/filter_guides.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,18 @@ if (strands != "both") {
if (strands == "coding") {
if (target_type == "target") {
filter_strand <- strand(list_guides) != list_guides$tss_strand
} else {
} else if (target_type == "intergenic") {
filter_strand <- strand(list_guides) == "+"
} else if (target_type == "ntc") {
filter_strand <- rep(TRUE, length(list_guides))
}
} else if (strands == "template") {
if (target_type == "target") {
filter_strand <- strand(list_guides) == list_guides$tss_strand
} else {
} else if (target_type == "intergenic") {
filter_strand <- strand(list_guides) == "-"
} else if (target_type == "ntc") {
filter_strand <- rep(TRUE, length(list_guides))
}
} else {
stop("parameter 'strands' must be one of 'coding', 'template', or 'both'")
Expand Down Expand Up @@ -280,6 +284,14 @@ if (!is.null(fiveprime_linker) || !is.null(threeprime_linker)) {
)
}

# check if guides remain, otherwise throw warning
if (!nrow(df_guides)) {
warn_no_guides <- "the final list of guide RNAs after filtering is 0, omitting export."
messages <- append(messages, warn_no_guides)
warning(warn_no_guides)
export_as_gff <- FALSE
}

# export results as CSV table and GFF file (optional)
write_csv(df_guides, output_csv)
if (export_as_gff) {
Expand Down

0 comments on commit eaea7ac

Please sign in to comment.