From c5ae81583f19cb75aa2e651d23b9c4e297b19bda Mon Sep 17 00:00:00 2001 From: Adrian Sven Geissler Date: Fri, 6 Nov 2020 15:02:24 +0100 Subject: [PATCH] long UTR adress idea --- ideas/long_UTR.R | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 ideas/long_UTR.R diff --git a/ideas/long_UTR.R b/ideas/long_UTR.R new file mode 100644 index 00000000..f987e375 --- /dev/null +++ b/ideas/long_UTR.R @@ -0,0 +1,40 @@ +source('analysis/00_load.R') +source('scripts/distance_matching.R') + +load('data/01_nicolas.rda') +load('analysis/02_merging.rda') + + +nicolas$upshifts %>% + transmute(id, start = pos, end = pos, strand) %>% + distance_matching( + bind_rows( + select(merging$merged_genes, + id = merged_id, + start, end, strand), + nicolas$all.features %>% + filter(is.na(type) | str_detect(type, "3'")) %>% + mutate(five = ifelse(strand == '+', start, end)) %>% + select(id = locus, start = five, end = five, strand) + ) + ) %>% + filter(!antisense) -> cmp + + +cmp %>% + group_by(x) %>% + summarize_at('distance', min) %>% + filter(distance <= 500) %>% + ggplot(aes(distance)) + + geom_histogram() + +cmp %>% + filter(distance <= 50) %>% + select(x) %>% + unique -> foo + +cmp %>% + anti_join(foo) %>% + select(x) %>% + unique %>% + nrow