Skip to content

Commit 86e83ef

Browse files
author
RubyFore
committed
creation of bladderpdo branch and CNV-segfile-annotation.R
Creating a new branch to work in for the bladder PDO data, as well as a .R file (executable at the command line with `Rscript` that performs CNV annotation from a segfile, producing a .csv which is then ready for further processing in Python for inclusion in `coderdata`.
1 parent e65634b commit 86e83ef

11 files changed

+59
-0
lines changed

build/bladderpdo/00_createBladderPDOSampleFile.py

Whitespace-only changes.

build/bladderpdo/01_createBladderPDOOmicsFiles.py

Whitespace-only changes.

build/bladderpdo/02_createBladderPDODrugsFile.py

Whitespace-only changes.

build/bladderpdo/03_createBladderPDOExperimentFile.py

Whitespace-only changes.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env Rscript
2+
args = commandArgs(trailingOnly=TRUE)
3+
4+
# If BiocManager not installed, install it
5+
if (!require("BiocManager", quietly = TRUE))
6+
install.packages("BiocManager")
7+
BiocManager::install(version = "3.20")
8+
9+
library(GenomicRanges)
10+
library(Homo.sapiens)
11+
12+
13+
# function to return gene mappings for each coordinate range (row) in the segfile
14+
splitColumnByOverlap <-
15+
function(query, subject, column="ENTREZID", ...)
16+
{
17+
olaps <- findOverlaps(query, subject, ...)
18+
f1 <- factor(subjectHits(olaps),
19+
levels=seq_len(subjectLength(olaps)))
20+
splitAsList(mcols(query)[[column]][queryHits(olaps)], f1)
21+
}
22+
23+
segfile = read.csv(args[1])
24+
25+
# create genomic ranges object from segfile for use with findOverlaps()
26+
gr <- GRanges(seqnames = Rle(paste0('chr', segfile$chrom)),
27+
ranges = IRanges(segfile$loc.start, end = segfile$loc.end),
28+
strand = Rle(c("-", "0", "+")[sign(segfile$loc.start) +2]),
29+
score = segfile$seg.mean,
30+
ID = segfile$ID)
31+
32+
# create genes GRanges obj from database
33+
genes<- genes(Homo.sapiens, columns =c("ENTREZID"))
34+
35+
geneHitsByRow <- splitColumnByOverlap(genes, gr, "ENTREZID")
36+
37+
# create matrices of annotations and scores/patient IDs in segfile
38+
# with a catch if there are no gene hits found
39+
matrixresults <- list()
40+
noresults <- c()
41+
for (i in 1:length(geneHitsByRow)){
42+
if(length(geneHitsByRow[[i]])>0){
43+
44+
matrixresults[[i]] <- data.frame(ENTREZID = as.matrix(geneHitsByRow[[i]]), rep(mcols(gr[i,]), length(geneHitsByRow[[i]])))
45+
}else{
46+
noresults <- c(noresults, i)
47+
matrixresults[[i]] <- data.frame(ENTREZID = NA, mcols(gr)[i,])
48+
}
49+
}
50+
# concatenate annotated matrices
51+
allCNV <- do.call(rbind, matrixresults)
52+
53+
# drop NAs
54+
completeAllCNV <- allCNV[complete.cases(allCNV),]
55+
# aggregate scores for the same genes (that came from different regions) for the same patient ID
56+
aggregatedAllCNV <- aggregate(completeAllCNV$score, by = list(ENTREZID =completeAllCNV$ENTREZID, ID = completeAllCNV$ID), FUN=mean)
57+
names(aggregatedAllCNV)[names(aggregatedAllCNV)=='x'] <- "score"
58+
# write results to csv for further processing in Python
59+
write.csv(aggregatedAllCNV, args[2], row.names=F, quote =F)

build/bladderpdo/README.md

Whitespace-only changes.

build/bladderpdo/build_drugs.sh

Whitespace-only changes.

build/bladderpdo/build_exp.sh

Whitespace-only changes.

build/bladderpdo/build_omics.sh

Whitespace-only changes.

build/bladderpdo/build_samples.sh

Whitespace-only changes.

0 commit comments

Comments
 (0)