-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIntake_correlation_HCC.R
executable file
·93 lines (66 loc) · 3.54 KB
/
Intake_correlation_HCC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Partial correlation between alcohol intake of HCC controls and spectral features
# New version of Liver CC alcohol model.R
hcpos <- read_csv("EPIC liver cancer 2016 RP POS feature table.csv") %>% slice(-(83:84))
hcneg <- read_csv("EPIC liver cancer 2016 RP NEG feature table.csv")
intakecor_hcc <- function(dat, pos = T, matchvec = NULL) {
# Feature tables
library(tidyverse)
if(nrow(dat) == 4321) ionmode <- "Pos" else ionmode <- "Neg"
names <- dat$Compound
# Subset samples and add feature names
dat <- dat %>% select(contains("LivCan_")) %>% t
colnames(dat) <- paste(names, 1:ncol(dat), sep = "@")
# Split sample ID and order by second part
id <- data.frame(val = rownames(dat)) %>% separate(val, sep = "_", into=c("id1", "id2"), convert = T)
ints <- cbind(id, dat) %>% arrange(id2) %>% select(-(1:2))
# Metadata from Laura. Split ID to put in same order as ints
library(haven)
meta <- read_sas("newdataset_missings1.sas7bdat") %>% select(-starts_with("Untarg_")) %>%
separate(Id_Bma, sep = "_", into=c("id1", "id2"), convert = T) %>% arrange(id2)
# Get vector of controls for subsetting
controls <- meta$Cncr_Caco_Live == 0
ints0 <- ints[controls, ]
#Convert variables of interest to factors
varlist <- c("Country", "Sex", "Smoke_Stat", "Batch_Rppos", "Batch_Rpneg")
meta <- meta %>% mutate_at(vars(varlist), as.factor)
#Partial correlation between alcohol intake and each feature
#Get residuals from lm on alcohol intake: BMI, smoke, sex, batch, country, CC status
# Subsetting: controls only (with suffix 0), matched features only
meta0 <- meta[controls, ]
filt <- colSums(ints0 > 1) > 65
# Subset and log transform matrix
ints0 <- ints[controls, filt]
# Get median intensity and numbers of detections (untransformed)
md <- tibble(medint = apply(ints0, 2, median),
detect = apply(ints0, 2, function(x) sum(x > 1)))
logints <- log2(ints0)
# Subset original feature table for extraction of feature data later
dat <- dat[controls, filt]
namesfilt <- names[filt]
print(paste("Testing", ncol(logints), "features..."))
# correlation test food intake with intensities of selected features
lcor <- apply(logints, 2, function(x) cor.test(log2(meta0$Qe_Alc + 1)[x > 0], x[x > 0]) )
partialcor <- function(x) {
# Get residuals on log transformed alcohol intake and each feature
mod1 <- lm(log2(Qe_Alc + 1) ~ Country + Sex + Bmi_C + Smoke_Stat, data = meta0[x > 0, ])
mod2 <- lm(x[x > 0] ~ Country + Sex + Bmi_C + Batch_Rppos + Smoke_Stat, data = meta0[x > 0, ])
# Correlate residuals
cor.test(residuals(mod1), residuals(mod2))
}
# Applying the partialcor function across the pos and neg matrices columnwise (argument=2)
pcor <- apply(logints, 2, partialcor)
library(broom)
rcor <- map_dfr(lcor, tidy, .id = "feat") %>% separate(feat, sep = "@", into = c("Mass", "RT", "feat"))
pcor <- map_dfr(pcor, tidy, .id = "feat") %>%
mutate(p.valBH = p.adjust(p.value, method = "BH")) %>% select(estimate, p.value, p.valBH)
# Final partial correlation is called estimate1
output <- bind_cols(rcor, pcor, md) %>% bind_cols(data.frame(t(ints0))) %>%
arrange(desc(estimate1))
}
alcpos.hcc <- intakecor_hcc(hcpos)
alcneg.hcc0 <- intakecor_hcc(hcneg)
# Get the subset of HCC features matched with significant alcohol CS features (from Intake_correlation_new.R)
f1 <- matched_pos_filt$HCC.feat
f2 <- matched_neg_filt$HCC.feat
alcpos.hcc1 <- intakecor_hcc(matchvec = f1)
alcneg.hcc1 <- intakecor_hcc(pos = F, matchvec = f2)