-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathukbb_prs_subsample.py
67 lines (55 loc) · 2.67 KB
/
ukbb_prs_subsample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import util
import numpy as np
import pandas as pd
from argparse import ArgumentParser
def iter_sample_var_effect(pheno,connectomes,var,regressors,p_out,n_sample=450,n_iter=1000,std=True):
clean_regressors = [r[2:-1] if r[0]=='C' else r for r in regressors]
p = pheno[clean_regressors + [var,'PRS_eth','PI']].copy()
p = p[p['PI']=='UKBB']
p = p[(p.PRS_eth == 'WB') | (p.PRS_eth == 'EUR')]
mask = util.mask_var(p,var)
p = p[mask]
conn = connectomes.to_numpy()[pheno.index.isin(p.index)]
betas = np.zeros((n_iter,2080))
mtds = []
for i in range(n_iter):
sample_subs = np.random.choice(p.index,n_sample,replace=False)
summary = util.variable_effect(p[p.index.isin(sample_subs)],
var,
regressors,
conn[p.index.isin(sample_subs)],
std=std)
bbb = summary['betas_std'].to_numpy()
betas[i,:] = bbb
rank = pd.qcut(np.abs(bbb),10,labels=False)
decile = np.abs(bbb)[rank[rank==9]]
mtds.append(np.mean(decile))
if not p_out is None:
np.save(os.path.join(p_out,f'betas_{var}_{n_sample}_{n_iter}.npy'),betas)
np.save(os.path.join(p_out,f'mtds_{var}_{n_sample}_{n_iter}.npy'),mtds)
return betas, mtds
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--path_pheno",help="path to phenotype .csv file",
default='/home/harveyaa/projects/def-pbellec/harveyaa/data/pheno_26-01-22.csv')
parser.add_argument("--path_connectomes",help="path to connectomes .csv file",
default='/home/harveyaa/projects/def-pbellec/harveyaa/data/connectomes_01-12-21.csv')
parser.add_argument("--path_out",help="path to output directory")
parser.add_argument("--prs",help="which prs")
parser.add_argument("--n_iter",help="number of iterations",type=int,default=1000)
parser.add_argument("--n_sample",help="number of subjects in sample",type=int,default=450)
args = parser.parse_args()
#############
# LOAD DATA #
#############
pheno = pd.read_csv(args.path_pheno,index_col=0)
connectomes = pd.read_csv(args.path_connectomes,index_col=0)
regressors_mc = ['AGE','C(SEX)','FD_scrubbed', 'C(SITE)', 'mean_conn']
iter_sample_var_effect(pheno,
connectomes,
args.prs,
regressors_mc,
args.path_out,
n_iter=args.n_iter,
n_sample=args.n_sample)