-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_null_model.py
87 lines (74 loc) · 3.31 KB
/
generate_null_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from cnvfc import stats
import numpy as np
import pandas as pd
from argparse import ArgumentParser
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--case",help="case to run",dest='case')
parser.add_argument("--path_pheno",help="path to phenotype .csv file",dest='path_pheno')
parser.add_argument("--path_connectomes",help="path to connectomes .csv file",dest='path_connectomes')
parser.add_argument("--path_out",help="path to output directory",dest='path_out')
args = parser.parse_args()
n_iter = 5000
case = args.case
path_pheno = args.path_pheno
path_connectomes = args.path_connectomes
path_out = args.path_out
pheno = pd.read_csv(path_pheno, index_col=0)
connectomes = pd.read_csv(path_connectomes,index_col=0)
group = 'CNV_name'
regressors_str_mc = ' + '.join(['AGE','C(SEX)', 'FD_scrubbed', 'C(SITE)','mean_conn'])
regressors_str_nomc = ' + '.join(['AGE','C(SEX)', 'FD_scrubbed', 'C(SITE)'])
#RENAME PHENO TO GROUP IPC
CNV_name = []
for name in pheno['CNV_name']:
if (name=='SZ_ds'):
CNV_name.append('SZ')
elif (name=='BIP_ds'):
CNV_name.append('BIP')
elif (name=='ADHD_ds'):
CNV_name.append('ADHD')
elif (name=='Autism'):
CNV_name.append('ASD')
else:
CNV_name.append(name)
pheno['CNV_name'] = CNV_name
#SELECT CONTROLS BASED ON CASE
ipc = ['ADHD','ASD','BIP','SZ']
control = 'non_carriers'
if (case in ipc):
control = 'CON_IPC'
if case == 'IBD':
control = 'no_IBD'
group = 'IBD_str'
case = 'IBD_K50_K51'
cases = ['IBD','DEL1q21_1','DEL2q13','DEL13q12_12','DEL15q11_2','DEL16p11_2','DEL17p12','DEL22q11_2','TAR_dup',
'DUP1q21_1','DUP2q13','DUP13q12_12','DUP15q11_2','DUP15q13_3_CHRNA7','DUP16p11_2','DUP16p13_11','DUP22q11_2',
'SZ','BIP','ASD','ADHD']
df_pi = pheno.groupby('PI').sum()[cases]
mask_pi = (df_pi > 0)
if case in ipc:
mask_case = pheno[case].to_numpy(dtype=bool)
mask_con = pheno[control].to_numpy(dtype=bool)
mask = mask_case + mask_con
elif case == 'IBD_K50_K51':
mask_case = (pheno['IBD_str'] == 'IBD_K50_K51').to_numpy(dtype=bool)
mask_con = (pheno['IBD_str'] == 'no_IBD').to_numpy(dtype=bool)
mask = mask_case + mask_con
else:
mask_case = pheno[case].to_numpy(dtype=bool)
pi_list = df_pi[mask_pi[case]].index.to_list()
mask_con = np.array((pheno['PI'].isin(pi_list))&(pheno['non_carriers']==1))
mask = mask_case + mask_con
#DO MEAN CORRECTED
betas_mc = stats.permutation_glm(pheno[mask], connectomes.to_numpy()[mask], group, case, control,regressors=regressors_str_mc, n_iter=n_iter, stand=False)
if case == 'IBD_K50_K51':
np.save(path_out + 'IBD_null_model_mc.npy'.format(case), betas_mc)
else:
np.save(path_out + '{}_null_model_mc.npy'.format(case), betas_mc)
#DO NOMC
betas_mc = stats.permutation_glm(pheno[mask], connectomes.to_numpy()[mask], group, case, control,regressors=regressors_str_nomc, n_iter=n_iter, stand=False)
if case == 'IBD_K50_K51':
np.save(path_out + 'IBD_null_model_nomc.npy'.format(case), betas_mc)
else:
np.save(path_out + '{}_null_model_nomc.npy'.format(case), betas_mc)