Skip to content

Commit b23d407

Browse files
authored
Merge pull request #4 from sapiris/master
update for pip
2 parents a77dd5a + 0b44173 commit b23d407

File tree

5 files changed

+45
-21
lines changed

5 files changed

+45
-21
lines changed

grim/conf/minimal-configuration.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"gamma": 1e-7,
1212
"delta": 0.4999999
1313
},
14+
"UNK_priors": "SR",
1415
"FULL_LOCI": "ABCQR",
1516
"loci_map": {
1617
"A": 1,
@@ -19,6 +20,7 @@
1920
"DQB1": 4,
2021
"DRB1": 5
2122
},
23+
2224
"factor_missing_data": 0.0001,
2325
"Plan_B_Matrix": [
2426
[[1, 2, 3, 4, 5]],

grim/grim.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,13 @@ def impute(conf_file = ""):
4646
project_dir_in_file = os.path.dirname(os.path.realpath(__file__)) + '/'
4747
runfile.run_impute(conf_file, project_dir_graph, project_dir_in_file)
4848

49-
def impute_instance(config):
49+
def impute_instance(config, graph, count_by_prob= None):
50+
imputation = Imputation(graph, config, count_by_prob)
51+
return imputation
52+
53+
def graph_instance(config):
5054
graph = Graph(config)
5155
graph.build_graph(config["node_file"], config["top_links_file"], config["edges_file"])
52-
imputation = Imputation(graph, config)
53-
return imputation
56+
return graph
5457

5558

grim/imputation/graph_generation/generate_neo4j_multi_hpf.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import sys
1313
import os
14+
1415
#sys.path.insert(0, os.path.join(".."))
1516

1617

@@ -168,10 +169,10 @@ def labels_for_grap(conf, full_loci,csvdir):
168169

169170
nodes_plan_b = list(set(nodes_plan_b))
170171
all_combo_list = list(dict.fromkeys(nodes_plan_a + nodes_plan_b + top_nodes_plan_b))
171-
with open(csvdir + '/nodes_for_plan_a.txt', 'w') as f:
172+
with open(csvdir + 'nodes_for_plan_a.txt', 'w') as f:
172173
for item in nodes_plan_a:
173174
f.write("%s\n" % item)
174-
with open(csvdir + '/nodes_for_plan_b.txt', 'w') as f:
175+
with open(csvdir + 'nodes_for_plan_b.txt', 'w') as f:
175176
for item in (nodes_plan_b + top_nodes_plan_b):
176177
f.write("%s\n" % item)
177178
#pickle.dump(nodes_plan_a, open(csvdir + '/nodes_for_plan_a.pkl', "wb"))
@@ -197,8 +198,8 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
197198
# Configure
198199
##############################################################################
199200
# set output directory and create it if it doesn't exist
200-
csvdir = "output/csv"
201-
pathlib.Path(csvdir).mkdir(parents=True, exist_ok=True)
201+
#csvdir = "output/csv"
202+
202203

203204

204205
# Input file
@@ -218,6 +219,11 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
218219
with open(configuration_file) as f:
219220
conf = json.load(f)
220221

222+
csvdir = conf.get("graph_files_path")
223+
pathlib.Path(csvdir).mkdir(parents=True, exist_ok=True)
224+
if csvdir[-1] != '/':
225+
csvdir += '/'
226+
221227
pops = conf.get("populations")
222228
if em_pop:
223229
pops = em_pop
@@ -227,16 +233,19 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
227233
if freq_file == "default":
228234
freq_file = os.path.dirname(os.path.realpath(__file__)) + '/output/hpf.csv'
229235
dict_count_of_pop = {}
230-
#if em:
231-
for pop in pops:
232-
dict_count_of_pop[pop] = freq_trim
233-
"""else:
234-
project_dir = "../../"
235-
pop_ratio_dir = project_dir + conf.get("pops_count_file", 'imputation/graph_generation/output/pop_ratio.txt')
236+
237+
pop_ratio_dir = conf.get("pops_count_file", os.path.dirname(os.path.realpath(__file__)) + '/imputation/graph_generation/output/pop_ratio.txt')
238+
path = pathlib.Path(pop_ratio_dir)
239+
240+
241+
if em or not path.is_file():
242+
for pop in pops:
243+
dict_count_of_pop[pop] = freq_trim
244+
else:
236245
with open(pop_ratio_dir) as f_count:
237246
for line in f_count:
238247
pop, count_pop, ratio = line.strip().split(',')
239-
dict_count_of_pop[pop] = freq_trim / float(count_pop)"""
248+
dict_count_of_pop[pop] = freq_trim / float(count_pop)
240249

241250

242251
# Display the configurations we are using
@@ -360,7 +369,7 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
360369
# #### Build Nodes file
361370

362371
header = ['haplotypeId:ID(HAPLOTYPE)', 'name', 'loci:LABEL', 'frequency:DOUBLE[]']
363-
node_file = csvdir + '/nodes.csv'
372+
node_file = csvdir + conf.get("node_csv_file")
364373
with open(node_file, mode='w') as csvfile:
365374
csv_writer = csv.writer(csvfile)
366375
csv_writer.writerow(header)
@@ -373,7 +382,7 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
373382
# #### Build Edges File
374383

375384
edgeheader = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', 'CP:DOUBLE[]', ':TYPE']
376-
edge_file = csvdir + '/edges.csv'
385+
edge_file = csvdir + conf.get("edges_csv_file")
377386
with open(edge_file, mode='w') as csvfile:
378387
csv_writer = csv.writer(csvfile)
379388
csv_writer.writerow(edgeheader)
@@ -396,7 +405,7 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
396405
# #### Generate Top Links file
397406

398407
topheader = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', ':TYPE']
399-
top_links_file = csvdir + '/top_links.csv'
408+
top_links_file = csvdir + conf.get("top_links_csv_file")
400409
with open(top_links_file, mode='w') as csvfile:
401410
csv_writer = csv.writer(csvfile)
402411
csv_writer.writerow(topheader)
@@ -412,7 +421,7 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
412421
# #### Generate Info Node file
413422

414423
infonode_header = ['INFO_NODE_ID:ID(INFO_NODE)', 'populations:STRING[]', 'INFO_NODE:LABEL']
415-
top_links_file = csvdir + '/info_node.csv'
424+
top_links_file = csvdir + conf.get("info_node_csv_file")
416425
with open(top_links_file, mode='w') as csvfile:
417426
csv_writer = csv.writer(csvfile)
418427
csv_writer.writerow(infonode_header)

grim/imputation/imputegl/impute.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ def __init__(self, net=None,config=None, count_by_prob=None, verbose=False):
110110
self.populations = config["pops"]
111111
self.netGraph = net
112112
self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
113+
self.unk_priors = config["UNK_priors"]
113114

114115
# For plan b
115116
#self.full_loci = config["full_loci"]
@@ -1419,7 +1420,11 @@ def call_comp_phase_prob(self, epsilon, n, phases, chr, MUUG_output, planb):
14191420
# no plan b
14201421
for level in range(2):
14211422
if level == 1:
1422-
self.priorMatrix = np.ones((len(self.populations), len(self.populations))) ####
1423+
if self.unk_priors == "MR":
1424+
self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
1425+
else:
1426+
self.priorMatrix = np.identity(len(self.populations))
1427+
#self.priorMatrix = np.ones((len(self.populations), len(self.populations))) ####
14231428
if planb and len(res['Haps']) == 0:
14241429
self.plan = 'b'
14251430
epsilon = 1e-14
@@ -1615,7 +1620,10 @@ def update_prob_by_priority(self, res, race1, race2, priority):
16151620

16161621
def impute_one(self, subject_id, gl, binary, race1, race2, priority, epsilon, n, MUUG_output, haps_output, planb, em):#em
16171622
clean_gl = clean_up_gl(gl)
1618-
self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
1623+
if self.unk_priors == "MR":
1624+
self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
1625+
else:
1626+
self.priorMatrix = np.identity(len(self.populations))
16191627
to_calc_prior_matrix = False
16201628
if race1 or race2:
16211629
race1 = race1.split(';')

grim/validation/runfile.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
6767
"max_haplotypes_number_in_phase": json_conf.get("max_haplotypes_number_in_phase",100 ),
6868
"bin_imputation_input_file": project_dir_in_file + json_conf.get("bin_imputation_in_file", "None"),
6969
"nodes_for_plan_A": json_conf.get("Plan_A_Matrix", []),
70-
"save_mode": json_conf.get("save_space_mode", False)
70+
"save_mode": json_conf.get("save_space_mode", False),
71+
"UNK_priors" : json_conf.get("UNK_priors", "MR")
7172

7273
}
7374

@@ -76,6 +77,7 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
7677
print("Performing imputation based on:")
7778
print("\tPopulation: {}".format(config["pops"]))
7879
print("\tPriority: {}".format(config["priority"]))
80+
print("\tPriority: {}".format(config["UNK_priors"]))
7981
print("\tEpsilon: {}".format(config["epsilon"]))
8082
print("\tPlan B: {}".format(config["planb"]))
8183
print("\tNumber of Results: {}".format(config["number_of_results"]))

0 commit comments

Comments
 (0)