Skip to content

Commit d24780b

Browse files
authored
Merge pull request #36 from pbashyal-nmdp/version_0.0.14
Added Example File
2 parents a94bce0 + 4a68087 commit d24780b

File tree

8 files changed

+68
-44
lines changed

8 files changed

+68
-44
lines changed

README.md

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,19 @@ pip install py-graph-imputation
2020

2121
#### Get Frequency Data and Subject Data and Configuration File
2222

23-
For an example, copy the folders to your working directory:
24-
- https://github.com/nmdp-bioinformatics/py-graph-imputation/tree/master/data
25-
- https://github.com/nmdp-bioinformatics/py-graph-imputation/tree/master/conf
23+
For an example, get [example-conf-data.zip](https://github.com/nmdp-bioinformatics/py-graph-imputation/tree/master/example-conf-data.zip)
2624

27-
so it appears as:
25+
Unzip the folder so it appears as:
2826

2927
```
30-
.
31-
├── conf
32-
│   ├── README.md
33-
│   └── minimal-configuration.json
34-
├── data
35-
│   ├── freqs
36-
│   │   └── CAU.freqs.gz
37-
│   └── subjects
38-
│   └── donor.csv
28+
conf
29+
|-- README.md
30+
`-- minimal-configuration.json
31+
data
32+
|-- freqs
33+
| `-- CAU.freqs.gz
34+
`-- subjects
35+
`-- donor.csv
3936
```
4037

4138
#### Modify the configuration.json to suit your need
@@ -59,9 +56,9 @@ Writing hpf File: output/hpf.csv
5956
This will produce the files which will be used for graph generation:
6057

6158
```
62-
├── output
63-
│   ├── hpf.csv # CSV file of Haplotype, Populatio, Freq
64-
│   └── pop_counts_file.txt # Size of each population
59+
output
60+
|-- hpf.csv # CSV file of Haplotype, Populatio, Freq
61+
`-- pop_counts_file.txt # Size of each population
6562
```
6663

6764
#### Generate the Graph (nodes and edges) files
@@ -81,13 +78,12 @@ Performing graph generation based on following configuration:
8178
This will produce the following files:
8279

8380
```
84-
├── output
85-
│   ├── csv
86-
│   │   ├── edges.csv
87-
│   │   ├── info_node.csv
88-
│   │   ├── nodes.csv
89-
│   │   └── top_links.csv
90-
81+
output
82+
`-- csv
83+
|-- edges.csv
84+
|-- info_node.csv
85+
|-- nodes.csv
86+
`-- top_links.csv
9187
```
9288

9389
#### Produce Imputation Results for Subjects
@@ -133,12 +129,12 @@ This will produce files in `output` directory as:
133129

134130
```
135131
├── output
136-
   ├── don.miss # Cases that failed imputation (e.g. incorrect typing etc.)
137-
   ├── don.pmug # Phased imputation as PMUG GL String
138-
   ├── don.pmug.pops # Population for Phased Imputation
139-
   ├── don.problem # List of errors
140-
   ├── don.umug # Unphased imputation as UMUG GL String
141-
   ├── don.umug.pops # Population for Phased Imputation
132+
│ ├── don.miss # Cases that failed imputation (e.g. incorrect typing etc.)
133+
│ ├── don.pmug # Phased imputation as PMUG GL String
134+
│ ├── don.pmug.pops # Population for Phased Imputation
135+
│ ├── don.problem # List of errors
136+
│ ├── don.umug # Unphased imputation as UMUG GL String
137+
│ ├── don.umug.pops # Population for Phased Imputation
142138
```
143139

144140

example-conf-data.zip

19.7 KB
Binary file not shown.

graph_generation/generate_neo4j_multi_hpf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
# 26M edges (3.6B)and 107M top links (200MB).
3939
# FULL_LOCI = 'ABCQR'
4040

41+
4142
##############################################################################
4243
# functions
4344
##############################################################################

grim/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@
2626
"""Top-level package for py-grim."""
2727

2828
__organization__ = "NMDP/CIBMTR Bioinformatics"
29-
__version__ = "0.0.13"
29+
__version__ = "0.1.0"

grim/imputation/impute.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -949,7 +949,7 @@ def open_phases(self, haps, N_Loc, gl_string):
949949
optionDict = {} # set()
950950
if len(fq) == 0:
951951
_list = []
952-
for (gen, name) in self.cypher.loc_map.items():
952+
for gen, name in self.cypher.loc_map.items():
953953
count = 0
954954
for i in range(len(hap_list[0])):
955955
if hap_list[0][i].split("*", 1)[0] == gen:
@@ -2017,7 +2017,7 @@ def impute_file(self, config, planb=None, em_mr=False, em=False): ##em
20172017
problem = open(config["imputation_out_problem_file"], "w")
20182018

20192019
with f as lines:
2020-
for (i, name_gl) in enumerate(lines):
2020+
for i, name_gl in enumerate(lines):
20212021
try:
20222022
name_gl = name_gl.rstrip() # remove trailing whitespace
20232023
if "," in name_gl:

grim/imputation/networkx_graph.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,20 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
5050
if not self.nodes_plan_a or row[2] in self.nodes_plan_a:
5151
self.Vertices.append(row[1])
5252
vertex_id = len(self.Vertices) - 1
53-
self.Vertices_attributes[row[1]] = (row[2], list(map(float, row[3].split(";"))), vertex_id)
53+
self.Vertices_attributes[row[1]] = (
54+
row[2],
55+
list(map(float, row[3].split(";"))),
56+
vertex_id,
57+
)
5458

5559
if not self.nodes_plan_b or row[2] in self.nodes_plan_b:
5660
self.Whole_Vertices.append(row[1])
5761
vertex_id = len(self.Whole_Vertices) - 1
5862
self.Whole_Vertices_attributes[row[1]] = (
59-
row[2], list(map(float, row[3].split(";"))), vertex_id)
63+
row[2],
64+
list(map(float, row[3].split(";"))),
65+
vertex_id,
66+
)
6067

6168
nodesDict[row[0]] = row[1]
6269

@@ -70,7 +77,10 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
7077
node2_id = row[1]
7178
node1 = nodesDict[node1_id]
7279
node2 = nodesDict[node2_id]
73-
if node1 in self.Vertices_attributes and node2 in self.Vertices_attributes:
80+
if (
81+
node1 in self.Vertices_attributes
82+
and node2 in self.Vertices_attributes
83+
):
7484
node1_label = self.Vertices_attributes[node1][0]
7585
if node1_label == self.full_loci:
7686
self.Edges.append([node2_id, node1_id])
@@ -145,7 +155,9 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
145155
del sorted_indices
146156

147157
# Create a list of the first appearance of a number in the 0 column in the matrix
148-
unique_values, first_occurrences_indices = np.unique(self.Edges[:, 0], return_index=True)
158+
unique_values, first_occurrences_indices = np.unique(
159+
self.Edges[:, 0], return_index=True
160+
)
149161

150162
j = 0
151163
for i in range(0, self.Vertices.shape[0]):
@@ -162,7 +174,9 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
162174
del unique_values, first_occurrences_indices
163175

164176
# Create a list of the first appearance of a number in the 0 column in the matrix
165-
unique_values, first_occurrences_indices = np.unique(self.Whole_Edges[:, 0], return_index=True)
177+
unique_values, first_occurrences_indices = np.unique(
178+
self.Whole_Edges[:, 0], return_index=True
179+
)
166180

167181
j = 0
168182
for i in range(0, self.Whole_Vertices.shape[0]):
@@ -182,7 +196,9 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
182196
self.Whole_Neighbors_start.append(int(len(self.Whole_Vertices)))
183197

184198
self.Neighbors_start = np.array(self.Neighbors_start, dtype=np.uint32)
185-
self.Whole_Neighbors_start = np.array(self.Whole_Neighbors_start, dtype=np.uint32)
199+
self.Whole_Neighbors_start = np.array(
200+
self.Whole_Neighbors_start, dtype=np.uint32
201+
)
186202

187203
# Take the first column out of the Edges arrays
188204
### Do the following to massive save of memory
@@ -249,7 +265,13 @@ def adjs_query(self, alleleList):
249265
allele_id = self.Vertices_attributes[allele][2]
250266
# Find the neighbors of the allele
251267
allele_neighbors = self.Vertices[
252-
self.Edges[range(self.Neighbors_start[allele_id], self.Neighbors_start[allele_id + 1])]]
268+
self.Edges[
269+
range(
270+
self.Neighbors_start[allele_id],
271+
self.Neighbors_start[allele_id + 1],
272+
)
273+
]
274+
]
253275
# The frequencies of the neighbors to the dictionary
254276
for adj in allele_neighbors:
255277
adjDict[adj] = self.Vertices_attributes[adj][1]
@@ -271,9 +293,14 @@ def adjs_query_by_color(self, alleleList, labelA, labelB):
271293

272294
if connector in self.Whole_Vertices_attributes:
273295
connector_id = self.Whole_Vertices_attributes[connector]
274-
alleles = self.Whole_Vertices[self.Whole_Edges[range(self.Whole_Neighbors_start[connector_id],
275-
self.Whole_Neighbors_start[
276-
connector_id + 1])]]
296+
alleles = self.Whole_Vertices[
297+
self.Whole_Edges[
298+
range(
299+
self.Whole_Neighbors_start[connector_id],
300+
self.Whole_Neighbors_start[connector_id + 1],
301+
)
302+
]
303+
]
277304

278305
for adj in alleles:
279306
adjDict[adj] = self.Whole_Vertices_attributes[adj][1]

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.0.13
2+
current_version = 0.1.0
33
commit = True
44
tag = True
55

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252

5353
setup(
5454
name="py-graph-imputation",
55-
version="0.0.13",
55+
version="0.1.0",
5656
author="Pradeep Bashyal",
5757
author_email="[email protected]",
5858
python_requires=">=3.8",

0 commit comments

Comments
 (0)