Merge pull request #36 from pbashyal-nmdp/version_0.0.14

mmaiers-nmdp · web-flow · commit d24780b87b00 · 2024-02-23T14:57:20.000-06:00
Added Example File
diff --git a/README.md b/README.md
@@ -20,22 +20,19 @@ pip install py-graph-imputation
 
 #### Get Frequency Data and Subject Data and Configuration File
 
-For an example, copy the folders to your working directory:
- -  https://github.com/nmdp-bioinformatics/py-graph-imputation/tree/master/data
- -  https://github.com/nmdp-bioinformatics/py-graph-imputation/tree/master/conf
+For an example, get [example-conf-data.zip](https://github.com/nmdp-bioinformatics/py-graph-imputation/tree/master/example-conf-data.zip)
 
-so it appears as:
+Unzip the folder so it appears as:
 
 ```
-.
-├── conf
-│   ├── README.md
-│   └── minimal-configuration.json
-├── data
-│   ├── freqs
-│   │   └── CAU.freqs.gz
-│   └── subjects
-│       └── donor.csv
+conf
+|-- README.md
+`-- minimal-configuration.json
+data
+|-- freqs
+|   `-- CAU.freqs.gz
+`-- subjects
+    `-- donor.csv
 ```
 
 #### Modify the configuration.json to suit your need
@@ -59,9 +56,9 @@ Writing hpf File:	 output/hpf.csv
 This will produce the files which will be used for graph generation:
 
 ```
-├── output
-│   ├── hpf.csv                 # CSV file of Haplotype, Populatio, Freq
-│   └── pop_counts_file.txt     # Size of each population
+output
+|-- hpf.csv                         # CSV file of Haplotype, Populatio, Freq
+`-- pop_counts_file.txt             # Size of each population
 ```
 
 #### Generate the Graph (nodes and edges) files
@@ -81,13 +78,12 @@ Performing graph generation based on following configuration:
 This will produce the following files:
 
 ```
-├── output
-│   ├── csv
-│   │   ├── edges.csv
-│   │   ├── info_node.csv
-│   │   ├── nodes.csv
-│   │   └── top_links.csv
-
+output
+`-- csv
+    |-- edges.csv
+    |-- info_node.csv
+    |-- nodes.csv
+    `-- top_links.csv
 ```
 
 #### Produce Imputation Results for Subjects
@@ -133,12 +129,12 @@ This will produce files in `output` directory as:
 
 ```
 ├── output
-│   ├── don.miss                # Cases that failed imputation (e.g. incorrect typing etc.)
-│   ├── don.pmug                # Phased imputation as PMUG GL String
-│   ├── don.pmug.pops           # Population for Phased Imputation
-│   ├── don.problem             # List of errors
-│   ├── don.umug                # Unphased imputation as UMUG GL String
-│   ├── don.umug.pops           # Population for Phased Imputation
+│ ├── don.miss                # Cases that failed imputation (e.g. incorrect typing etc.)
+│ ├── don.pmug                # Phased imputation as PMUG GL String
+│ ├── don.pmug.pops           # Population for Phased Imputation
+│ ├── don.problem             # List of errors
+│ ├── don.umug                # Unphased imputation as UMUG GL String
+│ ├── don.umug.pops           # Population for Phased Imputation
 ```
 
 
diff --git a/example-conf-data.zip b/example-conf-data.zip
diff --git a/graph_generation/generate_neo4j_multi_hpf.py b/graph_generation/generate_neo4j_multi_hpf.py
@@ -38,6 +38,7 @@
 # 26M edges (3.6B)and 107M top links (200MB).
 # FULL_LOCI = 'ABCQR'
 
+
 ##############################################################################
 # functions
 ##############################################################################
diff --git a/grim/__init__.py b/grim/__init__.py
@@ -26,4 +26,4 @@
 """Top-level package for py-grim."""
 
 __organization__ = "NMDP/CIBMTR Bioinformatics"
-__version__ = "0.0.13"
+__version__ = "0.1.0"
diff --git a/grim/imputation/impute.py b/grim/imputation/impute.py
@@ -949,7 +949,7 @@ def open_phases(self, haps, N_Loc, gl_string):
                     optionDict = {}  # set()
                     if len(fq) == 0:
                         _list = []
-                        for (gen, name) in self.cypher.loc_map.items():
+                        for gen, name in self.cypher.loc_map.items():
                             count = 0
                             for i in range(len(hap_list[0])):
                                 if hap_list[0][i].split("*", 1)[0] == gen:
@@ -2017,7 +2017,7 @@ def impute_file(self, config, planb=None, em_mr=False, em=False):  ##em
         problem = open(config["imputation_out_problem_file"], "w")
 
         with f as lines:
-            for (i, name_gl) in enumerate(lines):
+            for i, name_gl in enumerate(lines):
                 try:
                     name_gl = name_gl.rstrip()  # remove trailing whitespace
                     if "," in name_gl:
diff --git a/grim/imputation/networkx_graph.py b/grim/imputation/networkx_graph.py
@@ -50,13 +50,20 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
                     if not self.nodes_plan_a or row[2] in self.nodes_plan_a:
                         self.Vertices.append(row[1])
                         vertex_id = len(self.Vertices) - 1
-                        self.Vertices_attributes[row[1]] = (row[2], list(map(float, row[3].split(";"))), vertex_id)
+                        self.Vertices_attributes[row[1]] = (
+                            row[2],
+                            list(map(float, row[3].split(";"))),
+                            vertex_id,
+                        )
 
                     if not self.nodes_plan_b or row[2] in self.nodes_plan_b:
                         self.Whole_Vertices.append(row[1])
                         vertex_id = len(self.Whole_Vertices) - 1
                         self.Whole_Vertices_attributes[row[1]] = (
-                            row[2], list(map(float, row[3].split(";"))), vertex_id)
+                            row[2],
+                            list(map(float, row[3].split(";"))),
+                            vertex_id,
+                        )
 
                     nodesDict[row[0]] = row[1]
 
@@ -70,7 +77,10 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
                     node2_id = row[1]
                     node1 = nodesDict[node1_id]
                     node2 = nodesDict[node2_id]
-                    if node1 in self.Vertices_attributes and node2 in self.Vertices_attributes:
+                    if (
+                        node1 in self.Vertices_attributes
+                        and node2 in self.Vertices_attributes
+                    ):
                         node1_label = self.Vertices_attributes[node1][0]
                         if node1_label == self.full_loci:
                             self.Edges.append([node2_id, node1_id])
@@ -145,7 +155,9 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
         del sorted_indices
 
         # Create a list of the first appearance of a number in the 0 column in the matrix
-        unique_values, first_occurrences_indices = np.unique(self.Edges[:, 0], return_index=True)
+        unique_values, first_occurrences_indices = np.unique(
+            self.Edges[:, 0], return_index=True
+        )
 
         j = 0
         for i in range(0, self.Vertices.shape[0]):
@@ -162,7 +174,9 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
         del unique_values, first_occurrences_indices
 
         # Create a list of the first appearance of a number in the 0 column in the matrix
-        unique_values, first_occurrences_indices = np.unique(self.Whole_Edges[:, 0], return_index=True)
+        unique_values, first_occurrences_indices = np.unique(
+            self.Whole_Edges[:, 0], return_index=True
+        )
 
         j = 0
         for i in range(0, self.Whole_Vertices.shape[0]):
@@ -182,7 +196,9 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
         self.Whole_Neighbors_start.append(int(len(self.Whole_Vertices)))
 
         self.Neighbors_start = np.array(self.Neighbors_start, dtype=np.uint32)
-        self.Whole_Neighbors_start = np.array(self.Whole_Neighbors_start, dtype=np.uint32)
+        self.Whole_Neighbors_start = np.array(
+            self.Whole_Neighbors_start, dtype=np.uint32
+        )
 
         # Take the first column out of the Edges arrays
         ### Do the following to massive save of memory
@@ -249,7 +265,13 @@ def adjs_query(self, alleleList):
                     allele_id = self.Vertices_attributes[allele][2]
                     # Find the neighbors of the allele
                     allele_neighbors = self.Vertices[
-                        self.Edges[range(self.Neighbors_start[allele_id], self.Neighbors_start[allele_id + 1])]]
+                        self.Edges[
+                            range(
+                                self.Neighbors_start[allele_id],
+                                self.Neighbors_start[allele_id + 1],
+                            )
+                        ]
+                    ]
                     # The frequencies of the neighbors to the dictionary
                     for adj in allele_neighbors:
                         adjDict[adj] = self.Vertices_attributes[adj][1]
@@ -271,9 +293,14 @@ def adjs_query_by_color(self, alleleList, labelA, labelB):
 
                 if connector in self.Whole_Vertices_attributes:
                     connector_id = self.Whole_Vertices_attributes[connector]
-                    alleles = self.Whole_Vertices[self.Whole_Edges[range(self.Whole_Neighbors_start[connector_id],
-                                                                         self.Whole_Neighbors_start[
-                                                                             connector_id + 1])]]
+                    alleles = self.Whole_Vertices[
+                        self.Whole_Edges[
+                            range(
+                                self.Whole_Neighbors_start[connector_id],
+                                self.Whole_Neighbors_start[connector_id + 1],
+                            )
+                        ]
+                    ]
 
                 for adj in alleles:
                     adjDict[adj] = self.Whole_Vertices_attributes[adj][1]
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.13
+current_version = 0.1.0
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
@@ -52,7 +52,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.13",
+    version="0.1.0",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",