[nomenclature._GPCRdbJSON2DF] new method to be used inside _GPCRdb_we…

…b_lookup
gph82 · Nov 15, 2024 · 247cb4e · 247cb4e
1 parent d46a993
commit 247cb4e
Showing 1 changed file with 33 additions and 20 deletions.
diff --git a/mdciao/nomenclature/nomenclature.py b/mdciao/nomenclature/nomenclature.py
@@ -371,32 +371,13 @@ def _GPCRdb_web_lookup(url, verbose=True,
     UniProt_name = url.split("/")[-1]
     a = _requests.get(url, timeout=timeout)
 
-    return_fields = ["protein_segment",
-                     "AAresSeq",
-                     "display_generic_number"]
-    pop_fields = ["sequence_number", "amino_acid", "alternative_generic_numbers"]
-    # TODO use _url2json here
     if verbose:
         print("done!")
     if a.text == '[]':
         DFout = ValueError('Contacted %s url successfully (no 404),\n'
                            'but Uniprot name %s yields nothing' % (url, UniProt_name))
     else:
-        df = _DataFrame(a.json())
-        mydict = df.T.to_dict()
-        for key, val in mydict.items():
-            try:
-                val["AAresSeq"] = '%s%s' % (val["amino_acid"], val["sequence_number"])
-                if "alternative_generic_numbers" in val.keys():
-                    for idict in val["alternative_generic_numbers"]:
-                        # print(key, idict["scheme"], idict["label"])
-                        val[idict["scheme"]] = idict["label"]
-            except IndexError:
-                pass
-
-        DFout = _DataFrame.from_dict(mydict, orient="index").replace({_np.nan: None})
-        return_fields += [key for key in DFout.keys() if key not in return_fields + pop_fields]
-        DFout = DFout[return_fields]
+        DFout = _GPCRdbJSON2DF(a.json())
         print("Please cite the following reference to the GPCRdb:")
         lit = Literature()
         print(_format_cite(lit.site_GPCRdb))
@@ -407,6 +388,38 @@ def _GPCRdb_web_lookup(url, verbose=True,
 
     return DFout
 
+def _GPCRdbJSON2DF(jlist : list):
+    r"""
+
+    Parameters
+    ----------
+    jlist : list
+        A list of dictionaries, typically coming from
+        * requests.get(url).json() where url is a GPCRdb url
+        * json.load(f) where f is an open file handle of a json file
+    -------
+
+    """
+    df = _DataFrame(jlist)
+    return_fields = ["protein_segment",
+                     "AAresSeq",
+                     "display_generic_number"]
+    pop_fields = ["sequence_number", "amino_acid", "alternative_generic_numbers"]
+    mydict = df.T.to_dict()
+    for key, val in mydict.items():
+        try:
+            val["AAresSeq"] = '%s%s' % (val["amino_acid"], val["sequence_number"])
+            if "alternative_generic_numbers" in val.keys():
+                for idict in val["alternative_generic_numbers"]:
+                    # print(key, idict["scheme"], idict["label"])
+                    val[idict["scheme"]] = idict["label"]
+        except IndexError:
+            pass
+
+    DFout = _DataFrame.from_dict(mydict, orient="index").replace({_np.nan: None})
+    return_fields += [key for key in DFout.keys() if key not in return_fields + pop_fields]
+    DFout = DFout[return_fields]
+    return DFout
 
 def _md_load_rcsb(PDB,
                   web_address="https://files.rcsb.org/download",