Skip to content

Commit

Permalink
[nomenclature._GPCRdbJSON2DF] new method to be used inside _GPCRdb_we…
Browse files Browse the repository at this point in the history
…b_lookup
  • Loading branch information
gph82 committed Nov 15, 2024
1 parent d46a993 commit 247cb4e
Showing 1 changed file with 33 additions and 20 deletions.
53 changes: 33 additions & 20 deletions mdciao/nomenclature/nomenclature.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,32 +371,13 @@ def _GPCRdb_web_lookup(url, verbose=True,
UniProt_name = url.split("/")[-1]
a = _requests.get(url, timeout=timeout)

return_fields = ["protein_segment",
"AAresSeq",
"display_generic_number"]
pop_fields = ["sequence_number", "amino_acid", "alternative_generic_numbers"]
# TODO use _url2json here
if verbose:
print("done!")
if a.text == '[]':
DFout = ValueError('Contacted %s url successfully (no 404),\n'
'but Uniprot name %s yields nothing' % (url, UniProt_name))
else:
df = _DataFrame(a.json())
mydict = df.T.to_dict()
for key, val in mydict.items():
try:
val["AAresSeq"] = '%s%s' % (val["amino_acid"], val["sequence_number"])
if "alternative_generic_numbers" in val.keys():
for idict in val["alternative_generic_numbers"]:
# print(key, idict["scheme"], idict["label"])
val[idict["scheme"]] = idict["label"]
except IndexError:
pass

DFout = _DataFrame.from_dict(mydict, orient="index").replace({_np.nan: None})
return_fields += [key for key in DFout.keys() if key not in return_fields + pop_fields]
DFout = DFout[return_fields]
DFout = _GPCRdbJSON2DF(a.json())
print("Please cite the following reference to the GPCRdb:")
lit = Literature()
print(_format_cite(lit.site_GPCRdb))
Expand All @@ -407,6 +388,38 @@ def _GPCRdb_web_lookup(url, verbose=True,

return DFout

def _GPCRdbJSON2DF(jlist : list):
r"""
Parameters
----------
jlist : list
A list of dictionaries, typically coming from
* requests.get(url).json() where url is a GPCRdb url
* json.load(f) where f is an open file handle of a json file
-------
"""
df = _DataFrame(jlist)
return_fields = ["protein_segment",
"AAresSeq",
"display_generic_number"]
pop_fields = ["sequence_number", "amino_acid", "alternative_generic_numbers"]
mydict = df.T.to_dict()
for key, val in mydict.items():
try:
val["AAresSeq"] = '%s%s' % (val["amino_acid"], val["sequence_number"])
if "alternative_generic_numbers" in val.keys():
for idict in val["alternative_generic_numbers"]:
# print(key, idict["scheme"], idict["label"])
val[idict["scheme"]] = idict["label"]
except IndexError:
pass

DFout = _DataFrame.from_dict(mydict, orient="index").replace({_np.nan: None})
return_fields += [key for key in DFout.keys() if key not in return_fields + pop_fields]
DFout = DFout[return_fields]
return DFout

def _md_load_rcsb(PDB,
web_address="https://files.rcsb.org/download",
Expand Down

0 comments on commit 247cb4e

Please sign in to comment.