-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetUniprot.py
executable file
·44 lines (40 loc) · 1.52 KB
/
getUniprot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
import sys
import json
def getUniprot(uniprot):
response= json.loads(requests.get("https://rest.uniprot.org/uniprotkb/{}?format=json".format(uniprot)).text)
#print(json.dumps(response, indent=4))
go_terms_c = [] ## cellular component
go_terms_f = [] ## biological processes
go_terms_p = [] ## molecular function
go_ids = set()
genes = set()
for item in response["uniProtKBCrossReferences"]:
# print(item)
database_id = item["database"]
if database_id == "GO":
go_id = item['id'].split(':')[1]
term = item["properties"][0]["value"]
spl = term.split(":")
if spl[0] == "C":
go_terms_c.append((spl[1], go_id))
elif spl[0] == "F":
go_terms_f.append((spl[1], go_id))
elif spl[0] == "P":
go_terms_p.append((spl[1], go_id))
go_ids.add(go_id)
if 'genes' in response:
try:
for gene in response['genes']:
genes.add(gene["geneName"]["value"])
except Exception as e:
print("could not add genes properly")
organism = response["organism"]["scientificName"]
try:
protein_name = response['proteinDescription']['recommendedName']['fullName']['value']
except Exception as e:
print("Could not find protein name!")
protein_name = "?"
return organism, go_terms_c, go_terms_p, go_terms_f, protein_name, go_ids, genes
if __name__=="__main__":
print(getUniprot("P0ACH5"))