Skip to content

Commit

Permalink
🚚✨ move uniprot code to module, creating user-facing function
Browse files Browse the repository at this point in the history
  • Loading branch information
enryH committed Dec 4, 2024
1 parent 834b54e commit 794c1af
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 40 deletions.
44 changes: 44 additions & 0 deletions acore/io/uniprot/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Uniprot API user functions for fetching annotations for UniProt IDs and providing
the results as a pandas.DataFrame."""

import pandas as pd

from .uniprot import (
check_id_mapping_results_ready,
get_id_mapping_results_link,
get_id_mapping_results_search,
submit_id_mapping,
)


# function for outside usage
def fetch_annotations(ids: pd.Index | list) -> pd.DataFrame:
"""Fetch annotations for UniProt IDs. Combines several calls to the API of UniProt's
knowledgebase (KB).
Parameters
----------
ids : pd.Index | list
Iterable of UniProt IDs. Fetches annotations as speecified by the specified fields.
fields : str, optional
Fields to fetch, by default "accession,go_p,go_c. See for availble fields:
https://www.uniprot.org/help/return_fields
Returns
-------
pd.DataFrame
DataFrame with annotations of the UniProt IDs.
"""
job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=ids)

if check_id_mapping_results_ready(job_id):
link = get_id_mapping_results_link(job_id)
# add fields to the link to get more information
# From and Entry (accession) are the same for UniProt IDs.
results = get_id_mapping_results_search(
link + "?fields=accession,go_p,go_c,go_f&format=tsv"
)
header = results.pop(0).split("\t")
results = [line.split("\t") for line in results]
df = pd.DataFrame(results, columns=header)
return df
3 changes: 3 additions & 0 deletions acore/io/uniprot.py → acore/io/uniprot/uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from urllib.parse import parse_qs, urlencode, urlparse
from xml.etree import ElementTree

import pandas as pd
import requests
from requests.adapters import HTTPAdapter, Retry

Expand Down Expand Up @@ -176,6 +177,8 @@ def get_id_mapping_results_stream(url):
return decode_results(request, file_format, compressed)




if __name__ == "__main__":
# id mapping is used to create a link to a query (you can see the json in the browser)
# UniProtKB is the knowleadgebase integrating all kind of other databases
Expand Down
41 changes: 1 addition & 40 deletions docs/api_examples/enrichment_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import acore
import acore.differential_regulation
import acore.enrichment_analysis
from acore.io.uniprot import fetch_annotations

dsp_pandas.format.set_pandas_options(max_colwidth=15)

Expand Down Expand Up @@ -105,46 +106,6 @@
#

# %%
from acore.io.uniprot import (
check_id_mapping_results_ready,
get_id_mapping_results_link,
get_id_mapping_results_search,
submit_id_mapping,
)


def fetch_annotations(ids: pd.Index | list) -> pd.DataFrame:
"""Fetch annotations for UniProt IDs. Combines several calls to the API of UniProt's
knowledgebase (KB).
Parameters
----------
ids : pd.Index | list
Iterable of UniProt IDs. Fetches annotations as speecified by the specified fields.
fields : str, optional
Fields to fetch, by default "accession,go_p,go_c. See for availble fields:
https://www.uniprot.org/help/return_fields
Returns
-------
pd.DataFrame
DataFrame with annotations of the UniProt IDs.
"""
job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=ids)

if check_id_mapping_results_ready(job_id):
link = get_id_mapping_results_link(job_id)
# add fields to the link to get more information
# From and Entry (accession) are the same for UniProt IDs.
results = get_id_mapping_results_search(
link + "?fields=accession,go_p,go_c,go_f&format=tsv"
)
header = results.pop(0).split("\t")
results = [line.split("\t") for line in results]
df = pd.DataFrame(results, columns=header)
return df


fname_annotations = "downloaded/annotations.csv"
fname = Path(fname_annotations)
try:
Expand Down

0 comments on commit 794c1af

Please sign in to comment.