🚚✨ move uniprot code to module, creating user-facing function

Multiomics-Analytics-Group · Dec 4, 2024 · 794c1af · 794c1af
1 parent 834b54e
commit 794c1af
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 40 deletions.
diff --git a/acore/io/uniprot/__init__.py b/acore/io/uniprot/__init__.py
@@ -0,0 +1,44 @@
+"""Uniprot API user functions for fetching annotations for UniProt IDs and providing 
+the results as a pandas.DataFrame."""
+
+import pandas as pd
+
+from .uniprot import (
+    check_id_mapping_results_ready,
+    get_id_mapping_results_link,
+    get_id_mapping_results_search,
+    submit_id_mapping,
+)
+
+
+# function for outside usage
+def fetch_annotations(ids: pd.Index | list) -> pd.DataFrame:
+    """Fetch annotations for UniProt IDs. Combines several calls to the API of UniProt's
+    knowledgebase (KB).
+
+    Parameters
+    ----------
+    ids : pd.Index | list
+        Iterable of UniProt IDs. Fetches annotations as speecified by the specified fields.
+    fields : str, optional
+        Fields to fetch, by default "accession,go_p,go_c. See for availble fields:
+        https://www.uniprot.org/help/return_fields
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with annotations of the UniProt IDs.
+    """
+    job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=ids)
+
+    if check_id_mapping_results_ready(job_id):
+        link = get_id_mapping_results_link(job_id)
+        # add fields to the link to get more information
+        # From and Entry (accession) are the same for UniProt IDs.
+        results = get_id_mapping_results_search(
+            link + "?fields=accession,go_p,go_c,go_f&format=tsv"
+        )
+    header = results.pop(0).split("\t")
+    results = [line.split("\t") for line in results]
+    df = pd.DataFrame(results, columns=header)
+    return df
diff --git a/acore/io/uniprot.py → acore/io/uniprot/uniprot.py b/acore/io/uniprot.py → acore/io/uniprot/uniprot.py
@@ -10,6 +10,7 @@
 from urllib.parse import parse_qs, urlencode, urlparse
 from xml.etree import ElementTree
 
+import pandas as pd
 import requests
 from requests.adapters import HTTPAdapter, Retry
 
@@ -176,6 +177,8 @@ def get_id_mapping_results_stream(url):
     return decode_results(request, file_format, compressed)
 
 
+
+
 if __name__ == "__main__":
     # id mapping is used to create a link to a query (you can see the json in the browser)
     # UniProtKB is the knowleadgebase integrating all kind of other databases

diff --git a/docs/api_examples/enrichment_analysis.py b/docs/api_examples/enrichment_analysis.py
@@ -24,6 +24,7 @@
 import acore
 import acore.differential_regulation
 import acore.enrichment_analysis
+from acore.io.uniprot import fetch_annotations
 
 dsp_pandas.format.set_pandas_options(max_colwidth=15)
 
@@ -105,46 +106,6 @@
 #
 
 # %%
-from acore.io.uniprot import (
-    check_id_mapping_results_ready,
-    get_id_mapping_results_link,
-    get_id_mapping_results_search,
-    submit_id_mapping,
-)
-
-
-def fetch_annotations(ids: pd.Index | list) -> pd.DataFrame:
-    """Fetch annotations for UniProt IDs. Combines several calls to the API of UniProt's
-    knowledgebase (KB).
-
-    Parameters
-    ----------
-    ids : pd.Index | list
-        Iterable of UniProt IDs. Fetches annotations as speecified by the specified fields.
-    fields : str, optional
-        Fields to fetch, by default "accession,go_p,go_c. See for availble fields:
-        https://www.uniprot.org/help/return_fields
-
-    Returns
-    -------
-    pd.DataFrame
-        DataFrame with annotations of the UniProt IDs.
-    """
-    job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=ids)
-
-    if check_id_mapping_results_ready(job_id):
-        link = get_id_mapping_results_link(job_id)
-        # add fields to the link to get more information
-        # From and Entry (accession) are the same for UniProt IDs.
-        results = get_id_mapping_results_search(
-            link + "?fields=accession,go_p,go_c,go_f&format=tsv"
-        )
-    header = results.pop(0).split("\t")
-    results = [line.split("\t") for line in results]
-    df = pd.DataFrame(results, columns=header)
-    return df
-
-
 fname_annotations = "downloaded/annotations.csv"
 fname = Path(fname_annotations)
 try: