token based login for flash scicat (#347)

* token based login for flash scicat * add test file for metadata * add requests-mock pytest fixture * fix the merge conflict * fix version problems * option for user to add token from main interface * fix the lint error * fix the kwds
OpenCOMPES · Feb 25, 2024 · 8bdf418 · 8bdf418
1 parent 23da8fa
commit 8bdf418
Show file tree

Hide file tree

Showing 5 changed files with 132 additions and 52 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,6 +57,8 @@ mypy = ">=1.6.0"
 types-pyyaml = ">=6.0.12.12"
 types-requests = ">=2.31.0.9"
 pyfakefs = ">=5.3.0"
+requests-mock = "^1.11.0"
+
 
 [tool.poetry.group.docs]
 optional = true

diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py
@@ -750,6 +750,7 @@ def parquet_handler(
         load_parquet: bool = False,
         save_parquet: bool = False,
         force_recreate: bool = False,
+        **kwds,
     ) -> Tuple[dd.DataFrame, dd.DataFrame]:
         """
         Handles loading and saving of parquet files based on the provided parameters.
@@ -835,13 +836,14 @@ def parquet_handler(
 
         return dataframe_electron, dataframe_pulse
 
-    def parse_metadata(self) -> dict:
+    def parse_metadata(self, scicat_token: str = None, **kwds) -> dict:
         """Uses the MetadataRetriever class to fetch metadata from scicat for each run.
 
         Returns:
             dict: Metadata dictionary
+            scicat_token (str, optional):: The scicat token to use for fetching metadata
         """
-        metadata_retriever = MetadataRetriever(self._config["metadata"])
+        metadata_retriever = MetadataRetriever(self._config["metadata"], scicat_token)
         metadata = metadata_retriever.get_metadata(
             beamtime_id=self._config["core"]["beamtime_id"],
             runs=self.runs,
@@ -924,7 +926,7 @@ def read_dataframe(
 
         df, df_timed = self.parquet_handler(data_parquet_dir, **kwds)
 
-        metadata = self.parse_metadata() if collect_metadata else {}
+        metadata = self.parse_metadata(**kwds) if collect_metadata else {}
         print(f"loading complete in {time.time() - t0: .2f} s")
 
         return df, df_timed, metadata

diff --git a/sed/loader/flash/metadata.py b/sed/loader/flash/metadata.py
@@ -15,17 +15,23 @@ class MetadataRetriever:
     on beamtime and run IDs.
     """
 
-    def __init__(self, metadata_config: Dict) -> None:
+    def __init__(self, metadata_config: Dict, scicat_token: str = None) -> None:
         """
         Initializes the MetadataRetriever class.
 
         Args:
             metadata_config (dict): Takes a dict containing
-            at least url, username and password
+            at least url, and optionally token for the scicat instance.
+            scicat_token (str, optional): The token to use for fetching metadata.
         """
-        self.url = metadata_config["scicat_url"]
-        self.username = metadata_config["scicat_username"]
-        self.password = metadata_config["scicat_password"]
+        self.token = metadata_config.get("scicat_token", None)
+        if scicat_token:
+            self.token = scicat_token
+        self.url = metadata_config.get("scicat_url", None)
+
+        if not self.token or not self.url:
+            raise ValueError("No URL or token provided for fetching metadata from scicat.")
+
         self.headers = {
             "Content-Type": "application/json",
             "Accept": "application/json",
@@ -80,9 +86,16 @@ def _get_metadata_per_run(self, pid: str) -> Dict:
         Raises:
             Exception: If the request to retrieve metadata fails.
         """
+        headers2 = dict(self.headers)
+        headers2["Authorization"] = f"Bearer {self.token}"
         try:
             # Create the dataset URL using the PID
-            dataset_response = requests.get(self._create_dataset_url_by_PID(pid), timeout=10)
+            dataset_response = requests.get(
+                self._create_dataset_url_by_PID(pid),
+                params={"access_token": self.token},
+                headers=headers2,
+                timeout=10,
+            )
             dataset_response.raise_for_status()  # Raise HTTPError if request fails
             # If the dataset request is successful, return the retrieved metadata
             # as a JSON object
@@ -105,37 +118,9 @@ def _create_dataset_url_by_PID(self, pid: str) -> str:  # pylint: disable=invali
         Raises:
             Exception: If the token request fails.
         """
-        npid = ("/" + pid).replace(
+        npid = pid.replace(
             "/",
             "%2F",
         )  # Replace slashes in the PID with URL-encoded slashes
-        url = f"{self.url}/RawDatasets/{npid}?access_token={self._get_token()}"
+        url = f"{self.url}/Datasets/{npid}"
         return url
-
-    def _get_token(self) -> str:
-        """
-        Retrieves the access token for authentication.
-
-        Returns:
-            str: The access token.
-
-        Raises:
-            Exception: If the token request fails.
-        """
-        try:
-            token_url = f"{self.url}/Users/login"
-            # Send a POST request to the token URL with the username and password
-            token_response = requests.post(
-                token_url,
-                headers=self.headers,
-                json={"username": self.username, "password": self.password},
-                timeout=10,
-            )
-            token_response.raise_for_status()
-            # If the token request is successful, return the access token from the response
-            return token_response.json()["id"]
-
-            # Otherwise issue warning
-        except requests.exceptions.RequestException as exception:
-            warnings.warn(f"Failed to retrieve authentication token: {str(exception)}")
-            return ""  # Return an empty string if token retrieval fails