Skip to content

Commit

Permalink
token based login for flash scicat (#347)
Browse files Browse the repository at this point in the history
* token based login for flash scicat

* add test file for metadata

* add requests-mock pytest fixture

* fix the merge conflict

* fix version problems

* option for user to add token from main interface

* fix the lint error

* fix the kwds
  • Loading branch information
zain-sohail authored Feb 25, 2024
1 parent 23da8fa commit 8bdf418
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 52 deletions.
61 changes: 48 additions & 13 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ mypy = ">=1.6.0"
types-pyyaml = ">=6.0.12.12"
types-requests = ">=2.31.0.9"
pyfakefs = ">=5.3.0"
requests-mock = "^1.11.0"


[tool.poetry.group.docs]
optional = true
Expand Down
8 changes: 5 additions & 3 deletions sed/loader/flash/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ def parquet_handler(
load_parquet: bool = False,
save_parquet: bool = False,
force_recreate: bool = False,
**kwds,
) -> Tuple[dd.DataFrame, dd.DataFrame]:
"""
Handles loading and saving of parquet files based on the provided parameters.
Expand Down Expand Up @@ -835,13 +836,14 @@ def parquet_handler(

return dataframe_electron, dataframe_pulse

def parse_metadata(self) -> dict:
def parse_metadata(self, scicat_token: str = None, **kwds) -> dict:
"""Uses the MetadataRetriever class to fetch metadata from scicat for each run.
Returns:
dict: Metadata dictionary
scicat_token (str, optional):: The scicat token to use for fetching metadata
"""
metadata_retriever = MetadataRetriever(self._config["metadata"])
metadata_retriever = MetadataRetriever(self._config["metadata"], scicat_token)
metadata = metadata_retriever.get_metadata(
beamtime_id=self._config["core"]["beamtime_id"],
runs=self.runs,
Expand Down Expand Up @@ -924,7 +926,7 @@ def read_dataframe(

df, df_timed = self.parquet_handler(data_parquet_dir, **kwds)

metadata = self.parse_metadata() if collect_metadata else {}
metadata = self.parse_metadata(**kwds) if collect_metadata else {}
print(f"loading complete in {time.time() - t0: .2f} s")

return df, df_timed, metadata
Expand Down
57 changes: 21 additions & 36 deletions sed/loader/flash/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,23 @@ class MetadataRetriever:
on beamtime and run IDs.
"""

def __init__(self, metadata_config: Dict) -> None:
def __init__(self, metadata_config: Dict, scicat_token: str = None) -> None:
"""
Initializes the MetadataRetriever class.
Args:
metadata_config (dict): Takes a dict containing
at least url, username and password
at least url, and optionally token for the scicat instance.
scicat_token (str, optional): The token to use for fetching metadata.
"""
self.url = metadata_config["scicat_url"]
self.username = metadata_config["scicat_username"]
self.password = metadata_config["scicat_password"]
self.token = metadata_config.get("scicat_token", None)
if scicat_token:
self.token = scicat_token
self.url = metadata_config.get("scicat_url", None)

if not self.token or not self.url:
raise ValueError("No URL or token provided for fetching metadata from scicat.")

self.headers = {
"Content-Type": "application/json",
"Accept": "application/json",
Expand Down Expand Up @@ -80,9 +86,16 @@ def _get_metadata_per_run(self, pid: str) -> Dict:
Raises:
Exception: If the request to retrieve metadata fails.
"""
headers2 = dict(self.headers)
headers2["Authorization"] = f"Bearer {self.token}"
try:
# Create the dataset URL using the PID
dataset_response = requests.get(self._create_dataset_url_by_PID(pid), timeout=10)
dataset_response = requests.get(
self._create_dataset_url_by_PID(pid),
params={"access_token": self.token},
headers=headers2,
timeout=10,
)
dataset_response.raise_for_status() # Raise HTTPError if request fails
# If the dataset request is successful, return the retrieved metadata
# as a JSON object
Expand All @@ -105,37 +118,9 @@ def _create_dataset_url_by_PID(self, pid: str) -> str: # pylint: disable=invali
Raises:
Exception: If the token request fails.
"""
npid = ("/" + pid).replace(
npid = pid.replace(
"/",
"%2F",
) # Replace slashes in the PID with URL-encoded slashes
url = f"{self.url}/RawDatasets/{npid}?access_token={self._get_token()}"
url = f"{self.url}/Datasets/{npid}"
return url

def _get_token(self) -> str:
"""
Retrieves the access token for authentication.
Returns:
str: The access token.
Raises:
Exception: If the token request fails.
"""
try:
token_url = f"{self.url}/Users/login"
# Send a POST request to the token URL with the username and password
token_response = requests.post(
token_url,
headers=self.headers,
json={"username": self.username, "password": self.password},
timeout=10,
)
token_response.raise_for_status()
# If the token request is successful, return the access token from the response
return token_response.json()["id"]

# Otherwise issue warning
except requests.exceptions.RequestException as exception:
warnings.warn(f"Failed to retrieve authentication token: {str(exception)}")
return "" # Return an empty string if token retrieval fails
Loading

0 comments on commit 8bdf418

Please sign in to comment.