Skip to content

Commit 4a6ec53

Browse files
committed
added metadata retrieve from beamtime folder
1 parent ec2160f commit 4a6ec53

File tree

3 files changed

+127
-2
lines changed

3 files changed

+127
-2
lines changed

src/sed/core/config_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class PathsModel(BaseModel):
2626

2727
raw: DirectoryPath
2828
processed: Optional[Union[DirectoryPath, NewPath]] = None
29+
meta: Optional[Union[DirectoryPath, NewPath]] = None
2930

3031

3132
class CopyToolModel(BaseModel):

src/sed/loader/cfel/loader.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ def __init__(self, config: dict, verbose: bool = True) -> None:
5656
set_verbosity(logger, self._verbose)
5757

5858
self.instrument: str = self._config["core"].get("instrument", "hextof") # default is hextof
59+
self.beamtime_dir: str = None
5960
self.raw_dir: str = None
6061
self.processed_dir: str = None
62+
self.meta_dir: str = None
6163

6264
@property
6365
def verbose(self) -> bool:
@@ -94,9 +96,14 @@ def _initialize_dirs(self) -> None:
9496
# Only raw_dir is necessary, processed_dir can be based on raw_dir, if not provided
9597
if "paths" in self._config["core"]:
9698
raw_dir = Path(self._config["core"]["paths"].get("raw", ""))
99+
print(raw_dir)
97100
processed_dir = Path(
98101
self._config["core"]["paths"].get("processed", raw_dir.joinpath("processed")),
99102
)
103+
meta_dir = Path(
104+
self._config["core"]["paths"].get("meta", raw_dir.joinpath("meta")),
105+
)
106+
beamtime_dir = Path(raw_dir).parent
100107

101108
else:
102109
try:
@@ -130,11 +137,14 @@ def _initialize_dirs(self) -> None:
130137
raw_dir = raw_paths[0].resolve()
131138

132139
processed_dir = beamtime_dir.joinpath("processed")
140+
meta_dir = beamtime_dir.joinpath("meta/fabtrack/")
133141

134142
processed_dir.mkdir(parents=True, exist_ok=True)
135143

144+
self.beamtime_dir = str(beamtime_dir)
136145
self.raw_dir = str(raw_dir)
137146
self.processed_dir = str(processed_dir)
147+
self.meta_dir = str(meta_dir)
138148

139149
@property
140150
def available_runs(self) -> list[int]:
@@ -209,7 +219,7 @@ def get_files_from_run_id( # type: ignore[override]
209219
# Return the list of found files
210220
return [str(file.resolve()) for file in files]
211221

212-
def parse_metadata(self, token: str = None) -> dict:
222+
def parse_scicat_metadata(self, token: str = None) -> dict:
213223
"""Uses the MetadataRetriever class to fetch metadata from scicat for each run.
214224
215225
Returns:
@@ -225,6 +235,23 @@ def parse_metadata(self, token: str = None) -> dict:
225235

226236
return metadata
227237

238+
def parse_local_metadata(self) -> dict:
239+
"""Uses the MetadataRetriever class to fetch metadata from local folder for each run.
240+
241+
Returns:
242+
dict: Metadata dictionary
243+
"""
244+
metadata_retriever = MetadataRetriever(self._config["metadata"])
245+
metadata = metadata_retriever.get_local_metadata(
246+
beamtime_id=self._config["core"]["beamtime_id"],
247+
beamtime_dir=self.beamtime_dir,
248+
meta_dir=self.meta_dir,
249+
runs=self.runs,
250+
metadata=self.metadata,
251+
)
252+
253+
return metadata
254+
228255
def get_count_rate(
229256
self,
230257
fids: Sequence[int] = None, # noqa: ARG002
@@ -403,7 +430,12 @@ def read_dataframe(
403430
filter_timed_by_electron=filter_timed_by_electron,
404431
)
405432

406-
self.metadata.update(self.parse_metadata(token) if collect_metadata else {})
433+
if len(self.parse_scicat_metadata(token)) == 0:
434+
print("No SciCat metadata available, checking local folder")
435+
self.metadata.update(self.parse_local_metadata())
436+
else:
437+
print("Metadata taken from SciCat")
438+
self.metadata.update(self.parse_scicat_metadata(token) if collect_metadata else {})
407439
self.metadata.update(bh.metadata)
408440

409441
print(f"loading complete in {time.time() - t0: .2f} s")

src/sed/loader/flash/metadata.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from __future__ import annotations
66

77
import requests
8+
import json
9+
import yaml
810

911
from sed.core.config import read_env_var
1012
from sed.core.config import save_env_var
@@ -144,3 +146,93 @@ def _create_new_dataset_url(self, pid: str) -> str:
144146
def _reformat_pid(self, pid: str) -> str:
145147
"""SciCat adds a pid-prefix + "/" but at DESY prefix = "" """
146148
return (pid).replace("/", "%2F")
149+
150+
def get_local_metadata(
151+
self,
152+
beamtime_id: str,
153+
beamtime_dir: str,
154+
meta_dir: str,
155+
runs: list,
156+
metadata: dict = None,
157+
) -> dict:
158+
"""
159+
Retrieves metadata for a given beamtime ID and list of runs from local meta folder and yaml file.
160+
161+
Args:
162+
beamtime_id (str): The ID of the beamtime.
163+
runs (list): A list of run IDs.
164+
metadata (dict, optional): The existing metadata dictionary.
165+
Defaults to None.
166+
167+
Returns:
168+
Dict: The updated metadata dictionary.
169+
170+
Raises:
171+
Exception: If the request to retrieve metadata fails.
172+
"""
173+
if metadata is None:
174+
metadata = {}
175+
176+
beamtime_metadata = self._get_beamtime_metadata(beamtime_dir,beamtime_id)
177+
metadata.update(beamtime_metadata)
178+
for run in runs:
179+
logger.debug(f"Retrieving metadata for PID: {run}")
180+
local_metadata_per_run = self._get_local_metadata_per_run(meta_dir,run)
181+
local_metadata_per_run.update(local_metadata_per_run) # TODO: Not correct for multiple runs
182+
183+
metadata.update({'scientificMetadata': local_metadata_per_run['_data']})
184+
185+
logger.debug(f"Retrieved metadata with {len(metadata)} entries")
186+
return metadata
187+
188+
def _get_beamtime_metadata(
189+
self,
190+
beamtime_dir: str,
191+
beamtime_id: str,
192+
) -> dict:
193+
"""
194+
Retrieves general metadata for a given beamtime ID from beamtime-metadata-{beamtime_id}.json file
195+
196+
Args:
197+
beamtime_id (str): The ID of the beamtime.
198+
meta_dir(str): The existing local metadata folder.
199+
200+
Returns:
201+
Dict: The retrieved metadata dictionary.
202+
203+
Raises:
204+
Exception: If the request to retrieve metadata fails.
205+
"""
206+
try:
207+
f = open(f'{beamtime_dir}/beamtime-metadata-{beamtime_id}.json', "r")
208+
beamtime_metadata = json.loads(f.read())
209+
return beamtime_metadata
210+
211+
except Exception as exception:
212+
logger.warning(f"Failed to retrieve metadata for beamtime ID {beamtime_id}: {str(exception)}")
213+
return {} # Return an empty dictionary for this beamtime ID
214+
215+
216+
def _get_local_metadata_per_run(self, meta_dir: str, run: str) -> dict:
217+
"""
218+
Retrieves metadata for a specific run based on the PID from yaml file in the local beamtime folder.
219+
220+
Args:
221+
pid (str): The PID of the run.
222+
223+
Returns:
224+
dict: The retrieved metadata.
225+
226+
Raises:
227+
Exception: If the request to retrieve metadata fails.
228+
"""
229+
try:
230+
run = str(run)
231+
with open(f"{meta_dir}/{run}_1.yaml", 'r') as stream:
232+
print("Getting metadata from local folder")
233+
run_metadata = yaml.safe_load(stream)
234+
return run_metadata
235+
236+
except Exception as exception:
237+
logger.warning(f"Failed to retrieve metadata for PID {run}: {str(exception)}")
238+
return {"_data":{}} # Return an empty dictionary for this run

0 commit comments

Comments
 (0)