Skip to content

Commit

Permalink
Merge pull request #487 from OpenCOMPES/pydantic-model
Browse files Browse the repository at this point in the history
Pydantic model
  • Loading branch information
rettigl authored Oct 22, 2024
2 parents d56d6d9 + fea015a commit 68b2eaf
Show file tree
Hide file tree
Showing 40 changed files with 1,096 additions and 596 deletions.
2 changes: 2 additions & 0 deletions .cspell/custom-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ ftype
fwhm
genindex
getgid
getgrgid
getmtime
gpfs
griddata
Expand Down Expand Up @@ -290,6 +291,7 @@ ptargs
pullrequest
pval
pyarrow
pydantic
pyenv
pygments
pynxtools
Expand Down
134 changes: 133 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ tqdm = ">=4.62.3"
xarray = ">=0.20.2"
joblib = ">=1.2.0"
pyarrow = ">=14.0.1, <17.0"
pydantic = ">=2.8.2"
jupyter = {version = ">=1.0.0", optional = true}
ipykernel = {version = ">=6.9.1", optional = true}
jupyterlab = {version = "^3.4.0", optional = true}
Expand Down
61 changes: 30 additions & 31 deletions sed/calibrator/delay.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def __init__(
self._verbose = verbose
set_verbosity(logger, self._verbose)

self.adc_column: str = self._config["dataframe"].get("adc_column", None)
self.delay_column: str = self._config["dataframe"]["delay_column"]
self.corrected_delay_column = self._config["dataframe"].get(
"corrected_delay_column",
self.adc_column: str = config["dataframe"]["columns"]["adc"]
self.delay_column: str = config["dataframe"]["columns"]["delay"]
self.corrected_delay_column = self._config["dataframe"]["columns"].get(
"corrected_delay",
self.delay_column,
)
self.calibration: dict[str, Any] = self._config["delay"].get("calibration", {})
Expand Down Expand Up @@ -102,9 +102,9 @@ def append_delay_axis(
df (pd.DataFrame | dask.dataframe.DataFrame): The dataframe where
to apply the delay calibration to.
adc_column (str, optional): Source column for delay calibration.
Defaults to config["dataframe"]["adc_column"].
Defaults to config["dataframe"]["columns"]["adc"].
delay_column (str, optional): Destination column for delay calibration.
Defaults to config["dataframe"]["delay_column"].
Defaults to config["dataframe"]["columns"]["delay"].
calibration (dict, optional): Calibration dictionary with parameters for
delay calibration.
adc_range (tuple | list | np.ndarray, optional): The range of used
Expand Down Expand Up @@ -146,7 +146,7 @@ def append_delay_axis(
or datafile is not None
):
calibration = {}
calibration["creation_date"] = datetime.now().timestamp()
calibration["creation_date"] = datetime.now()
if adc_range is not None:
calibration["adc_range"] = adc_range
if delay_range is not None:
Expand All @@ -158,9 +158,7 @@ def append_delay_axis(
else:
# report usage of loaded parameters
if "creation_date" in calibration and not suppress_output:
datestring = datetime.fromtimestamp(calibration["creation_date"]).strftime(
"%m/%d/%Y, %H:%M:%S",
)
datestring = calibration["creation_date"].strftime("%m/%d/%Y, %H:%M:%S")
logger.info(f"Using delay calibration parameters generated on {datestring}")

if adc_column is None:
Expand Down Expand Up @@ -212,7 +210,7 @@ def append_delay_axis(
)
if not suppress_output:
logger.info(f"Converted delay_range (ps) = {calibration['delay_range']}")
calibration["creation_date"] = datetime.now().timestamp()
calibration["creation_date"] = datetime.now()

if "delay_range" in calibration.keys():
df[delay_column] = calibration["delay_range"][0] + (
Expand Down Expand Up @@ -285,9 +283,10 @@ def add_offsets(
# pylint:disable=duplicate-code
# use passed parameters, overwrite config
offsets = {}
offsets["creation_date"] = datetime.now().timestamp()
offsets["creation_date"] = datetime.now()
# column-based offsets
if columns is not None:
offsets["columns"] = {}
if weights is None:
weights = 1
if isinstance(weights, (int, float, np.integer, np.floating)):
Expand All @@ -314,7 +313,7 @@ def add_offsets(

# store in offsets dictionary
for col, weight, pmean, red in zip(columns, weights, preserve_mean, reductions):
offsets[col] = {
offsets["columns"][col] = {
"weight": weight,
"preserve_mean": pmean,
"reduction": red,
Expand All @@ -330,9 +329,7 @@ def add_offsets(
offsets["flip_delay_axis"] = flip_delay_axis

elif "creation_date" in offsets and not suppress_output:
datestring = datetime.fromtimestamp(offsets["creation_date"]).strftime(
"%m/%d/%Y, %H:%M:%S",
)
datestring = offsets["creation_date"].strftime("%m/%d/%Y, %H:%M:%S")
logger.info(f"Using delay offset parameters generated on {datestring}")

if len(offsets) > 0:
Expand All @@ -359,21 +356,23 @@ def add_offsets(
f"Invalid value for flip_delay_axis in config: {flip_delay_axis}.",
)
log_str += f"\n Flip delay axis: {flip_delay_axis}"
else:
columns.append(k)
try:
weight = v["weight"]
except KeyError:
weight = 1
weights.append(weight)
pm = v.get("preserve_mean", False)
preserve_mean.append(pm)
red = v.get("reduction", None)
reductions.append(red)
log_str += (
f"\n Column[{k}]: Weight={weight}, Preserve Mean: {pm}, "
f"Reductions: {red}."
)
elif k == "columns":
for column_name, column_dict in offsets["columns"].items():
columns.append(column_name)
weight = column_dict.get("weight", 1)
if not isinstance(weight, (int, float, np.integer, np.floating)):
raise TypeError(
f"Invalid type for weight of column {column_name}: {type(weight)}",
)
weights.append(weight)
pm = column_dict.get("preserve_mean", False)
preserve_mean.append(pm)
red = column_dict.get("reduction", None)
reductions.append(red)
log_str += (
f"\n Column[{column_name}]: Weight={weight}, Preserve Mean: {pm}, "
f"Reductions: {red}."
)

if not suppress_output:
logger.info(log_str)
Expand Down
111 changes: 51 additions & 60 deletions sed/calibrator/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ def __init__(
self.peaks: np.ndarray = np.asarray([])
self.calibration: dict[str, Any] = self._config["energy"].get("calibration", {})

self.tof_column = self._config["dataframe"]["tof_column"]
self.tof_ns_column = self._config["dataframe"].get("tof_ns_column", None)
self.corrected_tof_column = self._config["dataframe"]["corrected_tof_column"]
self.energy_column = self._config["dataframe"]["energy_column"]
self.x_column = self._config["dataframe"]["x_column"]
self.y_column = self._config["dataframe"]["y_column"]
self.tof_column = self._config["dataframe"]["columns"]["tof"]
self.tof_ns_column = self._config["dataframe"]["columns"].get("tof_ns", None)
self.corrected_tof_column = self._config["dataframe"]["columns"]["corrected_tof"]
self.energy_column = self._config["dataframe"]["columns"]["energy"]
self.x_column = self._config["dataframe"]["columns"]["x"]
self.y_column = self._config["dataframe"]["columns"]["y"]
self.binwidth: float = self._config["dataframe"]["tof_binwidth"]
self.binning: int = self._config["dataframe"]["tof_binning"]
self.x_width = self._config["energy"]["x_width"]
Expand All @@ -121,7 +121,7 @@ def __init__(
self.tof_fermi = self._config["energy"]["tof_fermi"] / self.binning
self.color_clip = self._config["energy"]["color_clip"]
self.sector_delays = self._config["dataframe"].get("sector_delays", None)
self.sector_id_column = self._config["dataframe"].get("sector_id_column", None)
self.sector_id_column = self._config["dataframe"]["columns"].get("sector_id", None)
self.offsets: dict[str, Any] = self._config["energy"].get("offsets", {})
self.correction: dict[str, Any] = self._config["energy"].get("correction", {})

Expand Down Expand Up @@ -217,7 +217,7 @@ def bin_data(
Args:
data_files (list[str]): list of file names to bin
axes (list[str], optional): bin axes. Defaults to
config["dataframe"]["tof_column"].
config["dataframe"]["columns"]["tof"].
bins (list[int], optional): number of bins.
Defaults to config["energy"]["bins"].
ranges (Sequence[tuple[float, float]], optional): bin ranges.
Expand Down Expand Up @@ -612,7 +612,7 @@ def calibrate(
else:
raise NotImplementedError()

self.calibration["creation_date"] = datetime.now().timestamp()
self.calibration["creation_date"] = datetime.now()
return self.calibration

def view(
Expand Down Expand Up @@ -802,9 +802,9 @@ def append_energy_axis(
df (pd.DataFrame | dask.dataframe.DataFrame):
Dataframe to apply the energy axis calibration to.
tof_column (str, optional): Label of the source column.
Defaults to config["dataframe"]["tof_column"].
Defaults to config["dataframe"]["columns"]["tof"].
energy_column (str, optional): Label of the destination column.
Defaults to config["dataframe"]["energy_column"].
Defaults to config["dataframe"]["columns"]["energy"].
calibration (dict, optional): Calibration dictionary. If provided,
overrides calibration from class or config.
Defaults to self.calibration or config["energy"]["calibration"].
Expand Down Expand Up @@ -843,12 +843,10 @@ def append_energy_axis(
if len(kwds) > 0:
for key, value in kwds.items():
calibration[key] = value
calibration["creation_date"] = datetime.now().timestamp()
calibration["creation_date"] = datetime.now()

elif "creation_date" in calibration and not suppress_output:
datestring = datetime.fromtimestamp(calibration["creation_date"]).strftime(
"%m/%d/%Y, %H:%M:%S",
)
datestring = calibration["creation_date"].strftime("%m/%d/%Y, %H:%M:%S")
logger.info(f"Using energy calibration parameters generated on {datestring}")

# try to determine calibration type if not provided
Expand Down Expand Up @@ -915,17 +913,17 @@ def append_energy_axis(
df[energy_column] = df[energy_column] + scale_sign * bias_voltage
if not suppress_output:
logger.debug(f"Shifted energy column by constant bias value: {bias_voltage}.")
elif self._config["dataframe"]["bias_column"] in df.columns:
elif self._config["dataframe"]["columns"]["bias"] in df.columns:
df = dfops.offset_by_other_columns(
df=df,
target_column=energy_column,
offset_columns=self._config["dataframe"]["bias_column"],
offset_columns=self._config["dataframe"]["columns"]["bias"],
weights=scale_sign,
)
if not suppress_output:
logger.debug(
"Shifted energy column by bias column: "
f"{self._config['dataframe']['bias_column']}.",
f"{self._config['dataframe']['columns']['bias']}.",
)
else:
logger.warning(
Expand All @@ -948,9 +946,9 @@ def append_tof_ns_axis(
Args:
df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to convert.
tof_column (str, optional): Name of the column containing the
time-of-flight steps. Defaults to config["dataframe"]["tof_column"].
time-of-flight steps. Defaults to config["dataframe"]["columns"]["tof"].
tof_ns_column (str, optional): Name of the column to store the
time-of-flight in nanoseconds. Defaults to config["dataframe"]["tof_ns_column"].
time-of-flight in nanoseconds. Defaults to config["dataframe"]["columns"]["tof_ns"].
binwidth (float, optional): Time-of-flight binwidth in ns.
Defaults to config["energy"]["tof_binwidth"].
binning (int, optional): Time-of-flight binning factor.
Expand Down Expand Up @@ -1202,7 +1200,7 @@ def common_apply_func(apply: bool): # noqa: ARG001
self.correction["amplitude"] = correction["amplitude"]
self.correction["center"] = correction["center"]
self.correction["correction_type"] = correction["correction_type"]
self.correction["creation_date"] = datetime.now().timestamp()
self.correction["creation_date"] = datetime.now()
amplitude_slider.close()
x_center_slider.close()
y_center_slider.close()
Expand Down Expand Up @@ -1381,9 +1379,9 @@ def apply_energy_correction(
df (pd.DataFrame | dask.dataframe.DataFrame): The dataframe where
to apply the energy correction to.
tof_column (str, optional): Name of the source column to convert.
Defaults to config["dataframe"]["tof_column"].
Defaults to config["dataframe"]["columns"]["tof"].
new_tof_column (str, optional): Name of the destination column to convert.
Defaults to config["dataframe"]["corrected_tof_column"].
Defaults to config["dataframe"]["columns"]["corrected_tof"].
correction_type (str, optional): Type of correction to apply to the TOF
axis. Valid values are:
Expand Down Expand Up @@ -1440,12 +1438,10 @@ def apply_energy_correction(
for key, value in kwds.items():
correction[key] = value

correction["creation_date"] = datetime.now().timestamp()
correction["creation_date"] = datetime.now()

elif "creation_date" in correction and not suppress_output:
datestring = datetime.fromtimestamp(correction["creation_date"]).strftime(
"%m/%d/%Y, %H:%M:%S",
)
datestring = correction["creation_date"].strftime("%m/%d/%Y, %H:%M:%S")
logger.info(f"Using energy correction parameters generated on {datestring}")

missing_keys = {"correction_type", "center", "amplitude"} - set(correction.keys())
Expand Down Expand Up @@ -1494,9 +1490,9 @@ def align_dld_sectors(
Args:
df (dask.dataframe.DataFrame): Dataframe to use.
tof_column (str, optional): Name of the column containing the time-of-flight values.
Defaults to config["dataframe"]["tof_column"].
Defaults to config["dataframe"]["columns"]["tof"].
sector_id_column (str, optional): Name of the column containing the sector id values.
Defaults to config["dataframe"]["sector_id_column"].
Defaults to config["dataframe"]["columns"]["sector_id"].
sector_delays (np.ndarray, optional): Array containing the sector delays. Defaults to
config["dataframe"]["sector_delays"].
Expand Down Expand Up @@ -1592,9 +1588,10 @@ def add_offsets(
# pylint:disable=duplicate-code
# use passed parameters, overwrite config
offsets = {}
offsets["creation_date"] = datetime.now().timestamp()
offsets["creation_date"] = datetime.now()
# column-based offsets
if columns is not None:
offsets["columns"] = {}
if isinstance(columns, str):
columns = [columns]

Expand Down Expand Up @@ -1623,7 +1620,7 @@ def add_offsets(

# store in offsets dictionary
for col, weight, pmean, red in zip(columns, weights, preserve_mean, reductions):
offsets[col] = {
offsets["columns"][col] = {
"weight": weight,
"preserve_mean": pmean,
"reduction": red,
Expand All @@ -1636,9 +1633,7 @@ def add_offsets(
raise TypeError(f"Invalid type for constant: {type(constant)}")

elif "creation_date" in offsets and not suppress_output:
datestring = datetime.fromtimestamp(offsets["creation_date"]).strftime(
"%m/%d/%Y, %H:%M:%S",
)
datestring = offsets["creation_date"].strftime("%m/%d/%Y, %H:%M:%S")
logger.info(f"Using energy offset parameters generated on {datestring}")

if len(offsets) > 0:
Expand All @@ -1652,35 +1647,31 @@ def add_offsets(
for k, v in offsets.items():
if k == "creation_date":
continue
if k == "constant":
elif k == "constant":
# flip sign if binding energy scale
constant = v * scale_sign
log_str += f"\n Constant: {constant}"
else:
columns.append(k)
try:
weight = v["weight"]
except KeyError:
weight = 1
if not isinstance(weight, (int, float, np.integer, np.floating)):
raise TypeError(f"Invalid type for weight of column {k}: {type(weight)}")
# flip sign if binding energy scale
weight = weight * scale_sign
weights.append(weight)
pm = v.get("preserve_mean", False)
if str(pm).lower() in ["false", "0", "no"]:
pm = False
elif str(pm).lower() in ["true", "1", "yes"]:
pm = True
preserve_mean.append(pm)
red = v.get("reduction", None)
if str(red).lower() in ["none", "null"]:
red = None
reductions.append(red)
log_str += (
f"\n Column[{k}]: Weight={weight}, Preserve Mean: {pm}, "
f"Reductions: {red}."
)
elif k == "columns":
for column_name, column_dict in offsets["columns"].items():
columns.append(column_name)
weight = column_dict.get("weight", 1)
if not isinstance(weight, (int, float, np.integer, np.floating)):
raise TypeError(
f"Invalid type for weight of column {column_name}: {type(weight)}",
)
# flip sign if binding energy scale
weight = weight * scale_sign
weights.append(weight)
pm = column_dict.get("preserve_mean", False)
preserve_mean.append(pm)
red = column_dict.get("reduction", None)
if str(red).lower() in ["none", "null"]:
red = None
reductions.append(red)
log_str += (
f"\n Column[{column_name}]: Weight={weight}, Preserve Mean: {pm}, "
f"Reductions: {red}."
)

if not suppress_output:
logger.info(log_str)
Expand Down
Loading

0 comments on commit 68b2eaf

Please sign in to comment.