diff --git a/sed/core/metadata.py b/sed/core/metadata.py index 803f5c3d..155bdfce 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -1,6 +1,6 @@ """This is a metadata handler class from the sed package - """ +import json from copy import deepcopy from typing import Any from typing import Dict @@ -9,7 +9,8 @@ class MetaHandler: - """[summary]""" + """This class provides methods to manipulate metadata dictionaries, + and give a nice representation of them.""" def __init__(self, meta: Dict = None) -> None: self._m = deepcopy(meta) if meta is not None else {} @@ -18,13 +19,35 @@ def __getitem__(self, val: Any) -> None: return self._m[val] def __repr__(self) -> str: - # TODO: #35 add pretty print, possibly to HTML - return str(self._m) + return json.dumps(self._m, default=str, indent=4) + + def _format_attributes(self, attributes, indent=0): + INDENT_FACTOR = 20 + html = "" + for key, value in attributes.items(): + # Format key + formatted_key = key.replace("_", " ").title() + formatted_key = f"{formatted_key}" + + html += f"
" + if isinstance(value, dict): + html += f"
{formatted_key} [{key}]" + html += self._format_attributes(value, indent + 1) + html += "
" + elif hasattr(value, "shape"): + html += f"{formatted_key} [{key}]: {value.shape}" + else: + html += f"{formatted_key} [{key}]: {value}" + html += "
" + return html + + def _repr_html_(self) -> str: + html = self._format_attributes(self._m) + return html @property - def metadata(self) -> dict: + def metadata(self) -> Dict: """Property returning the metadata dict. - Returns: dict: Dictionary of metadata. """ @@ -85,83 +108,17 @@ def add( f"Please choose between overwrite,append or raise.", ) - def add_processing(self, method: str, **kwds: Any) -> None: - """docstring - - Args: - - Returns: - - """ - # TODO: #36 Add processing metadata validation tests - self._m["processing"][method] = kwds - - def from_nexus(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def to_nexus(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def from_json(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def to_json(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def from_dict(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def to_dict(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - class DuplicateEntryError(Exception): - """[summary]""" + """Exception raised when attempting to add a duplicate entry to the metadata container. + + Attributes: + message -- explanation of the error + """ + def __init__(self, message: str = "An entry already exists in metadata"): + self.message = message + super().__init__(self.message) -if __name__ == "__main__": - m = MetaHandler() - m.add({"start": 0, "stop": 1}, name="test") - print(m) + def __str__(self): + return f"{self.__class__.__name__}: {self.message}" diff --git a/sed/core/processor.py b/sed/core/processor.py index 089e0f39..ee085739 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -175,13 +175,38 @@ def __init__( def __repr__(self): if self._dataframe is None: - df_str = "Data Frame: No Data loaded" + df_str = "Dataframe: No Data loaded" else: df_str = self._dataframe.__repr__() - attributes_str = f"Metadata: {self._attributes.metadata}" - pretty_str = df_str + "\n" + attributes_str + pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__() return pretty_str + def _repr_html_(self): + html = "
" + + if self._dataframe is None: + df_html = "Dataframe: No Data loaded" + else: + df_html = self._dataframe._repr_html_() + + html += f"
Dataframe{df_html}
" + + # Add expandable section for attributes + html += "
Metadata" + html += "
" + html += self._attributes._repr_html_() + html += "
" + + html += "
" + + return html + + ## Suggestion: + # @property + # def overview_panel(self): + # """Provides an overview panel with plots of different data attributes.""" + # self.view_event_histogram(dfpid=2, backend="matplotlib") + @property def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]: """Accessor to the underlying dataframe. @@ -238,13 +263,13 @@ def timed_dataframe(self, timed_dataframe: Union[pd.DataFrame, ddf.DataFrame]): self._timed_dataframe = timed_dataframe @property - def attributes(self) -> dict: + def attributes(self) -> MetaHandler: """Accessor to the metadata dict. Returns: - dict: The metadata dict. + MetaHandler: The metadata object """ - return self._attributes.metadata + return self._attributes def add_attribute(self, attributes: dict, name: str, **kwds): """Function to add element to the attributes dict. diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 00000000..fbe979a4 --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,75 @@ +import json +from typing import Any +from typing import Dict + +import numpy as np +import pytest + +from sed.core.metadata import DuplicateEntryError +from sed.core.metadata import MetaHandler + +metadata: Dict[Any, Any] = {} +metadata["entry_title"] = "Title" +# sample +metadata["sample"] = {} +metadata["sample"]["size"] = np.array([1, 2, 3]) +metadata["sample"]["name"] = "Sample Name" + + +@pytest.fixture +def meta_handler(): + # Create a MetaHandler instance + return MetaHandler(meta=metadata) + + +def test_add_entry_overwrite(meta_handler): + # Add a new entry to metadata with 'overwrite' policy + new_entry = {"sample": "Sample Name"} + meta_handler.add(new_entry, "sample", duplicate_policy="overwrite") + assert "sample" in meta_handler.metadata + assert meta_handler.metadata["sample"] == new_entry + + +def test_add_entry_raise(meta_handler): + # Attempt to add a duplicate entry with 'raise' policy + with pytest.raises(DuplicateEntryError): + meta_handler.add({}, "entry_title", duplicate_policy="raise") + + +def test_add_entry_append(meta_handler): + # Add a new entry to metadata with 'append' policy + new_entry = {"sample": "Sample Name"} + meta_handler.add(new_entry, "sample", duplicate_policy="append") + assert "sample" in meta_handler.metadata + assert "sample_1" in meta_handler.metadata + assert meta_handler.metadata["sample_1"] == new_entry + + +def test_add_entry_merge(meta_handler): + # Add a new entry to metadata with 'merge' policy + entry_to_merge = {"name": "Name", "type": "type"} + meta_handler.add(entry_to_merge, "sample", duplicate_policy="merge") + print(meta_handler.metadata) + assert "sample" in meta_handler.metadata + assert "name" in meta_handler.metadata["sample"] + assert "type" in meta_handler.metadata["sample"] + + +def test_repr(meta_handler): + # Test the __repr__ method + assert repr(meta_handler) == json.dumps(metadata, default=str, indent=4) + + +def test_repr_html(meta_handler): + # Test the _repr_html_ method + html = meta_handler._format_attributes(metadata) + assert meta_handler._repr_html_() == html + + html_test = "
Entry Title [entry_title]: Title
" + html_test += ( + "
Sample [sample]" + ) + html_test += "
Size [size]: (3,)
" + html_test += "
Name [name]: Sample Name" + html_test += "
" + assert html == html_test diff --git a/tests/test_processor.py b/tests/test_processor.py index 3d39f6e5..5471410a 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -189,11 +189,11 @@ def test_attributes_setters() -> None: processor.dataframe["X"].compute(), processor.dataframe["Y"].compute(), ) - processor_metadata = processor.attributes + processor_metadata = processor.attributes.metadata assert isinstance(processor_metadata, dict) assert "test" in processor_metadata.keys() processor.add_attribute({"key2": 5}, name="test2") - assert processor.attributes["test2"]["key2"] == 5 + assert processor_metadata["test2"]["key2"] == 5 assert processor.config["core"]["loader"] == "mpes" assert len(processor.files) == 2 @@ -398,7 +398,7 @@ def test_pose_adjustment_save_load() -> None: processor.apply_momentum_correction() assert "Xm" in processor.dataframe.columns assert "Ym" in processor.dataframe.columns - assert "momentum_correction" in processor.attributes + assert "momentum_correction" in processor.attributes.metadata os.remove("sed_config_pose_adjustments.yaml") @@ -609,7 +609,10 @@ def test_energy_calibration_workflow(energy_scale: str, calibration_method: str) processor.add_energy_offset(constant=1) processor.append_energy_axis(preview=False) assert "energy" in processor.dataframe.columns - assert processor.attributes["energy_calibration"]["calibration"]["energy_scale"] == energy_scale + assert ( + processor.attributes.metadata["energy_calibration"]["calibration"]["energy_scale"] + == energy_scale + ) os.remove(f"sed_config_energy_calibration_{energy_scale}-{calibration_method}.yaml") energy1 = processor.dataframe["energy"].compute().values @@ -743,11 +746,14 @@ def test_delay_calibration_workflow() -> None: processor.calibrate_delay_axis() assert "delay" in processor.dataframe.columns assert ( - processor.attributes["delay_calibration"]["calibration"]["creation_date"] + processor.attributes.metadata["delay_calibration"]["calibration"]["creation_date"] == creation_date_calibration ) processor.add_delay_offset(preview=True) - assert processor.attributes["delay_offset"]["offsets"]["creation_date"] == creation_date_offsets + assert ( + processor.attributes.metadata["delay_offset"]["offsets"]["creation_date"] + == creation_date_offsets + ) np.testing.assert_allclose(expected, processor.dataframe["delay"].compute()) os.remove("sed_config_delay_calibration.yaml") @@ -819,9 +825,12 @@ def test_add_time_stamped_data() -> None: res = processor.dataframe["time_stamped_data"].compute().values assert res[0] == 0 assert res[-1] == 1 - assert processor.attributes["time_stamped_data"][0] == "time_stamped_data" - np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps) - np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data) + assert processor.attributes.metadata["time_stamped_data"][0] == "time_stamped_data" + np.testing.assert_array_equal( + processor.attributes.metadata["time_stamped_data"][1], + time_stamps, + ) + np.testing.assert_array_equal(processor.attributes.metadata["time_stamped_data"][2], data) def test_event_histogram() -> None: