Skip to content

Commit

Permalink
Html representation of processor and metadata in notebooks (#395)
Browse files Browse the repository at this point in the history
* basics for repr for metadata and processor class

* basics for repr for metadata and processor class

* metadata pretty html representation

* fix linting/test errors

* idea for plots

* apply some fixes

* remove yaml dump

* put back metadata property

* add tests for metadata
  • Loading branch information
zain-sohail authored May 13, 2024
1 parent b294ee3 commit ed38cb1
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 97 deletions.
121 changes: 39 additions & 82 deletions sed/core/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This is a metadata handler class from the sed package
"""
import json
from copy import deepcopy
from typing import Any
from typing import Dict
Expand All @@ -9,7 +9,8 @@


class MetaHandler:
"""[summary]"""
"""This class provides methods to manipulate metadata dictionaries,
and give a nice representation of them."""

def __init__(self, meta: Dict = None) -> None:
self._m = deepcopy(meta) if meta is not None else {}
Expand All @@ -18,13 +19,35 @@ def __getitem__(self, val: Any) -> None:
return self._m[val]

def __repr__(self) -> str:
# TODO: #35 add pretty print, possibly to HTML
return str(self._m)
return json.dumps(self._m, default=str, indent=4)

def _format_attributes(self, attributes, indent=0):
INDENT_FACTOR = 20
html = ""
for key, value in attributes.items():
# Format key
formatted_key = key.replace("_", " ").title()
formatted_key = f"<b>{formatted_key}</b>"

html += f"<div style='padding-left: {indent * INDENT_FACTOR}px;'>"
if isinstance(value, dict):
html += f"<details><summary>{formatted_key} [{key}]</summary>"
html += self._format_attributes(value, indent + 1)
html += "</details>"
elif hasattr(value, "shape"):
html += f"{formatted_key} [{key}]: {value.shape}"
else:
html += f"{formatted_key} [{key}]: {value}"
html += "</div>"
return html

def _repr_html_(self) -> str:
html = self._format_attributes(self._m)
return html

@property
def metadata(self) -> dict:
def metadata(self) -> Dict:
"""Property returning the metadata dict.
Returns:
dict: Dictionary of metadata.
"""
Expand Down Expand Up @@ -85,83 +108,17 @@ def add(
f"Please choose between overwrite,append or raise.",
)

def add_processing(self, method: str, **kwds: Any) -> None:
"""docstring
Args:
Returns:
"""
# TODO: #36 Add processing metadata validation tests
self._m["processing"][method] = kwds

def from_nexus(self, val: Any) -> None:
"""docstring
Args:
Returns:
"""
raise NotImplementedError()

def to_nexus(self, val: Any) -> None:
"""docstring
Args:
Returns:
"""
raise NotImplementedError()

def from_json(self, val: Any) -> None:
"""docstring
Args:
Returns:
"""
raise NotImplementedError()

def to_json(self, val: Any) -> None:
"""docstring
Args:
Returns:
"""
raise NotImplementedError()

def from_dict(self, val: Any) -> None:
"""docstring
Args:
Returns:
"""
raise NotImplementedError()

def to_dict(self, val: Any) -> None:
"""docstring
Args:
Returns:
"""
raise NotImplementedError()


class DuplicateEntryError(Exception):
"""[summary]"""
"""Exception raised when attempting to add a duplicate entry to the metadata container.
Attributes:
message -- explanation of the error
"""

def __init__(self, message: str = "An entry already exists in metadata"):
self.message = message
super().__init__(self.message)

if __name__ == "__main__":
m = MetaHandler()
m.add({"start": 0, "stop": 1}, name="test")
print(m)
def __str__(self):
return f"{self.__class__.__name__}: {self.message}"
37 changes: 31 additions & 6 deletions sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,38 @@ def __init__(

def __repr__(self):
if self._dataframe is None:
df_str = "Data Frame: No Data loaded"
df_str = "Dataframe: No Data loaded"
else:
df_str = self._dataframe.__repr__()
attributes_str = f"Metadata: {self._attributes.metadata}"
pretty_str = df_str + "\n" + attributes_str
pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__()
return pretty_str

def _repr_html_(self):
html = "<div>"

if self._dataframe is None:
df_html = "Dataframe: No Data loaded"
else:
df_html = self._dataframe._repr_html_()

html += f"<details><summary>Dataframe</summary>{df_html}</details>"

# Add expandable section for attributes
html += "<details><summary>Metadata</summary>"
html += "<div style='padding-left: 10px;'>"
html += self._attributes._repr_html_()
html += "</div></details>"

html += "</div>"

return html

## Suggestion:
# @property
# def overview_panel(self):
# """Provides an overview panel with plots of different data attributes."""
# self.view_event_histogram(dfpid=2, backend="matplotlib")

@property
def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]:
"""Accessor to the underlying dataframe.
Expand Down Expand Up @@ -238,13 +263,13 @@ def timed_dataframe(self, timed_dataframe: Union[pd.DataFrame, ddf.DataFrame]):
self._timed_dataframe = timed_dataframe

@property
def attributes(self) -> dict:
def attributes(self) -> MetaHandler:
"""Accessor to the metadata dict.
Returns:
dict: The metadata dict.
MetaHandler: The metadata object
"""
return self._attributes.metadata
return self._attributes

def add_attribute(self, attributes: dict, name: str, **kwds):
"""Function to add element to the attributes dict.
Expand Down
75 changes: 75 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import json
from typing import Any
from typing import Dict

import numpy as np
import pytest

from sed.core.metadata import DuplicateEntryError
from sed.core.metadata import MetaHandler

metadata: Dict[Any, Any] = {}
metadata["entry_title"] = "Title"
# sample
metadata["sample"] = {}
metadata["sample"]["size"] = np.array([1, 2, 3])
metadata["sample"]["name"] = "Sample Name"


@pytest.fixture
def meta_handler():
# Create a MetaHandler instance
return MetaHandler(meta=metadata)


def test_add_entry_overwrite(meta_handler):
# Add a new entry to metadata with 'overwrite' policy
new_entry = {"sample": "Sample Name"}
meta_handler.add(new_entry, "sample", duplicate_policy="overwrite")
assert "sample" in meta_handler.metadata
assert meta_handler.metadata["sample"] == new_entry


def test_add_entry_raise(meta_handler):
# Attempt to add a duplicate entry with 'raise' policy
with pytest.raises(DuplicateEntryError):
meta_handler.add({}, "entry_title", duplicate_policy="raise")


def test_add_entry_append(meta_handler):
# Add a new entry to metadata with 'append' policy
new_entry = {"sample": "Sample Name"}
meta_handler.add(new_entry, "sample", duplicate_policy="append")
assert "sample" in meta_handler.metadata
assert "sample_1" in meta_handler.metadata
assert meta_handler.metadata["sample_1"] == new_entry


def test_add_entry_merge(meta_handler):
# Add a new entry to metadata with 'merge' policy
entry_to_merge = {"name": "Name", "type": "type"}
meta_handler.add(entry_to_merge, "sample", duplicate_policy="merge")
print(meta_handler.metadata)
assert "sample" in meta_handler.metadata
assert "name" in meta_handler.metadata["sample"]
assert "type" in meta_handler.metadata["sample"]


def test_repr(meta_handler):
# Test the __repr__ method
assert repr(meta_handler) == json.dumps(metadata, default=str, indent=4)


def test_repr_html(meta_handler):
# Test the _repr_html_ method
html = meta_handler._format_attributes(metadata)
assert meta_handler._repr_html_() == html

html_test = "<div style='padding-left: 0px;'><b>Entry Title</b> [entry_title]: Title</div>"
html_test += (
"<div style='padding-left: 0px;'><details><summary><b>Sample</b> [sample]</summary>"
)
html_test += "<div style='padding-left: 20px;'><b>Size</b> [size]: (3,)</div>"
html_test += "<div style='padding-left: 20px;'><b>Name</b> [name]: Sample Name"
html_test += "</div></details></div>"
assert html == html_test
27 changes: 18 additions & 9 deletions tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ def test_attributes_setters() -> None:
processor.dataframe["X"].compute(),
processor.dataframe["Y"].compute(),
)
processor_metadata = processor.attributes
processor_metadata = processor.attributes.metadata
assert isinstance(processor_metadata, dict)
assert "test" in processor_metadata.keys()
processor.add_attribute({"key2": 5}, name="test2")
assert processor.attributes["test2"]["key2"] == 5
assert processor_metadata["test2"]["key2"] == 5
assert processor.config["core"]["loader"] == "mpes"
assert len(processor.files) == 2

Expand Down Expand Up @@ -398,7 +398,7 @@ def test_pose_adjustment_save_load() -> None:
processor.apply_momentum_correction()
assert "Xm" in processor.dataframe.columns
assert "Ym" in processor.dataframe.columns
assert "momentum_correction" in processor.attributes
assert "momentum_correction" in processor.attributes.metadata
os.remove("sed_config_pose_adjustments.yaml")


Expand Down Expand Up @@ -609,7 +609,10 @@ def test_energy_calibration_workflow(energy_scale: str, calibration_method: str)
processor.add_energy_offset(constant=1)
processor.append_energy_axis(preview=False)
assert "energy" in processor.dataframe.columns
assert processor.attributes["energy_calibration"]["calibration"]["energy_scale"] == energy_scale
assert (
processor.attributes.metadata["energy_calibration"]["calibration"]["energy_scale"]
== energy_scale
)
os.remove(f"sed_config_energy_calibration_{energy_scale}-{calibration_method}.yaml")

energy1 = processor.dataframe["energy"].compute().values
Expand Down Expand Up @@ -743,11 +746,14 @@ def test_delay_calibration_workflow() -> None:
processor.calibrate_delay_axis()
assert "delay" in processor.dataframe.columns
assert (
processor.attributes["delay_calibration"]["calibration"]["creation_date"]
processor.attributes.metadata["delay_calibration"]["calibration"]["creation_date"]
== creation_date_calibration
)
processor.add_delay_offset(preview=True)
assert processor.attributes["delay_offset"]["offsets"]["creation_date"] == creation_date_offsets
assert (
processor.attributes.metadata["delay_offset"]["offsets"]["creation_date"]
== creation_date_offsets
)
np.testing.assert_allclose(expected, processor.dataframe["delay"].compute())
os.remove("sed_config_delay_calibration.yaml")

Expand Down Expand Up @@ -819,9 +825,12 @@ def test_add_time_stamped_data() -> None:
res = processor.dataframe["time_stamped_data"].compute().values
assert res[0] == 0
assert res[-1] == 1
assert processor.attributes["time_stamped_data"][0] == "time_stamped_data"
np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps)
np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data)
assert processor.attributes.metadata["time_stamped_data"][0] == "time_stamped_data"
np.testing.assert_array_equal(
processor.attributes.metadata["time_stamped_data"][1],
time_stamps,
)
np.testing.assert_array_equal(processor.attributes.metadata["time_stamped_data"][2], data)


def test_event_histogram() -> None:
Expand Down

0 comments on commit ed38cb1

Please sign in to comment.