Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Html representation of processor and metadata in notebooks #395

Merged
merged 9 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 39 additions & 82 deletions sed/core/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This is a metadata handler class from the sed package

"""
import json
from copy import deepcopy
from typing import Any
from typing import Dict
Expand All @@ -9,7 +9,8 @@


class MetaHandler:
"""[summary]"""
"""This class provides methods to manipulate metadata dictionaries,
and give a nice representation of them."""

def __init__(self, meta: Dict = None) -> None:
self._m = deepcopy(meta) if meta is not None else {}
Expand All @@ -18,13 +19,35 @@ def __getitem__(self, val: Any) -> None:
return self._m[val]

def __repr__(self) -> str:
# TODO: #35 add pretty print, possibly to HTML
return str(self._m)
return json.dumps(self._m, default=str, indent=4)

def _format_attributes(self, attributes, indent=0):
INDENT_FACTOR = 20
html = ""
for key, value in attributes.items():
# Format key
formatted_key = key.replace("_", " ").title()
formatted_key = f"<b>{formatted_key}</b>"

html += f"<div style='padding-left: {indent * INDENT_FACTOR}px;'>"
if isinstance(value, dict):
html += f"<details><summary>{formatted_key} [{key}]</summary>"
html += self._format_attributes(value, indent + 1)
html += "</details>"
elif hasattr(value, "shape"):
html += f"{formatted_key} [{key}]: {value.shape}"
else:
html += f"{formatted_key} [{key}]: {value}"
html += "</div>"
return html

def _repr_html_(self) -> str:
html = self._format_attributes(self._m)
return html

@property
def metadata(self) -> dict:
def metadata(self) -> Dict:
"""Property returning the metadata dict.

Returns:
dict: Dictionary of metadata.
"""
Expand Down Expand Up @@ -85,83 +108,17 @@ def add(
f"Please choose between overwrite,append or raise.",
)

def add_processing(self, method: str, **kwds: Any) -> None:
"""docstring

Args:

Returns:

"""
# TODO: #36 Add processing metadata validation tests
self._m["processing"][method] = kwds

def from_nexus(self, val: Any) -> None:
"""docstring

Args:

Returns:

"""
raise NotImplementedError()

def to_nexus(self, val: Any) -> None:
"""docstring

Args:

Returns:

"""
raise NotImplementedError()

def from_json(self, val: Any) -> None:
"""docstring

Args:

Returns:

"""
raise NotImplementedError()

def to_json(self, val: Any) -> None:
"""docstring

Args:

Returns:

"""
raise NotImplementedError()

def from_dict(self, val: Any) -> None:
"""docstring

Args:

Returns:

"""
raise NotImplementedError()

def to_dict(self, val: Any) -> None:
"""docstring

Args:

Returns:

"""
raise NotImplementedError()


class DuplicateEntryError(Exception):
"""[summary]"""
"""Exception raised when attempting to add a duplicate entry to the metadata container.

Attributes:
message -- explanation of the error
"""

def __init__(self, message: str = "An entry already exists in metadata"):
self.message = message
super().__init__(self.message)

if __name__ == "__main__":
m = MetaHandler()
m.add({"start": 0, "stop": 1}, name="test")
print(m)
def __str__(self):
return f"{self.__class__.__name__}: {self.message}"
37 changes: 31 additions & 6 deletions sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,38 @@ def __init__(

def __repr__(self):
if self._dataframe is None:
df_str = "Data Frame: No Data loaded"
df_str = "Dataframe: No Data loaded"
else:
df_str = self._dataframe.__repr__()
attributes_str = f"Metadata: {self._attributes.metadata}"
pretty_str = df_str + "\n" + attributes_str
pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__()
return pretty_str

def _repr_html_(self):
html = "<div>"

if self._dataframe is None:
df_html = "Dataframe: No Data loaded"
else:
df_html = self._dataframe._repr_html_()

html += f"<details><summary>Dataframe</summary>{df_html}</details>"

# Add expandable section for attributes
html += "<details><summary>Metadata</summary>"
html += "<div style='padding-left: 10px;'>"
html += self._attributes._repr_html_()
html += "</div></details>"

html += "</div>"

return html

## Suggestion:
# @property
# def overview_panel(self):
# """Provides an overview panel with plots of different data attributes."""
# self.view_event_histogram(dfpid=2, backend="matplotlib")

@property
def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]:
"""Accessor to the underlying dataframe.
Expand Down Expand Up @@ -238,13 +263,13 @@ def timed_dataframe(self, timed_dataframe: Union[pd.DataFrame, ddf.DataFrame]):
self._timed_dataframe = timed_dataframe

@property
def attributes(self) -> dict:
def attributes(self) -> MetaHandler:
"""Accessor to the metadata dict.

Returns:
dict: The metadata dict.
MetaHandler: The metadata object
"""
return self._attributes.metadata
return self._attributes

def add_attribute(self, attributes: dict, name: str, **kwds):
"""Function to add element to the attributes dict.
Expand Down
75 changes: 75 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import json
from typing import Any
from typing import Dict

import numpy as np
import pytest

from sed.core.metadata import DuplicateEntryError
from sed.core.metadata import MetaHandler

metadata: Dict[Any, Any] = {}
metadata["entry_title"] = "Title"
# sample
metadata["sample"] = {}
metadata["sample"]["size"] = np.array([1, 2, 3])
metadata["sample"]["name"] = "Sample Name"


@pytest.fixture
def meta_handler():
# Create a MetaHandler instance
return MetaHandler(meta=metadata)


def test_add_entry_overwrite(meta_handler):
# Add a new entry to metadata with 'overwrite' policy
new_entry = {"sample": "Sample Name"}
meta_handler.add(new_entry, "sample", duplicate_policy="overwrite")
assert "sample" in meta_handler.metadata
assert meta_handler.metadata["sample"] == new_entry


def test_add_entry_raise(meta_handler):
# Attempt to add a duplicate entry with 'raise' policy
with pytest.raises(DuplicateEntryError):
meta_handler.add({}, "entry_title", duplicate_policy="raise")


def test_add_entry_append(meta_handler):
# Add a new entry to metadata with 'append' policy
new_entry = {"sample": "Sample Name"}
meta_handler.add(new_entry, "sample", duplicate_policy="append")
assert "sample" in meta_handler.metadata
assert "sample_1" in meta_handler.metadata
assert meta_handler.metadata["sample_1"] == new_entry


def test_add_entry_merge(meta_handler):
# Add a new entry to metadata with 'merge' policy
entry_to_merge = {"name": "Name", "type": "type"}
meta_handler.add(entry_to_merge, "sample", duplicate_policy="merge")
print(meta_handler.metadata)
assert "sample" in meta_handler.metadata
assert "name" in meta_handler.metadata["sample"]
assert "type" in meta_handler.metadata["sample"]


def test_repr(meta_handler):
# Test the __repr__ method
assert repr(meta_handler) == json.dumps(metadata, default=str, indent=4)


def test_repr_html(meta_handler):
# Test the _repr_html_ method
html = meta_handler._format_attributes(metadata)
assert meta_handler._repr_html_() == html

html_test = "<div style='padding-left: 0px;'><b>Entry Title</b> [entry_title]: Title</div>"
html_test += (
"<div style='padding-left: 0px;'><details><summary><b>Sample</b> [sample]</summary>"
)
html_test += "<div style='padding-left: 20px;'><b>Size</b> [size]: (3,)</div>"
html_test += "<div style='padding-left: 20px;'><b>Name</b> [name]: Sample Name"
html_test += "</div></details></div>"
assert html == html_test
27 changes: 18 additions & 9 deletions tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ def test_attributes_setters() -> None:
processor.dataframe["X"].compute(),
processor.dataframe["Y"].compute(),
)
processor_metadata = processor.attributes
processor_metadata = processor.attributes.metadata
assert isinstance(processor_metadata, dict)
assert "test" in processor_metadata.keys()
processor.add_attribute({"key2": 5}, name="test2")
assert processor.attributes["test2"]["key2"] == 5
assert processor_metadata["test2"]["key2"] == 5
assert processor.config["core"]["loader"] == "mpes"
assert len(processor.files) == 2

Expand Down Expand Up @@ -398,7 +398,7 @@ def test_pose_adjustment_save_load() -> None:
processor.apply_momentum_correction()
assert "Xm" in processor.dataframe.columns
assert "Ym" in processor.dataframe.columns
assert "momentum_correction" in processor.attributes
assert "momentum_correction" in processor.attributes.metadata
os.remove("sed_config_pose_adjustments.yaml")


Expand Down Expand Up @@ -609,7 +609,10 @@ def test_energy_calibration_workflow(energy_scale: str, calibration_method: str)
processor.add_energy_offset(constant=1)
processor.append_energy_axis(preview=False)
assert "energy" in processor.dataframe.columns
assert processor.attributes["energy_calibration"]["calibration"]["energy_scale"] == energy_scale
assert (
processor.attributes.metadata["energy_calibration"]["calibration"]["energy_scale"]
== energy_scale
)
os.remove(f"sed_config_energy_calibration_{energy_scale}-{calibration_method}.yaml")

energy1 = processor.dataframe["energy"].compute().values
Expand Down Expand Up @@ -743,11 +746,14 @@ def test_delay_calibration_workflow() -> None:
processor.calibrate_delay_axis()
assert "delay" in processor.dataframe.columns
assert (
processor.attributes["delay_calibration"]["calibration"]["creation_date"]
processor.attributes.metadata["delay_calibration"]["calibration"]["creation_date"]
== creation_date_calibration
)
processor.add_delay_offset(preview=True)
assert processor.attributes["delay_offset"]["offsets"]["creation_date"] == creation_date_offsets
assert (
processor.attributes.metadata["delay_offset"]["offsets"]["creation_date"]
== creation_date_offsets
)
np.testing.assert_allclose(expected, processor.dataframe["delay"].compute())
os.remove("sed_config_delay_calibration.yaml")

Expand Down Expand Up @@ -819,9 +825,12 @@ def test_add_time_stamped_data() -> None:
res = processor.dataframe["time_stamped_data"].compute().values
assert res[0] == 0
assert res[-1] == 1
assert processor.attributes["time_stamped_data"][0] == "time_stamped_data"
np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps)
np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data)
assert processor.attributes.metadata["time_stamped_data"][0] == "time_stamped_data"
np.testing.assert_array_equal(
processor.attributes.metadata["time_stamped_data"][1],
time_stamps,
)
np.testing.assert_array_equal(processor.attributes.metadata["time_stamped_data"][2], data)


def test_event_histogram() -> None:
Expand Down