Skip to content

Commit ed38cb1

Browse files
authored
Html representation of processor and metadata in notebooks (#395)
* basics for repr for metadata and processor class * basics for repr for metadata and processor class * metadata pretty html representation * fix linting/test errors * idea for plots * apply some fixes * remove yaml dump * put back metadata property * add tests for metadata
1 parent b294ee3 commit ed38cb1

File tree

4 files changed

+163
-97
lines changed

4 files changed

+163
-97
lines changed

sed/core/metadata.py

Lines changed: 39 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""This is a metadata handler class from the sed package
2-
32
"""
3+
import json
44
from copy import deepcopy
55
from typing import Any
66
from typing import Dict
@@ -9,7 +9,8 @@
99

1010

1111
class MetaHandler:
12-
"""[summary]"""
12+
"""This class provides methods to manipulate metadata dictionaries,
13+
and give a nice representation of them."""
1314

1415
def __init__(self, meta: Dict = None) -> None:
1516
self._m = deepcopy(meta) if meta is not None else {}
@@ -18,13 +19,35 @@ def __getitem__(self, val: Any) -> None:
1819
return self._m[val]
1920

2021
def __repr__(self) -> str:
21-
# TODO: #35 add pretty print, possibly to HTML
22-
return str(self._m)
22+
return json.dumps(self._m, default=str, indent=4)
23+
24+
def _format_attributes(self, attributes, indent=0):
25+
INDENT_FACTOR = 20
26+
html = ""
27+
for key, value in attributes.items():
28+
# Format key
29+
formatted_key = key.replace("_", " ").title()
30+
formatted_key = f"<b>{formatted_key}</b>"
31+
32+
html += f"<div style='padding-left: {indent * INDENT_FACTOR}px;'>"
33+
if isinstance(value, dict):
34+
html += f"<details><summary>{formatted_key} [{key}]</summary>"
35+
html += self._format_attributes(value, indent + 1)
36+
html += "</details>"
37+
elif hasattr(value, "shape"):
38+
html += f"{formatted_key} [{key}]: {value.shape}"
39+
else:
40+
html += f"{formatted_key} [{key}]: {value}"
41+
html += "</div>"
42+
return html
43+
44+
def _repr_html_(self) -> str:
45+
html = self._format_attributes(self._m)
46+
return html
2347

2448
@property
25-
def metadata(self) -> dict:
49+
def metadata(self) -> Dict:
2650
"""Property returning the metadata dict.
27-
2851
Returns:
2952
dict: Dictionary of metadata.
3053
"""
@@ -85,83 +108,17 @@ def add(
85108
f"Please choose between overwrite,append or raise.",
86109
)
87110

88-
def add_processing(self, method: str, **kwds: Any) -> None:
89-
"""docstring
90-
91-
Args:
92-
93-
Returns:
94-
95-
"""
96-
# TODO: #36 Add processing metadata validation tests
97-
self._m["processing"][method] = kwds
98-
99-
def from_nexus(self, val: Any) -> None:
100-
"""docstring
101-
102-
Args:
103-
104-
Returns:
105-
106-
"""
107-
raise NotImplementedError()
108-
109-
def to_nexus(self, val: Any) -> None:
110-
"""docstring
111-
112-
Args:
113-
114-
Returns:
115-
116-
"""
117-
raise NotImplementedError()
118-
119-
def from_json(self, val: Any) -> None:
120-
"""docstring
121-
122-
Args:
123-
124-
Returns:
125-
126-
"""
127-
raise NotImplementedError()
128-
129-
def to_json(self, val: Any) -> None:
130-
"""docstring
131-
132-
Args:
133-
134-
Returns:
135-
136-
"""
137-
raise NotImplementedError()
138-
139-
def from_dict(self, val: Any) -> None:
140-
"""docstring
141-
142-
Args:
143-
144-
Returns:
145-
146-
"""
147-
raise NotImplementedError()
148-
149-
def to_dict(self, val: Any) -> None:
150-
"""docstring
151-
152-
Args:
153-
154-
Returns:
155-
156-
"""
157-
raise NotImplementedError()
158-
159111

160112
class DuplicateEntryError(Exception):
161-
"""[summary]"""
113+
"""Exception raised when attempting to add a duplicate entry to the metadata container.
114+
115+
Attributes:
116+
message -- explanation of the error
117+
"""
162118

119+
def __init__(self, message: str = "An entry already exists in metadata"):
120+
self.message = message
121+
super().__init__(self.message)
163122

164-
if __name__ == "__main__":
165-
m = MetaHandler()
166-
m.add({"start": 0, "stop": 1}, name="test")
167-
print(m)
123+
def __str__(self):
124+
return f"{self.__class__.__name__}: {self.message}"

sed/core/processor.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,38 @@ def __init__(
175175

176176
def __repr__(self):
177177
if self._dataframe is None:
178-
df_str = "Data Frame: No Data loaded"
178+
df_str = "Dataframe: No Data loaded"
179179
else:
180180
df_str = self._dataframe.__repr__()
181-
attributes_str = f"Metadata: {self._attributes.metadata}"
182-
pretty_str = df_str + "\n" + attributes_str
181+
pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__()
183182
return pretty_str
184183

184+
def _repr_html_(self):
185+
html = "<div>"
186+
187+
if self._dataframe is None:
188+
df_html = "Dataframe: No Data loaded"
189+
else:
190+
df_html = self._dataframe._repr_html_()
191+
192+
html += f"<details><summary>Dataframe</summary>{df_html}</details>"
193+
194+
# Add expandable section for attributes
195+
html += "<details><summary>Metadata</summary>"
196+
html += "<div style='padding-left: 10px;'>"
197+
html += self._attributes._repr_html_()
198+
html += "</div></details>"
199+
200+
html += "</div>"
201+
202+
return html
203+
204+
## Suggestion:
205+
# @property
206+
# def overview_panel(self):
207+
# """Provides an overview panel with plots of different data attributes."""
208+
# self.view_event_histogram(dfpid=2, backend="matplotlib")
209+
185210
@property
186211
def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]:
187212
"""Accessor to the underlying dataframe.
@@ -238,13 +263,13 @@ def timed_dataframe(self, timed_dataframe: Union[pd.DataFrame, ddf.DataFrame]):
238263
self._timed_dataframe = timed_dataframe
239264

240265
@property
241-
def attributes(self) -> dict:
266+
def attributes(self) -> MetaHandler:
242267
"""Accessor to the metadata dict.
243268
244269
Returns:
245-
dict: The metadata dict.
270+
MetaHandler: The metadata object
246271
"""
247-
return self._attributes.metadata
272+
return self._attributes
248273

249274
def add_attribute(self, attributes: dict, name: str, **kwds):
250275
"""Function to add element to the attributes dict.

tests/test_metadata.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import json
2+
from typing import Any
3+
from typing import Dict
4+
5+
import numpy as np
6+
import pytest
7+
8+
from sed.core.metadata import DuplicateEntryError
9+
from sed.core.metadata import MetaHandler
10+
11+
metadata: Dict[Any, Any] = {}
12+
metadata["entry_title"] = "Title"
13+
# sample
14+
metadata["sample"] = {}
15+
metadata["sample"]["size"] = np.array([1, 2, 3])
16+
metadata["sample"]["name"] = "Sample Name"
17+
18+
19+
@pytest.fixture
20+
def meta_handler():
21+
# Create a MetaHandler instance
22+
return MetaHandler(meta=metadata)
23+
24+
25+
def test_add_entry_overwrite(meta_handler):
26+
# Add a new entry to metadata with 'overwrite' policy
27+
new_entry = {"sample": "Sample Name"}
28+
meta_handler.add(new_entry, "sample", duplicate_policy="overwrite")
29+
assert "sample" in meta_handler.metadata
30+
assert meta_handler.metadata["sample"] == new_entry
31+
32+
33+
def test_add_entry_raise(meta_handler):
34+
# Attempt to add a duplicate entry with 'raise' policy
35+
with pytest.raises(DuplicateEntryError):
36+
meta_handler.add({}, "entry_title", duplicate_policy="raise")
37+
38+
39+
def test_add_entry_append(meta_handler):
40+
# Add a new entry to metadata with 'append' policy
41+
new_entry = {"sample": "Sample Name"}
42+
meta_handler.add(new_entry, "sample", duplicate_policy="append")
43+
assert "sample" in meta_handler.metadata
44+
assert "sample_1" in meta_handler.metadata
45+
assert meta_handler.metadata["sample_1"] == new_entry
46+
47+
48+
def test_add_entry_merge(meta_handler):
49+
# Add a new entry to metadata with 'merge' policy
50+
entry_to_merge = {"name": "Name", "type": "type"}
51+
meta_handler.add(entry_to_merge, "sample", duplicate_policy="merge")
52+
print(meta_handler.metadata)
53+
assert "sample" in meta_handler.metadata
54+
assert "name" in meta_handler.metadata["sample"]
55+
assert "type" in meta_handler.metadata["sample"]
56+
57+
58+
def test_repr(meta_handler):
59+
# Test the __repr__ method
60+
assert repr(meta_handler) == json.dumps(metadata, default=str, indent=4)
61+
62+
63+
def test_repr_html(meta_handler):
64+
# Test the _repr_html_ method
65+
html = meta_handler._format_attributes(metadata)
66+
assert meta_handler._repr_html_() == html
67+
68+
html_test = "<div style='padding-left: 0px;'><b>Entry Title</b> [entry_title]: Title</div>"
69+
html_test += (
70+
"<div style='padding-left: 0px;'><details><summary><b>Sample</b> [sample]</summary>"
71+
)
72+
html_test += "<div style='padding-left: 20px;'><b>Size</b> [size]: (3,)</div>"
73+
html_test += "<div style='padding-left: 20px;'><b>Name</b> [name]: Sample Name"
74+
html_test += "</div></details></div>"
75+
assert html == html_test

tests/test_processor.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,11 @@ def test_attributes_setters() -> None:
189189
processor.dataframe["X"].compute(),
190190
processor.dataframe["Y"].compute(),
191191
)
192-
processor_metadata = processor.attributes
192+
processor_metadata = processor.attributes.metadata
193193
assert isinstance(processor_metadata, dict)
194194
assert "test" in processor_metadata.keys()
195195
processor.add_attribute({"key2": 5}, name="test2")
196-
assert processor.attributes["test2"]["key2"] == 5
196+
assert processor_metadata["test2"]["key2"] == 5
197197
assert processor.config["core"]["loader"] == "mpes"
198198
assert len(processor.files) == 2
199199

@@ -398,7 +398,7 @@ def test_pose_adjustment_save_load() -> None:
398398
processor.apply_momentum_correction()
399399
assert "Xm" in processor.dataframe.columns
400400
assert "Ym" in processor.dataframe.columns
401-
assert "momentum_correction" in processor.attributes
401+
assert "momentum_correction" in processor.attributes.metadata
402402
os.remove("sed_config_pose_adjustments.yaml")
403403

404404

@@ -609,7 +609,10 @@ def test_energy_calibration_workflow(energy_scale: str, calibration_method: str)
609609
processor.add_energy_offset(constant=1)
610610
processor.append_energy_axis(preview=False)
611611
assert "energy" in processor.dataframe.columns
612-
assert processor.attributes["energy_calibration"]["calibration"]["energy_scale"] == energy_scale
612+
assert (
613+
processor.attributes.metadata["energy_calibration"]["calibration"]["energy_scale"]
614+
== energy_scale
615+
)
613616
os.remove(f"sed_config_energy_calibration_{energy_scale}-{calibration_method}.yaml")
614617

615618
energy1 = processor.dataframe["energy"].compute().values
@@ -743,11 +746,14 @@ def test_delay_calibration_workflow() -> None:
743746
processor.calibrate_delay_axis()
744747
assert "delay" in processor.dataframe.columns
745748
assert (
746-
processor.attributes["delay_calibration"]["calibration"]["creation_date"]
749+
processor.attributes.metadata["delay_calibration"]["calibration"]["creation_date"]
747750
== creation_date_calibration
748751
)
749752
processor.add_delay_offset(preview=True)
750-
assert processor.attributes["delay_offset"]["offsets"]["creation_date"] == creation_date_offsets
753+
assert (
754+
processor.attributes.metadata["delay_offset"]["offsets"]["creation_date"]
755+
== creation_date_offsets
756+
)
751757
np.testing.assert_allclose(expected, processor.dataframe["delay"].compute())
752758
os.remove("sed_config_delay_calibration.yaml")
753759

@@ -819,9 +825,12 @@ def test_add_time_stamped_data() -> None:
819825
res = processor.dataframe["time_stamped_data"].compute().values
820826
assert res[0] == 0
821827
assert res[-1] == 1
822-
assert processor.attributes["time_stamped_data"][0] == "time_stamped_data"
823-
np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps)
824-
np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data)
828+
assert processor.attributes.metadata["time_stamped_data"][0] == "time_stamped_data"
829+
np.testing.assert_array_equal(
830+
processor.attributes.metadata["time_stamped_data"][1],
831+
time_stamps,
832+
)
833+
np.testing.assert_array_equal(processor.attributes.metadata["time_stamped_data"][2], data)
825834

826835

827836
def test_event_histogram() -> None:

0 commit comments

Comments
 (0)