Skip to content

add function to concatenate HTML files, such as reports #13223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/devel/13223.newfeature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add functionality to :func:`mne.report.concatenate_reports` concatenate HTML files, such as mne.Reports, by `Roy Eric Wieske`_.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
dependencies:
- python >=3.10
- antio >=0.5.0
- beautifulsoup4
- darkdetect
- decorator
- defusedxml
Expand Down
88 changes: 88 additions & 0 deletions mne/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from shutil import copyfile

import numpy as np
from bs4 import BeautifulSoup, Comment, Tag

from .. import __version__ as MNE_VERSION
from .._fiff.meas_info import Info, read_info
Expand Down Expand Up @@ -675,6 +676,93 @@ def open_report(fname, **params):
return report


def concatenate_reports(html_files, output_file):
"""Concatenate multiple HTML files into one.

This function reads multiple HTML files, extracts their <head> and <body> sections,
and combines them into a single HTML file. It also handles duplicate IDs in the body
by removing them.

Parameters
----------
html_files : list of str or Path
List of paths to the HTML files to be concatenated.
output_file : str or Path
Path to the output HTML file.

Returns
-------
final_html : BeautifulSoup
A BeautifulSoup object representing the combined HTML content.
"""
combined_head = BeautifulSoup("<head></head>", "lxml").head
combined_body = BeautifulSoup("<body></body>", "lxml").body
used_ids = set()

for file in html_files:
file = Path(file)
with open(file, encoding="utf-8") as f:
soup = BeautifulSoup(f, "lxml")

toc = soup.find(id="toc") or soup.find(class_="toc")
if toc:
toc.decompose()

# handle head
if soup.head:
seen_styles = set()
seen_links = set()
seen_scripts = set()

for tag in soup.head.find_all(["script", "link", "style"], recursive=True):
tag_str = str(tag)
if tag.name == "style":
if tag_str in seen_styles:
continue
seen_styles.add(tag_str)
elif tag.name == "link":
if tag_str in seen_links:
continue
seen_links.add(tag_str)
elif tag.name == "script":
if tag_str in seen_scripts:
continue
seen_scripts.add(tag_str)

combined_head.append(tag)

# handle body
if soup.body:
section = soup.new_tag("section")
section.append(soup.new_tag("hr"))
section.append(Comment(f"START {file.name}"))

for tag in soup.body.contents:
if isinstance(tag, Tag):
for t in tag.find_all(True):
id_ = t.get("id")
if id_:
if id_ in used_ids:
del t["id"]
else:
used_ids.add(id_)

section.append(tag)

combined_body.append(section)

# create final HTML
final_html = BeautifulSoup("<html></html>", "lxml")
final_html.html.append(combined_head)
final_html.html.append(combined_body)

output_file = Path(output_file)
with output_file.open("w", encoding="utf-8") as f:
f.write(final_html.prettify())

return final_html


###############################################################################
# HTML scan renderer

Expand Down
45 changes: 45 additions & 0 deletions mne/report/tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np
import pytest
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt

from mne import (
Expand All @@ -33,6 +34,7 @@
from mne.report.report import (
_ALLOWED_IMAGE_FORMATS,
CONTENT_ORDER,
concatenate_reports,
)
from mne.utils import Bunch, _record_warnings
from mne.utils._testing import assert_object_equal
Expand Down Expand Up @@ -634,6 +636,49 @@ def test_open_report(tmp_path):
assert h5io.read_hdf5(hdf5, title="companion") == "test"


def test_concatenate_reports(tmp_path, sample_meg_dir):
"""Test the concatenate_reports function."""
raw_path = sample_meg_dir / "sample_audvis_raw.fif"
raw = read_raw_fif(raw_path, preload=True)
raw.set_annotations(None)
raw.crop(0, 20)

with tmp_path as tmp_dir:
tmp_path = Path(tmp_dir)

# Report 1 with custom content
report1 = Report(title="Report eeg_preprocessing #1")
report1.add_html(
"<div class='custom-note'>This is report one</div>", title="Note 1"
)
report1.add_raw(raw, title="Raw data", psd=False)
report1.save(tmp_path / "report1.html", overwrite=True, open_browser=False)

# Report 2 with different custom content
report2 = Report(title="Report eeg_preprocessing #2")
report2.add_html(
"<div class='custom-note'>This is report two</div>", title="Note 2"
)
report2.add_raw(raw, title="Raw data", psd=False)
report2.save(tmp_path / "report2.html", overwrite=True, open_browser=False)

file1 = tmp_path / "report1.html"
file2 = tmp_path / "report2.html"
output_file = tmp_path / "combined.html"

_ = concatenate_reports([file1, file2], output_file)

assert output_file.exists()

with open(output_file, encoding="utf-8") as f:
out_html = BeautifulSoup(f, "lxml")

assert out_html.head is not None
assert out_html.body is not None
assert out_html.find(text=lambda t: "This is report one" in t) is not None
assert out_html.find(text=lambda t: "This is report two" in t) is not None


def test_remove():
"""Test removing figures from a report."""
r = Report()
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ full = ["mne[full-no-qt]", "PyQt6 != 6.6.0", "PyQt6-Qt6 != 6.6.0, != 6.7.0"]
# and mne[full-pyside6], which will install PySide6 instead of PyQt6.
full-no-qt = [
"antio >= 0.5.0",
"beautifulsoup4",
"darkdetect",
"defusedxml",
"dipy",
Expand Down
Loading