Skip to content

Commit d2895ee

Browse files
committed
libzim StringProvider support both bytes and str for content
1 parent 7e19b92 commit d2895ee

File tree

4 files changed

+48
-4
lines changed

4 files changed

+48
-4
lines changed

src/zimscraperlib/zim/items.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import re
1010
import tempfile
1111
import urllib.parse
12-
from typing import Any, Optional
12+
from typing import Any, Optional, Union
1313

1414
import libzim.writer # pyright: ignore
1515

@@ -72,7 +72,7 @@ class StaticItem(Item):
7272

7373
def __init__(
7474
self,
75-
content: Optional[str] = None,
75+
content: Optional[Union[str, bytes]] = None,
7676
fileobj: Optional[io.IOBase] = None,
7777
filepath: Optional[pathlib.Path] = None,
7878
path: Optional[str] = None,
@@ -95,6 +95,8 @@ def get_contentprovider(self) -> libzim.writer.ContentProvider:
9595
# content was set manually
9696
content = getattr(self, "content", None)
9797
if content is not None:
98+
if not isinstance(content, (str, bytes)):
99+
raise AttributeError(f"Unexpected type for content: {type(content)}")
98100
return StringProvider(content=content, ref=self)
99101

100102
# using a file-like object

src/zimscraperlib/zim/providers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131

3232

3333
class StringProvider(libzim.writer.StringProvider):
34-
def __init__(self, content: str, ref: Optional[object] = None):
34+
def __init__(self, content: Union[str, bytes], ref: Optional[object] = None):
3535
super().__init__(content)
3636
self.ref = ref
3737

tests/zim/conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,26 @@ def html_str():
2424
"""
2525

2626

27+
@pytest.fixture(scope="function")
28+
def html_str_cn():
29+
"""sample HTML content with chinese characters"""
30+
return """<html>
31+
<body>
32+
<ul>
33+
<li><a href="download/toto.pdf">PDF doc in 汉字</a></li>
34+
<li><a href="download/toto.txt">text file</a></li>
35+
<li><a href="dest.html">HTML link</a></li>
36+
<li><a href="no-extension">no ext link</a></li>
37+
<li><a href="http://www.example.com/index/sample.html">external link</a></li>
38+
<li><a href="mailto:[email protected]">e-mail link</a></li>
39+
<li><a media="">no href link</a></li>
40+
<object data="download/toto.jpg" width="300" height="200"></object>
41+
<script src="assets/js/bootstrap/bootsrap.css?v=20190101"></script>
42+
</body>
43+
</html>
44+
"""
45+
46+
2747
@pytest.fixture(scope="function")
2848
def html_file(tmp_path, html_str):
2949
fpath = tmp_path / "test.html"

tests/zim/test_zim_creator.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def get_contentprovider(self):
4545
return FileLikeProvider(self.fileobj)
4646

4747

48-
def test_zim_creator(tmp_path, png_image, html_file, html_str):
48+
def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn: str):
4949
fpath = tmp_path / "test.zim"
5050
main_path = "welcome"
5151
tags = ";".join(["toto", "tata"])
@@ -56,6 +56,13 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str):
5656
) as creator:
5757
# verbatim HTML from string
5858
creator.add_item_for("welcome", "wel", content=html_str, is_front=True)
59+
# verbatim HTML from bytes
60+
creator.add_item_for(
61+
"welcome1", "wel1", content=html_str.encode(), is_front=True
62+
)
63+
creator.add_item_for(
64+
"welcome2", "wel2", content=html_str_cn.encode("gb2312"), is_front=True
65+
)
5966
# verbatim HTML from file
6067
creator.add_item_for("welcome3", "wel3", fpath=html_file)
6168
creator.add_item_for("welcome4", "wel4", fpath=html_file)
@@ -98,6 +105,8 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str):
98105

99106
# ensure non-rewritten articles have not been rewritten
100107
assert bytes(reader.get_item("welcome").content).decode(UTF8) == html_str
108+
assert bytes(reader.get_item("welcome1").content).decode(UTF8) == html_str
109+
assert bytes(reader.get_item("welcome2").content).decode("gb2312") == html_str_cn
101110
assert bytes(reader.get_item("welcome3").content).decode(UTF8) == html_str
102111

103112
# ensure illustration is present and corrext
@@ -180,6 +189,19 @@ def test_add_item_for_delete_fail(tmp_path, png_image):
180189
assert reader.get_item("index")
181190

182191

192+
def test_add_item_for_unsupported_content_type(tmp_path):
193+
fpath = tmp_path / "test.zim"
194+
# test with incorrect content type
195+
with Creator(fpath, "welcome").config_dev_metadata() as creator:
196+
with pytest.raises(RuntimeError):
197+
creator.add_item_for(
198+
path="welcome",
199+
title="hello",
200+
mimetype="text/plain",
201+
content=123, # pyright: ignore[reportArgumentType]
202+
)
203+
204+
183205
def test_compression(tmp_path):
184206
fpath = tmp_path / "test.zim"
185207
with Creator(

0 commit comments

Comments
 (0)