Skip to content

Commit dcb07b3

Browse files
authored
Merge pull request #97 from FledgeXu/config
Introduce `config_metadata`
2 parents 6f93bcc + 1b71a96 commit dcb07b3

File tree

4 files changed

+271
-96
lines changed

4 files changed

+271
-96
lines changed

src/zimscraperlib/constants.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# -*- coding: utf-8 -*-
33
# vim: ai ts=4 sts=4 et sw=4 nu
44

5+
import base64
56
import pathlib
67

78
ROOT_DIR = pathlib.Path(__file__).parent
@@ -17,3 +18,30 @@
1718

1819
# list of mimetypes we consider articles using it should default to FRONT_ARTICLE
1920
FRONT_ARTICLE_MIMETYPES = ["text/html"]
21+
22+
# list of mandatory meta tags of the zim file.
23+
MANDATORY_ZIM_METADATA_KEYS = [
24+
"Name",
25+
"Title",
26+
"Creator",
27+
"Publisher",
28+
"Date",
29+
"Description",
30+
"Language",
31+
"Illustration_48x48@1",
32+
]
33+
34+
DEFAULT_DEV_ZIM_METADATA = {
35+
"Name": "Test Name",
36+
"Title": "Test Title",
37+
"Creator": "Test Creator",
38+
"Publisher": "Test Publisher",
39+
"Date": "2023-01-01",
40+
"Description": "Test Description",
41+
"Language": "fra",
42+
"Illustration_48x48_at_1": base64.b64decode(
43+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAGXRFWHRTb2Z0d2FyZQBB"
44+
"ZG9iZSBJbWFnZVJlYWR5ccllPAAAAA9JREFUeNpi+P//P0CAAQAF/gL+Lc6J7gAAAABJ"
45+
"RU5ErkJggg=="
46+
),
47+
}

src/zimscraperlib/zim/creator.py

Lines changed: 102 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,15 @@
2222
import pathlib
2323
import re
2424
import weakref
25-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
25+
from typing import Any, Callable, Iterable, Optional, Tuple, Union
2626

2727
import libzim.writer
2828

29-
from ..constants import FRONT_ARTICLE_MIMETYPES
29+
from ..constants import (
30+
DEFAULT_DEV_ZIM_METADATA,
31+
FRONT_ARTICLE_MIMETYPES,
32+
MANDATORY_ZIM_METADATA_KEYS,
33+
)
3034
from ..filesystem import delete_callback, get_content_mimetype, get_file_mimetype
3135
from ..types import get_mime_for_name
3236
from .items import StaticItem
@@ -80,28 +84,16 @@ class Creator(libzim.writer.Creator):
8084
def __init__(
8185
self,
8286
filename: pathlib.Path,
83-
main_path: str = None,
84-
language: Optional[Union[str, List[str]]] = "eng",
87+
main_path: str,
8588
compression: Optional[str] = None,
8689
workaround_nocancel: Optional[bool] = True,
8790
ignore_duplicates: Optional[bool] = False,
88-
**metadata: Dict[str, Union[str, datetime.date, datetime.datetime]]
8991
):
9092
super().__init__(filename=filename)
93+
self._metadata = dict()
9194
self.can_finish = True
9295

93-
if main_path:
94-
self.main_path = main_path
95-
96-
if language:
97-
if not isinstance(language, list):
98-
language = language.split(",")
99-
self.config_indexing(True, language[0])
100-
ld = {"Language": ",".join(language)}
101-
if metadata:
102-
metadata.update(ld)
103-
else:
104-
metadata = ld
96+
self.set_mainpath(main_path)
10597

10698
if compression:
10799
self.config_compression(
@@ -110,26 +102,108 @@ def __init__(
110102
else compression
111103
)
112104

113-
if metadata:
114-
self.metadata = metadata
115-
116105
self.workaround_nocancel = workaround_nocancel
117106
self.ignore_duplicates = ignore_duplicates
118107

119108
def start(self):
109+
if not all(
110+
[
111+
key in self._metadata.keys() and self._metadata.get(key, None)
112+
for key in MANDATORY_ZIM_METADATA_KEYS
113+
]
114+
):
115+
raise ValueError("Mandatory metadata are not all set.")
116+
117+
for name, value in self._metadata.items():
118+
if value:
119+
self._validate_metadata(name, value)
120+
120121
super().__enter__()
121122

122-
if getattr(self, "main_path", None):
123-
self.set_mainpath(self.main_path)
123+
self.add_illustration(48, self._metadata["Illustration_48x48@1"])
124+
del self._metadata["Illustration_48x48@1"]
125+
for name, value in self._metadata.items():
126+
if value:
127+
self.add_metadata(name, value)
124128

125-
if getattr(self, "metadata", None):
126-
self.update_metadata(**self.metadata)
127129
return self
128130

129-
def update_metadata(self, **kwargs):
130-
if kwargs:
131-
for name, value in kwargs.items():
132-
self.add_metadata(name, value)
131+
def _validate_metadata(self, name, value):
132+
if name == "Counter":
133+
raise ValueError("You do not need to set Counter.")
134+
135+
if name == "Description" and len(value) > 80:
136+
raise ValueError("Description is too long.")
137+
138+
if name == "LongDescription" and len(value) > 4000:
139+
raise ValueError("LongDescription is too long.")
140+
141+
def config_metadata(
142+
self,
143+
*,
144+
Name: str,
145+
Language: str,
146+
Title: str,
147+
Description: str,
148+
LongDescription: Optional[str] = None,
149+
Creator: str,
150+
Publisher: str,
151+
Date: Union[datetime.datetime, datetime.date, str],
152+
Illustration_48x48_at_1: bytes,
153+
Tags: Optional[Union[Iterable[str], str]] = None,
154+
Scraper: Optional[str] = None,
155+
Flavour: Optional[str] = None,
156+
Source: Optional[str] = None,
157+
License: Optional[str] = None,
158+
Relation: Optional[str] = None,
159+
**extras: str,
160+
):
161+
"""
162+
A chaining functions which configures the metadata of the Creator class.
163+
You must set all mandatory metadata in this phase.
164+
165+
Parameters:
166+
check out: https://wiki.openzim.org/wiki/Metadata
167+
all the extra metadata must be plain text.
168+
169+
Returns:
170+
Self
171+
"""
172+
self._metadata.update(
173+
{
174+
"Name": Name,
175+
"Title": Title,
176+
"Creator": Creator,
177+
"Publisher": Publisher,
178+
"Date": Date,
179+
"Description": Description,
180+
"Language": Language,
181+
"License": License,
182+
"LongDescription": LongDescription,
183+
"Tags": Tags,
184+
"Relation": Relation,
185+
"Flavour": Flavour,
186+
"Source": Source,
187+
"Scraper": Scraper,
188+
"Illustration_48x48@1": Illustration_48x48_at_1,
189+
}
190+
)
191+
self._metadata.update(extras)
192+
language = self._metadata.get("Language", "").split(",")
193+
self.config_indexing(True, language[0])
194+
195+
return self
196+
197+
def config_dev_metadata(self, **extras: str):
198+
"""
199+
A Test function. It will set the default test metadata for a Creator instance.
200+
201+
Returns:
202+
Self
203+
"""
204+
devel_default_metadata = DEFAULT_DEV_ZIM_METADATA.copy()
205+
devel_default_metadata.update(extras)
206+
return self.config_metadata(**devel_default_metadata)
133207

134208
def add_item_for(
135209
self,
@@ -254,9 +328,6 @@ def add_redirect(
254328
self.can_finish = False # pragma: no cover
255329
raise
256330

257-
def add_default_illustration(self, content: bytes):
258-
self.add_illustration(48, content)
259-
260331
def finish(self, exc_type=None, exc_val=None, exc_tb=None):
261332
"""Triggers finalization of ZIM creation and create final ZIM file."""
262333
if not getattr(self, "can_finish", False):

src/zimscraperlib/zim/filesystem.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ def make_zim_file(
125125
source: str = None,
126126
flavour: str = None,
127127
scraper: str = None,
128+
long_description: str = None,
128129
without_fulltext_index: bool = False,
129130
redirects: Sequence[Tuple[str, str, str]] = None,
130131
redirects_file: pathlib.Path = None,
@@ -149,39 +150,37 @@ def make_zim_file(
149150
if not favicon_path.exists() or not favicon_path.is_file():
150151
raise IOError(f"Incorrect favicon: {favicon} ({favicon_path})")
151152

152-
zim_file = Creator(
153-
filename=fpath,
154-
main_path=main_page,
155-
index_language="" if without_fulltext_index else language,
153+
with open(favicon_path, "rb") as fh:
154+
favicon_data = fh.read()
155+
156+
zim_file = Creator(filename=fpath, main_path=main_page).config_metadata(
156157
**{
157158
k: v
158159
for k, v in {
159160
# (somewhat) mandatory
160-
"name": name,
161-
"title": title,
162-
"description": description,
163-
"date": date or datetime.date.today(),
164-
"language": language,
165-
"creator": creator,
166-
"publisher": publisher,
161+
"Name": name,
162+
"Title": title,
163+
"Description": description,
164+
"Date": date or datetime.date.today(),
165+
"Language": language,
166+
"Creator": creator,
167+
"Publisher": publisher,
167168
# optional
168-
"tags": ";".join(tags) if tags else None,
169-
"source": source,
170-
"flavour": flavour,
171-
"scraper": scraper,
169+
"Tags": ";".join(tags) if tags else None,
170+
"Source": source,
171+
"Flavour": flavour,
172+
"Scraper": scraper,
173+
"LongDescription": long_description,
174+
"Illustration_48x48_at_1": favicon_data,
172175
}.items()
173176
if v is not None
174-
},
177+
}
175178
)
176179

177180
zim_file.start()
178181
try:
179182
logger.debug(f"Preparing zimfile at {zim_file.filename}")
180183

181-
# add favicon as illustration
182-
with open(favicon_path, "rb") as fh:
183-
zim_file.add_default_illustration(fh.read())
184-
185184
# recursively add content from build_dir
186185
logger.debug(f"Recursively adding files from {build_dir}")
187186
add_to_zim(build_dir, zim_file, build_dir)

0 commit comments

Comments
 (0)