Skip to content

Commit fa7b421

Browse files
authored
Merge pull request #98 from openzim/api-updates
API updates
2 parents dcb07b3 + b132f5c commit fa7b421

File tree

18 files changed

+402
-112
lines changed

18 files changed

+402
-112
lines changed

CHANGELOG.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,37 @@ All notable changes to this project are documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).
77

8+
## [Unreleased]
9+
10+
⚠️ Warning: this release introduce several API changes to `zim.creator.Creator` and `zim.filesystem.make_zim_file`
11+
12+
### Added
13+
14+
- `zim.creator.Creator.config_metadata` method (returning Self) exposing all mandatory Metdata, all standard ones and allowing extra text metdadata.
15+
- `zim.creator.Creator.config_dev_metadata` method setting stub metdata for all mandatory ones (allowing overrides)
16+
- `zim.metadata` module with a list of per-metadata validation functions
17+
- `zim.creator.Creator.validate_metadata` (called on `start`) to verify metadata respects the spec (and its recommendations)
18+
- `zim.filesystem.make_zim_file` accepts a new optional `long_description` param.
19+
- `i18n.is_valid_iso_639_3` to check ISO-639-3 codes
20+
- `image.probing.is_valid_image` to check Image format and size
21+
22+
### Changed
23+
24+
- `zim.creator.Creator` `main_path` argument now mandatory
25+
- `zim.creator.Creator.start` now fails on missing required or invalid metadata
26+
- `zim.creator.Creator.add_metadata` nows enforces validation checks
27+
- `zim.filesystem.make_zim_file` renamed its `favicon_path` param to `illustration_path`
28+
- `zim.creator.Creator.config_indexing` `language` argument now optionnal when `indexing=False`
29+
- `zim.creator.Creator.config_indexing` now validates `language` is ISO- 639-3 when `indexing=True`
30+
31+
### Removed
32+
33+
- `zim.creator.Creator.update_metadata`. See `.config_metadata()` instead
34+
- `zim.creator.Creator` `language` argument. See `.config_metadata()` instead
35+
- `zim.creator.Creator` keyword arguments. See `.config_metadata()` instead
36+
- `zim.creator.Creator.add_default_illustration`. See `.config_metadata()` instead
37+
- `zim.archibe.Archive.media_counter` (deprecated in `2.0.0`)
38+
839
## [2.1.0] - 2023-03-06
940

1041
## Added

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ colorthief==0.2.1
44
python-resize-image>=1.1.19,<1.2
55
Babel>=2.9,<3.0
66
file-magic>=0.4.0,<0.5
7-
libzim>=2.1.0,<3.0
7+
libzim>=3.0.0,<3.1
88
beautifulsoup4>=4.9.3,<4.10
99
lxml>=4.6.3,<4.10
1010
optimize-images>=1.3.6,<1.6

src/zimscraperlib/constants.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import base64
66
import pathlib
7+
import re
78

89
ROOT_DIR = pathlib.Path(__file__).parent
910
NAME = pathlib.Path(__file__).parent.name
@@ -39,9 +40,17 @@
3940
"Date": "2023-01-01",
4041
"Description": "Test Description",
4142
"Language": "fra",
43+
# blank 48x48 transparent PNG
4244
"Illustration_48x48_at_1": base64.b64decode(
43-
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAGXRFWHRTb2Z0d2FyZQBB"
44-
"ZG9iZSBJbWFnZVJlYWR5ccllPAAAAA9JREFUeNpi+P//P0CAAQAF/gL+Lc6J7gAAAABJ"
45-
"RU5ErkJggg=="
45+
"iVBORw0KGgoAAAANSUhEUgAAADAAAAAwAQMAAABtzGvEAAAAGXRFWHRTb2Z0d2FyZQBB"
46+
"ZG9iZSBJbWFnZVJlYWR5ccllPAAAAANQTFRFR3BMgvrS0gAAAAF0Uk5TAEDm2GYAAAAN"
47+
"SURBVBjTY2AYBdQEAAFQAAGn4toWAAAAAElFTkSuQmCC"
4648
),
4749
}
50+
51+
MAXIMUM_DESCRIPTION_METADATA_LENGTH = 80
52+
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH = 4000
53+
54+
ILLUSTRATIONS_METADATA_RE = re.compile(
55+
r"^Illustration_(?P<height>\d+)x(?P<width>\d+)@(?P<scale>\d+)$"
56+
)

src/zimscraperlib/i18n.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,8 @@ def get_language_details(query: str, failsafe: Optional[bool] = False) -> Dict:
179179
}
180180
)
181181
return iso_data
182+
183+
184+
def is_valid_iso_639_3(code: str) -> bool:
185+
"""whether code is a valid ISO-639-3 code"""
186+
return (get_language_details(code, failsafe=True) or {}).get("iso-639-3") == code

src/zimscraperlib/image/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# flake8: noqa
66
from .convertion import convert_image
77
from .optimization import optimize_image
8+
from .probing import is_valid_image
89
from .transformation import resize_image
910

10-
__all__ = ["convert_image", "optimize_image", "resize_image"]
11+
__all__ = ["convert_image", "is_valid_image", "optimize_image", "resize_image"]

src/zimscraperlib/image/probing.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,22 @@ def format_for(src: Union[pathlib.Path, io.BytesIO], from_suffix: bool = True) -
6060

6161
init_pil()
6262
return ext_fmt_map[src.suffix] # might raise KeyError on unknown extension
63+
64+
65+
def is_valid_image(
66+
image: Union[pathlib.Path, io.IOBase, bytes],
67+
imformat: str,
68+
size: Optional[Tuple[int, int]] = None,
69+
) -> bool:
70+
"""whether image is a valid imformat (PNG) image, optionnaly of requested size"""
71+
if isinstance(image, bytes):
72+
image = io.BytesIO(image)
73+
try:
74+
img = PIL.Image.open(image)
75+
if img.format != imformat:
76+
return False
77+
if size and img.size != size:
78+
return False
79+
except Exception:
80+
return False
81+
return True

src/zimscraperlib/zim/archive.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
- direct access to search results and number of results
1111
- public Entry access by Id"""
1212

13-
import warnings
1413
from typing import Dict, Iterable, List, Optional
1514

1615
import libzim.reader
@@ -107,20 +106,3 @@ def counters(self) -> Dict[str, int]:
107106
return parseMimetypeCounter(self.get_text_metadata("Counter"))
108107
except RuntimeError: # pragma: no cover (no ZIM avail to test itl)
109108
return {} # pragma: no cover
110-
111-
@property
112-
def article_counter(self) -> int:
113-
warnings.warn(
114-
"Archive.article_counter now deprecated. "
115-
"Use Archive.article_count instead",
116-
DeprecationWarning,
117-
)
118-
return self.article_count
119-
120-
@property
121-
def media_counter(self) -> int:
122-
warnings.warn(
123-
"Archive.media_counter now deprecated. " "Use Archive.media_count instead",
124-
DeprecationWarning,
125-
)
126-
return self.media_count

src/zimscraperlib/zim/creator.py

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,21 @@
3232
MANDATORY_ZIM_METADATA_KEYS,
3333
)
3434
from ..filesystem import delete_callback, get_content_mimetype, get_file_mimetype
35+
from ..i18n import is_valid_iso_639_3
3536
from ..types import get_mime_for_name
3637
from .items import StaticItem
38+
from .metadata import (
39+
validate_counter,
40+
validate_date,
41+
validate_description,
42+
validate_illustrations,
43+
validate_language,
44+
validate_longdescription,
45+
validate_required_values,
46+
validate_standard_str_types,
47+
validate_tags,
48+
validate_title,
49+
)
3750

3851
DUPLICATE_EXC_STR = re.compile(
3952
r"^Impossible to add(.+)"
@@ -91,6 +104,7 @@ def __init__(
91104
):
92105
super().__init__(filename=filename)
93106
self._metadata = dict()
107+
self.__indexing_configured = False
94108
self.can_finish = True
95109

96110
self.set_mainpath(main_path)
@@ -105,18 +119,28 @@ def __init__(
105119
self.workaround_nocancel = workaround_nocancel
106120
self.ignore_duplicates = ignore_duplicates
107121

122+
def config_indexing(self, indexing: bool, language: Optional[str] = None):
123+
"""Toggle full-text and title indexing of entries
124+
125+
Uses Language metadata's value (or "") if not set"""
126+
language = language or self._metadata.get("Language", "")
127+
if indexing and not is_valid_iso_639_3(language):
128+
raise ValueError("Not a valid ISO-639-3 language code")
129+
super().config_indexing(indexing, language)
130+
self.__indexing_configured = True
131+
return self
132+
108133
def start(self):
109-
if not all(
110-
[
111-
key in self._metadata.keys() and self._metadata.get(key, None)
112-
for key in MANDATORY_ZIM_METADATA_KEYS
113-
]
114-
):
134+
if not all([self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS]):
115135
raise ValueError("Mandatory metadata are not all set.")
116136

117137
for name, value in self._metadata.items():
118138
if value:
119-
self._validate_metadata(name, value)
139+
self.validate_metadata(name, value)
140+
141+
language = self._metadata.get("Language", "").split(",")
142+
if language[0] and not self.__indexing_configured:
143+
self.config_indexing(True, language[0])
120144

121145
super().__enter__()
122146

@@ -128,15 +152,36 @@ def start(self):
128152

129153
return self
130154

131-
def _validate_metadata(self, name, value):
132-
if name == "Counter":
133-
raise ValueError("You do not need to set Counter.")
155+
def validate_metadata(
156+
self,
157+
name: str,
158+
value: Union[bytes, str, datetime.datetime, datetime.date, Iterable[str]],
159+
):
160+
"""Ensures metadata value for name is conform with the openZIM spec on Metadata
161+
162+
Also enforces recommendations
163+
See https://wiki.openzim.org/wiki/Metadata"""
134164

135-
if name == "Description" and len(value) > 80:
136-
raise ValueError("Description is too long.")
165+
validate_required_values(name, value)
166+
validate_standard_str_types(name, value)
137167

138-
if name == "LongDescription" and len(value) > 4000:
139-
raise ValueError("LongDescription is too long.")
168+
validate_title(name, value)
169+
validate_date(name, value)
170+
validate_language(name, value)
171+
validate_counter(name, value)
172+
validate_description(name, value)
173+
validate_longdescription(name, value)
174+
validate_tags(name, value)
175+
validate_illustrations(name, value)
176+
177+
def add_metadata(
178+
self,
179+
name: str,
180+
content: Union[str, bytes, datetime.date, datetime.datetime],
181+
mimetype: str = "text/plain;charset=UTF-8",
182+
):
183+
self.validate_metadata(name, content)
184+
super().add_metadata(name, content, mimetype)
140185

141186
def config_metadata(
142187
self,
@@ -158,17 +203,7 @@ def config_metadata(
158203
Relation: Optional[str] = None,
159204
**extras: str,
160205
):
161-
"""
162-
A chaining functions which configures the metadata of the Creator class.
163-
You must set all mandatory metadata in this phase.
164-
165-
Parameters:
166-
check out: https://wiki.openzim.org/wiki/Metadata
167-
all the extra metadata must be plain text.
168-
169-
Returns:
170-
Self
171-
"""
206+
"""Sets all mandatory Metadata as well as standard and any other text ones"""
172207
self._metadata.update(
173208
{
174209
"Name": Name,
@@ -189,18 +224,10 @@ def config_metadata(
189224
}
190225
)
191226
self._metadata.update(extras)
192-
language = self._metadata.get("Language", "").split(",")
193-
self.config_indexing(True, language[0])
194-
195227
return self
196228

197229
def config_dev_metadata(self, **extras: str):
198-
"""
199-
A Test function. It will set the default test metadata for a Creator instance.
200-
201-
Returns:
202-
Self
203-
"""
230+
"""Calls config_metadata with default (yet overridable) values for dev"""
204231
devel_default_metadata = DEFAULT_DEV_ZIM_METADATA.copy()
205232
devel_default_metadata.update(extras)
206233
return self.config_metadata(**devel_default_metadata)

src/zimscraperlib/zim/filesystem.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def make_zim_file(
114114
fpath: pathlib.Path,
115115
name: str,
116116
main_page: str,
117-
favicon: str,
117+
illustration: str,
118118
title: str,
119119
description: str,
120120
date: datetime.date = None,
@@ -135,7 +135,7 @@ def make_zim_file(
135135
"""Creates a zimwriterfs-like ZIM file at {fpath} from {build_dir}
136136
137137
main_page: path of item to serve as main page
138-
favicon: relative path to favicon file in build_dir
138+
illustration: relative path to illustration file in build_dir
139139
tags: list of str tags to add to meta
140140
redirects: list of (src, dst, title) tuple to create redirects from
141141
rewrite_links controls whether to rewrite HTML/CSS content
@@ -146,12 +146,12 @@ def make_zim_file(
146146
if not build_dir.exists() or not build_dir.is_dir():
147147
raise IOError(f"Incorrect build_dir: {build_dir}")
148148

149-
favicon_path = build_dir / favicon
150-
if not favicon_path.exists() or not favicon_path.is_file():
151-
raise IOError(f"Incorrect favicon: {favicon} ({favicon_path})")
149+
illustration_path = build_dir / illustration
150+
if not illustration_path.exists() or not illustration_path.is_file():
151+
raise IOError(f"Incorrect illustration: {illustration} ({illustration_path})")
152152

153-
with open(favicon_path, "rb") as fh:
154-
favicon_data = fh.read()
153+
with open(illustration_path, "rb") as fh:
154+
illustration_data = fh.read()
155155

156156
zim_file = Creator(filename=fpath, main_path=main_page).config_metadata(
157157
**{
@@ -171,7 +171,7 @@ def make_zim_file(
171171
"Flavour": flavour,
172172
"Scraper": scraper,
173173
"LongDescription": long_description,
174-
"Illustration_48x48_at_1": favicon_data,
174+
"Illustration_48x48_at_1": illustration_data,
175175
}.items()
176176
if v is not None
177177
}

0 commit comments

Comments
 (0)