2222import pathlib
2323import re
2424import weakref
25+ from collections .abc import Iterable as IterableT
2526from typing import Any , Callable , Iterable , Optional , Tuple , Union
2627
2728import libzim .writer
2829
2930from ..constants import (
3031 DEFAULT_DEV_ZIM_METADATA ,
3132 FRONT_ARTICLE_MIMETYPES ,
33+ ILLUSTRATIONS_METADATA_RE ,
3234 MANDATORY_ZIM_METADATA_KEYS ,
35+ MAXIMUM_DESCRIPTION_METADATA_LENGTH ,
36+ MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH ,
3337)
3438from ..filesystem import delete_callback , get_content_mimetype , get_file_mimetype
39+ from ..i18n import is_valid_iso_639_3
40+ from ..image .probing import is_valid_image
3541from ..types import get_mime_for_name
3642from .items import StaticItem
3743
@@ -91,6 +97,7 @@ def __init__(
9197 ):
9298 super ().__init__ (filename = filename )
9399 self ._metadata = dict ()
100+ self .__indexing_configured = False
94101 self .can_finish = True
95102
96103 self .set_mainpath (main_path )
@@ -105,18 +112,28 @@ def __init__(
105112 self .workaround_nocancel = workaround_nocancel
106113 self .ignore_duplicates = ignore_duplicates
107114
115+ def config_indexing (self , indexing : bool , language : Optional [str ] = None ):
116+ """Toggle full-text and title indexing of entries
117+
118+ Uses Language metadata's value (or "") if not set"""
119+ language = language or self ._metadata .get ("Language" , "" )
120+ if indexing and not is_valid_iso_639_3 (language ):
121+ raise ValueError ("Not a valid ISO-639-3 language code" )
122+ super ().config_indexing (indexing , language )
123+ self .__indexing_configured = True
124+ return self
125+
108126 def start (self ):
109- if not all (
110- [
111- key in self ._metadata .keys () and self ._metadata .get (key , None )
112- for key in MANDATORY_ZIM_METADATA_KEYS
113- ]
114- ):
127+ if not all ([self ._metadata .get (key ) for key in MANDATORY_ZIM_METADATA_KEYS ]):
115128 raise ValueError ("Mandatory metadata are not all set." )
116129
117130 for name , value in self ._metadata .items ():
118131 if value :
119- self ._validate_metadata (name , value )
132+ self .validate_metadata (name , value )
133+
134+ language = self ._metadata .get ("Language" , "" ).split ("," )
135+ if language [0 ] and not self .__indexing_configured :
136+ self .config_indexing (True , language [0 ])
120137
121138 super ().__enter__ ()
122139
@@ -128,15 +145,97 @@ def start(self):
128145
129146 return self
130147
131- def _validate_metadata (self , name , value ):
148+ def validate_metadata (
149+ self ,
150+ name : str ,
151+ value : Union [bytes , str , datetime .datetime , datetime .date , Iterable [str ]],
152+ ):
153+ """Ensures metadata value for name is conform with the openZIM spec on Metadata
154+
155+ Also enforces recommendations
156+ See https://wiki.openzim.org/wiki/Metadata"""
157+
158+ # spec doesnt require any value but empty strings are not useful
159+ if name in MANDATORY_ZIM_METADATA_KEYS and not value :
160+ raise ValueError (f"Missing value for { name } " )
161+
162+ # most require/standard and al
163+ if name in (
164+ "Name" ,
165+ "Title" ,
166+ "Creator" ,
167+ "Publisher" ,
168+ "Description" ,
169+ "LongDescription" ,
170+ "License" ,
171+ "Relation" ,
172+ "Relation" ,
173+ "Flavour" ,
174+ "Source" ,
175+ "Scraper" ,
176+ ) and not isinstance (value , str ):
177+ raise ValueError (f"Invalid type for { name } " )
178+
179+ if name == "Title" and len (value ) > 30 :
180+ raise ValueError (f"{ name } is too long." )
181+
182+ if name == "Date" :
183+ if not isinstance (value , (datetime .datetime , datetime .date , str )):
184+ raise ValueError (f"Invalid type for { name } ." )
185+ elif isinstance (value , str ):
186+ match = re .match (
187+ r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})" , value
188+ )
189+ try :
190+ datetime .date (** {k : int (v ) for k , v in match .groupdict ().items ()})
191+ except Exception as exc :
192+ raise ValueError (f"Invalid { name } format: { exc } " )
193+
194+ if name == "Language" and not is_valid_iso_639_3 (value ):
195+ raise ValueError (f"{ value } is not ISO-639-3." )
196+
132197 if name == "Counter" :
133- raise ValueError ("You do not need to set Counter." )
198+ raise ValueError (f"{ name } cannot be set. libzim sets it." )
199+
200+ if name == "Description" and len (value ) > MAXIMUM_DESCRIPTION_METADATA_LENGTH :
201+ raise ValueError (f"{ name } is too long." )
134202
135- if name == "Description" and len (value ) > 80 :
136- raise ValueError ("Description is too long." )
203+ if (
204+ name == "LongDescription"
205+ and len (value ) > MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
206+ ):
207+ raise ValueError (f"{ name } is too long." )
137208
138- if name == "LongDescription" and len (value ) > 4000 :
139- raise ValueError ("LongDescription is too long." )
209+ if name == "Tags" and (
210+ not isinstance (value , IterableT )
211+ or not all ([isinstance (tag , str ) for tag in value ])
212+ ):
213+ raise ValueError (f"Invalid type(s) for { name } " )
214+
215+ if name .startswith ("Illustration_" ):
216+ match = ILLUSTRATIONS_METADATA_RE .match (name )
217+ if match and not is_valid_image (
218+ image = value ,
219+ imformat = "PNG" ,
220+ size = (
221+ int (match .groupdict ()["width" ]),
222+ int (match .groupdict ()["height" ]),
223+ ),
224+ ):
225+ raise ValueError (
226+ f"{ name } is not a "
227+ f"{ match .groupdict ()['width' ]} x{ match .groupdict ()['height' ]} "
228+ "PNG Image"
229+ )
230+
231+ def add_metadata (
232+ self ,
233+ name : str ,
234+ content : Union [str , bytes , datetime .date , datetime .datetime ],
235+ mimetype : str = "text/plain;charset=UTF-8" ,
236+ ):
237+ self .validate_metadata (name , content )
238+ super ().add_metadata (name , content , mimetype )
140239
141240 def config_metadata (
142241 self ,
@@ -158,17 +257,7 @@ def config_metadata(
158257 Relation : Optional [str ] = None ,
159258 ** extras : str ,
160259 ):
161- """
162- A chaining functions which configures the metadata of the Creator class.
163- You must set all mandatory metadata in this phase.
164-
165- Parameters:
166- check out: https://wiki.openzim.org/wiki/Metadata
167- all the extra metadata must be plain text.
168-
169- Returns:
170- Self
171- """
260+ """Sets all mandatory Metadata as well as standard and any other text ones"""
172261 self ._metadata .update (
173262 {
174263 "Name" : Name ,
@@ -189,18 +278,10 @@ def config_metadata(
189278 }
190279 )
191280 self ._metadata .update (extras )
192- language = self ._metadata .get ("Language" , "" ).split ("," )
193- self .config_indexing (True , language [0 ])
194-
195281 return self
196282
197283 def config_dev_metadata (self , ** extras : str ):
198- """
199- A Test function. It will set the default test metadata for a Creator instance.
200-
201- Returns:
202- Self
203- """
284+ """Calls config_metadata with default (yet overridable) values for dev"""
204285 devel_default_metadata = DEFAULT_DEV_ZIM_METADATA .copy ()
205286 devel_default_metadata .update (extras )
206287 return self .config_metadata (** devel_default_metadata )
0 commit comments