From 3892adcb583dd3500871e0390d9339b50b009ec9 Mon Sep 17 00:00:00 2001 From: Gabriel Sousa Date: Fri, 22 Nov 2024 11:09:35 -0300 Subject: [PATCH] ResourcesBot: Refactor modules Major ResourcesBot refactoring: have all modules in their own subfolder, standardize also the constructors so we have a standard interface for them (extend abstract class LanguagePostProcessor). Implement dynamic loading. Directly extract information for the command line options from the module classes themselves. Code is now significantly more beautiful :) Co-authored-by: holybiber Co-authored-by: Josua Kowalzik --- pywikitools/resourcesbot/bot.py | 436 ++++++++++++------ .../resourcesbot/consistency_checks.py | 177 ------- pywikitools/resourcesbot/modules/__init__.py | 0 .../modules/consistency_checks.py | 286 ++++++++++++ .../resourcesbot/{ => modules}/export_html.py | 149 ++++-- .../resourcesbot/{ => modules}/export_pdf.py | 84 +++- .../{ => modules}/export_repository.py | 74 ++- .../resourcesbot/modules/post_processing.py | 68 +++ .../resourcesbot/modules/write_lists.py | 261 +++++++++++ .../resourcesbot/modules/write_report.py | 410 ++++++++++++++++ .../{ => modules}/write_sidebar_messages.py | 75 ++- .../{ => modules}/write_summary.py | 21 +- pywikitools/resourcesbot/post_processing.py | 34 -- pywikitools/resourcesbot/write_lists.py | 167 ------- pywikitools/resourcesbot/write_report.py | 269 ----------- pywikitools/test/test_consistency_checks.py | 7 +- pywikitools/test/test_resourcesbot.py | 166 ++++--- pywikitools/test/test_write_lists.py | 160 +++++-- pywikitools/test/test_write_report.py | 106 +++-- .../test/test_write_sidebar_messages.py | 61 ++- pywikitools/test/test_write_summary.py | 15 +- resourcesbot.py | 122 +++-- 22 files changed, 2056 insertions(+), 1092 deletions(-) mode change 100644 => 100755 pywikitools/resourcesbot/bot.py delete mode 100644 pywikitools/resourcesbot/consistency_checks.py create mode 100644 pywikitools/resourcesbot/modules/__init__.py create mode 100644 pywikitools/resourcesbot/modules/consistency_checks.py rename pywikitools/resourcesbot/{ => modules}/export_html.py (61%) rename pywikitools/resourcesbot/{ => modules}/export_pdf.py (55%) rename pywikitools/resourcesbot/{ => modules}/export_repository.py (50%) create mode 100644 pywikitools/resourcesbot/modules/post_processing.py create mode 100644 pywikitools/resourcesbot/modules/write_lists.py create mode 100644 pywikitools/resourcesbot/modules/write_report.py rename pywikitools/resourcesbot/{ => modules}/write_sidebar_messages.py (52%) rename pywikitools/resourcesbot/{ => modules}/write_summary.py (94%) delete mode 100644 pywikitools/resourcesbot/post_processing.py delete mode 100644 pywikitools/resourcesbot/write_lists.py delete mode 100644 pywikitools/resourcesbot/write_report.py diff --git a/pywikitools/resourcesbot/bot.py b/pywikitools/resourcesbot/bot.py old mode 100644 new mode 100755 index 8f4d22f..b67f20c --- a/pywikitools/resourcesbot/bot.py +++ b/pywikitools/resourcesbot/bot.py @@ -1,47 +1,91 @@ +import importlib +import inspect +import json +import logging import os import re -import logging -import json from configparser import ConfigParser -from typing import Final, List, Optional, Dict, Tuple +from typing import Callable, Dict, Final, List, Optional, Tuple + import pywikibot -from pywikitools.family import Family +from pywikitools.family import Family from pywikitools.fortraininglib import ForTrainingLib from pywikitools.pdftools.metadata import check_metadata from pywikitools.resourcesbot.changes import ChangeLog -from pywikitools.resourcesbot.consistency_checks import ConsistencyCheck -from pywikitools.resourcesbot.export_html import ExportHTML -from pywikitools.resourcesbot.export_pdf import ExportPDF -from pywikitools.resourcesbot.export_repository import ExportRepository -from pywikitools.resourcesbot.write_lists import WriteList -from pywikitools.resourcesbot.data_structures import FileInfo, WorksheetInfo, LanguageInfo, \ - DataStructureEncoder, json_decode -from pywikitools.resourcesbot.write_report import WriteReport -from pywikitools.resourcesbot.write_sidebar_messages import WriteSidebarMessages -from pywikitools.resourcesbot.write_summary import WriteSummary +from pywikitools.resourcesbot.data_structures import ( + DataStructureEncoder, + FileInfo, + LanguageInfo, + WorksheetInfo, + json_decode, +) +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor +from pywikitools.resourcesbot.modules.write_summary import WriteSummary + +AVAILABLE_MODULES: Final[List[str]] = [ + "consistency_checks", + "export_html", + "export_pdf", + "export_repository", + "write_lists", + "write_report", + "write_sidebar_messages", +] + + +def load_module(module_name: str) -> Callable: + """Load the post-processing module from modules/ and return it + + Raises RuntimeError if module can't be found""" + module_name = f"pywikitools.resourcesbot.modules.{module_name}" + try: + module = importlib.import_module(module_name) + except ModuleNotFoundError: + raise RuntimeError(f"Resourcesbot module {module_name} not found") + + # Find the class that inherits from LanguagePostProcessor + for _, obj in inspect.getmembers(module, inspect.isclass): + if issubclass(obj, LanguagePostProcessor) and obj is not LanguagePostProcessor: + return obj + + raise RuntimeError(f"Couldn't load module {module_name}. Giving up") class ResourcesBot: """Contains all the logic of our bot""" - def __init__(self, config: ConfigParser, limit_to_lang: Optional[str] = None, rewrite: Optional[str] = None, - read_from_cache: bool = False): + def __init__( + self, + config: ConfigParser, + read_from_cache: bool = False, + limit_to_lang: Optional[str] = None, + modules: list[str] = AVAILABLE_MODULES, + rewrite: Optional[str] = None, + ): """ Args: - limit_to_lang: limit processing to one language (string with a language code) - rewrite: force rewriting of selected component (even if there are no changes) - Possible values e.g. "json", "list", "report" - or "all" to rewrite everything - read_from_cache: Read from json cache from the mediawiki system (don't query individual worksheets) + read_from_cache: + Read from JSON cache from the mediawiki system + (don't query individual worksheets). + limit_to_lang: + limit processing to one language (string with a language code) + modules: + specify which post-processing modules should be executed """ + self.modules = modules # read-only list of download file types self._file_types: Final[List[str]] = ["pdf", "odt", "odg", "printPdf"] self._config = config - self.logger = logging.getLogger('pywikitools.resourcesbot') - - if not self._config.has_option('resourcesbot', 'site') or \ - not self._config.has_option('resourcesbot', 'username'): - raise RuntimeError("Missing connection settings for resourcesbot in config.ini") + self.logger = logging.getLogger("pywikitools.resourcesbot") + + # Initial check for mandatory configuration parameters in config.ini + if not self._config.has_option( + "resourcesbot", "site" + ) or not self._config.has_option("resourcesbot", "username"): + raise RuntimeError( + "Missing connection settings for resourcesbot in config.ini" + ) if not self._config.has_option("Paths", "temp"): self.logger.warning("Missing path for temporary files in config.ini") self._config.set("Paths", "temp", os.path.abspath(os.getcwd()) + "/temp/") @@ -49,55 +93,77 @@ def __init__(self, config: ConfigParser, limit_to_lang: Optional[str] = None, re os.makedirs(self._config.get("Paths", "temp")) family = Family() - code = self._config.get('resourcesbot', 'site') - self.site: pywikibot.site.APISite = pywikibot.Site(code=code, fam=family, - user=self._config.get('resourcesbot', 'username')) - # Set throttle to 0 to speed up write operations (otherwise pywikibot would wait up to 10s after each write) + code = self._config.get("resourcesbot", "site") + self.site: pywikibot.site.APISite = pywikibot.Site( + code=code, fam=family, user=self._config.get("resourcesbot", "username") + ) + + # Set throttle to 0 to speed up write operations + # (otherwise pywikibot would wait up to 10s after each writing) self.site.throttle.setDelays(delay=0, writedelay=0, absolute=True) - self.fortraininglib: ForTrainingLib = ForTrainingLib(family.base_url(code, ''), - family.scriptpath(code)) + self.fortraininglib: ForTrainingLib = ForTrainingLib( + family.base_url(code, ""), family.scriptpath(code) + ) - self._limit_to_lang: Optional[str] = limit_to_lang self._read_from_cache: bool = read_from_cache - self._rewrite: str = rewrite if rewrite is not None else "" # "" instead of None makes life a bit easier + self._limit_to_lang: Optional[str] = limit_to_lang + self._rewrite: Optional[str] = rewrite if self._limit_to_lang is not None: - self.logger.info(f"Parameter lang is set, limiting processing to language {limit_to_lang}") + self.logger.info( + f"Parameter lang is set, limiting processing " + f"to language {limit_to_lang}" + ) if self._read_from_cache: self.logger.info("Parameter --read-from-cache is set, reading from JSON...") if self._rewrite != "": self.logger.info(f"Parameter rewrite is set to {rewrite}") - # Stores details on all languages: language code -> information about all worksheets in that language + # Stores details on all languages: + # language code -> information about all worksheets + # in that language self._result: Dict[str, LanguageInfo] = {} - # Changes since the last run (will be filled after gathering of all information is done) + # Changes since the last run (will be filled after + # gathering of all information is done) self._changelog: Dict[str, ChangeLog] = {} def run(self): if self._read_from_cache: try: - language_list: List[str] = [] # List of languages to be read from cache + # List of languages to be read from cache + language_list: List[str] = [] if self._limit_to_lang is None: page = pywikibot.Page(self.site, "4training:languages.json") if not page.exists(): - raise RuntimeError("Couldn't load list of languages from 4training:languages.json") + raise RuntimeError( + "Couldn't load list of languages " + "from 4training:languages.json" + ) language_list = json.loads(page.text) assert isinstance(language_list, list) else: language_list.append(self._limit_to_lang) - language_list.append("en") # We need the English infos for LanguagePostProcessors - - for lang in language_list: # Now we read the details for each language - self.logger.info(f"Reading details for language {lang} from cache...") + # We need the English infos for LanguagePostProcessors + language_list.append("en") + + # Now we read the details for each language + for lang in language_list: + self.logger.info( + f"Reading details for " f"language {lang} from cache..." + ) page = pywikibot.Page(self.site, f"4training:{lang}.json") if not page.exists(): - raise RuntimeError(f"Couldn't load from cache for language {lang}") + raise RuntimeError( + f"Couldn't load from cache for language {lang}" + ) language_info = json.loads(page.text, object_hook=json_decode) assert isinstance(language_info, LanguageInfo) assert language_info.language_code == lang self._result[lang] = language_info except AssertionError: - raise RuntimeError("Unexpected error while parsing JSON data from cache.") + raise RuntimeError( + "Unexpected error while parsing JSON data from cache." + ) else: self._result["en"] = LanguageInfo("en", "English") @@ -105,8 +171,10 @@ def run(self): # Gather all data (this takes quite some time!) self._query_translations(worksheet) - # That shouldn't be necessary but for some reasons the script sometimes failed with WARNING from pywikibot: - # "No user is logged in on site 4training:en" -> better check and try to log in if necessary + # That shouldn't be necessary, but for some reason the script sometimes + # failed with a WARNING from pywikibot: + # "No user is logged in on site 4training:en"-> better check and try + # to log in if necessary if not self.site.logged_in(): self.logger.info("We're not logged in. Trying to log in...") self.site.login() @@ -115,135 +183,200 @@ def run(self): raise RuntimeError("Login with pywikibot failed.") # Find out what has been changed since our last run - for lang, language_info in self._result.items(): - self._changelog[lang] = self._sync_and_compare(language_info) if not self._read_from_cache else ChangeLog() + if not self._read_from_cache: + for lang, language_info in self._result.items(): + self._changelog[lang] = self._sync_and_compare(language_info) + else: + for lang, language_info in self._result.items(): + self._changelog[lang] = ChangeLog() + if not self._read_from_cache and self._limit_to_lang is None: self._save_languages_list() - self._save_number_of_languages() # TODO move this to a GlobalPostProcessor + # TODO: move _save_number_of_languages + # to a GlobalPostProcessor + self._save_number_of_languages() # Run all LanguagePostProcessors - write_list = WriteList(self.fortraininglib, self.site, - self._config.get("resourcesbot", "username", fallback=""), - self._config.get("resourcesbot", "password", fallback=""), - force_rewrite=(self._rewrite == "all") or (self._rewrite == "list")) - write_report = WriteReport(self.fortraininglib, self.site, - force_rewrite=(self._rewrite == "all") or (self._rewrite == "report")) - write_sidebar = WriteSidebarMessages(self.fortraininglib, self.site, - force_rewrite=(self._rewrite == "all") or (self._rewrite == "sidebar")) - consistency_check = ConsistencyCheck(self.fortraininglib) - export_html = ExportHTML(self.fortraininglib, self._config.get("Paths", "htmlexport", fallback=""), - force_rewrite=(self._rewrite == "all") or (self._rewrite == "html")) - export_pdf = ExportPDF(self.fortraininglib, self._config.get("Paths", "pdfexport", fallback=""), - force_rewrite=(self._rewrite == "all") or (self._rewrite == "pdf")) - export_repository = ExportRepository(self._config.get("Paths", "htmlexport", fallback="")) assert "en" in self._result assert "en" in self._changelog - self.logger.info(f"Starting post-processing for languages {list(self._result.keys())}") - for lang in self._result: - consistency_check.run(self._result[lang], self._result["en"], ChangeLog(), ChangeLog()) - export_html.run(self._result[lang], self._result["en"], self._changelog[lang], ChangeLog()) - export_pdf.run(self._result[lang], self._result["en"], self._changelog[lang], ChangeLog()) - export_repository.run(self._result[lang], self._result["en"], self._changelog[lang], ChangeLog()) - write_list.run(self._result[lang], self._result["en"], self._changelog[lang], ChangeLog()) - write_report.run(self._result[lang], self._result["en"], self._changelog[lang], self._changelog["en"]) - write_sidebar.run(self._result[lang], self._result["en"], self._changelog[lang], ChangeLog()) + + self.logger.info( + f"Starting post-processing for languages {list(self._result.keys())}" + ) + + self.logger.info(f"Modules specified for execution: {self.modules}") + + for selected_module in self.modules: + module = load_module(selected_module)( + self.fortraininglib, self._config, self.site + ) + for lang in self._result: + module.run( + self._result[lang], + self._result["en"], + ChangeLog(), + ChangeLog(), + force_rewrite=(self._rewrite == "all") + or (self._rewrite == module.abbreviation()), + ) # Now run all GlobalPostProcessors if not self._limit_to_lang: - write_summary = WriteSummary(self.site, - force_rewrite=(self._rewrite == "all") or (self._rewrite == "summary")) - write_summary.run(self._result, self._changelog) + write_summary = WriteSummary(self.site) + write_summary.run( + self._result, + self._changelog, + force_rewrite=(self._rewrite == "all") or (self._rewrite == "summary"), + ) def get_english_version(self, page_source: str) -> Tuple[str, int]: """ - Extract version of an English worksheet - @return Tuple of version string and the number of the translation unit where it is stored + Extract the version of an English worksheet + @return Tuple of version string and the number of the translation unit where + it is stored """ - handler = re.search(r"\{\{Version\|*?\s*([^<]+)", page_source) + handler = re.search( + r"\{\{Version\|*?\s*([^<]+)", + page_source, + ) if handler: - return (handler.group(2), int(handler.group(1))) + return handler.group(2), int(handler.group(1)) self.logger.warning("Couldn't retrieve version from English worksheet!") - return ("", 0) + return "", 0 - def _query_translated_file(self, worksheet: WorksheetInfo, english_file_info: FileInfo) -> None: + def _query_translated_file( + self, worksheet: WorksheetInfo, english_file_info: FileInfo + ) -> None: """ - Query the name of the translated file and see if it is valid. If yes, go ahead and see if such a file exists + Query the name of the translated file and see if it is valid. If yes, go ahead + and see if such a file exists """ if english_file_info.translation_unit is None: - self.logger.warning(f"Internal error: translation unit is None in {english_file_info}, ignoring.") + self.logger.warning( + f"Internal error: translation unit is None in {english_file_info}, ignoring." + ) return - file_name = self.fortraininglib.get_translated_unit(worksheet.page, worksheet.language_code, - english_file_info.translation_unit) + file_name = self.fortraininglib.get_translated_unit( + worksheet.page, worksheet.language_code, english_file_info.translation_unit + ) warning: str = "" if file_name is None: warning = "does not exist" - elif (file_name == '-') or (file_name == '.'): + elif (file_name == "-") or (file_name == "."): warning = f"is placeholder: {file_name}" elif file_name == english_file_info.get_file_name(): warning = "is identical with English original" if warning != "": # TODO fill that translation unit with "-" - if not worksheet.progress.is_unfinished(): # No need to write warnings if the translation is unfinished - self.logger.warning(f"Warning: translation {worksheet.page}/{english_file_info.translation_unit}/" - f"{worksheet.language_code} (for {english_file_info.file_type} file) {warning}") + if ( + not worksheet.progress.is_unfinished() + ): # No need to write warnings if the translation is unfinished + self.logger.warning( + f"Warning: translation {worksheet.page}/{english_file_info.translation_unit}/" + f"{worksheet.language_code} (for {english_file_info.file_type} file) {warning}" + ) return - assert file_name is not None # Make mypy happy in the next line + assert file_name is not None # Make mypy happy in the next line self._add_file_type(worksheet, english_file_info.file_type, file_name) - def _add_file_type(self, worksheet: WorksheetInfo, file_type: str, file_name: str, unit: Optional[int] = None): - """Try to add details on this translated file to worksheet - warn if it doesn't exist.""" + def _add_file_type( + self, + worksheet: WorksheetInfo, + file_type: str, + file_name: str, + unit: Optional[int] = None, + ): + """Try to add details on this translated file to worksheet - warn if it + doesn't exist. + """ try: file_page = pywikibot.FilePage(self.site, file_name) if file_page.exists(): metadata = None if file_type == "pdf": - # If it's a PDF, we try to analyze the metadata and save it also in our data structure - temp_file = os.path.join(self._config.get("Paths", "temp"), file_name) + # If it's a PDF, we try to analyze the metadata and save it also in + # our data structure + temp_file = os.path.join( + self._config.get("Paths", "temp"), file_name + ) if file_page.download(temp_file): - metadata = check_metadata(self.fortraininglib, temp_file, worksheet) + metadata = check_metadata( + self.fortraininglib, temp_file, worksheet + ) if not metadata.correct: - self.logger.warning(f"{file_name} metadata is incorrect: {metadata.warnings}") + self.logger.warning( + f"{file_name} metadata is incorrect: {metadata.warnings}" + ) if not metadata.pdf1a: self.logger.info(f"{file_name} is not PDF/1A") if metadata.only_docinfo: - self.logger.info(f"{file_name} uses only outdated DocInfo in PDF metadata") + self.logger.info( + f"{file_name} uses only outdated DocInfo in PDF metadata" + ) os.remove(temp_file) else: - self.logger.warning(f"Downloading {file_name} failed. Couldn't analyze PDF metadata") - worksheet.add_file_info(file_type=file_type, from_pywikibot=file_page.latest_file_info, - unit=unit, metadata=metadata) + self.logger.warning( + f"Downloading {file_name} failed. Couldn't analyze PDF metadata" + ) + worksheet.add_file_info( + file_type=file_type, + from_pywikibot=file_page.latest_file_info, + unit=unit, + metadata=metadata, + ) else: - self.logger.warning(f"Page {worksheet.page}/{worksheet.language_code}: Couldn't find {file_name}.") + self.logger.warning( + f"Page {worksheet.page}/{worksheet.language_code}: Couldn't find {file_name}." + ) except (ValueError, pywikibot.exceptions.Error) as err: self.logger.warning(f"Exception thrown for {file_type} file: {err}") - def _add_english_file_infos(self, page_source: str, worksheet: WorksheetInfo) -> None: + def _add_english_file_infos( + self, page_source: str, worksheet: WorksheetInfo + ) -> None: """ Finds out the names of the English downloadable files (originals) and adds them to worksheet """ for file_type in self._file_types: - handler = re.search(r"\{\{" + file_type[0].upper() + file_type[1:] + - r"Download\|*?\s*([^<]+)", page_source) + handler = re.search( + r"\{\{" + + file_type[0].upper() + + file_type[1:] + + r"Download\|*?\s*([^<]+)", + page_source, + ) if handler: - self._add_file_type(worksheet, file_type, handler.group(2), int(handler.group(1))) + self._add_file_type( + worksheet, file_type, handler.group(2), int(handler.group(1)) + ) def _query_translations(self, page: str): """ - Go through one worksheet, check all existing translations and gather information into self._result + Go through one worksheet, check all existing translations and gather + information into self._result @param: page: Name of the worksheet """ - # This is querying more data than necessary when self._limit_to_lang is set. But to save time we'd need to find - # a different API call that is only requesting progress for one particular language... for now it's okay - available_translations = self.fortraininglib.list_page_translations(page, include_unfinished=True) + # This is querying more data than necessary when self._limit_to_lang is set. + # But to save time we'd need to find a different API call that is only + # requesting progress for one particular language... for now it's okay + available_translations = self.fortraininglib.list_page_translations( + page, include_unfinished=True + ) english_title = self.fortraininglib.get_translated_title(page, "en") page_source = self.fortraininglib.get_page_source(page) if english_title is None or page_source is None: self.logger.error(f"Couldn't get English page {page}, skipping.") return version, version_unit = self.get_english_version(page_source) - english_page_info: WorksheetInfo = WorksheetInfo(page, "en", english_title, available_translations["en"], - version, version_unit) + english_page_info: WorksheetInfo = WorksheetInfo( + page, + "en", + english_title, + available_translations["en"], + version, + version_unit, + ) self._add_english_file_infos(page_source, english_page_info) self._result["en"].add_worksheet_info(page, english_page_info) @@ -251,45 +384,62 @@ def _query_translations(self, page: str): for lang, progress in available_translations.items(): if (self._limit_to_lang is not None) and (self._limit_to_lang != lang): continue - if lang == "en": # We saved information on the English originals already, don't do that again + # We saved information on the English originals already, don't do that again + if lang == "en": continue translated_title = self.fortraininglib.get_translated_title(page, lang) if translated_title is None: # apparently this translation doesn't exist if not progress.is_unfinished(): - self.logger.warning(f"Language {lang}: Title of {page} not translated, skipping.") + self.logger.warning( + f"Language {lang}: Title of {page} not translated, skipping." + ) continue - translated_version = self.fortraininglib.get_translated_unit(page, lang, version_unit) + translated_version = self.fortraininglib.get_translated_unit( + page, lang, version_unit + ) if translated_version is None: if not progress.is_unfinished(): - self.logger.warning(f"Language {lang}: Version of {page} not translated, skipping.") + self.logger.warning( + f"Language {lang}: Version of {page} not translated, skipping." + ) continue if progress.is_unfinished(): - self.logger.info(f"Ignoring translation {page}/{lang} - ({progress} translation units translated)") + self.logger.info( + f"Ignoring translation {page}/{lang} - ({progress} translation units translated)" + ) else: finished_translations.append(lang) - page_info = WorksheetInfo(page, lang, translated_title, progress, translated_version) + page_info = WorksheetInfo( + page, lang, translated_title, progress, translated_version + ) if not page_info.has_same_version(english_page_info): - self.logger.warning(f"Language {lang}: {translated_title} has version {translated_version}" - f" - {english_title} has version {version}") + self.logger.warning( + f"Language {lang}: {translated_title} has version {translated_version}" + f" - {english_title} has version {version}" + ) for file_info in english_page_info.get_file_infos().values(): self._query_translated_file(page_info, file_info) if lang not in self._result: - language_name = self.fortraininglib.get_language_name(lang, 'en') or "" + language_name = self.fortraininglib.get_language_name(lang, "en") or "" self._result[lang] = LanguageInfo(lang, language_name) self._result[lang].add_worksheet_info(page, page_info) - self.logger.info(f"Worksheet {page} is translated into: {finished_translations}, " - f"ignored {set(available_translations.keys()) - set(finished_translations)}") + self.logger.info( + f"Worksheet {page} is translated into: {finished_translations}, " + f"ignored {set(available_translations.keys()) - set(finished_translations)}" + ) def _sync_and_compare(self, language_info: LanguageInfo) -> ChangeLog: """ - Synchronize our generated data on this language with our "database" and return the changes. + Synchronize our generated data on this language with our "database" and + return the changes. - The "database" is the JSON representation of LanguageInfo and is stored in a mediawiki page. + The "database" is the JSON representation of LanguageInfo and is stored in a + mediawiki page. @param lang language code @return comparison to what was previously stored in our database @@ -299,11 +449,14 @@ def _sync_and_compare(self, language_info: LanguageInfo) -> ChangeLog: old_language_info: LanguageInfo = LanguageInfo(lang, language_info.english_name) rewrite_json: bool = (self._rewrite == "all") or (self._rewrite == "json") - # Reading data structure from our mediawiki, stored in e.g. https://www.4training.net/4training:de.json + # Reading data structure from our mediawiki, + # stored in e.g. https://www.4training.net/4training:de.json page = pywikibot.Page(self.site, f"4training:{lang}.json") if not page.exists(): # There doesn't seem to be any information on this language stored yet! - self.logger.warning(f"{page.full_url()} doesn't seem to exist yet. Creating...") + self.logger.warning( + f"{page.full_url()} doesn't seem to exist yet. Creating..." + ) page.text = encoded_json page.save("Created JSON data structure") rewrite_json = False @@ -337,9 +490,11 @@ def _sync_and_compare(self, language_info: LanguageInfo) -> ChangeLog: def _save_languages_list(self): """ Save a list of language codes of all our languages to the mediawiki server - We want this list so that the bot can be run with --read-from-cache for all languages + We want this list so that the bot can be run with --read-from-cache + for all languages - The list is stored to https://www.4training.net/4training:languages.json in alphabetical order + The list is stored to https://www.4training.net/4training:languages.json + in alphabetical order """ language_list = list(self._result) language_list.sort() @@ -352,7 +507,8 @@ def _save_languages_list(self): else: previous_json = page.text - # TODO compare language_list and json.loads(previous_json) to find out if a new language was added + # TODO compare language_list and json.loads(previous_json) to find out if a new + # language was added if previous_json != encoded_json: page.text = encoded_json page.save("Updated list of languages") @@ -360,9 +516,11 @@ def _save_languages_list(self): def _save_number_of_languages(self): """ - Count number of languages we have and save them to https://www.4training.net/MediaWiki:Numberoflanguages + Count number of languages we have and save them to + https://www.4training.net/MediaWiki:Numberoflanguages Language variants (any language code containing a "-") are not counted extra. - TODO: Discuss how we want to count in some edge cases, e.g. count pt-br always extra as we have a + TODO: Discuss how we want to count in some edge cases, e.g. count pt-br always + extra as we have a separate page for Brazilian Portuguese? @param language_list: List of language codes """ @@ -372,7 +530,9 @@ def _save_number_of_languages(self): if "-" not in lang: number_of_languages += 1 else: - self.logger.debug(f"Not counting {lang} into the number of languages we have") + self.logger.debug( + f"Not counting {lang} into the number of languages we have" + ) self.logger.info(f"Number of languages: {number_of_languages}") previous_number_of_languages: int = 0 @@ -380,12 +540,18 @@ def _save_number_of_languages(self): if page.exists(): previous_number_of_languages = int(page.text) else: - self.logger.warning("MediaWiki:Numberoflanguages doesn't seem to exist yet. Creating...") + self.logger.warning( + "MediaWiki:Numberoflanguages doesn't seem to exist yet. Creating..." + ) if previous_number_of_languages != number_of_languages: try: page.text = number_of_languages page.save("Updated number of languages") - self.logger.info(f"Updated MediaWiki:Numberoflanguages to {number_of_languages}") + self.logger.info( + f"Updated MediaWiki:Numberoflanguages to {number_of_languages}" + ) except pywikibot.exceptions.PageSaveRelatedError as err: - self.logger.warning(f"Error while trying to update MediaWiki:Numberoflanguages: {err}") + self.logger.warning( + f"Error while trying to update MediaWiki:Numberoflanguages: {err}" + ) diff --git a/pywikitools/resourcesbot/consistency_checks.py b/pywikitools/resourcesbot/consistency_checks.py deleted file mode 100644 index 4a7b2f8..0000000 --- a/pywikitools/resourcesbot/consistency_checks.py +++ /dev/null @@ -1,177 +0,0 @@ -""" -Contains consistency checks specifically for 4training.net -""" - -import logging -import re -from typing import Final, Optional, Tuple, Union -from pywikitools.fortraininglib import ForTrainingLib -from pywikitools.lang.translated_page import TranslationUnit -from pywikitools.resourcesbot.data_structures import LanguageInfo, WorksheetInfo -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor - - -class ConsistencyCheck(LanguagePostProcessor): - """ - Post-processing plugin: Check whether some translation units with the same English definition - also have the same translation in the specified language - - This is completely 4training.net-specific. - Next step: Write the results to some meaningful place on 4training.net - so that translators can access them and correct inconsistencies - """ - TITLE: Final[str] = "Page display title" - - def __init__(self, fortraininglib: ForTrainingLib): - self.fortraininglib = fortraininglib - self.logger = logging.getLogger("pywikitools.resourcesbot.consistency_checks") - - def extract_link(self, text: str) -> Tuple[str, str]: - """ - Search in text for a mediawiki link of the form [[Destination|Title]]. - This function will only look at the first link it finds in the text, any other will be ignored. - @return a tuple (destination, title). In case no link was found both strings will be empty. - """ - match = re.search(r"\[\[([^|]+)\|([^\]]+)\]\]", text) - if not match: - return "", "" - return match.group(1), match.group(2) - - def load_translation_unit(self, language_info: LanguageInfo, page: str, - identifier: Union[int, str]) -> Optional[TranslationUnit]: - """ - Try to load a translation unit - - If we request the title of a worksheet, let's first try to see if it's already in language_info. - Then we don't need to make an API query. - Otherwise we try to load the translation unit from the mediawiki system - """ - if isinstance(identifier, int): - content = self.fortraininglib.get_translated_unit(page, language_info.language_code, identifier) - if content is None: - self.logger.info(f"Couldn't load {page}/{identifier}/{language_info.language_code}") - return None - # Leaving definition parameter empty because we don't have it and don't need it - return TranslationUnit(f"{page}/{identifier}", language_info.language_code, "", content) - - elif identifier == self.TITLE: - worksheet_info: Optional[WorksheetInfo] = language_info.get_worksheet(page) - if worksheet_info is not None: - return TranslationUnit(f"{page}/Page display title", - language_info.language_code, page, worksheet_info.title) - content = self.fortraininglib.get_translated_title(page, language_info.language_code) - if content is None: - self.logger.info(f"Couldn't load {page}/{identifier}/{language_info.language_code}") - return None - return TranslationUnit(f"{page}/Page display title", - language_info.language_code, page, content) - - else: - raise LookupError(f"Invalid unit name {page}/{identifier}/{language_info.language_code}") - - def should_be_equal(self, base: Optional[TranslationUnit], other: Optional[TranslationUnit]) -> bool: - """returns True if checks pass: base and other are the same (or not existing)""" - if base is None or other is None: - return True - if other.get_translation() == base.get_translation(): - self.logger.debug(f"Consistency check passed: {base.get_translation()} == {other.get_translation()}") - return True - self.logger.warning(f"Consistency check failed: {other.get_translation()} is not equal to " - f"{base.get_translation()}. Check {base.get_name()} and {other.get_name()}") - return False - - def should_start_with(self, base: Optional[TranslationUnit], other: Optional[TranslationUnit]) -> bool: - """returns True if checks pass: other starts with base (or not existing)""" - if base is None or other is None: - return True - if other.get_translation().startswith(base.get_translation()): - self.logger.debug(f"Consistency check passed: " - f"{other.get_translation()} starts with {base.get_translation()}.") - return True - self.logger.warning(f"Consistency check failed: {other.get_translation()} does not start with " - f"{base.get_translation()}. Check {base.get_name()} and {other.get_name()}") - return False - - def check_bible_reading_hints_titles(self, language_info: LanguageInfo) -> bool: - """Titles of the different Bible Reading Hints variants should start the same""" - ret1 = self.should_start_with( - self.load_translation_unit(language_info, "Bible_Reading_Hints", self.TITLE), - self.load_translation_unit(language_info, "Bible_Reading_Hints_(Seven_Stories_full_of_Hope)", self.TITLE)) - ret2 = self.should_start_with( - self.load_translation_unit(language_info, "Bible_Reading_Hints", self.TITLE), - self.load_translation_unit(language_info, "Bible_Reading_Hints_(Starting_with_the_Creation)", self.TITLE)) - return ret1 and ret2 - - def check_bible_reading_hints_links(self, language_info: LanguageInfo) -> bool: - """Check whether the link titles in https://www.4training.net/Bible_Reading_Hints - are identical with the titles of the destination pages""" - ret1 = True - ret2 = True - link_unit = self.load_translation_unit(language_info, "Bible_Reading_Hints", 2) - if link_unit is not None: - _, title = self.extract_link(link_unit.get_translation()) - link_unit.set_translation(title) - ret1 = self.should_be_equal(link_unit, self.load_translation_unit( - language_info, "Bible_Reading_Hints_(Seven_Stories_full_of_Hope)", self.TITLE)) - link_unit = self.load_translation_unit(language_info, "Bible_Reading_Hints", 3) - if link_unit is not None: - _, title = self.extract_link(link_unit.get_translation()) - link_unit.set_translation(title) - ret2 = self.should_be_equal(link_unit, self.load_translation_unit( - language_info, "Bible_Reading_Hints_(Starting_with_the_Creation)", self.TITLE)) - return ret1 and ret2 - - def check_gods_story_titles(self, language_info: LanguageInfo) -> bool: - """Titles of the two different variants of God's Story should start the same""" - ret1 = self.should_start_with( - self.load_translation_unit(language_info, "God's_Story", self.TITLE), - self.load_translation_unit(language_info, "God's_Story_(first_and_last_sacrifice)", self.TITLE)) - ret2 = self.should_start_with( - self.load_translation_unit(language_info, "God's_Story", self.TITLE), - self.load_translation_unit(language_info, "God's_Story_(five_fingers)", self.TITLE)) - return ret1 and ret2 - - def check_who_do_i_need_to_forgive(self, language_info: LanguageInfo) -> bool: - """Should both be 'God, who do I need to forgive?'""" - return self.should_be_equal( - self.load_translation_unit(language_info, "How_to_Continue_After_a_Prayer_Time", 11), - self.load_translation_unit(language_info, "Forgiving_Step_by_Step", 34)) - - def check_book_of_acts(self, language_info: LanguageInfo) -> bool: - """Name of the book of Acts should be the same in different Bible Reading Hints variants""" - t24 = self.load_translation_unit(language_info, "Template:BibleReadingHints", 24) - t26 = self.load_translation_unit(language_info, "Template:BibleReadingHints", 26) - if t24 is None or (len(t24.get_translation()) <= 3) or t26 is None or (len(t26.get_translation()) <= 3): - return True - # Text is e.g. "2. Apostelgeschichte" / "3. Apostelgeschichte" -> remove first three characters - t24.set_translation(t24.get_translation()[3:]) - t26.set_translation(t26.get_translation()[3:]) - return self.should_be_equal(t24, t26) - - def run(self, language_info: LanguageInfo, _english_info, _changes, _english_changes): - checks_passed: int = 0 - checks_passed += int(self.check_bible_reading_hints_titles(language_info)) - checks_passed += int(self.check_gods_story_titles(language_info)) - checks_passed += int(self.check_who_do_i_need_to_forgive(language_info)) - checks_passed += int(self.check_bible_reading_hints_links(language_info)) - checks_passed += int(self.check_book_of_acts(language_info)) - self.logger.info(f"Consistency checks for {language_info.english_name}: {checks_passed}/5 passed") - - -""" -TODO implement more consistency checks: -- Each list item of the Seven Stories full of Hope should start with a number (in the target language of course...) - e.g. Translations:Bible Reading Hints (Seven Stories full of Hope)/7/id starts with "1." - -- Check each link: Is the title the same as the title of the destination page? - -More consistency checks that currently can't be automated: - -Head-Heart-Hands questions should be the same on -https://www.4training.net/Time_with_God -https://www.4training.net/Template:BibleReadingHints --> not automated because the first uses "me", the latter "we" - -Many title from the Three-Thirds-Process and from Template:BibleReadingHints should be the same --> needs to be checked manually -""" diff --git a/pywikitools/resourcesbot/modules/__init__.py b/pywikitools/resourcesbot/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pywikitools/resourcesbot/modules/consistency_checks.py b/pywikitools/resourcesbot/modules/consistency_checks.py new file mode 100644 index 0000000..9d298a7 --- /dev/null +++ b/pywikitools/resourcesbot/modules/consistency_checks.py @@ -0,0 +1,286 @@ +""" +Contains consistency checks specifically for 4training.net +""" + +import logging +import re +from configparser import ConfigParser +from typing import Final, Optional, Tuple, Union + +import pywikibot.site + +from pywikitools.fortraininglib import ForTrainingLib +from pywikitools.lang.translated_page import TranslationUnit +from pywikitools.resourcesbot.data_structures import LanguageInfo, WorksheetInfo +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor + + +class ConsistencyCheck(LanguagePostProcessor): + """ + Post-processing plugin: Check whether some translation units with the same English + definition also have the same translation in the specified language + + This is completely 4training.net-specific. + Next step: Write the results to some meaningful place on 4training.net + so that translators can access them and correct inconsistencies + """ + + TITLE: Final[str] = "Page display title" + + @classmethod + def help_summary(cls) -> str: + return "Check consistency of translations" + + @classmethod + def abbreviation(cls) -> str: + return "check" + + @classmethod + def can_be_rewritten(cls) -> bool: + return False + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser = None, + site: pywikibot.site.APISite = None + ): + super().__init__(fortraininglib, config, site) + self.logger = logging.getLogger( + "pywikitools.resourcesbot.modules.consistency_checks" + ) + + def extract_link(self, text: str) -> Tuple[str, str]: + """ + Search in text for a mediawiki link of the form [[Destination|Title]]. + This function will only look at the first link it finds in the text, any other + will be ignored. + @return a tuple (destination, title). In case no link was found both strings + will be empty. + """ + match = re.search(r"\[\[([^|]+)\|([^\]]+)\]\]", text) + if not match: + return "", "" + return match.group(1), match.group(2) + + def load_translation_unit( + self, language_info: LanguageInfo, page: str, identifier: Union[int, str] + ) -> Optional[TranslationUnit]: + """ + Try to load a translation unit + + If we request the title of a worksheet, let's first try to see if it's already + in language_info. Then we don't need to make an API query. + Otherwise we try to load the translation unit from the mediawiki system + """ + if isinstance(identifier, int): + content = self.fortraininglib.get_translated_unit( + page, language_info.language_code, identifier + ) + if content is None: + self.logger.info( + f"Couldn't load {page}/{identifier}/{language_info.language_code}" + ) + return None + # Leaving definition parameter empty because we don't have it and + # don't need it + return TranslationUnit( + f"{page}/{identifier}", language_info.language_code, "", content + ) + + elif identifier == self.TITLE: + worksheet_info: Optional[WorksheetInfo] = language_info.get_worksheet(page) + if worksheet_info is not None: + return TranslationUnit( + f"{page}/Page display title", + language_info.language_code, + page, + worksheet_info.title, + ) + content = self.fortraininglib.get_translated_title( + page, language_info.language_code + ) + if content is None: + self.logger.info( + f"Couldn't load {page}/{identifier}/{language_info.language_code}" + ) + return None + return TranslationUnit( + f"{page}/Page display title", language_info.language_code, page, content + ) + + else: + raise LookupError( + f"Invalid unit name {page}/{identifier}/{language_info.language_code}" + ) + + def should_be_equal( + self, base: Optional[TranslationUnit], other: Optional[TranslationUnit] + ) -> bool: + """returns True if checks pass: base and other are the same (or not existing)""" + if base is None or other is None: + return True + if other.get_translation() == base.get_translation(): + self.logger.debug( + f"Consistency check passed: {base.get_translation()} == {other.get_translation()}" + ) + return True + self.logger.warning( + f"Consistency check failed: {other.get_translation()} is not equal to " + f"{base.get_translation()}. Check {base.get_name()} and {other.get_name()}" + ) + return False + + def should_start_with( + self, base: Optional[TranslationUnit], other: Optional[TranslationUnit] + ) -> bool: + """returns True if checks pass: other starts with base (or not existing)""" + if base is None or other is None: + return True + if other.get_translation().startswith(base.get_translation()): + self.logger.debug( + f"Consistency check passed: " + f"{other.get_translation()} starts with {base.get_translation()}." + ) + return True + self.logger.warning( + f"Consistency check failed: {other.get_translation()} does not start with " + f"{base.get_translation()}. Check {base.get_name()} and {other.get_name()}" + ) + return False + + def check_bible_reading_hints_titles(self, language_info: LanguageInfo) -> bool: + """Titles of the different Bible Reading Hints variants should start the same""" + ret1 = self.should_start_with( + self.load_translation_unit( + language_info, "Bible_Reading_Hints", self.TITLE + ), + self.load_translation_unit( + language_info, + "Bible_Reading_Hints_(Seven_Stories_full_of_Hope)", + self.TITLE, + ), + ) + ret2 = self.should_start_with( + self.load_translation_unit( + language_info, "Bible_Reading_Hints", self.TITLE + ), + self.load_translation_unit( + language_info, + "Bible_Reading_Hints_(Starting_with_the_Creation)", + self.TITLE, + ), + ) + return ret1 and ret2 + + def check_bible_reading_hints_links(self, language_info: LanguageInfo) -> bool: + """Check whether the link titles in https://www.4training.net/Bible_Reading_Hints + are identical with the titles of the destination pages""" + ret1 = True + ret2 = True + link_unit = self.load_translation_unit(language_info, "Bible_Reading_Hints", 2) + if link_unit is not None: + _, title = self.extract_link(link_unit.get_translation()) + link_unit.set_translation(title) + ret1 = self.should_be_equal( + link_unit, + self.load_translation_unit( + language_info, + "Bible_Reading_Hints_(Seven_Stories_full_of_Hope)", + self.TITLE, + ), + ) + link_unit = self.load_translation_unit(language_info, "Bible_Reading_Hints", 3) + if link_unit is not None: + _, title = self.extract_link(link_unit.get_translation()) + link_unit.set_translation(title) + ret2 = self.should_be_equal( + link_unit, + self.load_translation_unit( + language_info, + "Bible_Reading_Hints_(Starting_with_the_Creation)", + self.TITLE, + ), + ) + return ret1 and ret2 + + def check_gods_story_titles(self, language_info: LanguageInfo) -> bool: + """Titles of the two different variants of God's Story should start the same""" + ret1 = self.should_start_with( + self.load_translation_unit(language_info, "God's_Story", self.TITLE), + self.load_translation_unit( + language_info, "God's_Story_(first_and_last_sacrifice)", self.TITLE + ), + ) + ret2 = self.should_start_with( + self.load_translation_unit(language_info, "God's_Story", self.TITLE), + self.load_translation_unit( + language_info, "God's_Story_(five_fingers)", self.TITLE + ), + ) + return ret1 and ret2 + + def check_who_do_i_need_to_forgive(self, language_info: LanguageInfo) -> bool: + """Should both be 'God, who do I need to forgive?'""" + return self.should_be_equal( + self.load_translation_unit( + language_info, "How_to_Continue_After_a_Prayer_Time", 11 + ), + self.load_translation_unit(language_info, "Forgiving_Step_by_Step", 34), + ) + + def check_book_of_acts(self, language_info: LanguageInfo) -> bool: + """The name of the book of Acts should be the same in different + Bible Reading Hints variants""" + t24 = self.load_translation_unit( + language_info, "Template:BibleReadingHints", 24 + ) + t26 = self.load_translation_unit( + language_info, "Template:BibleReadingHints", 26 + ) + if ( + t24 is None + or (len(t24.get_translation()) <= 3) + or t26 is None + or (len(t26.get_translation()) <= 3) + ): + return True + # Text is e.g. "2. Apostelgeschichte" / "3. Apostelgeschichte" -> remove first three characters + t24.set_translation(t24.get_translation()[3:]) + t26.set_translation(t26.get_translation()[3:]) + return self.should_be_equal(t24, t26) + + def run( + self, language_info: LanguageInfo, _english_info, _changes, _english_changes, + *, force_rewrite: bool = False + ): + checks_passed: int = 0 + checks_passed += int(self.check_bible_reading_hints_titles(language_info)) + checks_passed += int(self.check_gods_story_titles(language_info)) + checks_passed += int(self.check_who_do_i_need_to_forgive(language_info)) + checks_passed += int(self.check_bible_reading_hints_links(language_info)) + checks_passed += int(self.check_book_of_acts(language_info)) + self.logger.info( + f"Consistency checks for {language_info.english_name}: {checks_passed}/5 passed" + ) + + +""" +TODO implement more consistency checks: +- Each list item of the Seven Stories full of Hope should start with a number +(in the target language of course...) + e.g. Translations:Bible Reading Hints (Seven Stories full of Hope)/7/id starts with "1." + +- Check each link: Is the title the same as the title of the destination page? + +More consistency checks that currently can't be automated: + +Head-Heart-Hands questions should be the same on +https://www.4training.net/Time_with_God +https://www.4training.net/Template:BibleReadingHints +-> not automated because the first uses "me", the latter "we" + +Many title from the Three-Thirds-Process and from Template:BibleReadingHints should +be the same +-> needs to be checked manually +""" diff --git a/pywikitools/resourcesbot/export_html.py b/pywikitools/resourcesbot/modules/export_html.py similarity index 61% rename from pywikitools/resourcesbot/export_html.py rename to pywikitools/resourcesbot/modules/export_html.py index a502169..6cd1707 100644 --- a/pywikitools/resourcesbot/export_html.py +++ b/pywikitools/resourcesbot/modules/export_html.py @@ -2,14 +2,21 @@ import json import logging import os -import requests +from configparser import ConfigParser from typing import Any, Dict, Final, Optional, Set +import pywikibot +import requests + from pywikitools.fortraininglib import ForTrainingLib from pywikitools.htmltools.beautify_html import BeautifyHTML from pywikitools.resourcesbot.changes import ChangeLog -from pywikitools.resourcesbot.data_structures import FileInfo, LanguageInfo, WorksheetInfo -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor +from pywikitools.resourcesbot.data_structures import ( + FileInfo, + LanguageInfo, + WorksheetInfo, +) +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor class CustomBeautifyHTML(BeautifyHTML): @@ -17,13 +24,14 @@ class CustomBeautifyHTML(BeautifyHTML): Class to collect all images used in the generated HTML files TODO do something about links to worksheets that are not translated yet """ + def __init__(self, change_hrefs: Dict[str, str], file_collector: Set[str]): super().__init__(img_src_base="files/", change_hrefs=change_hrefs) self.file_collector = file_collector def img_rewrite_handler(self, element): super().img_rewrite_handler(element) - self.file_collector.add(element['src'][6:]) # Remove leading "files/" + self.file_collector.add(element["src"][6:]) # Remove leading "files/" def make_html_name(title: str) -> str: @@ -35,29 +43,48 @@ class ExportHTML(LanguagePostProcessor): Export all finished worksheets of this language as HTML into a folder This is a step towards having a git repo with this content always up-to-date """ - def __init__(self, fortraininglib: ForTrainingLib, folder: str, *, force_rewrite: bool = False): - """ - Args: - folder: base directory for export; subdirectories will be created for each language - force_rewrite: rewrite even if there were no (relevant) changes - """ - self._base_folder: str = folder - self._force_rewrite: Final[bool] = force_rewrite - self.fortraininglib: Final[ForTrainingLib] = fortraininglib - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.export_html') + @classmethod + def help_summary(cls) -> str: + return "Exports finished worksheets of a language to HTML" + + @classmethod + def abbreviation(cls) -> str: + return "html" + + @classmethod + def can_be_rewritten(cls) -> bool: + return True + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser, + site: pywikibot.site.APISite + ): + super().__init__(fortraininglib, config, site) + self._base_folder: str = self._config.get("Paths", "htmlexport", fallback="") + self.logger: Final[logging.Logger] = logging.getLogger( + "pywikitools.resourcesbot.modules.export_html" + ) if self._base_folder != "": try: - os.makedirs(folder, exist_ok=True) + os.makedirs(self._base_folder, exist_ok=True) except OSError as err: - self.logger.warning(f"Error creating directories for HTML export: {err}. Won't export HTML files.") + self.logger.warning( + f"Error creating directories for HTML export: {err}. " + f"Won't export HTML files." + ) self._base_folder = "" else: - self.logger.warning("Missing htmlexport path in config.ini. Won't export HTML files.") + self.logger.warning( + "Missing htmlexport path in config.ini. Won't export HTML files." + ) def has_relevant_change(self, worksheet: str, changes: ChangeLog) -> bool: """ Is there a relevant change for worksheet? - TODO: Define what exactly we consider relevant (for re-generating that worksheet's HTML) + TODO: Define what exactly we consider relevant + (for re-generating that worksheet's HTML) """ for change_item in changes: if change_item.worksheet == worksheet: @@ -68,12 +95,14 @@ def has_relevant_change(self, worksheet: str, changes: ChangeLog) -> bool: def download_file(self, files_folder: str, filename: str) -> bool: """Download a file from the mediawiki server - If a file already exists locally, we don't download it again because usually those - files (graphics) don't change. - TODO: Implement a way to force re-downloading of files (in case a file was updated in the mediawiki system). + If a file already exists locally, we don't download it again because + usually those files (graphics) don't change. + TODO: Implement a way to force re-downloading of files + (in case a file was updated in the mediawiki system). Two possible ways: - an extra flag (e.g. --force-rewrite-files) - - by getting the time stamp of the file in the mediawiki system, comparing it with the last + - by getting the time stamp of the file in the mediawiki system, + comparing it with the last modified timestamp of the local file and download again if the first is newer (would require adjustments of get_file_url() to also request timestamp) @@ -81,7 +110,9 @@ def download_file(self, files_folder: str, filename: str) -> bool: """ file_path = os.path.join(files_folder, filename) if os.path.isfile(file_path): - self.logger.info(f"File {file_path} already exists locally, not downloading.") + self.logger.info( + f"File {file_path} already exists locally, not downloading." + ) return False else: url = self.fortraininglib.get_file_url(filename) @@ -90,19 +121,30 @@ def download_file(self, files_folder: str, filename: str) -> bool: return False response = requests.get(url, allow_redirects=True) - with open(file_path, 'wb') as fh: + with open(file_path, "wb") as fh: fh.write(response.content) self.logger.info(f"Successfully downloaded and saved {file_path}") return True - def run(self, language_info: LanguageInfo, english_info: LanguageInfo, changes: ChangeLog, _english_changes): + def run( + self, + language_info: LanguageInfo, + english_info: LanguageInfo, + changes: ChangeLog, + _english_changes, + *, + force_rewrite: bool = False + ): if self._base_folder == "": return - # Remove worksheets that aren't finished - don't change the language_info object we got + # Remove worksheets that aren't finished - don't change the + # language_info object we got lang_info: LanguageInfo = copy.deepcopy(language_info) - del language_info # prevent accidental usage of the wrong object + del language_info # prevent accidental usage of the wrong object for worksheet in list(lang_info.worksheets.keys()): - if not lang_info.worksheets[worksheet].show_in_list(english_info.worksheets[worksheet]): + if not lang_info.worksheets[worksheet].show_in_list( + english_info.worksheets[worksheet] + ): del lang_info.worksheets[worksheet] lang_code = lang_info.language_code @@ -117,36 +159,47 @@ def run(self, language_info: LanguageInfo, english_info: LanguageInfo, changes: if not os.path.isdir(structure_folder): os.makedirs(structure_folder) except OSError as err: - self.logger.warning(f"Error creating directories for HTML export: {err}. " - f"Won't export HTML files for language {lang_code}.") + self.logger.warning( + f"Error creating directories for HTML export: {err}. " + f"Won't export HTML files for language {lang_code}." + ) return - change_hrefs: Dict[str, str] = {} # Dictionary to set correct targets for links in the HTML files + change_hrefs: Dict[str, str] = ( + {} + ) # Dictionary to set correct targets for links in the HTML files for worksheet, info in lang_info.worksheets.items(): - # Most link can stay the same but we need to add them to change_hrefs, otherwise links are removed + # Most links can stay the same, but we need to add them to change_hrefs; + # otherwise links are removed change_hrefs[f"/{worksheet}/{lang_code}"] = f"/{worksheet}/{lang_code}" - if lang_code == 'en': # English links normally don't have /en at the end + if lang_code == "en": # English links normally don't have /en at the end change_hrefs[f"/{worksheet}"] = f"/{worksheet}/en" file_collector: Set[str] = set() - beautifyhtml = CustomBeautifyHTML(change_hrefs=change_hrefs, file_collector=file_collector) + beautifyhtml = CustomBeautifyHTML( + change_hrefs=change_hrefs, file_collector=file_collector + ) - html_counter: int = 0 # Counting exported HTML files - file_counter: int = 0 # Counting downloaded files (images) + html_counter: int = 0 # Counting exported HTML files + file_counter: int = 0 # Counting downloaded files (images) # Download all worksheets and save the transformed HTML for worksheet, info in lang_info.worksheets.items(): # As elsewhere, we ignore outdated / unfinished translations - if self._force_rewrite or self.has_relevant_change(worksheet, changes): + if force_rewrite or self.has_relevant_change(worksheet, changes): content = self.fortraininglib.get_page_html(f"{worksheet}/{lang_code}") if content is None: - self.logger.warning(f"Couldn't get content of {worksheet}/{lang_code}. Skipping") + self.logger.warning( + f"Couldn't get content of {worksheet}/{lang_code}. Skipping" + ) continue html_counter += 1 filename = make_html_name(info.title) with open(os.path.join(folder, filename), "w") as f: self.logger.info(f"Exporting HTML to {filename}") - content = f"

{info.title}

" + beautifyhtml.process_html(content) + content = f"

{info.title}

" + beautifyhtml.process_html( + content + ) f.write(content) # Download all images we came across in the previous step @@ -155,16 +208,19 @@ def run(self, language_info: LanguageInfo, english_info: LanguageInfo, changes: file_counter += 1 # Write contents.json - # TODO define specifications for contents.json (similar to language jsons?) - for now just a simple structure - if self._force_rewrite or html_counter > 0: + # TODO define specifications for contents.json + # (similar to language jsons?) - for now just a simple structure + if force_rewrite or html_counter > 0: encoded_json = StructureEncoder().encode(lang_info) pretty_printed_json = json.dumps(json.loads(encoded_json), indent=4) with open(os.path.join(structure_folder, "contents.json"), "w") as f: self.logger.info("Exporting contents.json") f.write(pretty_printed_json) - self.logger.info(f"ExportHTML {lang_code}: " - f"Downloaded {html_counter} HTML files, {file_counter} images") + self.logger.info( + f"ExportHTML {lang_code}: " + f"Downloaded {html_counter} HTML files, {file_counter} images" + ) class StructureEncoder(json.JSONEncoder): @@ -172,13 +228,14 @@ class StructureEncoder(json.JSONEncoder): Serializes all information needed for the app into a JSON string. This is similar to DataStructureEncoder but removes some stuff we don't need """ + def default(self, o): if isinstance(o, LanguageInfo): # Don't include unfinished / outdated worksheets return { "language_code": o.language_code, "english_name": o.english_name, - "worksheets": list(o.worksheets.values()) + "worksheets": list(o.worksheets.values()), } if isinstance(o, WorksheetInfo): worksheet_json: Dict[str, Any] = { @@ -189,8 +246,8 @@ def default(self, o): } pdf_info: Optional[FileInfo] = o.get_file_type_info("pdf") if pdf_info: - pos: int = pdf_info.url.rfind('/') + pos: int = pdf_info.url.rfind("/") if pos > -1: - worksheet_json["pdf"] = pdf_info.url[pos+1:] + worksheet_json["pdf"] = pdf_info.url[pos + 1:] return worksheet_json return super().default(o) diff --git a/pywikitools/resourcesbot/export_pdf.py b/pywikitools/resourcesbot/modules/export_pdf.py similarity index 55% rename from pywikitools/resourcesbot/export_pdf.py rename to pywikitools/resourcesbot/modules/export_pdf.py index 947ab80..25a052e 100644 --- a/pywikitools/resourcesbot/export_pdf.py +++ b/pywikitools/resourcesbot/modules/export_pdf.py @@ -1,13 +1,16 @@ import copy import logging import os -import requests +from configparser import ConfigParser from typing import Final, Optional +import pywikibot +import requests + from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.changes import ChangeLog from pywikitools.resourcesbot.data_structures import FileInfo, LanguageInfo -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor class ExportPDF(LanguagePostProcessor): @@ -15,24 +18,42 @@ class ExportPDF(LanguagePostProcessor): Export all PDF files of this language into a folder This is a step towards having a git repo with this content always up-to-date """ - def __init__(self, fortraininglib: ForTrainingLib, folder: str, *, force_rewrite: bool = False): - """ - Args: - folder: base directory for export; subdirectories will be created for each language - force_rewrite: rewrite even if there were no (relevant) changes - """ - self._base_folder: str = folder - self._force_rewrite: Final[bool] = force_rewrite - self.fortraininglib: Final[ForTrainingLib] = fortraininglib - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.export_pdf') + @classmethod + def help_summary(cls) -> str: + return "Export PDF files of a language" + + @classmethod + def abbreviation(cls) -> str: + return "pdf" + + @classmethod + def can_be_rewritten(cls) -> bool: + return True + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser, + site: pywikibot.site.APISite = None + ): + super().__init__(fortraininglib, config, site) + self._base_folder: str = self._config.get("Paths", "pdfexport", fallback="") + self.logger: Final[logging.Logger] = logging.getLogger( + "pywikitools.resourcesbot.modules.export_pdf" + ) if self._base_folder != "": try: - os.makedirs(folder, exist_ok=True) + os.makedirs(self._base_folder, exist_ok=True) except OSError as err: - self.logger.warning(f"Error creating directories for PDF export: {err}. Won't export PDF files.") + self.logger.warning( + f"Error creating directories for PDF export: {err}." + f" Won't export PDF files." + ) self._base_folder = "" else: - self.logger.warning("Missing pdfexport path in config.ini. Won't export PDF files.") + self.logger.warning( + "Missing pdfexport path in config.ini. Won't export PDF files." + ) def has_relevant_change(self, worksheet: str, changes: ChangeLog) -> bool: """ @@ -46,14 +67,25 @@ def has_relevant_change(self, worksheet: str, changes: ChangeLog) -> bool: return True return False - def run(self, language_info: LanguageInfo, english_info: LanguageInfo, changes: ChangeLog, _english_changes): + def run( + self, + language_info: LanguageInfo, + english_info: LanguageInfo, + changes: ChangeLog, + _english_changes, + *, + force_rewrite: bool = False + ): if self._base_folder == "": return - # Remove worksheets that aren't finished - don't change the language_info object we got + # Remove worksheets that aren't finished - don't change the language_info + # object we got lang_info: LanguageInfo = copy.deepcopy(language_info) - del language_info # prevent accidental usage of the wrong object + del language_info # prevent accidental usage of the wrong object for worksheet in list(lang_info.worksheets.keys()): - if not lang_info.worksheets[worksheet].show_in_list(english_info.worksheets[worksheet]): + if not lang_info.worksheets[worksheet].show_in_list( + english_info.worksheets[worksheet] + ): del lang_info.worksheets[worksheet] lang_code = lang_info.language_code @@ -62,22 +94,24 @@ def run(self, language_info: LanguageInfo, english_info: LanguageInfo, changes: try: os.makedirs(folder, exist_ok=True) except OSError as err: - self.logger.warning(f"Error creating directories for PDF export: {err}. " - f"Won't export PDF files for language {lang_code}.") + self.logger.warning( + f"Error creating directories for PDF export: {err}. " + f"Won't export PDF files for language {lang_code}." + ) return - file_counter: int = 0 # Counting downloaded PDF files + file_counter: int = 0 # Counting downloaded PDF files # Download and save all PDF files for worksheet, info in lang_info.worksheets.items(): - pdf_info: Optional[FileInfo] = info.get_file_type_info('pdf') + pdf_info: Optional[FileInfo] = info.get_file_type_info("pdf") if pdf_info is None: continue # As elsewhere, we ignore outdated / unfinished translations - if self._force_rewrite or self.has_relevant_change(worksheet, changes): + if force_rewrite or self.has_relevant_change(worksheet, changes): response = requests.get(pdf_info.url, allow_redirects=True) file_path = os.path.join(folder, pdf_info.get_file_name()) - with open(file_path, 'wb') as fh: + with open(file_path, "wb") as fh: fh.write(response.content) file_counter += 1 self.logger.info(f"Successfully downloaded and saved {file_path}") diff --git a/pywikitools/resourcesbot/export_repository.py b/pywikitools/resourcesbot/modules/export_repository.py similarity index 50% rename from pywikitools/resourcesbot/export_repository.py rename to pywikitools/resourcesbot/modules/export_repository.py index c23f8e3..30809db 100644 --- a/pywikitools/resourcesbot/export_repository.py +++ b/pywikitools/resourcesbot/modules/export_repository.py @@ -1,35 +1,65 @@ import logging import os +from configparser import ConfigParser from typing import Final + from git import Actor, Repo from git.exc import GitError +import pywikibot +from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.data_structures import LanguageInfo -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor class ExportRepository(LanguagePostProcessor): """ - Export the html files (result of ExportHTML) to a git repository. + Export the HTML files (result of ExportHTML) to a git repository. Needs to run after ExportHTML. """ - def __init__(self, base_folder: str): - """ - Args: - folder: export base directory (repositories will be in subdirectories for each language) - repo: the address of the remote repository we're filling TODO - Currently we assume that origin is correctly set up in the folder and we just need to push - """ - self._base_folder: Final[str] = base_folder - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.export_repository') + @classmethod + def help_summary(cls) -> str: + return "Exports the HTML files into a git repo" + + @classmethod + def abbreviation(cls) -> str: + return "repository" + + @classmethod + def can_be_rewritten(cls) -> bool: + return False + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser, + site: pywikibot.site.APISite = None, + ): + # TODO Currently we assume that origin is correctly set up in the folder, + # and we just need to push + + super().__init__(fortraininglib, config, site) + self._base_folder: str = self._config.get("Paths", "htmlexport", fallback="") + self.logger: Final[logging.Logger] = logging.getLogger( + "pywikitools.resourcesbot.modules.export_repository" + ) if self._base_folder == "": - self.logger.warning("Missing htmlexport path in config.ini. Won't export to repository") - self._author: Final[Actor] = Actor("ExportRepository", "samuel@holydevelopers.net") + self.logger.warning( + "Missing htmlexport path in config.ini. Won't export to repository" + ) + self._author: Final[Actor] = Actor( + "ExportRepository", "samuel@holydevelopers.net" + ) - def run(self, language_info: LanguageInfo, _english_info, _changes, _english_changes): - """Pushing all changes in the local repository (created by ExportHTML) to the remote repository + def run( + self, language_info: LanguageInfo, _english_info, _changes, _english_changes, + *, force_rewrite: bool = False + ): + """Pushing all changes in the local repository (created by ExportHTML) to the + remote repository. - Currently we're ignoring the changes parameter and just check for changes in the git repository + Currently, we're ignoring the change parameter and just check for changes + in the git repository """ # Make sure we have a valid repository if self._base_folder == "": @@ -61,15 +91,21 @@ def run(self, language_info: LanguageInfo, _english_info, _changes, _english_cha repo.index.remove(item.a_path) deleted += 1 else: - self.logger.warning(f"Unsupported change_type {item.change_type} in git diff, ignoring.") + self.logger.warning( + f"Unsupported change_type {item.change_type} in git diff, ignoring." + ) if repo.is_dirty(): # Commiting and pushing to remote - commit_message = f"Update: {untracked} new, {modified} modified, {deleted} deleted." + commit_message = ( + f"Update: {untracked} new, {modified} modified, {deleted} deleted." + ) self.logger.warning(f"Pushing to git repository. {commit_message}") # TODO add details to the commit message repo.index.commit(f"{commit_message}", author=self._author) result = repo.remotes.origin.push() self.logger.info(f"Pushed to remote, result: {result[0].summary}") else: - self.logger.info(f"ExportRepository {language_info.language_code}: No changes.") + self.logger.info( + f"ExportRepository {language_info.language_code}: No changes." + ) diff --git a/pywikitools/resourcesbot/modules/post_processing.py b/pywikitools/resourcesbot/modules/post_processing.py new file mode 100644 index 0000000..d3408c5 --- /dev/null +++ b/pywikitools/resourcesbot/modules/post_processing.py @@ -0,0 +1,68 @@ +""" +Base classes for all functionality doing useful stuff with the data gathered previously. + +If the functionality looks only at one language at a time, implement LanguagePostProcessor. +If the functionality needs to look at everything, implement GlobalPostProcessor. +The resourcesbot will first call any LanguagePostProcessors for each language and +afterwards call any GlobalPostProcessor +""" +from abc import ABC, abstractmethod +from configparser import ConfigParser +from typing import Dict, Final, Optional + +import pywikibot + +from pywikitools.fortraininglib import ForTrainingLib +from pywikitools.resourcesbot.changes import ChangeLog +from pywikitools.resourcesbot.data_structures import LanguageInfo + + +class LanguagePostProcessor(ABC): + """Base class for all functionality doing useful stuff with the data on one language. + + We include information on English as well because several post-processors need it as reference + """ + + @classmethod + @abstractmethod + def help_summary(cls) -> str: + """Used for the resourcesbot.py -h help command""" + raise NotImplementedError + + @classmethod + @abstractmethod + def abbreviation(cls) -> str: + """Abbreviation to be used for this module with the -m and --rewrite options""" + raise NotImplementedError + + @classmethod + @abstractmethod + def can_be_rewritten(cls) -> bool: + """Is the force_rewrite flag available for this module?""" + raise NotImplementedError + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser = None, + site: pywikibot.site.APISite = None + ): + self.fortraininglib: Final[ForTrainingLib] = fortraininglib + self._config: Final[ConfigParser] = config + self._site: Final[Optional[pywikibot.site.APISite]] = site + + @abstractmethod + def run(self, language_info: LanguageInfo, english_info: LanguageInfo, + changes: ChangeLog, english_changes: ChangeLog, + *, force_rewrite: bool = False): + """Entry point""" + raise NotImplementedError() + + +class GlobalPostProcessor(ABC): + """Base class for all functionality doing useful stuff with the data on all languages""" + + @abstractmethod + def run(self, language_data: Dict[str, LanguageInfo], changes: Dict[str, ChangeLog]): + """Entry point""" + raise NotImplementedError() diff --git a/pywikitools/resourcesbot/modules/write_lists.py b/pywikitools/resourcesbot/modules/write_lists.py new file mode 100644 index 0000000..caf02fe --- /dev/null +++ b/pywikitools/resourcesbot/modules/write_lists.py @@ -0,0 +1,261 @@ +import logging +import re +from configparser import ConfigParser +from typing import Final, Optional, Tuple + +import pywikibot + +from pywikitools.fortraininglib import ForTrainingLib +from pywikitools.resourcesbot.changes import ChangeLog, ChangeType +from pywikitools.resourcesbot.data_structures import FileInfo, LanguageInfo +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor + + +class WriteList(LanguagePostProcessor): + """ + Write/update the list of available training resources for languages. + + We only show worksheets that have a PDF file (to ensure good quality) + + This class can be re-used to call run() several times + """ + @classmethod + def help_summary(cls) -> str: + return "Write list of available training resources for languages" + + @classmethod + def abbreviation(cls) -> str: + return "list" + + @classmethod + def can_be_rewritten(cls) -> bool: + return True + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser, + site: pywikibot.site.APISite + ): + super().__init__(fortraininglib, config, site) + self._username: Final[str] = config.get("resourcesbot", "username", fallback="") + self._password: Final[str] = config.get("resourcesbot", "password", fallback="") + self.logger: Final[logging.Logger] = logging.getLogger( + "pywikitools.resourcesbot.modules.write_lists" + ) + + if self._username == "" or self._password == "": + self.logger.warning( + "Missing user name and/or password in config." + "Won't mark pages for translation." + ) + + def needs_rewrite(self, language_info: LanguageInfo, changes: ChangeLog) -> bool: + """Determine whether the list of available training resources needs + to be rewritten. + """ + lang = language_info.language_code + needs_rewrite = False + for change_item in changes: + if change_item.change_type in [ + ChangeType.UPDATED_PDF, + ChangeType.NEW_PDF, + ChangeType.DELETED_PDF, + ChangeType.NEW_WORKSHEET, + ChangeType.DELETED_WORKSHEET, + ]: + needs_rewrite = True + if ( + change_item.change_type in [ChangeType.NEW_ODT, ChangeType.DELETED_ODT] + ) and language_info.worksheet_has_type(change_item.worksheet, "pdf"): + needs_rewrite = True + + if needs_rewrite: + self.logger.info( + f"List of available training resources in language {lang} needs " + f"to be re-written." + ) + else: + self.logger.info( + f"List of available training resources in language {lang} doesn't " + f"need to be re-written." + ) + + return needs_rewrite + + def _create_file_mediawiki(self, file_info: Optional[FileInfo]) -> str: + """ + Return string with mediawiki code to display a downloadable file + + Example: [[File:pdficon_small.png|link={{filepath:Gebet.pdf}}]] + @return empty string if file_info is None + """ + if file_info is None: + return "" + file_name: str = file_info.url + pos: int = file_name.rfind("/") + if pos > -1: + file_name = file_name[pos + 1:] + else: + self.logger.warning(f"Couldn't find / in {file_name}") + return ( + f" [[File:{file_info.file_type.lower()}icon_small.png|" + + r"link={{filepath:" + + file_name + + r"}}]]" + ) + + def create_mediawiki( + self, language_info: LanguageInfo, english_info: LanguageInfo + ) -> str: + """ + Create the mediawiki string for the list of available training resources + + Output should look like the following line: + * [[God's_Story_(five_fingers)/de|{{int:sidebar-godsstory-fivefingers}}]] \ + [[File:pdficon_small.png|link={{filepath:Gottes_Geschichte_(fünf_Finger).pdf}}]] \ + [[File:printpdficon_small.png|link={{filepath:Gottes_Geschichte_(fünf_Finger).pdf}}]] \ + [[File:odticon_small.png|link={{filepath:Gottes_Geschichte_(fünf_Finger).odt}}]] + """ + content: str = "" + for worksheet, worksheet_info in language_info.worksheets.items(): + if worksheet_info.show_in_list(english_info.worksheets[worksheet]): + content += f"* [[{worksheet}/{language_info.language_code}|" + content += ( + "{{int:" + self.fortraininglib.title_to_message(worksheet) + "}}]]" + ) + content += self._create_file_mediawiki(worksheet_info.get_file_type_info("pdf")) + content += self._create_file_mediawiki( + worksheet_info.get_file_type_info("printPdf") + ) + content += self._create_file_mediawiki( + worksheet_info.get_file_type_info("odt") + ) + content += "\n" + if worksheet_info.progress.translated < worksheet_info.progress.total: + self.logger.warning( + f"Worksheet {worksheet}/{language_info.language_code} " + f"is not fully translated!" + ) + + self.logger.debug(content) + return content + + def _find_resources_list(self, page_content: str, language: str) -> Tuple[int, int]: + """Find the exact positions of the existing list of available training + resources in the page. + @param page_content: mediawiki "source" of the language info page that we're + searching through + @param language: The language name (as in LanguageInfo.english_name) + @return Tuple of start and end position. (0, 0) indicates we couldn't find it + """ + language_re = language.replace( + "(", r"\(" + ) # if language name contains brackets, we need to escape them + language_re = language_re.replace( + ")", r"\)" + ) # Example would be language Turkish (secular)- + match = re.search( + f"Available training resources in {language_re}\\s*?\\s*?==", + page_content, + ) + if not match: + return 0, 0 + list_start = 0 + list_end = 0 + # Find all following list entries: must start with * + pattern = re.compile(r"^\*.*$", re.MULTILINE) + for m in pattern.finditer(page_content, match.end()): + if list_start == 0: + list_start = m.start() + else: + # Make sure there is no other line in between: We only want to find + # lines directly following each other + if m.start() > (list_end + 1): + self.logger.info( + f"Looks like there is another list later in page {language}. " + f"Ignoring it." + ) + break + list_end = m.end() + self.logger.debug( + f"Matching line: start={m.start()}, end={m.end()}, {m.group(0)}" + ) + return list_start, list_end + + def run( + self, + language_info: LanguageInfo, + english_info: LanguageInfo, + changes: ChangeLog, + _english_changes, + *, + force_rewrite: bool = False + ) -> None: + if not force_rewrite and not self.needs_rewrite(language_info, changes): + return + + # Saving this to the language information page, e.g. https://www.4training.net/German + language = language_info.english_name + if language == "": + self.logger.warning( + f"English language name of {language_info.language_code} missing! " + f"Skipping WriteList" + ) + return + self.logger.debug(f"Writing list of available resources in {language}...") + page = pywikibot.Page(self._site, language) + if not page.exists(): + self.logger.warning(f"Language information page {language} doesn't exist!") + return + if page.isRedirectPage(): + self.logger.info( + f"Language information page {language} is a redirect. Following the " + f"redirect..." + ) + page = page.getRedirectTarget() + if not page.exists(): + self.logger.warning( + f"Redirect target for language {language} doesn't exist!" + ) + return + language = page.title() + + list_start, list_end = self._find_resources_list(page.text, language) + if (list_start == 0) or (list_end == 0): + self.logger.warning( + f"Couldn't find list of available training resources in {language}! " + f"Doing nothing." + ) + self.logger.info(page.text) + return + self.logger.debug( + f"Found existing list of available training resources " + f"@{list_start}-{list_end}. Replacing..." + ) + new_page_content = page.text[0:list_start] + self.create_mediawiki( + language_info, english_info + ) + new_page_content += page.text[list_end + 1:] + self.logger.debug(new_page_content) + + # Save page and mark it for translation if necessary + if page.text.strip() == new_page_content.strip(): + return + page.text = new_page_content + page.save( + "Updated list of available training resources" + ) # TODO write list of changes here in the save message + if self._username != "" and self._password != "": + self.fortraininglib.mark_for_translation( + page.title(), self._username, self._password + ) + self.logger.info( + f"Updated language information page {language} and marked it " + f"for translation." + ) + else: + self.logger.info( + f"Updated language information page {language}. Couldn't mark it " + f"for translation." + ) diff --git a/pywikitools/resourcesbot/modules/write_report.py b/pywikitools/resourcesbot/modules/write_report.py new file mode 100644 index 0000000..6dac4cf --- /dev/null +++ b/pywikitools/resourcesbot/modules/write_report.py @@ -0,0 +1,410 @@ +import logging +import re +from configparser import ConfigParser +from enum import Enum +from typing import Final, Optional + +import pywikibot + +from pywikitools.fortraininglib import ForTrainingLib +from pywikitools.resourcesbot.changes import ChangeLog +from pywikitools.resourcesbot.data_structures import LanguageInfo, WorksheetInfo +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor + + +class Color(Enum): + GREEN = "green" + ORANGE = "orange" + RED = "red" + GREY = "grey" + + def __str__(self) -> str: + return self.value + + +class WriteReport(LanguagePostProcessor): + """ + Write/update status reports for all languages (for translators and translation + coordinators). + + Every language report has a table with the translation status of all worksheets: + Which worksheet is translated? + Is the translation 100% complete? + Is it the same version as the English original? + Do we have ODT and PDF files for download? + To help interpret the results, we use colors (green / orange / red) for each cell. + """ + @classmethod + def help_summary(cls) -> str: + return "Write status report for a language" + + @classmethod + def abbreviation(cls) -> str: + return "report" + + @classmethod + def can_be_rewritten(cls) -> bool: + return False + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser, + site: pywikibot.site.APISite + ): + """ + Args: + site: our pywikibot object to be able to write to the mediawiki system + """ + super().__init__(fortraininglib, config, site) + self.logger: Final[logging.Logger] = logging.getLogger( + "pywikitools.resourcesbot.modules.write_report" + ) + + def run( + self, + language_info: LanguageInfo, + english_info: LanguageInfo, + changes: ChangeLog, + english_changes: ChangeLog, + *, + force_rewrite: bool = False + ): + """Entry function + + We run everything, and don't look at whether we have changes because we need to + look at all CorrectBot reports and according to them, may need to rewrite + the report even if changes and english_changes are empty + """ + # We don't need a report for English as it is the source language + if language_info.language_code == "en": + return + + # Don't write reports for language variants (except Brazilian Portuguese) + # TODO: this should go somewhere else + if ( + "-" in language_info.language_code + and language_info.language_code != "pt-br" + ): + return + self.save_language_report(language_info, english_info) + + def create_correctbot_mediawiki(self, worksheet: str, language_code: str) -> str: + """Check Correctbot report status for one worksheet + + Returns: + mediawiki string to fill one cell (for one worksheet in the CorrectBot + column) + """ + page = f"{worksheet}/{language_code}" + worksheet_page = pywikibot.Page(self._site, page) + if not worksheet_page.exists(): + self.logger.warning(f"Couldn't access page {page}") + return f'| style="background-color:{Color.RED}" | ERROR\n' + + correctbot_page = pywikibot.Page(self._site, f"CorrectBot:{page}") + if not correctbot_page.exists(): + return f'| style="background-color:{Color.RED}" | Missing\n' + + # Analyze the result of the last CorrectBot run (from edit summary) + correctbot_summary = correctbot_page.latest_revision["comment"] + match = re.match( + r"^(\d+) corrections, (\d+) suggestions, (\d+) warnings$", + correctbot_summary, + ) + if not match: + # Somehow the edit summary is not as we expect it + self.logger.warning( + f"Couldn't parse edit summary '{correctbot_summary}' in page" + f" CorrectBot:{page}" + ) + return ( + f'| style="background-color:{Color.RED}" | Invalid. Please run ' + f"CorrectBot again.\n" + ) + + if int(match.group(3)) > 0: + # CorrectBot gave warnings - something is definitely not okay + return ( + f'| style="background-color:{Color.RED}" | ' + f"[[CorrectBot:{page}|{match.group(3)} warnings]]\n" + ) + + report_link = ( + f"[[CorrectBot:{page}|{match.group(1)} corrections," + f" {match.group(2)} suggestions]]" + ) + if correctbot_page.editTime() > worksheet_page.editTime(): + # Perfect: CorrectBot report is newer than the latest change on the + # worksheet page + return f'| style="background-color:{Color.GREEN}" | {report_link}\n' + + # we don't know if still everything is okay or whether there were problems + # introduced since the + # last time CorrectBot ran, so we suggest to re-run CorrectBot + return ( + f'| style="background-color:{Color.ORANGE}" | ' + f'' + f" {report_link}\n" + ) + + def save_language_report( + self, language_info: LanguageInfo, english_info: LanguageInfo + ): + """ + Create language report and save it if it's different from the previous report + + Example: https://www.4training.net/4training:German + Args: + language_info: The language we want to write the report for + english_info: We need the details of the English original worksheets as well + """ + if language_info.english_name == "": + self.logger.warning( + f"English name of language {language_info.language_code} empty! " + f"Skipping WriteReport" + ) + return + page_url = f"4training:{language_info.english_name}" + page = pywikibot.Page(self._site, page_url) + report = self.create_mediawiki(language_info, english_info) + if not page.exists(): + self.logger.warning( + f"Language report page {page_url} doesn't exist, creating..." + ) + page.text = report + page.save("Created language report") + else: + if page.text.strip() != report.strip(): + page.text = report + page.save( + "Updated language report" + ) # TODO write human-readable changes here in the save message + self.logger.info( + f"Updated language report for {language_info.english_name}" + ) + + def create_mediawiki( + self, language_info: LanguageInfo, english_info: LanguageInfo + ) -> str: + """Build mediawiki code for the complete report page""" + content: str = "__NOEDITSECTION__" + content += self.create_worksheet_overview(language_info, english_info) + content += ( + "Check also the mediawiki [https://www.4training.net/Special:LanguageStats" + ) + content += ( + f"?language={language_info.language_code}&x=D Language Statistics for" + f" {language_info.english_name}]" + ) + return content + + def create_worksheet_overview( + self, language_info: LanguageInfo, english_info: LanguageInfo + ) -> str: + """Create mediawiki code to display the whole worksheet overview table + + Args: + language_info: all information on the language we're writing this report for + english_info: LanguageInfo for English - needed because we need English + WorksheetInfos + Returns: + string with mediawiki code for a whole paragraph with the complete table + """ + content: str = "== Worksheets ==\n" + content += '{| class="wikitable" style="width:100%"\n|-\n' + content += ( + f"! [[{language_info.english_name}#Available_training_resources_in_" + f"{language_info.english_name}|" + ) + content += ( + 'Listed?]]\n! Worksheet\n! Translation\n! Progress\n! colspan="2" | ' + "PDF\n! ODT\n! Version\n" + ) + content += "! CorrectBot\n" + for page, en_worksheet in english_info.worksheets.items(): + lang_worksheet = ( + language_info.worksheets[page] + if page in language_info.worksheets + else None + ) + content += self.create_worksheet_line( + language_info.language_code, en_worksheet, lang_worksheet + ) + content += "|}\n" + return content + + def _note(self, en_worksheet: WorksheetInfo) -> str: + """Helper function to add a quick note for certain worksheets + + Currently we only use this for the Bible Reading Hints (by including a template) + Returns: + string with mediawiki code or an empty string (for all other worksheets) + """ + if en_worksheet.title == "Bible Reading Hints": + return " {{4training:ReportNote-BibleReadingHints}}" + return "" + + def create_worksheet_line( + self, + language_code: str, + en_worksheet: WorksheetInfo, + lang_worksheet: Optional[WorksheetInfo], + ) -> str: + """Create mediawiki code with the report for one worksheet + (one line of the overview) + + Args: + language_code: Which language we're writing this report line for + (we can't use worksheet_info.language_code because + worksheet_info may be None) + en_worksheet: WorksheetInfo for the English original + lang_worksheet: WorksheetInfo for the translation if it exists, + otherwise None + Returns: + string with mediawiki code for one line of our table + """ + # column 1: Is this worksheet listed in the language overview page? + if lang_worksheet is not None and lang_worksheet.show_in_list(en_worksheet): + content = '| style="text-align:center" | ✓\n' + else: + content = '| style="text-align:center" | -\n' + + # column 2: Link to English worksheet + content += f"| [[{en_worksheet.title}]]\n" + + # column 3: Link to translated worksheet (if existing) + if lang_worksheet is not None: + content += ( + f"| [[{en_worksheet.title}/{language_code}|{lang_worksheet.title}" + f"]]{self._note(en_worksheet)}\n" + ) + else: + content += "| -\n" + + # column 8: Version information (we need to process this here because + # version_color is needed for other columns) + version_color = Color.RED + if lang_worksheet is None: + version_content = f'| style="background-color:{Color.RED}" | -\n' + elif lang_worksheet.has_same_version(en_worksheet): + version_color = Color.GREEN + version_content = ( + f'| style="background-color:{Color.GREEN}" | {en_worksheet.version}\n' + ) + else: + version_content = f'| style="background-color:{Color.RED}" ' + version_content += ( + f"| {lang_worksheet.version} (Original: {en_worksheet.version})\n" + ) + + # column 4: Translation progress + translated_unit_count: int = ( + lang_worksheet.progress.translated if lang_worksheet is not None else 0 + ) + progress: int = round(translated_unit_count / en_worksheet.progress.total * 100) + if lang_worksheet is None: + progress_color = Color.RED + elif progress == 100 and version_color == Color.GREEN: + progress_color = Color.GREEN + elif lang_worksheet.show_in_list(en_worksheet) and progress < 100: + # This produces a warning in the line for this language in WriteSummary, + # so make it red + progress_color = Color.RED + else: + progress_color = Color.ORANGE + + # in case the worksheet doesn't exist, the whole line will be red + color_css = ( + f";background-color:{progress_color}" if lang_worksheet is not None else "" + ) + content += f'| style="text-align:right{color_css}" ' + # Add a link to translation view showing either untranslated units + # (progress < 100%) or translated units + content += ( + f"| [{self.fortraininglib.index_url}?title=Special:Translate&group=page-" + f"{en_worksheet.page}" + ) + content += f"&action=page&filter={'' if progress == 100 else '!'}translated" + content += f"&language={language_code} {progress}%]\n" + + # column 5: Link to translated PDF file (if existing) + if ( + lang_worksheet is not None + and (file_info := lang_worksheet.get_file_type_info("pdf")) is not None + ): + pdf_color = Color.GREEN if version_color == Color.GREEN else Color.ORANGE + if file_info.metadata is not None and not file_info.metadata.correct: + pdf_color = Color.ORANGE + if file_info.metadata.version != lang_worksheet.version: + # TODO: Is this the right place to log this warning? + self.logger.warning( + f"{lang_worksheet.page}/{lang_worksheet.language_code} " + f"has version " + f"{lang_worksheet.version} but PDF has version " + f"{file_info.metadata.version}!" + ) + pdf_color = Color.RED + content += f'| style="background-color:{pdf_color}" ' + content += f"| [[File:{lang_worksheet.get_file_type_name('pdf')}]]\n" + + # column 6: PDF metadata details + if file_info.metadata is not None: + content += ( + f'| style="background-color:{pdf_color}" | ' + f"{file_info.metadata.to_html()}\n" + ) + else: + content += f'| style="background-color:{Color.GREY} | ?\n' + else: + pdf_color = Color.RED + content += ( + f'| colspan="2" style="background-color:{Color.RED}; ' + f'text-align:center" | -\n' + ) + + # column 7: Link to translated ODT/ODG file (if existing) + if lang_worksheet is not None and ( + lang_worksheet.has_file_type("odt") or lang_worksheet.has_file_type("odg") + ): + od_color = Color.GREEN if version_color == Color.GREEN else Color.ORANGE + content += f'| style="background-color:{od_color}" ' + od_file = lang_worksheet.get_file_type_name("odt") + if od_file == "": + od_file = lang_worksheet.get_file_type_name("odg") + content += f"| [[File:{od_file}]]\n" + else: + od_color = Color.RED + content += ( + f'| style="background-color:{Color.RED}; text-align:center" | -\n' + ) + + # Now we append content for column 7: version information + content += version_content + + # column 9: CorrectBot status (do we have an up-to-date report?) + if lang_worksheet is not None: + content += self.create_correctbot_mediawiki( + lang_worksheet.page, lang_worksheet.language_code + ) + else: + content += "| -\n" + + # Determine the line color (for the first two cells) + line_color = Color.RED + if ( + version_color == Color.GREEN + or progress_color != Color.RED + or od_color != Color.RED + or pdf_color != Color.RED + ): + line_color = Color.ORANGE + if ( + version_color == Color.GREEN + and progress_color == Color.GREEN + and od_color == Color.GREEN + and pdf_color == Color.GREEN + ): + line_color = Color.GREEN + content = f'|- style="background-color:{line_color}"\n' + content + return content diff --git a/pywikitools/resourcesbot/write_sidebar_messages.py b/pywikitools/resourcesbot/modules/write_sidebar_messages.py similarity index 52% rename from pywikitools/resourcesbot/write_sidebar_messages.py rename to pywikitools/resourcesbot/modules/write_sidebar_messages.py index 51fd8a0..4451cd5 100644 --- a/pywikitools/resourcesbot/write_sidebar_messages.py +++ b/pywikitools/resourcesbot/modules/write_sidebar_messages.py @@ -1,39 +1,59 @@ import logging +from configparser import ConfigParser from typing import Final import pywikibot + +from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.changes import ChangeLog, ChangeType -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor from pywikitools.resourcesbot.data_structures import LanguageInfo, WorksheetInfo -from pywikitools.fortraininglib import ForTrainingLib +from pywikitools.resourcesbot.modules.post_processing import LanguagePostProcessor class WriteSidebarMessages(LanguagePostProcessor): """ - Write/update the system messages for the sidebar with the translated titles of worksheets. - These are used when displaying the whole website in another language (changing the "user interface language") + Write/update the system messages for the sidebar with the translated + titles of worksheets. + These are used when displaying the whole website in another language + (changing the "user interface language") - E.g. write German headline of "Hearing from God" + E.g., write German headline of "Hearing from God" to https://www.4training.net/MediaWiki:Sidebar-hearingfromgod/de - More information on system messages: https://www.mediawiki.org/wiki/Help:System_message + More information on system messages: + https://www.mediawiki.org/wiki/Help:System_message This class can be re-used to call run() several times """ - def __init__(self, fortraininglib: ForTrainingLib, site: pywikibot.site.APISite, *, - force_rewrite: bool = False): - """ - Args: - force_rewrite rewrite even if there were no (relevant) changes - """ - self.fortraininglib: Final[ForTrainingLib] = fortraininglib - self._site: Final[pywikibot.site.APISite] = site - self._force_rewrite: Final[bool] = force_rewrite - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.write_sidebar_messages') + @classmethod + def help_summary(cls) -> str: + return "Write the system messages for sidebar with translated titles" + + @classmethod + def abbreviation(cls) -> str: + return "sidebar" + + @classmethod + def can_be_rewritten(cls) -> bool: + return True + + def __init__( + self, + fortraininglib: ForTrainingLib, + config: ConfigParser, + site: pywikibot.site.APISite + ): + super().__init__(fortraininglib, config, site) + self.logger: Final[logging.Logger] = logging.getLogger( + "pywikitools.resourcesbot.modules.write_sidebar_messages" + ) def save_worksheet_title(self, worksheet: WorksheetInfo): """Save system message with the title of the given worksheet.""" - title = f"MediaWiki:{self.fortraininglib.title_to_message(worksheet.page).capitalize()}" + title = ( + f"MediaWiki:" + f"{self.fortraininglib.title_to_message(worksheet.page).capitalize()}" + ) if worksheet.language_code != "en": title += f"/{worksheet.language_code}" self.logger.debug(f"save_worksheet_title(): title = {title}") @@ -51,19 +71,30 @@ def save_worksheet_title(self, worksheet: WorksheetInfo): def has_relevant_change(worksheet: str, changes: ChangeLog) -> bool: """ Is there a relevant change for our worksheet? - Relevant is a change indicating that the translated title might have changed (new / updated worksheet) + Relevant is a change indicating that the translated title might have changed + (new / updated worksheet) """ for change_item in changes: if change_item.worksheet == worksheet: - if change_item.change_type == ChangeType.NEW_WORKSHEET or \ - change_item.change_type == ChangeType.UPDATED_WORKSHEET: + if ( + change_item.change_type == ChangeType.NEW_WORKSHEET + or change_item.change_type == ChangeType.UPDATED_WORKSHEET + ): return True return False - def run(self, language_info: LanguageInfo, _english_info, changes: ChangeLog, _english_changes) -> None: + def run( + self, + language_info: LanguageInfo, + _english_info, + changes: ChangeLog, + _english_changes, + *, + force_rewrite: bool = False + ) -> None: """Our entry function""" for worksheet in language_info.worksheets.values(): if worksheet.title == "": continue - if self._force_rewrite or self.has_relevant_change(worksheet.page, changes): + if force_rewrite or self.has_relevant_change(worksheet.page, changes): self.save_worksheet_title(worksheet) diff --git a/pywikitools/resourcesbot/write_summary.py b/pywikitools/resourcesbot/modules/write_summary.py similarity index 94% rename from pywikitools/resourcesbot/write_summary.py rename to pywikitools/resourcesbot/modules/write_summary.py index fce0a50..4be0779 100644 --- a/pywikitools/resourcesbot/write_summary.py +++ b/pywikitools/resourcesbot/modules/write_summary.py @@ -4,7 +4,7 @@ import pywikibot from pywikitools.resourcesbot.changes import ChangeLog from pywikitools.resourcesbot.data_structures import LanguageInfo -from pywikitools.resourcesbot.post_processing import GlobalPostProcessor +from pywikitools.resourcesbot.modules.post_processing import GlobalPostProcessor class WriteSummary(GlobalPostProcessor): @@ -18,24 +18,29 @@ class WriteSummary(GlobalPostProcessor): This is a summary of all the language reports written by WriteReport. It will be written to https://www.4training.net/4training:Summary - see also there for more explanations """ - def __init__(self, site: pywikibot.site.APISite, *, force_rewrite: bool = False): + def __init__(self, site: pywikibot.site.APISite): """ Args: site: our pywikibot object to be able to write to the mediawiki system - force_rewrite: rewrite report even if there were no (relevant) changes """ self._site: Final[pywikibot.site.APISite] = site - self._force_rewrite: Final[bool] = force_rewrite - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.write_summary') + self.logger: Final[logging.Logger] = logging.getLogger( + 'pywikitools.resourcesbot.modules.write_summary' + ) self.total_stats: Counter = Counter() # Summing up statistics for all languages - def run(self, language_data: Dict[str, LanguageInfo], changes: Dict[str, ChangeLog]): - """Entry function""" + def run(self, language_data: Dict[str, LanguageInfo], changes: Dict[str, ChangeLog], + *, force_rewrite: bool = False) -> None: + """Entry function + + Args: + force_rewrite: rewrite report even if there were no (relevant) changes + """ has_changes = False for change_log in changes.values(): if not change_log.is_empty(): has_changes = True - if self._force_rewrite or has_changes: + if force_rewrite or has_changes: self.save_summary(language_data) def save_summary(self, language_data: Dict[str, LanguageInfo]): diff --git a/pywikitools/resourcesbot/post_processing.py b/pywikitools/resourcesbot/post_processing.py deleted file mode 100644 index 7fb3f36..0000000 --- a/pywikitools/resourcesbot/post_processing.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -Base classes for all functionality doing useful stuff with the data gathered previously. - -If the functionality looks only at one language at a time, implement LanguagePostProcessor. -If the functionality needs to look at everything, implement GlobalPostProcessor. -The resourcesbot will first call any LanguagePostProcessors for each language and -afterwards call any GlobalPostProcessor -""" -from abc import ABC, abstractmethod -from typing import Dict -from pywikitools.resourcesbot.changes import ChangeLog -from pywikitools.resourcesbot.data_structures import LanguageInfo - - -class LanguagePostProcessor(ABC): - """Base class for all functionality doing useful stuff with the data on one language. - - We include information on English as well because several post-processors need it as reference - """ - - @abstractmethod - def run(self, language_info: LanguageInfo, english_info: LanguageInfo, - changes: ChangeLog, english_changes: ChangeLog): - """Entry point""" - pass - - -class GlobalPostProcessor(ABC): - """Base class for all functionality doing useful stuff with the data on all languages""" - - @abstractmethod - def run(self, language_data: Dict[str, LanguageInfo], changes: Dict[str, ChangeLog]): - """Entry point""" - pass diff --git a/pywikitools/resourcesbot/write_lists.py b/pywikitools/resourcesbot/write_lists.py deleted file mode 100644 index 645f1a1..0000000 --- a/pywikitools/resourcesbot/write_lists.py +++ /dev/null @@ -1,167 +0,0 @@ -import re -import logging -from typing import Final, Optional, Tuple - -import pywikibot -from pywikitools.resourcesbot.changes import ChangeLog, ChangeType -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor -from pywikitools.resourcesbot.data_structures import FileInfo, LanguageInfo -from pywikitools.fortraininglib import ForTrainingLib - - -class WriteList(LanguagePostProcessor): - """ - Write/update the list of available training resources for languages. - - We only show worksheets that have a PDF file (to ensure good quality) - - This class can be re-used to call run() several times - """ - def __init__(self, fortraininglib: ForTrainingLib, site: pywikibot.site.APISite, - user_name: str, password: str, *, force_rewrite: bool = False): - """ - Arguments user_name and password are necessary to mark page for translation in case of changes. - In case they're empty we won't try to mark pages for translation - Args: - force_rewrite rewrite even if there were no (relevant) changes - """ - self.fortraininglib: Final[ForTrainingLib] = fortraininglib - self._site: Final[pywikibot.site.APISite] = site - self._user_name: Final[str] = user_name - self._password: Final[str] = password - self._force_rewrite: Final[bool] = force_rewrite - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.write_lists') - if user_name == "" or password == "": - self.logger.warning("Missing user name and/or password in config. Won't mark pages for translation.") - - def needs_rewrite(self, language_info: LanguageInfo, changes: ChangeLog) -> bool: - """Determine whether the list of available training resources needs to be rewritten.""" - lang = language_info.language_code - needs_rewrite = self._force_rewrite - for change_item in changes: - if change_item.change_type in [ChangeType.UPDATED_PDF, ChangeType.NEW_PDF, ChangeType.DELETED_PDF, - ChangeType.NEW_WORKSHEET, ChangeType.DELETED_WORKSHEET]: - needs_rewrite = True - if (change_item.change_type in [ChangeType.NEW_ODT, ChangeType.DELETED_ODT]) \ - and language_info.worksheet_has_type(change_item.worksheet, "pdf"): - needs_rewrite = True - - if needs_rewrite: - self.logger.info(f"List of available training resources in language {lang} needs to be re-written.") - else: - self.logger.info(f"List of available training resources in language {lang} doesn't need to be re-written.") - - return needs_rewrite - - def _create_file_mediawiki(self, file_info: Optional[FileInfo]) -> str: - """ - Return string with mediawiki code to display a downloadable file - - Example: [[File:pdficon_small.png|link={{filepath:Gebet.pdf}}]] - @return empty string if file_info is None - """ - if file_info is None: - return "" - file_name: str = file_info.url - pos: int = file_name.rfind('/') - if pos > -1: - file_name = file_name[pos+1:] - else: - self.logger.warning(f"Couldn't find / in {file_name}") - return f" [[File:{file_info.file_type.lower()}icon_small.png|" + r"link={{filepath:" + file_name + r"}}]]" - - def create_mediawiki(self, language_info: LanguageInfo, english_info: LanguageInfo) -> str: - """ - Create the mediawiki string for the list of available training resources - - Output should look like the following line: - * [[God's_Story_(five_fingers)/de|{{int:sidebar-godsstory-fivefingers}}]] \ - [[File:pdficon_small.png|link={{filepath:Gottes_Geschichte_(fünf_Finger).pdf}}]] \ - [[File:printpdficon_small.png|link={{filepath:Gottes_Geschichte_(fünf_Finger).pdf}}]] \ - [[File:odticon_small.png|link={{filepath:Gottes_Geschichte_(fünf_Finger).odt}}]] - """ - content: str = '' - for worksheet, worksheet_info in language_info.worksheets.items(): - if worksheet_info.show_in_list(english_info.worksheets[worksheet]): - content += f"* [[{worksheet}/{language_info.language_code}|" - content += "{{int:" + self.fortraininglib.title_to_message(worksheet) + "}}]]" - content += self._create_file_mediawiki(worksheet_info.get_file_type_info("pdf")) - content += self._create_file_mediawiki(worksheet_info.get_file_type_info("printPdf")) - content += self._create_file_mediawiki(worksheet_info.get_file_type_info("odt")) - content += "\n" - if worksheet_info.progress.translated < worksheet_info.progress.total: - self.logger.warning(f"Worksheet {worksheet}/{language_info.language_code} is not fully translated!") - - self.logger.debug(content) - return content - - def _find_resources_list(self, page_content: str, language: str) -> Tuple[int, int]: - """Find the exact positions of the existing list of available training resources in the page - @param page_content: mediawiki "source" of the language info page that we're searching through - @param language: The language name (as in LanguageInfo.english_name) - @return Tuple of start and end position. (0, 0) indicates we couldn't find it - """ - language_re = language.replace('(', r'\(') # if language name contains brackets, we need to escape them - language_re = language_re.replace(')', r'\)') # Example would be language Turkish (secular)- - match = re.search(f"Available training resources in {language_re}\\s*?\\s*?==", page_content) - if not match: - return 0, 0 - list_start = 0 - list_end = 0 - # Find all following list entries: must start with * - pattern = re.compile(r'^\*.*$', re.MULTILINE) - for m in pattern.finditer(page_content, match.end()): - if list_start == 0: - list_start = m.start() - else: - # Make sure there is no other line in between: We only want to find lines directly following each other - if m.start() > (list_end + 1): - self.logger.info(f"Looks like there is another list later in page {language}. Ignoring it.") - break - list_end = m.end() - self.logger.debug(f"Matching line: start={m.start()}, end={m.end()}, {m.group(0)}") - return list_start, list_end - - def run(self, language_info: LanguageInfo, english_info: LanguageInfo, - changes: ChangeLog, _english_changes) -> None: - if not self.needs_rewrite(language_info, changes): - return - - # Saving this to the language information page, e.g. https://www.4training.net/German - language = language_info.english_name - if language == "": - self.logger.warning(f"English language name of {language_info.language_code} missing! Skipping WriteList") - return - self.logger.debug(f"Writing list of available resources in {language}...") - page = pywikibot.Page(self._site, language) - if not page.exists(): - self.logger.warning(f"Language information page {language} doesn't exist!") - return - if page.isRedirectPage(): - self.logger.info(f"Language information page {language} is a redirect. Following the redirect...") - page = page.getRedirectTarget() - if not page.exists(): - self.logger.warning(f"Redirect target for language {language} doesn't exist!") - return - language = page.title() - - list_start, list_end = self._find_resources_list(page.text, language) - if (list_start == 0) or (list_end == 0): - self.logger.warning(f"Couldn't find list of available training resources in {language}! Doing nothing.") - self.logger.info(page.text) - return - self.logger.debug(f"Found existing list of available training resources @{list_start}-{list_end}. Replacing...") - new_page_content = page.text[0:list_start] + self.create_mediawiki(language_info, english_info) - new_page_content += page.text[list_end+1:] - self.logger.debug(new_page_content) - - # Save page and mark it for translation if necessary - if page.text.strip() == new_page_content.strip(): - return - page.text = new_page_content - page.save("Updated list of available training resources") # TODO write list of changes here in the save message - if self._user_name != '' and self._password != '': - self.fortraininglib.mark_for_translation(page.title(), self._user_name, self._password) - self.logger.info(f"Updated language information page {language} and marked it for translation.") - else: - self.logger.info(f"Updated language information page {language}. Couldn't mark it for translation.") diff --git a/pywikitools/resourcesbot/write_report.py b/pywikitools/resourcesbot/write_report.py deleted file mode 100644 index 35f81a8..0000000 --- a/pywikitools/resourcesbot/write_report.py +++ /dev/null @@ -1,269 +0,0 @@ -from enum import Enum -import logging -import re -from typing import Final, Optional -import pywikibot -from pywikitools.fortraininglib import ForTrainingLib -from pywikitools.resourcesbot.changes import ChangeLog -from pywikitools.resourcesbot.data_structures import LanguageInfo, WorksheetInfo -from pywikitools.resourcesbot.post_processing import LanguagePostProcessor - - -class Color(Enum): - GREEN = "green" - ORANGE = "orange" - RED = "red" - GREY = "grey" - - def __str__(self) -> str: - return self.value - - -class WriteReport(LanguagePostProcessor): - """ - Write/update status reports for all languages (for translators and translation coordinators). - - Every language report has a table with the translation status of all worksheets: - Which worksheet is translated? Is the translation 100% complete? Is it the same version as the English original? - Do we have ODT and PDF files for download? - To help interpreting the results, we use colors (green / orange / red) for each cell. - """ - def __init__(self, fortraininglib: ForTrainingLib, site: pywikibot.site.APISite, *, force_rewrite: bool = False): - """ - Args: - site: our pywikibot object to be able to write to the mediawiki system - force_rewrite: is ignored as we need to check CorrectBot reports anyway - """ - self.fortraininglib: Final[ForTrainingLib] = fortraininglib - self._site: Final[pywikibot.site.APISite] = site - self.logger: Final[logging.Logger] = logging.getLogger('pywikitools.resourcesbot.write_report') - - def run(self, language_info: LanguageInfo, english_info: LanguageInfo, - changes: ChangeLog, english_changes: ChangeLog): - """Entry function - - We run everything and don't look whether we have changes because we need to look at all CorrectBot reports - and according to them may need to rewrite the report even if changes and english_changes are empty - """ - if language_info.language_code == "en": # We don't need a report for English as it is the source language - return - if "-" in language_info.language_code and language_info.language_code != "pt-br": - # Don't write reports for language variants - # (except Brazilian Portuguese) TODO this should go somewhere else - return - self.save_language_report(language_info, english_info) - - def create_correctbot_mediawiki(self, worksheet: str, language_code: str) -> str: - """Check Correctbot report status for one worksheet - - Returns: - mediawiki string to fill one cell (for one worksheet in the CorrectBot column) - """ - page = f"{worksheet}/{language_code}" - worksheet_page = pywikibot.Page(self._site, page) - if not worksheet_page.exists(): - self.logger.warning(f"Couldn't access page {page}") - return f'| style="background-color:{Color.RED}" | ERROR\n' - - correctbot_page = pywikibot.Page(self._site, f"CorrectBot:{page}") - if not correctbot_page.exists(): - return f'| style="background-color:{Color.RED}" | Missing\n' - - # Analyze the result of the last CorrectBot run (from edit summary) - correctbot_summary = correctbot_page.latest_revision["comment"] - match = re.match(r"^(\d+) corrections, (\d+) suggestions, (\d+) warnings$", correctbot_summary) - if not match: - # Somehow the edit summary is not as we expect it - self.logger.warning(f"Couldn't parse edit summary '{correctbot_summary}' in page CorrectBot:{page}") - return f'| style="background-color:{Color.RED}" | Invalid. Please run CorrectBot again.\n' - - if int(match.group(3)) > 0: - # CorrectBot gave warnings - something is definitely not okay - return f'| style="background-color:{Color.RED}" | [[CorrectBot:{page}|{match.group(3)} warnings]]\n' - - report_link = f'[[CorrectBot:{page}|{match.group(1)} corrections, {match.group(2)} suggestions]]' - if correctbot_page.editTime() > worksheet_page.editTime(): - # Perfect: CorrectBot report is newer than latest change on the worksheet page - return f'| style="background-color:{Color.GREEN}" | {report_link}\n' - - # we don't know if still everything is okay or whether there were problems introduced since the - # last time CorrectBot ran, so we suggest to re-run CorrectBot - return f'| style="background-color:{Color.ORANGE}" | ' \ - f' {report_link}\n' - - def save_language_report(self, language_info: LanguageInfo, english_info: LanguageInfo): - """ - Create language report and save it if it's different from the previous report - - Example: https://www.4training.net/4training:German - Args: - language_info: The language we want to write the report for - english_info: We need the details of the English original worksheets as well - """ - if language_info.english_name == "": - self.logger.warning(f"English name of language {language_info.language_code} empty! Skipping WriteReport") - return - page_url = f"4training:{language_info.english_name}" - page = pywikibot.Page(self._site, page_url) - report = self.create_mediawiki(language_info, english_info) - if not page.exists(): - self.logger.warning(f"Language report page {page_url} doesn't exist, creating...") - page.text = report - page.save("Created language report") - else: - if page.text.strip() != report.strip(): - page.text = report - page.save("Updated language report") # TODO write human-readable changes here in the save message - self.logger.info(f"Updated language report for {language_info.english_name}") - - def create_mediawiki(self, language_info: LanguageInfo, english_info: LanguageInfo) -> str: - """Build mediawiki code for the complete report page""" - content: str = "__NOEDITSECTION__" - content += self.create_worksheet_overview(language_info, english_info) - content += "Check also the mediawiki [https://www.4training.net/Special:LanguageStats" - content += f"?language={language_info.language_code}&x=D Language Statistics for {language_info.english_name}]" - return content - - def create_worksheet_overview(self, language_info: LanguageInfo, english_info: LanguageInfo) -> str: - """Create mediawiki code to display the whole worksheet overview table - - Args: - language_info: all information on the language we're writing this report for - english_info: LanguageInfo for English - needed because we need English WorksheetInfos - Returns: - string with mediawiki code for a whole paragraph with the complete table - """ - content: str = "== Worksheets ==\n" - content += '{| class="wikitable" style="width:100%"\n|-\n' - content += f"! [[{language_info.english_name}#Available_training_resources_in_{language_info.english_name}|" - content += "Listed?]]\n! Worksheet\n! Translation\n! Progress\n! colspan=\"2\" | PDF\n! ODT\n! Version\n" - content += "! CorrectBot\n" - for page, en_worksheet in english_info.worksheets.items(): - lang_worksheet = language_info.worksheets[page] if page in language_info.worksheets else None - content += self.create_worksheet_line(language_info.language_code, en_worksheet, lang_worksheet) - content += "|}\n" - return content - - def _note(self, en_worksheet: WorksheetInfo) -> str: - """Helper function to add a quick note for certain worksheets - - Currently we only use this for the Bible Reading Hints (by including a template) - Returns: - string with mediawiki code or an empty string (for all other worksheets) - """ - if en_worksheet.title == "Bible Reading Hints": - return " {{4training:ReportNote-BibleReadingHints}}" - return "" - - def create_worksheet_line(self, language_code: str, - en_worksheet: WorksheetInfo, lang_worksheet: Optional[WorksheetInfo]) -> str: - """Create mediawiki code with report for one worksheet (one line of the overview) - - Args: - language_code: Which language we're writing this report line for - (we can't use worksheet_info.language_code because worksheet_info may be None) - en_worksheet: WorksheetInfo for the English original - lang_worksheet: WorksheetInfo for the translation if it exists, otherwise None - Returns: - string with mediawiki code for one line of our table - """ - # column 1: Is this worksheet listed in the language overview page? - if lang_worksheet is not None and lang_worksheet.show_in_list(en_worksheet): - content = "| style=\"text-align:center\" | ✓\n" - else: - content = "| style=\"text-align:center\" | -\n" - - # column 2: Link to English worksheet - content += f"| [[{en_worksheet.title}]]\n" - - # column 3: Link to translated worksheet (if existing) - if lang_worksheet is not None: - content += f"| [[{en_worksheet.title}/{language_code}|{lang_worksheet.title}]]{self._note(en_worksheet)}\n" - else: - content += "| -\n" - - # column 8: Version information (we need to process this here because version_color is needed for other columns) - version_color = Color.RED - if lang_worksheet is None: - version_content = f'| style="background-color:{Color.RED}" | -\n' - elif lang_worksheet.has_same_version(en_worksheet): - version_color = Color.GREEN - version_content = f'| style="background-color:{Color.GREEN}" | {en_worksheet.version}\n' - else: - version_content = f'| style="background-color:{Color.RED}" ' - version_content += f"| {lang_worksheet.version} (Original: {en_worksheet.version})\n" - - # column 4: Translation progress - translated_unit_count: int = lang_worksheet.progress.translated if lang_worksheet is not None else 0 - progress: int = round(translated_unit_count / en_worksheet.progress.total * 100) - if lang_worksheet is None: - progress_color = Color.RED - elif progress == 100 and version_color == Color.GREEN: - progress_color = Color.GREEN - elif lang_worksheet.show_in_list(en_worksheet) and progress < 100: - # This produces a warning in the line for this language in WriteSummary, so make it red - progress_color = Color.RED - else: - progress_color = Color.ORANGE - - # in case the worksheet doesn't exist, the whole line will be red - color_css = f";background-color:{progress_color}" if lang_worksheet is not None else "" - content += f'| style="text-align:right{color_css}" ' - # Add link to translation view, showing either untranslated units (progress < 100%) or translated units - content += f"| [{self.fortraininglib.index_url}?title=Special:Translate&group=page-{en_worksheet.page}" - content += f"&action=page&filter={'' if progress == 100 else '!'}translated" - content += f"&language={language_code} {progress}%]\n" - - # column 5: Link to translated PDF file (if existing) - if lang_worksheet is not None and (file_info := lang_worksheet.get_file_type_info("pdf")) is not None: - pdf_color = Color.GREEN if version_color == Color.GREEN else Color.ORANGE - if file_info.metadata is not None and not file_info.metadata.correct: - pdf_color = Color.ORANGE - if file_info.metadata.version != lang_worksheet.version: - # TODO: Is this the right place to log this warning? - self.logger.warning(f"{lang_worksheet.page}/{lang_worksheet.language_code} has version " - f"{lang_worksheet.version} but PDF has version {file_info.metadata.version}!") - pdf_color = Color.RED - content += f'| style="background-color:{pdf_color}" ' - content += f"| [[File:{lang_worksheet.get_file_type_name('pdf')}]]\n" - - # column 6: PDF metadata details - if file_info.metadata is not None: - content += f'| style="background-color:{pdf_color}" | {file_info.metadata.to_html()}\n' - else: - content += f'| style="background-color:{Color.GREY} | ?\n' - else: - pdf_color = Color.RED - content += f'| colspan="2" style="background-color:{Color.RED}; text-align:center" | -\n' - - # column 7: Link to translated ODT/ODG file (if existing) - if lang_worksheet is not None and (lang_worksheet.has_file_type("odt") or lang_worksheet.has_file_type("odg")): - od_color = Color.GREEN if version_color == Color.GREEN else Color.ORANGE - content += f'| style="background-color:{od_color}" ' - od_file = lang_worksheet.get_file_type_name('odt') - if od_file == "": - od_file = lang_worksheet.get_file_type_name('odg') - content += f"| [[File:{od_file}]]\n" - else: - od_color = Color.RED - content += f'| style="background-color:{Color.RED}; text-align:center" | -\n' - - # Now we append content for column 7: version information - content += version_content - - # column 9: CorrectBot status (do we have an up-to-date report?) - if lang_worksheet is not None: - content += self.create_correctbot_mediawiki(lang_worksheet.page, lang_worksheet.language_code) - else: - content += "| -\n" - - # Determine the line color (for the first two cells) - line_color = Color.RED - if version_color == Color.GREEN or progress_color != Color.RED or \ - od_color != Color.RED or pdf_color != Color.RED: - line_color = Color.ORANGE - if version_color == Color.GREEN and progress_color == Color.GREEN and \ - od_color == Color.GREEN and pdf_color == Color.GREEN: - line_color = Color.GREEN - content = f'|- style="background-color:{line_color}"\n' + content - return content diff --git a/pywikitools/test/test_consistency_checks.py b/pywikitools/test/test_consistency_checks.py index 22c6f02..a8285d7 100644 --- a/pywikitools/test/test_consistency_checks.py +++ b/pywikitools/test/test_consistency_checks.py @@ -2,7 +2,7 @@ from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.changes import ChangeLog -from pywikitools.resourcesbot.consistency_checks import ConsistencyCheck +from pywikitools.resourcesbot.modules.consistency_checks import ConsistencyCheck from pywikitools.resourcesbot.data_structures import LanguageInfo @@ -28,7 +28,10 @@ def test_everything_in_english(self): """All consistency checks should pass in English""" cc = ConsistencyCheck(self.fortraininglib) language_info = LanguageInfo("en", "English") - with self.assertLogs("pywikitools.resourcesbot.consistency_checks", level="INFO") as logs: + with self.assertLogs( + "pywikitools.resourcesbot.modules.consistency_checks", + level="INFO" + ) as logs: cc.run(language_info, LanguageInfo("en", "English"), ChangeLog(), ChangeLog()) self.assertIn("Consistency checks for English: 5/5 passed", logs.output[0]) diff --git a/pywikitools/test/test_resourcesbot.py b/pywikitools/test/test_resourcesbot.py index 6f4b87b..bef12d6 100644 --- a/pywikitools/test/test_resourcesbot.py +++ b/pywikitools/test/test_resourcesbot.py @@ -4,12 +4,13 @@ Currently we have only little test coverage... TODO: Find ways to run meaningful tests that don't take too long... """ + +import unittest from configparser import ConfigParser from datetime import datetime from os.path import abspath, dirname, join from typing import Dict -import unittest -from unittest.mock import patch, Mock +from unittest.mock import Mock, patch import pywikibot @@ -27,13 +28,22 @@ class TestResourcesBot(unittest.TestCase): """ - We mock pywikibot because otherwise we would need to provide a valid user-config.py (and because it saves time) + We mock pywikibot because otherwise we would need to provide a valid user-config.py + (and because it saves time) """ def setUp(self): self.config = ConfigParser() - self.config.read_dict({"resourcesbot": {"site": "test", "username": "TestBotName"}, - "Paths": {"logs": "~/", "temp": "~/temp/"}}) # Fill this to prevent warnings + self.config.read_dict( + { + "resourcesbot": {"site": "test", "username": "TestBotName", + "password": "test"}, + "Paths": {"logs": "~/", "temp": "~/temp/", + "htmlexport": "~/htmlexport/", + "pdfexport": "~/pdfexport/" + }, + } + ) # Fill this to prevent warnings self.bot = ResourcesBot(self.config) def tearDown(self): @@ -44,12 +54,18 @@ def tearDown(self): def test_add_english_file_infos(self, mock_filepage): mock_filepage.return_value.exists.return_value = True mock_filepage.return_value.latest_file_info.url = TEST_URL - mock_filepage.return_value.latest_file_info.timestamp = datetime.fromisoformat(TEST_TIME) + mock_filepage.return_value.latest_file_info.timestamp = datetime.fromisoformat( + TEST_TIME + ) mock_filepage.return_value.download.return_value = False progress = TranslationProgress(**TEST_PROGRESS) - worksheet_info = WorksheetInfo("Hearing_from_God", "en", "Hearing from God", progress, "1.2") - with self.assertLogs("pywikitools.resourcesbot", level="WARNING"): # warning for not checking PDF metadata + worksheet_info = WorksheetInfo( + "Hearing_from_God", "en", "Hearing from God", progress, "1.2" + ) + with self.assertLogs( + "pywikitools.resourcesbot", level="WARNING" + ): # warning for not checking PDF metadata self.bot._add_english_file_infos(HEARING_FROM_GOD, worksheet_info) self.assertTrue(worksheet_info.has_file_type("pdf")) self.assertTrue(worksheet_info.has_file_type("odt")) @@ -64,12 +80,18 @@ def test_add_english_file_infos(self, mock_filepage): def test_add_file_type(self, mock_filepage, mock_os): # Testing with reading metadata from a real PDF that is in our repo mock_filepage.return_value.exists.return_value = True - mock_os.path.join.return_value = join(dirname(abspath(__file__)), "data", "Gottes_Reden_wahrnehmen.pdf") + mock_os.path.join.return_value = join( + dirname(abspath(__file__)), "data", "Gottes_Reden_wahrnehmen.pdf" + ) mock_filepage.return_value.download.return_value = True - mock_filepage.return_value.latest_file_info.url = "https://www.4training.net/test/Gottes_Reden_wahrnehmen.pdf" + mock_filepage.return_value.latest_file_info.url = ( + "https://www.4training.net/test/Gottes_Reden_wahrnehmen.pdf" + ) mock_filepage.return_value.latest_file_info.timestamp = datetime(1970, 1, 1) progress = TranslationProgress(**TEST_PROGRESS) - worksheet_info = WorksheetInfo("Hearing_from_God", "de", "Gottes Reden wahrnehmen", progress, "1.2") + worksheet_info = WorksheetInfo( + "Hearing_from_God", "de", "Gottes Reden wahrnehmen", progress, "1.2" + ) self.bot._add_file_type(worksheet_info, "pdf", "Gottes_Reden_wahrnehmen.pdf") self.assertTrue(worksheet_info.has_file_type("pdf")) pdf_info = worksheet_info.get_file_type_info("pdf") @@ -80,7 +102,9 @@ def test_add_file_type(self, mock_filepage, mock_os): def test_add_file_type_not_existing(self, mock_filepage): mock_filepage.return_value.exists.return_value = False progress = TranslationProgress(**TEST_PROGRESS) - worksheet_info = WorksheetInfo("Hearing_from_God", "en", "Hearing from God", progress, "1.2") + worksheet_info = WorksheetInfo( + "Hearing_from_God", "en", "Hearing from God", progress, "1.2" + ) with self.assertLogs("pywikitools.resourcesbot", level="WARNING"): self.bot._add_file_type(worksheet_info, "pdf", "Hearing_from_God.pdf") self.assertFalse(worksheet_info.has_file_type("pdf")) @@ -89,14 +113,18 @@ def test_add_file_type_not_existing(self, mock_filepage): def test_add_file_type_exception(self, mock_filepage): mock_filepage.side_effect = pywikibot.exceptions.Error("Test error") progress = TranslationProgress(**TEST_PROGRESS) - worksheet_info = WorksheetInfo("Hearing_from_God", "en", "Hearing from God", progress, "1.2") + worksheet_info = WorksheetInfo( + "Hearing_from_God", "en", "Hearing from God", progress, "1.2" + ) with self.assertLogs("pywikitools.resourcesbot", level="WARNING"): self.bot._add_file_type(worksheet_info, "pdf", "Hearing_from_God.pdf") self.assertFalse(worksheet_info.has_file_type("pdf")) mock_filepage.side_effect = ValueError("Invalid extension") progress = TranslationProgress(**TEST_PROGRESS) - worksheet_info = WorksheetInfo("Hearing_from_God", "en", "Hearing from God", progress, "1.2") + worksheet_info = WorksheetInfo( + "Hearing_from_God", "en", "Hearing from God", progress, "1.2" + ) with self.assertLogs("pywikitools.resourcesbot", level="WARNING"): self.bot._add_file_type(worksheet_info, "pdf", "Hearing_from_God") self.assertFalse(worksheet_info.has_file_type("pdf")) @@ -106,7 +134,9 @@ def test_get_english_version(self): self.assertEqual(version, "1.2") self.assertEqual(version_unit, 55) with self.assertLogs("pywikitools.resourcesbot", level="WARNING"): - version, version_unit = self.bot.get_english_version("Some mediawiki content...") + version, version_unit = self.bot.get_english_version( + "Some mediawiki content..." + ) self.assertEqual(version, "") self.assertEqual(version_unit, 0) @@ -117,74 +147,98 @@ def json_test_loader(site, page: str): if page == "4training:languages.json": result.text = '["en", "ru"]' elif page == "4training:en.json": - with open(join(dirname(abspath(__file__)), "data", "en.json"), 'r') as f: + with open(join(dirname(abspath(__file__)), "data", "en.json"), "r") as f: result.text = f.read() elif page == "4training:ru.json": - with open(join(dirname(abspath(__file__)), "data", "ru.json"), 'r') as f: + with open(join(dirname(abspath(__file__)), "data", "ru.json"), "r") as f: result.text = f.read() return result @patch("pywikibot.Site", autospec=True) @patch("pywikibot.Page", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteSummary", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteReport", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteList", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteSidebarMessages", autospec=True) - @patch("pywikitools.resourcesbot.bot.ExportRepository", autospec=True) - @patch("pywikitools.resourcesbot.bot.ExportHTML", autospec=True) - @patch("pywikitools.resourcesbot.bot.ExportPDF", autospec=True) - @patch("pywikitools.resourcesbot.bot.ConsistencyCheck", autospec=True) - def test_run_with_cache(self, mock_consistency_check, mock_export_pdf, mock_export_html, mock_export_repository, - mock_write_sidebar_messages, mock_write_list, mock_write_report, mock_write_summary, - mock_pywikibot_page, mock_pywikibot_site): + @patch("pywikitools.resourcesbot.modules.write_summary.WriteSummary.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.write_report.WriteReport.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.write_lists.WriteList.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.write_sidebar_messages.WriteSidebarMessages.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.export_repository.ExportRepository.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.export_html.ExportHTML.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.export_pdf.ExportPDF.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.consistency_checks.ConsistencyCheck.run", autospec=True) + def test_run_with_cache( + self, + mock_consistency_check, + mock_export_pdf, + mock_export_html, + mock_export_repository, + mock_write_sidebar_messages, + mock_write_list, + mock_write_report, + mock_write_summary, + mock_pywikibot_page, + mock_pywikibot_site, + ): mock_pywikibot_page.side_effect = self.json_test_loader mock_pywikibot_site.return_value.logged_in.return_value = True - bot = ResourcesBot(self.config, read_from_cache=True) + bot = ResourcesBot(config=self.config, read_from_cache=True) bot.run() # run() function of each LanguagePostProcessor should get called 2x (for English and Russian) - self.assertEqual(mock_consistency_check.return_value.run.call_count, 2) - self.assertEqual(mock_export_pdf.return_value.run.call_count, 2) - self.assertEqual(mock_export_html.return_value.run.call_count, 2) - self.assertEqual(mock_export_repository.return_value.run.call_count, 2) - self.assertEqual(mock_write_sidebar_messages.return_value.run.call_count, 2) - self.assertEqual(mock_write_list.return_value.run.call_count, 2) - self.assertEqual(mock_write_report.return_value.run.call_count, 2) - mock_write_summary.return_value.run.assert_called_once() + self.assertEqual(mock_consistency_check.call_count, 2) + self.assertEqual(mock_export_pdf.call_count, 2) + self.assertEqual(mock_export_html.call_count, 2) + self.assertEqual(mock_export_repository.call_count, 2) + self.assertEqual(mock_write_sidebar_messages.call_count, 2) + self.assertEqual(mock_write_list.call_count, 2) + self.assertEqual(mock_write_report.call_count, 2) + mock_write_summary.assert_called_once() self.assertIn("en", bot._result) self.assertIn("ru", bot._result) self.assertEqual(len(bot._result), 2) - self.assertTrue(bot._changelog["en"].is_empty()) # ChangeLogs must be empty because we read data from cache + self.assertTrue( + bot._changelog["en"].is_empty() + ) # ChangeLogs must be empty because we read data from cache self.assertTrue(bot._changelog["ru"].is_empty()) self.assertEqual(len(bot._changelog), 2) @patch("pywikibot.Site", autospec=True) @patch("pywikibot.Page", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteSummary", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteReport", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteList", autospec=True) - @patch("pywikitools.resourcesbot.bot.WriteSidebarMessages", autospec=True) - @patch("pywikitools.resourcesbot.bot.ExportRepository", autospec=True) - @patch("pywikitools.resourcesbot.bot.ExportHTML", autospec=True) - @patch("pywikitools.resourcesbot.bot.ExportPDF", autospec=True) - @patch("pywikitools.resourcesbot.bot.ConsistencyCheck", autospec=True) - def test_rewrite_options(self, mock_consistency_check, mock_export_pdf, mock_export_html, mock_export_repository, - mock_write_sidebar_messages, mock_write_list, mock_write_report, mock_write_summary, - mock_pywikibot_page, mock_pywikibot_site): + @patch("pywikitools.resourcesbot.modules.write_summary.WriteSummary.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.write_report.WriteReport.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.write_lists.WriteList.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.write_sidebar_messages.WriteSidebarMessages.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.export_repository.ExportRepository.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.export_html.ExportHTML.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.export_pdf.ExportPDF.run", autospec=True) + @patch("pywikitools.resourcesbot.modules.consistency_checks.ConsistencyCheck.run", autospec=True) + def test_rewrite_options( + self, + mock_consistency_check, + mock_export_pdf, + mock_export_html, + mock_export_repository, + mock_write_sidebar_messages, + mock_write_list, + mock_write_report, + mock_write_summary, + mock_pywikibot_page, + mock_pywikibot_site, + ): mock_pywikibot_page.side_effect = self.json_test_loader mock_pywikibot_site.return_value.logged_in.return_value = True - # Expected results: rewrite option -> post-processor that should get initialized with force_rewrite=True + # Expected results: rewrite option -> post-processor that should get initialized + # with force_rewrite=True rewrite_check: Dict[str, Mock] = { "summary": mock_write_summary, "list": mock_write_list, "report": mock_write_report, "html": mock_export_html, - "sidebar": mock_write_sidebar_messages + "sidebar": mock_write_sidebar_messages, } for rewrite_option, mocked_component in rewrite_check.items(): - # Component selected with rewrite option should have force_rewrite=True, the others not - bot = ResourcesBot(self.config, read_from_cache=True, rewrite=rewrite_option) + # Component selected with the rewrite option should have force_rewrite=True, + # the others not + bot = ResourcesBot(config=self.config, read_from_cache=True, rewrite=rewrite_option) bot.run() self.assertTrue(mocked_component.call_args.kwargs.get("force_rewrite")) for other_mock in rewrite_check.values(): @@ -192,7 +246,7 @@ def test_rewrite_options(self, mock_consistency_check, mock_export_pdf, mock_exp self.assertFalse(other_mock.call_args.kwargs.get("force_rewrite")) # "all" components should get called with force_rewrite=True - bot = ResourcesBot(self.config, read_from_cache=True, rewrite="all") + bot = ResourcesBot(config=self.config, read_from_cache=True, rewrite='all') bot.run() for mocked_component in rewrite_check.values(): self.assertTrue(mocked_component.call_args.kwargs.get("force_rewrite")) @@ -203,10 +257,8 @@ def test_rewrite_options(self, mock_consistency_check, mock_export_pdf, mock_exp for mocked_component in rewrite_check.values(): self.assertFalse(mocked_component.call_args.kwargs.get("force_rewrite")) - # TODO: Check correctness of rewrite="json" as well - # TODO: test_run_with_limit_lang -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pywikitools/test/test_write_lists.py b/pywikitools/test/test_write_lists.py index 9c0df40..31e5ad8 100644 --- a/pywikitools/test/test_write_lists.py +++ b/pywikitools/test/test_write_lists.py @@ -1,35 +1,44 @@ import json -from os.path import abspath, dirname, join import unittest +from configparser import ConfigParser +from os.path import abspath, dirname, join from unittest.mock import patch + from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.changes import ChangeLog, ChangeType - from pywikitools.resourcesbot.data_structures import FileInfo, LanguageInfo, json_decode -from pywikitools.resourcesbot.write_lists import WriteList +from pywikitools.resourcesbot.modules.write_lists import WriteList from pywikitools.test.test_data_structures import TEST_URL class TestWriteList(unittest.TestCase): @classmethod def setUpClass(self): - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - self.write_list = WriteList(ForTrainingLib("https://test.4training.net"), None, "", "") - with open(join(dirname(abspath(__file__)), "data", "ru.json"), 'r') as f: + self.config = ConfigParser() + self.config["resourcesbot"] = {"username": "", "password": ""} + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="WARNING" + ): + self.write_list = WriteList( + ForTrainingLib("https://test.4training.net"), self.config, None + ) + with open(join(dirname(abspath(__file__)), "data", "ru.json"), "r") as f: self.language_info: LanguageInfo = json.load(f, object_hook=json_decode) - # Create a pseudo English LanguageInfo - enough for our testing purposes (version is always the same) + # Create a pseudo English LanguageInfo - enough for our testing purposes + # (version is always the same) self.english_info = LanguageInfo("en", "English") for worksheet, info in self.language_info.worksheets.items(): self.english_info.add_worksheet_info(worksheet, info) - with open(join(dirname(abspath(__file__)), "data", "Russian_resources_list.mediawiki"), 'r') as f: + with open( + join( + dirname(abspath(__file__)), "data", "Russian_resources_list.mediawiki" + ), + "r", + ) as f: self.expected_output: str = f.read() - def test_force_rewrite(self): - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - write_list = WriteList(ForTrainingLib("https://test.4training.net"), None, "", "", force_rewrite=True) - self.assertTrue(write_list.needs_rewrite(LanguageInfo("ru", "Russian"), ChangeLog())) - self.assertFalse(self.write_list.needs_rewrite(LanguageInfo("ru", "Russian"), ChangeLog())) + # TODO def test_force_rewrite(self): def test_needs_rewrite(self): change_log = ChangeLog() @@ -50,41 +59,69 @@ def test_needs_rewrite(self): self.assertTrue(self.write_list.needs_rewrite(self.language_info, change_log)) def test_create_file_mediawiki(self): - pdf_mediawiki = r" [[File:pdficon_small.png|link={{filepath:Gottes_Reden_wahrnehmen.pdf}}]]" + pdf_mediawiki = ( + r" [[File:pdficon_small.png|link={{filepath:Gottes_Reden_wahrnehmen.pdf}}]]" + ) file_info = FileInfo("pdf", TEST_URL, "2018-12-23T13:11:23+00:00") # Should return empty string if file_info is None self.assertEqual(self.write_list._create_file_mediawiki(None), "") # Test a "normal" call - self.assertEqual(self.write_list._create_file_mediawiki(file_info), pdf_mediawiki) + self.assertEqual( + self.write_list._create_file_mediawiki(file_info), pdf_mediawiki + ) # Test for robust handling if URL is just a filename - file_info = FileInfo("pdf", "Gottes_Reden_wahrnehmen.pdf", "2018-12-23T13:11:23+00:00") - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - self.assertEqual(self.write_list._create_file_mediawiki(file_info), pdf_mediawiki) + file_info = FileInfo( + "pdf", "Gottes_Reden_wahrnehmen.pdf", "2018-12-23T13:11:23+00:00" + ) + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="WARNING" + ): + self.assertEqual( + self.write_list._create_file_mediawiki(file_info), pdf_mediawiki + ) def test_create_mediawiki(self): """Test creation of the list of available resources for a language""" # Compare with expected result - self.assertEqual(self.write_list.create_mediawiki(self.language_info, self.english_info), - self.expected_output) + self.assertEqual( + self.write_list.create_mediawiki(self.language_info, self.english_info), + self.expected_output, + ) def test_find_resources_list(self): - page_content1 = "Some text\n== Available training resources in Turkish (secular) ==\n" + page_content1 = ( + "Some text\n== Available training resources in" + " Turkish (secular) ==\n" + ) resources_list = "* List item 1\n* List item 2" page_content = page_content1 + resources_list # There is no list for German in page_content - self.assertEqual(self.write_list._find_resources_list(page_content, "German"), (0, 0)) + self.assertEqual( + self.write_list._find_resources_list(page_content, "German"), (0, 0) + ) # There is no list in page_content1 - self.assertEqual(self.write_list._find_resources_list(page_content1, "Turkish (secular)"), (0, 0)) + self.assertEqual( + self.write_list._find_resources_list(page_content1, "Turkish (secular)"), + (0, 0), + ) # Now he should find the list of available training resources. # Testing the special case of a language name with brackets at the same time - pos_start, pos_end = self.write_list._find_resources_list(page_content, "Turkish (secular)") + pos_start, pos_end = self.write_list._find_resources_list( + page_content, "Turkish (secular)" + ) self.assertEqual(page_content[pos_start:pos_end], resources_list) # If there is another list later in the page (with other lines in between), that other list should be ignored - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="INFO"): - page_with_two_lists = page_content + "\n== Turkish (another variant) ==\n" + resources_list - pos_start2, pos_end2 = self.write_list._find_resources_list(page_with_two_lists, "Turkish (secular)") + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="INFO" + ): + page_with_two_lists = ( + page_content + "\n== Turkish (another variant) ==\n" + resources_list + ) + pos_start2, pos_end2 = self.write_list._find_resources_list( + page_with_two_lists, "Turkish (secular)" + ) self.assertEqual(pos_start, pos_start2) self.assertEqual(pos_end, pos_end2) @@ -95,51 +132,84 @@ def test_run_edge_cases(self, mock_page): self.write_list.run(self.language_info, self.english_info, changes, ChangeLog()) mock_page.assert_not_called() - # run() should warn and directly return if the language name is missing in LanguageInfo + # run() should warn and directly return if the language name is missing + # in LanguageInfo problematic_language_info = LanguageInfo("de", "") - changes.add_change("Prayer", ChangeType.NEW_WORKSHEET) # we need a relevant change - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - self.write_list.run(problematic_language_info, self.english_info, changes, ChangeLog()) + changes.add_change( + "Prayer", ChangeType.NEW_WORKSHEET + ) # we need a relevant change + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="WARNING" + ): + self.write_list.run( + problematic_language_info, self.english_info, changes, ChangeLog() + ) mock_page.return_value.exists.assert_not_called() # run() should warn and return if there is no language information page # (has the same name as LanguageInfo.english_name) not_existing_language_info = LanguageInfo("none", "NotExisting") mock_page.return_value.exists.return_value = False - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - self.write_list.run(not_existing_language_info, self.english_info, changes, ChangeLog()) + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="WARNING" + ): + self.write_list.run( + not_existing_language_info, self.english_info, changes, ChangeLog() + ) mock_page.return_value.exists.assert_called_once() mock_page.return_value.isRedirectPage.assert_not_called() - # run() should warn and return if language information page is redirect but the redirect target doesn't exist + # run() should warn and return if language information page is redirect but the + # redirect target doesn't exist mock_page.return_value.exists.return_value = True mock_page.return_value.isRedirectPage.return_value = True - mock_page.return_value.getRedirectTarget.return_value.exists.return_value = False - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - self.write_list.run(not_existing_language_info, self.english_info, changes, ChangeLog()) + mock_page.return_value.getRedirectTarget.return_value.exists.return_value = ( + False + ) + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="WARNING" + ): + self.write_list.run( + not_existing_language_info, self.english_info, changes, ChangeLog() + ) mock_page.return_value.text.assert_not_called() - # run() should warn and return if we can't find section for available resources in that language - mock_page.return_value.text = "== Available training resources in German ==\n* List" - with self.assertLogs('pywikitools.resourcesbot.write_lists', level="WARNING"): - self.write_list.run(self.language_info, self.english_info, changes, ChangeLog()) + # run() should warn and return if we can't find section for available resources + # in that language + mock_page.return_value.text = ( + "== Available training resources " + "in German ==\n* List" + ) + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_lists", level="WARNING" + ): + self.write_list.run( + self.language_info, self.english_info, changes, ChangeLog() + ) mock_page.return_value.save.assert_not_called() @patch("pywikibot.Page") def test_run(self, mock_page): changes = ChangeLog() - changes.add_change("Prayer", ChangeType.NEW_WORKSHEET) # we need a relevant change + changes.add_change( + "Prayer", ChangeType.NEW_WORKSHEET + ) # we need a relevant change mock_page.return_value.exists.return_value = True mock_page.return_value.isRedirectPage.return_value = False # run() should update list of available training resources - page_content1 = "Some text\n== Available training resources in Russian ==\n" + page_content1 = ( + "Some text\n== Available training resources" + " in Russian ==\n" + ) resources_list = "* List\n* List" mock_page.return_value.text = page_content1 + resources_list self.write_list.run(self.language_info, self.english_info, changes, ChangeLog()) mock_page.return_value.save.assert_called_once() - self.assertEqual(mock_page.return_value.text, page_content1 + self.expected_output) + self.assertEqual( + mock_page.return_value.text, page_content1 + self.expected_output + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pywikitools/test/test_write_report.py b/pywikitools/test/test_write_report.py index a3cf44a..cbb3249 100644 --- a/pywikitools/test/test_write_report.py +++ b/pywikitools/test/test_write_report.py @@ -1,21 +1,22 @@ -from datetime import datetime import json -from os.path import abspath, dirname, join import unittest +from configparser import ConfigParser +from datetime import datetime +from os.path import abspath, dirname, join from unittest.mock import Mock, patch from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.changes import ChangeLog - from pywikitools.resourcesbot.data_structures import LanguageInfo, json_decode -from pywikitools.resourcesbot.write_report import WriteReport +from pywikitools.resourcesbot.modules.write_report import WriteReport class TestWriteReport(unittest.TestCase): def setUp(self): - with open(join(dirname(abspath(__file__)), "data", "ru.json"), 'r') as f: + self.config = ConfigParser() + with open(join(dirname(abspath(__file__)), "data", "ru.json"), "r") as f: self.language_info = json.load(f, object_hook=json_decode) - with open(join(dirname(abspath(__file__)), "data", "en.json"), 'r') as f: + with open(join(dirname(abspath(__file__)), "data", "en.json"), "r") as f: self.english_info = json.load(f, object_hook=json_decode) self.fortraininglib = ForTrainingLib("https://test.4training.net") @@ -23,7 +24,7 @@ def setUp(self): def mock_pywikibot_pages(site, page: str): """Test all different cases in create_correctbot_mediawiki""" result = Mock() - if page == "Healing/ru": # Original page doesn't exist (serious error) + if page == "Healing/ru": # Original page doesn't exist (serious error) result.exists = lambda: False return result if page == "CorrectBot:Prayer/ru": # CorrectBot report missing @@ -36,43 +37,73 @@ def mock_pywikibot_pages(site, page: str): if page.startswith("CorrectBot"): # defaults for the correctbot_page - result.latest_revision = {"comment": "2 corrections, 1 suggestions, 0 warnings"} + result.latest_revision = { + "comment": "2 corrections, 1 suggestions, 0 warnings" + } result.editTime = lambda: datetime(2022, 1, 2) - if page == "CorrectBot:Hearing_from_God/ru": # CorrectBot report contains warnings - result.latest_revision = {"comment": "0 corrections, 5 suggestions, 2 warnings"} - elif page == "CorrectBot:Bible_Reading_Hints/ru": # weird CorrectBot edit message + if ( + page == "CorrectBot:Hearing_from_God/ru" + ): # CorrectBot report contains warnings + result.latest_revision = { + "comment": "0 corrections, 5 suggestions, 2 warnings" + } + elif ( + page == "CorrectBot:Bible_Reading_Hints/ru" + ): # weird CorrectBot edit message result.latest_revision = {"comment": "invalid"} - elif page == "CorrectBot:Time_with_God/ru": # outdated CorrectBot report + elif page == "CorrectBot:Time_with_God/ru": # outdated CorrectBot report result.editTime = lambda: datetime(2021, 1, 1) return result @patch("pywikibot.Page", autospec=True) def test_created_mediawiki(self, mock_page): - # Compare mediawiki output with the content in data/ru_worksheet_overview.mediawiki - write_report = WriteReport(self.fortraininglib, None) + # Compare mediawiki output with the content in + # data/ru_worksheet_overview.mediawiki + write_report = WriteReport(self.fortraininglib, self.config, None) mock_page.side_effect = self.mock_pywikibot_pages - with open(join(dirname(abspath(__file__)), "data", "ru_worksheet_overview.mediawiki"), 'r') as f: + with open( + join(dirname(abspath(__file__)), "data", "ru_worksheet_overview.mediawiki"), + "r", + ) as f: expected_mediawiki = f.read() - with self.assertLogs("pywikitools.resourcesbot.write_report", level="WARNING"): - self.assertEqual(write_report.create_worksheet_overview(self.language_info, self.english_info), - expected_mediawiki) - self.assertIn(expected_mediawiki, write_report.create_mediawiki(self.language_info, self.english_info)) - - @patch("pywikitools.resourcesbot.write_report.WriteReport.create_mediawiki") # don't go into create_mediawiki() + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_report", level="WARNING" + ): + self.assertEqual( + write_report.create_worksheet_overview( + self.language_info, self.english_info + ), + expected_mediawiki, + ) + self.assertIn( + expected_mediawiki, + write_report.create_mediawiki( + self.language_info, self.english_info + ), + ) + + @patch( + "pywikitools.resourcesbot.modules.write_report.WriteReport" ".create_mediawiki" + ) # don't go into create_mediawiki() @patch("pywikibot.Page") def test_save_language_report(self, mock_page, mock_create_mediawiki): - write_report = WriteReport(self.fortraininglib, None) - # When there is no proper language name, save_language_report() should directly exit - with self.assertLogs("pywikitools.resourcesbot.write_report", level="WARNING"): + write_report = WriteReport(self.fortraininglib, self.config, None) + # When there is no proper language name, save_language_report() + # should directly exit + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_report", level="WARNING" + ): write_report.save_language_report(LanguageInfo("de", ""), self.english_info) mock_page.return_value.exists.assert_not_called() # Language report should get created if it doesn't exist mock_page.return_value.exists.return_value = False - with self.assertLogs("pywikitools.resourcesbot.write_report", level="WARNING"): + with self.assertLogs( + "pywikitools.resourcesbot.modules.write_report", level="WARNING" + ): write_report.save_language_report(self.language_info, self.english_info) mock_page.return_value.save.assert_called_with("Created language report") @@ -82,18 +113,29 @@ def test_save_language_report(self, mock_page, mock_create_mediawiki): write_report.save_language_report(self.language_info, self.english_info) mock_page.return_value.save.assert_called_with("Updated language report") - @patch("pywikitools.resourcesbot.write_report.WriteReport.save_language_report") + @patch( + "pywikitools.resourcesbot.modules.write_report.WriteReport" + ".save_language_report" + ) def test_run(self, mock_save): - write_report = WriteReport(self.fortraininglib, None) + write_report = WriteReport(self.fortraininglib, self.config, None) # save_language_report() shouldn't get called when we have a language variant - write_report.run(LanguageInfo("de-test", "Deutsch (Test)"), self.english_info, ChangeLog(), ChangeLog()) + write_report.run( + LanguageInfo("de-test", "Deutsch (Test)"), + self.english_info, + ChangeLog(), + ChangeLog(), + ) mock_save.assert_not_called() - # save_language_report() should be called once (for Russian) and force_rewrite should be ignored - write_report = WriteReport(self.fortraininglib, None, force_rewrite=False) - write_report.run(self.language_info, self.english_info, ChangeLog(), ChangeLog()) + # save_language_report() should be called once (for Russian) and force_rewrite + # should be ignored + write_report = WriteReport(self.fortraininglib, self.config, None) + write_report.run( + self.language_info, self.english_info, ChangeLog(), ChangeLog() + ) mock_save.assert_called_once() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pywikitools/test/test_write_sidebar_messages.py b/pywikitools/test/test_write_sidebar_messages.py index edb492c..586ea50 100644 --- a/pywikitools/test/test_write_sidebar_messages.py +++ b/pywikitools/test/test_write_sidebar_messages.py @@ -1,21 +1,34 @@ import unittest +from configparser import ConfigParser from unittest.mock import patch + from pywikitools.fortraininglib import ForTrainingLib from pywikitools.resourcesbot.changes import ChangeLog, ChangeType - -from pywikitools.resourcesbot.data_structures import LanguageInfo, TranslationProgress, WorksheetInfo -from pywikitools.resourcesbot.write_sidebar_messages import WriteSidebarMessages +from pywikitools.resourcesbot.data_structures import ( + LanguageInfo, + TranslationProgress, + WorksheetInfo, +) +from pywikitools.resourcesbot.modules.write_sidebar_messages import WriteSidebarMessages from pywikitools.test.test_data_structures import TEST_PROGRESS class TestWriteSidebarMessages(unittest.TestCase): def setUp(self): - self.worksheet = WorksheetInfo("Hearing_from_God", "de", "Gottes Reden wahrnehmen", - TranslationProgress(**TEST_PROGRESS), "1.2") + self.config = ConfigParser() + self.worksheet = WorksheetInfo( + "Hearing_from_God", + "de", + "Gottes Reden wahrnehmen", + TranslationProgress(**TEST_PROGRESS), + "1.2", + ) self.language_info = LanguageInfo("de", "German") self.language_info.add_worksheet_info("Hearing_from_God", self.worksheet) - self.write_sidebar_messages = WriteSidebarMessages(ForTrainingLib("https://test.4training.net"), None) + self.write_sidebar_messages = WriteSidebarMessages( + ForTrainingLib("https://test.4training.net"), self.config, None + ) @patch("pywikibot.Page") def test_save_worksheet_title(self, mock_page): @@ -39,16 +52,27 @@ def test_save_worksheet_title(self, mock_page): # Check that we're writing to the correct system message mock_page.assert_called_with(None, "MediaWiki:Sidebar-hearingfromgod/de") - en_worksheet = WorksheetInfo("Hearing_from_God", "en", "Hearing from God", - TranslationProgress(**TEST_PROGRESS), "1.2") + en_worksheet = WorksheetInfo( + "Hearing_from_God", + "en", + "Hearing from God", + TranslationProgress(**TEST_PROGRESS), + "1.2", + ) self.write_sidebar_messages.save_worksheet_title(en_worksheet) - # Note: It's not MediaWiki:Sidebar-hearingfromgod/en as English is our source language + # Note: It's not MediaWiki:Sidebar-hearingfromgod/en as + # English is our source language mock_page.assert_called_with(None, "MediaWiki:Sidebar-hearingfromgod") - @patch("pywikitools.resourcesbot.write_sidebar_messages.WriteSidebarMessages.save_worksheet_title") + @patch( + "pywikitools.resourcesbot.modules.write_sidebar_messages.WriteSidebarMessages" + ".save_worksheet_title" + ) def test_run(self, mock_save): # save_worksheet_title() shouldn't get called when there are no changes - self.write_sidebar_messages.run(self.language_info, None, ChangeLog(), ChangeLog()) + self.write_sidebar_messages.run( + self.language_info, None, ChangeLog(), ChangeLog() + ) mock_save.assert_not_called() # save_worksheet_title() should get called when there is a change @@ -60,14 +84,19 @@ def test_run(self, mock_save): # save_worksheet_title() shouldn't get called when there change is irrelevant irrelevant_changes = ChangeLog() irrelevant_changes.add_change("Hearing_from_God", ChangeType.NEW_PDF) - self.write_sidebar_messages.run(self.language_info, None, irrelevant_changes, ChangeLog()) + self.write_sidebar_messages.run( + self.language_info, None, irrelevant_changes, ChangeLog() + ) mock_save.assert_called_once() - # save_worksheet_title() should be called when we have force_rewrite (even if there are no changes) - write_sidebar_messages = WriteSidebarMessages(None, None, force_rewrite=True) - write_sidebar_messages.run(self.language_info, None, ChangeLog(), ChangeLog()) + # save_worksheet_title() should be called when we have force_rewrite + # (even if there are no changes) + write_sidebar_messages = WriteSidebarMessages( + fortraininglib=None, config=self.config, site=None + ) + write_sidebar_messages.run(self.language_info, None, ChangeLog(), ChangeLog(), force_rewrite=True) self.assertEqual(mock_save.call_count, 2) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pywikitools/test/test_write_summary.py b/pywikitools/test/test_write_summary.py index 7b3a36f..9730a0d 100644 --- a/pywikitools/test/test_write_summary.py +++ b/pywikitools/test/test_write_summary.py @@ -6,7 +6,7 @@ from pywikitools.resourcesbot.changes import ChangeLog, ChangeType from pywikitools.resourcesbot.data_structures import LanguageInfo, json_decode -from pywikitools.resourcesbot.write_summary import WriteSummary +from pywikitools.resourcesbot.modules.write_summary import WriteSummary class TestWriteSummary(unittest.TestCase): @@ -31,13 +31,15 @@ def test_created_mediawiki(self): @patch("pywikibot.Page") def test_save_summary(self, mock_page): # When English LanguageInfo is missing, save_summary() should directly exit - with self.assertLogs("pywikitools.resourcesbot.write_summary", level="WARNING"): + with self.assertLogs("pywikitools.resourcesbot.modules.write_summary", + level="WARNING"): self.write_summary.save_summary({}) mock_page.return_value.exists.assert_not_called() # Summary report should get created if it doesn't exist mock_page.return_value.exists.return_value = False - with self.assertLogs("pywikitools.resourcesbot.write_summary", level="WARNING"): + with self.assertLogs("pywikitools.resourcesbot.modules.write_summary", + level="WARNING"): self.write_summary.save_summary(self.language_data) mock_page.return_value.save.assert_called_with("Created summary report") @@ -47,7 +49,8 @@ def test_save_summary(self, mock_page): self.write_summary.save_summary(self.language_data) mock_page.return_value.save.assert_called_with("Updated summary report") - @patch("pywikitools.resourcesbot.write_summary.WriteSummary.save_summary") + @patch("pywikitools.resourcesbot.modules.write_summary.WriteSummary" + ".save_summary") def test_run(self, mock_save): # save_summary() shouldn't get called when there are no changes self.write_summary.run(self.language_data, self.empty_change_log) @@ -61,8 +64,8 @@ def test_run(self, mock_save): mock_save.assert_called_once() # save_summary() should be called when we have force_rewrite (even if there are no changes) - write_summary = WriteSummary(None, force_rewrite=True) - write_summary.run(self.language_data, self.empty_change_log) + write_summary = WriteSummary(None) + write_summary.run(self.language_data, self.empty_change_log, force_rewrite=True) self.assertEqual(mock_save.call_count, 2) diff --git a/resourcesbot.py b/resourcesbot.py index cc04a67..e6e11a7 100644 --- a/resourcesbot.py +++ b/resourcesbot.py @@ -1,7 +1,7 @@ """ -The ResourcesBot scans through the resources and all their translations, retrieving also information -on PDF/ODT files. It checks if new translations were added and does many helpful things then -like updating language overview pages where necessary. +The ResourcesBot scans through the resources and all their translations, retrieving also +information on PDF/ODT files. It checks if new translations were added and does many +helpful things then like updating language overview pages where necessary. It is supposed to run daily as a cronjob. Main steps: @@ -26,11 +26,12 @@ --lang LANGUAGECODE: only look at this one language (significantly faster) -l, --loglevel: change logging level (standard: warning; other options: debug, info) --rewrite: Force rewriting of one component or all - --read-from-cache: Read from the JSON structure instead of querying the current status of all worksheets + --read-from-cache: Read from the JSON structure instead of querying the current + status of all worksheets Logging: - If configured in config.ini (see config.example.ini), output will be logged to three different files - in three different verbosity levels (WARNING, INFO, DEBUG) + If configured in config.ini (see config.example.ini), output will be logged to three + different files in three different verbosity levels (WARNING, INFO, DEBUG) Reports: We write language reports into the folder specified in config.ini @@ -53,67 +54,120 @@ This is only the wrapper script, all main logic is in resourcesbot/bot.py """ + import argparse -from configparser import ConfigParser import logging import os import sys import traceback -from typing import List +from configparser import ConfigParser +from typing import Dict, List -from pywikitools.resourcesbot.bot import ResourcesBot +from pywikitools.resourcesbot.bot import AVAILABLE_MODULES, ResourcesBot, load_module def parse_arguments() -> ResourcesBot: """ Parses command-line arguments. + @return: ResourcesBot instance """ - description = 'Update list of available training resources in the language information pages' - epilog = 'Refer to https://datahub.io/core/language-codes/r/0.html for language codes.' - log_levels: List[str] = ['debug', 'info', 'warning', 'error'] - rewrite_options: List[str] = ['all', 'json', 'list', 'report', 'summary', 'html', 'pdf', 'sidebar'] - - parser = argparse.ArgumentParser(prog='python3 resourcesbot.py', description=description, epilog=epilog) - parser.add_argument('--lang', help='run script for only one language') - parser.add_argument('-l', '--loglevel', choices=log_levels, default="warning", help='set loglevel for the script') - parser.add_argument('--read-from-cache', action='store_true', help='Read results from json cache from the server') - parser.add_argument('--rewrite', choices=rewrite_options, help='Force rewriting of one component or all') + parser = argparse.ArgumentParser( + prog="python | python3 resourcesbot.py", + description="Update list of available training resources in the" + " language information pages.", + formatter_class=argparse.RawTextHelpFormatter, + ) + + log_levels: List[str] = ["debug", "info", "warning", "error"] + rewrite_options: List[str] = ["all", "json", "summary"] + modules: Dict[str, str] = {} # abbreviation -> full name + modules_help = "Select the modules to be run. Available options are:\n" + # Read module information from the module classes + for selected_module in AVAILABLE_MODULES: + module = load_module(selected_module) + modules_help += f" - {module.abbreviation()}: {module.help_summary()}\n" + modules[module.abbreviation()] = selected_module + if module.can_be_rewritten(): + rewrite_options.append(module.abbreviation()) + modules_help += "Default: run all modules" + + parser.add_argument( + "--read-from-cache", + action="store_true", + help="Read results from json cache from the server", + ) + parser.add_argument("--lang", help="Process only one language (ISO 639-1 code)") + parser.add_argument("-m", nargs="+", choices=modules.keys(), help=modules_help) + parser.add_argument( + "--rewrite", + choices=rewrite_options, + help="Force rewriting of one component or all.", + ) + parser.add_argument( + "-l", + "--loglevel", + choices=log_levels, + default="warning", + help="Set loglevel for the script", + ) args = parser.parse_args() limit_to_lang = None if args.lang is not None: limit_to_lang = str(args.lang) + config = ConfigParser() - config.read(os.path.dirname(os.path.abspath(__file__)) + '/config.ini') + config.read(os.path.dirname(os.path.abspath(__file__)) + "/config.ini") + numeric_level = getattr(logging, args.loglevel.upper(), None) assert isinstance(numeric_level, int) set_loglevel(config, numeric_level) - return ResourcesBot(config, limit_to_lang=limit_to_lang, rewrite=args.rewrite, - read_from_cache=args.read_from_cache) + + # Map abbreviations to full module names + if args.m is None: + run_modules = AVAILABLE_MODULES + else: + run_modules = [modules[abbr] for abbr in args.m] + + return ResourcesBot( + config=config, + read_from_cache=args.read_from_cache, + limit_to_lang=limit_to_lang, + modules=run_modules, + rewrite=args.rewrite + ) def set_loglevel(config: ConfigParser, loglevel: int): """ Setting up logging to three log files and to stdout. - The file paths for the three log files (for each log level WARNING, INFO and DEBUG) are - configured in the config.ini - @param loglevel: logging.WARNING is standard, logging.INFO for more details, logging.DEBUG for a lot of output + The file paths for the three log files (for each log level + WARNING, INFO and DEBUG) are configured in the config.ini. + + @param loglevel: logging.WARNING is standard, logging.INFO for details, + logging.DEBUG for a lot of output. + + @param config: A config set of parameters to be used in this function. """ root = logging.getLogger() root.setLevel(logging.DEBUG) + # The following is necessary so that debug messages go to debuglogfile - logging.getLogger('pywikitools.resourcesbot').setLevel(logging.DEBUG) + logging.getLogger("pywikitools.resourcesbot").setLevel(logging.DEBUG) sh = logging.StreamHandler(sys.stdout) sh.setLevel(loglevel) - fformatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s') + fformatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") sh.setFormatter(fformatter) root.addHandler(sh) - log_path = config.get('Paths', 'logs', fallback='') - if log_path == '': - root.warning('No log directory specified in configuration. Using current working directory') + log_path = config.get("Paths", "logs", fallback="") + if log_path == "": + root.warning( + "No log directory specified in configuration." + " Using current working directory" + ) # Logging output to files with different verbosity if config.has_option("resourcesbot", "logfile"): fh = logging.FileHandler(f"{log_path}{config['resourcesbot']['logfile']}") @@ -121,12 +175,16 @@ def set_loglevel(config: ConfigParser, loglevel: int): fh.setFormatter(fformatter) root.addHandler(fh) if config.has_option("resourcesbot", "infologfile"): - fh_info = logging.FileHandler(f"{log_path}{config['resourcesbot']['infologfile']}") + fh_info = logging.FileHandler( + f"{log_path}{config['resourcesbot']['infologfile']}" + ) fh_info.setLevel(logging.INFO) fh_info.setFormatter(fformatter) root.addHandler(fh_info) if config.has_option("resourcesbot", "debuglogfile"): - fh_debug = logging.FileHandler(f"{log_path}{config['resourcesbot']['debuglogfile']}") + fh_debug = logging.FileHandler( + f"{log_path}{config['resourcesbot']['debuglogfile']}" + ) fh_debug.setLevel(logging.DEBUG) fh_debug.setFormatter(fformatter) root.addHandler(fh_debug)