diff --git a/HISTORY.md b/HISTORY.md index 2813f31b..48bdaa99 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,32 @@ ## History / Changelog +### 1.9.0 + +Extraction: +- add markdown as explicit output (#550) +- improve recall preset (#571) +- speedup for readability-lxml (#547) +- add global options object for extraction and use it in CLI (#552) +- fix: better encoding detection (#548) +- recall: fix for lists inside tables with @mikhainin (#534) +- add symbol to preserve vertical spacing in Markdown (#499) +- fix: table cell separators in non-XML output (#563) +- slightly better accuracy and execution speed overall + +Metadata: +- add file creation date (date extraction, JSON & XML-TEI) (#561) +- fix: empty content in meta tag by @felipehertzer (#545) + +Maintenance: +- restructure and simplify code (#543, #556) +- CLI & downloads: revamp and use global options (#565) +- eval: review code, add guidelines and small benchmark (#542) +- fix: raise error if config file does not exist (#554) +- deprecate `process_record()` (#549) +- docs: convert readme to markdown and update info (#564, #578) + + ### 1.8.1 Maintenance: diff --git a/setup.py b/setup.py index 97379f1d..a9c20186 100644 --- a/setup.py +++ b/setup.py @@ -19,9 +19,6 @@ def get_long_description(): "Return the README" with open("README.md", "r", encoding="utf-8") as filehandle: long_description = filehandle.read() - # long_description += "\n\n" - # with open("CHANGELOG.md", encoding="utf8") as f: - # long_description += f.read() return long_description diff --git a/trafilatura/__init__.py b/trafilatura/__init__.py index f0fecc8b..164ea561 100644 --- a/trafilatura/__init__.py +++ b/trafilatura/__init__.py @@ -9,7 +9,7 @@ __author__ = 'Adrien Barbaresi and contributors' __license__ = "Apache-2.0" __copyright__ = 'Copyright 2019-2024, Adrien Barbaresi' -__version__ = '1.8.1' +__version__ = '1.9.0' import logging