Skip to content

Commit

Permalink
tests on Python 3.10+: fix errors and warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Nov 18, 2021
1 parent 8af3939 commit bf79dee
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
2 changes: 1 addition & 1 deletion tests/cli_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_parser():
args = cli.parse_args(testargs)
assert e.type == SystemExit
assert e.value.code == 0
assert re.match(r'Trafilatura [0-9]\.[0-9]\.[0-9] - Python [0-9]\.[0-9]\.[0-9]', f.getvalue())
assert re.match(r'Trafilatura [0-9]\.[0-9]\.[0-9] - Python [0-9]\.[0-9]+\.[0-9]', f.getvalue())


def test_climain():
Expand Down
9 changes: 5 additions & 4 deletions trafilatura/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,9 @@ def recover_wild_text(tree, result_body, potential_tags=TAG_CATALOG, deduplicate
etree.strip_tags(search_tree, 'a', 'ref', 'span')
else:
etree.strip_tags(search_tree, 'span')
processed_elems = [handle_textelem(element, potential_tags, deduplicate, config) for element in search_tree.iter('blockquote', 'code', 'div', 'p', 'pre', 'q', 'quote', 'table')]
result_body.extend(list(filter(None.__ne__, processed_elems)))
result_body.extend(e for e in
[handle_textelem(element, potential_tags, deduplicate, config) for element in search_tree.iter('blockquote', 'code', 'div', 'p', 'pre', 'q', 'quote', 'table')]
if e is not None)
return result_body


Expand Down Expand Up @@ -498,9 +499,9 @@ def extract_content(tree, favor_precision=False, favor_recall=False, include_tab
##etree.strip_tags(subtree, 'lb') # BoingBoing-Bug
# extract content
# list(filter(None.__ne__, processed_elems))
result_body.extend([e for e in
result_body.extend(e for e in
[handle_textelem(e, potential_tags, deduplicate, config) for e in subtree.xpath('.//*')]
if e is not None])
if e is not None)
# remove trailing titles
while len(result_body) > 0 and result_body[-1].tag in HEADINGS:
result_body[-1].getparent().remove(result_body[-1])
Expand Down

0 comments on commit bf79dee

Please sign in to comment.