-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathci_spellcheck_format.py
61 lines (47 loc) · 1.41 KB
/
ci_spellcheck_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import sys
import pathlib
fpath = pathlib.Path(sys.argv[1])
text = fpath.read_text(encoding="utf-8")
result = {}
#
# process file
#
while text:
pattern_start = "\n> Processing: content/"
pattern_start_len = len(pattern_start)
start_idx = text.find(pattern_start)
if start_idx < 0:
break
end_idx = text[start_idx + pattern_start_len :].find(pattern_start)
final_block = end_idx < 0
end_idx += start_idx + pattern_start_len
# print(f"{final_block=}, {start_idx=}, {end_idx=}")
if final_block:
cur_block = text[start_idx:]
text = None
else:
cur_block = text[start_idx:end_idx]
text = text[end_idx:]
assert cur_block.startswith(pattern_start)
lines = [x for x in cur_block.splitlines() if x]
if len(lines) < 2:
continue
article_path = lines[0].split()[2]
words = [
x
for x in lines[1:]
if x
!= "--------------------------------------------------------------------------------"
and not x.startswith("<htmlcontent> content/")
and not x.startswith("Misspelled words:")
and not x.startswith("!!!Spelling check failed!!!")
]
if len(words):
result[article_path] = sorted(words)
#
# output as markdown table
#
print("| 📖 Page | ❌ Typo(s) |")
print("|---------|-------------|")
for page, words in result.items():
print(f"|`{page}`| {', '.join(words)} |")