-
Notifications
You must be signed in to change notification settings - Fork 400
/
Copy pathlint-style.py
executable file
·277 lines (245 loc) · 12.6 KB
/
lint-style.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/env python3
"""
Lint a file or files from mathlib for style.
Sample usage:
$ ./scripts/lint-style.py $(find Mathlib -name '*.lean')
which will lint all of the Lean files in the specified directories.
The resulting error output will contain one line for each style error
encountered that isn't in the list of allowed / ignored style exceptions.
Paths with no errors will not appear in the output, and the script will
exit with successful return code if there are no errors encountered in
any provided paths.
Paths emitted in the output will match the paths provided on the
command line for any files containing errors -- in particular, linting
a relative path (like ``Mathlib/Foo/Bar.lean``) will produce errors
that contain the relative path, whilst linting absolute paths (like
``/root/mathlib4/Mathlib/Foo/Bar.lean``) will produce errors with the
absolute path.
The linters in this script are gradually being rewritten in Lean.
Do not add new linters here; please write them in Lean instead.
To run all style linters, run `lake exe lint-style`.
"""
# TODO: This is adapted from the linter for mathlib3. It should be rewritten in Lean.
from pathlib import Path
import sys
import re
import shutil
ERR_IBY = 11 # isolated by
ERR_IWH = 22 # isolated where
ERR_SEM = 13 # the substring " ;"
ERR_CLN = 16 # line starts with a colon
ERR_IND = 17 # second line not correctly indented
ERR_ARR = 18 # space after "←"
ERR_NSP = 20 # non-terminal simp
exceptions = []
new_exceptions = False
def annotate_comments(enumerate_lines):
"""
Take a list of tuples of enumerated lines of the form
(line_number, line, ...)
and return a list of
(line_number, line, ..., True/False)
where lines have True attached when they are in comments.
"""
nesting_depth = 0 # We're in a comment when `nesting_depth > 0`.
starts_in_comment = False # Whether we're in a comment when starting the line.
for line_nr, line, *rem in enumerate_lines:
# We assume multiline comments do not begin or end within single-line comments.
if line == "\n" or line.lstrip().startswith("--"):
yield line_nr, line, *rem, True
continue
# We assume that "/-/" and "-/-" never occur outside of "--" comments.
# We assume that we do not encounter "... -/ <term> /- ...".
# We also don't account for "/-" and "-/" appearing in strings.
starts_in_comment = (nesting_depth > 0)
nesting_depth = nesting_depth + line.count("/-") - line.count("-/")
in_comment = (starts_in_comment or line.lstrip().startswith("/-")) and \
(nesting_depth > 0 or line.rstrip().endswith("-/"))
yield line_nr, line, *rem, in_comment
def annotate_strings(enumerate_lines):
"""
Take a list of tuples of enumerated lines of the form
(line_number, line, ...)
and return a list of
(line_number, line, ..., True/False)
where lines have True attached when they are in strings.
"""
in_string = False
in_comment = False
for line_nr, line, *rem in enumerate_lines:
# ignore comment markers inside string literals
if not in_string:
if "/-" in line:
in_comment = True
if "-/" in line:
in_comment = False
# ignore quotes inside comments
if not in_comment:
# crude heuristic: if the number of non-escaped quote signs is odd,
# we're starting / ending a string literal
if line.count("\"") - line.count("\\\"") % 2 == 1:
in_string = not in_string
# if there are quote signs in this line,
# a string literal probably begins and / or ends here,
# so we skip this line
if line.count("\"") > 0:
yield line_nr, line, *rem, True
continue
if in_string:
yield line_nr, line, *rem, True
continue
yield line_nr, line, *rem, False
def four_spaces_in_second_line(lines, path):
# TODO: also fix the space for all lines before ":=", right now we only fix the line after
# the first line break
errors = []
# We never alter the first line, as it does not occur as next_line in the iteration over the
# zipped lines below, hence we add it here
newlines = [lines[0]]
annotated_lines = list(annotate_comments(lines))
for (_, line, is_comment), (next_line_nr, next_line, _) in zip(annotated_lines,
annotated_lines[1:]):
# Check if the current line matches "(lemma|theorem) .* :"
new_next_line = next_line
if (not is_comment) and re.search(r"^(protected )?(def|lemma|theorem) (?!.*:=).*(where)?$",
line):
# Calculate the number of spaces before the first non-space character in the next line
stripped_next_line = next_line.lstrip()
if not (next_line == '\n' or next_line.startswith("#") or stripped_next_line.startswith("--")):
num_spaces = len(next_line) - len(stripped_next_line)
# The match with "| " could potentially match with a different usage of the same
# symbol, e.g. some sort of norm. In that case a space is not necessary, so
# looking for "| " should be enough.
if stripped_next_line.startswith("| ") or line.endswith("where\n"):
# Check and fix if the number of leading space is not 2
if num_spaces != 2:
errors += [(ERR_IND, next_line_nr, path)]
new_next_line = ' ' * 2 + stripped_next_line
# Check and fix if the number of leading spaces is not 4
else:
if num_spaces != 4:
errors += [(ERR_IND, next_line_nr, path)]
new_next_line = ' ' * 4 + stripped_next_line
newlines.append((next_line_nr, new_next_line))
return errors, newlines
flexible_tactics = ["rfl", "ring", "aesop", "norm_num", "positivity", "abel", "omega", "linarith", "nlinarith"]
def nonterminal_simp_check(lines, path):
errors = []
newlines = []
annotated_lines = list(annotate_comments(lines))
for (line_nr, line, is_comment), (_, next_line, _) in zip(annotated_lines,
annotated_lines[1:]):
# Check if the current line matches whitespace followed by "simp"
new_line = line
# TODO it would be better to use a regex like r"^\s*simp( \[.*\])?( at .*)?$" and thereby
# catch all possible simp invocations. Adding this will require more initial cleanup or
# nolint.
if (not is_comment) and re.search(r"^\s*simp$", line):
# Calculate the number of spaces before the first non-space character in the line
num_spaces = len(line) - len(line.lstrip())
# Calculate the number of spaces before the first non-space character in the next line
stripped_next_line = next_line.lstrip()
if not (next_line == '\n' or next_line.startswith("#") or stripped_next_line.startswith("--") or any(f in next_line for f in flexible_tactics)):
num_next_spaces = len(next_line) - len(stripped_next_line)
# Check if the number of leading spaces is the same
if num_spaces == num_next_spaces:
# If so, the simp is nonterminal
errors += [(ERR_NSP, line_nr, path)]
new_line = line.replace("simp", "simp?")
newlines.append((line_nr, new_line))
newlines.append(lines[-1])
return errors, newlines
def isolated_by_dot_semicolon_check(lines, path):
errors = []
newlines = []
for line_nr, line in lines:
if line.strip() == "by":
# We excuse those "by"s following a comma or ", fun ... =>", since generally hanging "by"s
# should not be used in the second or later arguments of a tuple/anonymous constructor
# See https://github.com/leanprover-community/mathlib4/pull/3825#discussion_r1186702599
prev_line = lines[line_nr - 2][1].rstrip()
if not prev_line.endswith(",") and not re.search(", fun [^,]* (=>|↦)$", prev_line):
errors += [(ERR_IBY, line_nr, path)]
elif line.lstrip().startswith("by "):
# We also error if the previous line ends on := and the current line starts with "by ".
prev_line = newlines[-1][1].rstrip()
if prev_line.endswith(":="):
# If the previous line is short enough, we can suggest an auto-fix.
# Future: error also if it is not: currently, mathlib contains about 30 such
# instances which are not obvious to fix.
if len(prev_line) <= 97:
errors += [(ERR_IBY, line_nr, path)]
newlines[-1] = (line_nr - 1, prev_line + " by\n")
indent = " " * (len(line) - len(line.lstrip()))
line = f"{indent}{line.lstrip()[3:]}"
elif line.lstrip() == "where":
errors += [(ERR_IWH, line_nr, path)]
if " ;" in line:
errors += [(ERR_SEM, line_nr, path)]
line = line.replace(" ;", ";")
if line.lstrip().startswith(":"):
errors += [(ERR_CLN, line_nr, path)]
newlines.append((line_nr, line))
return errors, newlines
def left_arrow_check(lines, path):
errors = []
newlines = []
for line_nr, line, is_comment, in_string in annotate_strings(annotate_comments(lines)):
if is_comment or in_string:
newlines.append((line_nr, line))
continue
# Allow "←" to be followed by "%" or "`", but not by "`(" or "``(" (since "`()" and "``()"
# are used for syntax quotations). Otherwise, insert a space after "←".
new_line = re.sub(r'←(?:(?=``?\()|(?![%`]))(\S)', r'← \1', line)
if new_line != line:
errors += [(ERR_ARR, line_nr, path)]
newlines.append((line_nr, new_line))
return errors, newlines
def output_message(path, line_nr, code, msg):
# We are outputting for github. We duplicate path, line_nr and code,
# so that they are also visible in the plaintext output.
print(f"::error file={path},line={line_nr},code={code}::{path}:{line_nr} {code}: {msg}")
def format_errors(errors):
global new_exceptions
for errno, line_nr, path in errors:
if (errno, path.resolve(), None) in exceptions:
continue
new_exceptions = True
if errno == ERR_IBY:
output_message(path, line_nr, "ERR_IBY", "Line is an isolated 'by'")
if errno == ERR_IWH:
output_message(path, line_nr, "ERR_IWH", "Line is an isolated where")
if errno == ERR_SEM:
output_message(path, line_nr, "ERR_SEM", "Line contains a space before a semicolon")
if errno == ERR_CLN:
output_message(path, line_nr, "ERR_CLN", "Put : and := before line breaks, not after")
if errno == ERR_IND:
output_message(path, line_nr, "ERR_IND", "If the theorem/def statement requires multiple lines, indent it correctly (4 spaces or 2 for `|`)")
if errno == ERR_ARR:
output_message(path, line_nr, "ERR_ARR", "Missing space after '←'.")
if errno == ERR_NSP:
output_message(path, line_nr, "ERR_NSP", "Non-terminal simp. Replace with `simp?` and use the suggested output")
def lint(path, fix=False):
global new_exceptions
with path.open(encoding="utf-8", newline="") as f:
# We enumerate the lines so that we can report line numbers in the error messages correctly
# we will modify lines as we go, so we need to keep track of the original line numbers
lines = f.readlines()
enum_lines = enumerate(lines, 1)
newlines = enum_lines
for error_check in [four_spaces_in_second_line,
isolated_by_dot_semicolon_check,
left_arrow_check,
nonterminal_simp_check]:
errs, newlines = error_check(newlines, path)
format_errors(errs)
# if we haven't been asked to fix errors, or there are no errors or no fixes, we're done
if fix and new_exceptions and enum_lines != newlines:
path.with_name(path.name + '.bak').write_text("".join(l for _,l in newlines), encoding = "utf8")
shutil.move(path.with_name(path.name + '.bak'), path)
fix = "--fix" in sys.argv
argv = (arg for arg in sys.argv[1:] if arg != "--fix")
for filename in argv:
lint(Path(filename), fix=fix)
if new_exceptions:
exit(1)