lint: Use zulint as wrapper for running different linters.

amanagr · timabbott · commit 7f39201e791c · 2020-01-14T16:00:24.000-08:00
diff --git a/requirements.txt b/requirements.txt
@@ -7,3 +7,4 @@ pytest
 -e ./zulip
 -e ./zulip_bots
 -e ./zulip_botserver
+-e git+https://github.com/zulip/zulint@aaed679f1ad38b230090eadd3870b7682500f60c#egg=zulint==1.0.0
diff --git a/tools/custom_check.py b/tools/custom_check.py
@@ -1,126 +1,36 @@
 from __future__ import print_function
 from __future__ import absolute_import
 
-import os
-import re
-import traceback
-
-from server_lib.printer import print_err, colors
-
-from typing import cast, Any, Callable, Dict, List, Optional, Tuple
-
-def build_custom_checkers(by_lang):
-    # type: (Dict[str, List[str]]) -> Tuple[Callable[[], bool], Callable[[], bool]]
-    RuleList = List[Dict[str, Any]]
-
-    def custom_check_file(fn, identifier, rules, skip_rules=None, max_length=None):
-        # type: (str, str, RuleList, Optional[Any], Optional[int]) -> bool
-        failed = False
-        color = next(colors)
-
-        line_tups = []
-        for i, line in enumerate(open(fn)):
-            line_newline_stripped = line.strip('\n')
-            line_fully_stripped = line_newline_stripped.strip()
-            skip = False
-            for rule in skip_rules or []:
-                if re.match(rule, line):
-                    skip = True
-            if line_fully_stripped.endswith('  # nolint'):
-                continue
-            if skip:
-                continue
-            tup = (i, line, line_newline_stripped, line_fully_stripped)
-            line_tups.append(tup)
-
-        rules_to_apply = []
-        fn_dirname = os.path.dirname(fn)
-        for rule in rules:
-            exclude_list = rule.get('exclude', set())
-            if fn in exclude_list or fn_dirname in exclude_list:
-                continue
-            if rule.get("include_only"):
-                found = False
-                for item in rule.get("include_only", set()):
-                    if item in fn:
-                        found = True
-                if not found:
-                    continue
-            rules_to_apply.append(rule)
-
-        for rule in rules_to_apply:
-            exclude_lines = {
-                line for
-                (exclude_fn, line) in rule.get('exclude_line', set())
-                if exclude_fn == fn
-            }
-
-            pattern = rule['pattern']
-            for (i, line, line_newline_stripped, line_fully_stripped) in line_tups:
-                if line_fully_stripped in exclude_lines:
-                    exclude_lines.remove(line_fully_stripped)
-                    continue
-                try:
-                    line_to_check = line_fully_stripped
-                    if rule.get('strip') is not None:
-                        if rule['strip'] == '\n':
-                            line_to_check = line_newline_stripped
-                        else:
-                            raise Exception("Invalid strip rule")
-                    if re.search(pattern, line_to_check):
-                        print_err(identifier, color, '{} at {} line {}:'.format(
-                            rule['description'], fn, i+1))
-                        print_err(identifier, color, line)
-                        failed = True
-                except Exception:
-                    print("Exception with %s at %s line %s" % (rule['pattern'], fn, i+1))
-                    traceback.print_exc()
-
-            if exclude_lines:
-                print('Please remove exclusions for file %s: %s' % (fn, exclude_lines))
-
-        lastLine = None
-        for (i, line, line_newline_stripped, line_fully_stripped) in line_tups:
-            if isinstance(line, bytes):
-                line_length = len(line.decode("utf-8"))
-            else:
-                line_length = len(line)
-            if (max_length is not None and line_length > max_length and
-                '# type' not in line and 'test' not in fn and 'example' not in fn and
-                not re.match("\[[ A-Za-z0-9_:,&()-]*\]: http.*", line) and
-                not re.match("`\{\{ external_api_uri_subdomain \}\}[^`]+`", line) and
-                    "#ignorelongline" not in line and 'migrations' not in fn):
-                print("Line too long (%s) at %s line %s: %s" % (len(line), fn, i+1, line_newline_stripped))
-                failed = True
-            lastLine = line
-
-        if lastLine and ('\n' not in lastLine):
-            print("No newline at the end of file.  Fix with `sed -i '$a\\' %s`" % (fn,))
-            failed = True
-
-        return failed
-
-    whitespace_rules = [
-        # This linter should be first since bash_rules depends on it.
-        {'pattern': '\s+$',
-         'strip': '\n',
-         'description': 'Fix trailing whitespace'},
-        {'pattern': '\t',
-         'strip': '\n',
-         'description': 'Fix tab-based whitespace'},
-    ]  # type: RuleList
-    markdown_whitespace_rules = list([rule for rule in whitespace_rules if rule['pattern'] != '\s+$']) + [
-        # Two spaces trailing a line with other content is okay--it's a markdown line break.
-        # This rule finds one space trailing a non-space, three or more trailing spaces, and
-        # spaces on an empty line.
-        {'pattern': '((?<!\s)\s$)|(\s\s\s+$)|(^\s+$)',
-         'strip': '\n',
-         'description': 'Fix trailing whitespace'},
-        {'pattern': '^#+[A-Za-z0-9]',
-         'strip': '\n',
-         'description': 'Missing space after # in heading'},
-    ]  # type: RuleList
-    python_rules = cast(RuleList, [
+from typing import cast, Any, Dict, List, Tuple
+from zulint.custom_rules import RuleList
+
+Rule = List[Dict[str, Any]]
+
+whitespace_rules = [
+    # This linter should be first since bash_rules depends on it.
+    {'pattern': '\s+$',
+     'strip': '\n',
+     'description': 'Fix trailing whitespace'},
+    {'pattern': '\t',
+     'strip': '\n',
+     'description': 'Fix tab-based whitespace'},
+]  # type: Rule
+
+markdown_whitespace_rules = list([rule for rule in whitespace_rules if rule['pattern'] != '\s+$']) + [
+    # Two spaces trailing a line with other content is okay--it's a markdown line break.
+    # This rule finds one space trailing a non-space, three or more trailing spaces, and
+    # spaces on an empty line.
+    {'pattern': '((?<!\s)\s$)|(\s\s\s+$)|(^\s+$)',
+     'strip': '\n',
+     'description': 'Fix trailing whitespace'},
+    {'pattern': '^#+[A-Za-z0-9]',
+     'strip': '\n',
+     'description': 'Missing space after # in heading'},
+]  # type: Rule
+
+python_rules = RuleList(
+    langs=['py'],
+    rules=cast(Rule, [
         {'pattern': '".*"%\([a-z_].*\)?$',
          'description': 'Missing space around "%"'},
         {'pattern': "'.*'%\([a-z_].*\)?$",
@@ -186,84 +96,68 @@ def custom_check_file(fn, identifier, rules, skip_rules=None, max_length=None):
          'bad_lines': ['class TestSomeBot(DefaultTests, BotTestCase):'],
          'good_lines': ['class TestSomeBot(BotTestCase, DefaultTests):'],
          'description': 'Bot test cases should inherit from BotTestCase before DefaultTests.'},
-    ]) + whitespace_rules
-    bash_rules = [
+    ]) + whitespace_rules,
+    max_length=140,
+)
+
+bash_rules = RuleList(
+    langs=['sh'],
+    rules=cast(Rule, [
         {'pattern': '#!.*sh [-xe]',
          'description': 'Fix shebang line with proper call to /usr/bin/env for Bash path, change -x|-e switches'
                         ' to set -x|set -e'},
-    ] + whitespace_rules[0:1]  # type: RuleList
-    prose_style_rules = [
-        {'pattern': '[^\/\#\-\"]([jJ]avascript)',  # exclude usage in hrefs/divs
-         'description': "javascript should be spelled JavaScript"},
-        {'pattern': '[^\/\-\.\"\'\_\=\>]([gG]ithub)[^\.\-\_\"\<]',  # exclude usage in hrefs/divs
-         'description': "github should be spelled GitHub"},
-        {'pattern': '[oO]rganisation',  # exclude usage in hrefs/divs
-         'description': "Organization is spelled with a z"},
-        {'pattern': '!!! warning',
-         'description': "!!! warning is invalid; it's spelled '!!! warn'"},
-        {'pattern': '[^-_]botserver(?!rc)|bot server',
-         'description': "Use Botserver instead of botserver or Botserver."},
-    ]  # type: RuleList
-    json_rules = []  # type: RuleList # fix newlines at ends of files
-    # It is okay that json_rules is empty, because the empty list
-    # ensures we'll still check JSON files for whitespace.
-    markdown_rules = markdown_whitespace_rules + prose_style_rules + [
+    ]) + whitespace_rules[0:1],
+)
+
+
+json_rules = RuleList(
+    langs=['json'],
+    # Here, we don't check tab-based whitespace, because the tab-based
+    # whitespace rule flags a lot of third-party JSON fixtures
+    # under zerver/webhooks that we want preserved verbatim.  So
+    # we just include the trailing whitespace rule and a modified
+    # version of the tab-based whitespace rule (we can't just use
+    # exclude in whitespace_rules, since we only want to ignore
+    # JSON files with tab-based whitespace, not webhook code).
+    rules= whitespace_rules[0:1],
+)
+
+prose_style_rules = [
+    {'pattern': '[^\/\#\-\"]([jJ]avascript)',  # exclude usage in hrefs/divs
+     'description': "javascript should be spelled JavaScript"},
+    {'pattern': '[^\/\-\.\"\'\_\=\>]([gG]ithub)[^\.\-\_\"\<]',  # exclude usage in hrefs/divs
+     'description': "github should be spelled GitHub"},
+    {'pattern': '[oO]rganisation',  # exclude usage in hrefs/divs
+     'description': "Organization is spelled with a z"},
+    {'pattern': '!!! warning',
+     'description': "!!! warning is invalid; it's spelled '!!! warn'"},
+    {'pattern': '[^-_]botserver(?!rc)|bot server',
+     'description': "Use Botserver instead of botserver or Botserver."},
+]  # type: Rule
+
+markdown_docs_length_exclude = {
+    "zulip_bots/zulip_bots/bots/converter/doc.md",
+    "tools/server_lib/README.md",
+}
+
+markdown_rules = RuleList(
+    langs=['md'],
+    rules=markdown_whitespace_rules + prose_style_rules + cast(Rule, [
         {'pattern': '\[(?P<url>[^\]]+)\]\((?P=url)\)',
          'description': 'Linkified markdown URLs should use cleaner <http://example.com> syntax.'}
-    ]
-    help_markdown_rules = markdown_rules + [
-        {'pattern': '[a-z][.][A-Z]',
-         'description': "Likely missing space after end of sentence"},
-        {'pattern': '[rR]ealm',
-         'description': "Realms are referred to as Organizations in user-facing docs."},
-    ]
-    txt_rules = whitespace_rules
-
-    def check_custom_checks_py():
-        # type: () -> bool
-        failed = False
-
-        for fn in by_lang['py']:
-            if 'custom_check.py' in fn:
-                continue
-            if custom_check_file(fn, 'py', python_rules, max_length=140):
-                failed = True
-        return failed
-
-    def check_custom_checks_nonpy():
-        # type: () -> bool
-        failed = False
-
-        for fn in by_lang['sh']:
-            if custom_check_file(fn, 'sh', bash_rules):
-                failed = True
-
-        for fn in by_lang['json']:
-            if custom_check_file(fn, 'json', json_rules):
-                failed = True
-
-        markdown_docs_length_exclude = {
-            "zulip_bots/zulip_bots/bots/converter/doc.md",
-            "tools/server_lib/README.md",
-        }
-        for fn in by_lang['md']:
-            max_length = None
-            if fn not in markdown_docs_length_exclude:
-                max_length = 120
-            rules = markdown_rules
-            if fn.startswith("templates/zerver/help"):
-                rules = help_markdown_rules
-            if custom_check_file(fn, 'md', rules, max_length=max_length):
-                failed = True
-
-        for fn in by_lang['txt'] + by_lang['text']:
-            if custom_check_file(fn, 'txt', txt_rules):
-                failed = True
-
-        for fn in by_lang['yaml']:
-            if custom_check_file(fn, 'yaml', txt_rules):
-                failed = True
-
-        return failed
-
-    return (check_custom_checks_py, check_custom_checks_nonpy)
+    ]),
+    max_length=120,
+    length_exclude=markdown_docs_length_exclude,
+)
+
+txt_rules = RuleList(
+    langs=['txt'],
+    rules=whitespace_rules,
+)
+
+non_py_rules = [
+    json_rules,
+    markdown_rules,
+    bash_rules,
+    txt_rules,
+]
diff --git a/tools/lint b/tools/lint
@@ -1,39 +1,54 @@
 #! /usr/bin/env python
 
-from pep8 import check_pep8
-from custom_check import build_custom_checkers
-from server_lib import lister
+from __future__ import print_function
+from __future__ import absolute_import
+import argparse
+
+from zulint.command import add_default_linter_arguments, LinterConfig
 
-import sys
-import optparse
-from typing import cast, Callable, Dict, Iterator, List
+from custom_check import python_rules, non_py_rules
+from pep8 import check_pep8
 
 EXCLUDED_FILES = [
     # This is an external file that doesn't comply with our codestyle
     'zulip/integrations/perforce/git_p4.py',
 ]
 
-def lint_all(args, options):
-
-    by_lang = cast(Dict[str, List[str]],
-                   lister.list_files(args, modified_only=options.modified,
-                                     ftypes=['py', 'sh', 'js', 'pp', 'css', 'handlebars',
-                                             'html', 'json', 'md', 'txt', 'text', 'yaml'],
-                                     use_shebang=True, group_by_ftype=True, exclude=EXCLUDED_FILES))
-    check_custom_checks_py, check_custom_checks_nonpy = build_custom_checkers(by_lang)
-    return any([check_pep8(by_lang['py']),
-                check_custom_checks_py(),
-                check_custom_checks_nonpy()])
-
 def run():
     # type: () -> None
-    parser = optparse.OptionParser()
-    parser.add_option('--modified', '-m',
-                      action='store_true',
-                      help='Only check modified files')
-    (options, args) = parser.parse_args()
-    failed = lint_all(args, options)
-    sys.exit(1 if failed else 0)
+    parser = argparse.ArgumentParser()
+    add_default_linter_arguments(parser)
+    args = parser.parse_args()
+
+    linter_config = LinterConfig(args)
+
+    by_lang = linter_config.list_files(file_types=['py', 'sh', 'json', 'md', 'txt'],
+                                       exclude=EXCLUDED_FILES)
+
+    @linter_config.lint
+    def custom_py():
+        # type: () -> int
+        """Runs custom checks for python files (config: tools/linter_lib/custom_check.py)"""
+        failed = python_rules.check(by_lang, verbose=args.verbose)
+        return 1 if failed else 0
+
+    @linter_config.lint
+    def custom_nonpy():
+        # type: () -> int
+        """Runs custom checks for non-python files (config: tools/linter_lib/custom_check.py)"""
+        failed = False
+        for rule in non_py_rules:
+            failed = failed or rule.check(by_lang, verbose=args.verbose)
+        return 1 if failed else 0
+
+    @linter_config.lint
+    def pep8():
+        # type: () -> int
+        """Standard Python style linter on 50% of files (config: tools/linter_lib/pep8.py)"""
+        failed = check_pep8(by_lang['py'])
+        return 1 if failed else 0
+
+    linter_config.do_lint()
 
 if __name__ == '__main__':
     run()