calmjs · metatoaster · Aug 7, 2018 · Jul 17, 2018 · Aug 7, 2018 · Aug 7, 2018
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -6,7 +6,20 @@ Changelog
 
 - Correct the implementation of line continuation in strings.  This also
   meant a change in the minify unparser so that it will continue to
-  remove the line continuation sequences.
+  remove the line continuation sequences.  [
+  `#16 <https://github.com/calmjs/calmjs.parse/issues/16>`_
+  ]
+
+- Correct the implementation of ASI (automatic semicolon insertion) by
+  introducing a dedicated token type, such that the production of
+  empty statement can no longer happen and that distinguishes it from
+  production of statements that should not have ASI applied, such that
+  incorrectly successful parsing due to this issue will no longer
+  result.  [
+  `#18 <https://github.com/calmjs/calmjs.parse/issues/18>`_
+  `rspivak/slimit#29 <https://github.com/rspivak/slimit/issues/29>`_
+  `rspivak/slimit#101 <https://github.com/rspivak/slimit/issues/101>`_
+  ]
 
 1.0.1 - 2018-04-19
 ------------------

diff --git a/README.rst b/README.rst
@@ -186,15 +186,18 @@ immediate access to the parsing feature.  It may be used like so:
     ... console.log(main('world'));
     ... '''
     >>> program = es5(program_source)
-    >>> program  # for a simple repr-like nested view of the ast
+    >>> # for a simple repr-like nested view of the ast
+    >>> program  # equivalent to repr(program)
     <ES5Program @3:1 ?children=[
       <VarStatement @3:1 ?children=[
         <VarDecl @3:5 identifier=<Identifier ...>, initializer=<FuncExpr ...>>
       ]>,
       <ExprStatement @7:1 expr=<FunctionCall @7:1 args=<Arguments ...>,
         identifier=<DotAccessor ...>>>
     ]>
-    >>> print(program)  # automatic reconstruction of ast into source
+    >>> # automatic reconstruction of ast into source, without having to
+    >>> # call something like `.to_ecma()`
+    >>> print(program)  # equivalent to str(program)
     var main = function(greet) {
       var hello = "hello " + greet;
       return hello;
@@ -503,7 +506,7 @@ Object assignments from a given script file:
 .. code:: python
 
     >>> from calmjs.parse import es5
-    >>> from calmjs.parse.asttypes import Object, VarDecl
+    >>> from calmjs.parse.asttypes import Object, VarDecl, FunctionCall
     >>> from calmjs.parse.walkers import Walker
     >>> walker = Walker()
     >>> declarations = es5(u'''
@@ -514,12 +517,17 @@ Object assignments from a given script file:
     ...         v: "value"
     ...     }
     ... };
+    ... foo({foo: "bar"});
     ... function bar() {
     ...     var t = {
     ...         foo: "bar",
-    ...     }
+    ...     };
+    ...     return t;
     ... }
+    ... foo.bar = bar;
+    ... foo.bar();
     ... ''')
+    >>> # print out the object nodes that were part of some assignments
     >>> for node in walker.filter(declarations, lambda node: (
     ...         isinstance(node, VarDecl) and
     ...         isinstance(node.initializer, Object))):
@@ -534,6 +542,13 @@ Object assignments from a given script file:
     {
       foo: "bar"
     }
+    >>> # print out all function calls
+    >>> for node in walker.filter(declarations, lambda node: (
+    ...         isinstance(node, FunctionCall))):
+    ...     print(node.identifier)
+    ...
+    foo
+    foo.bar
 
 Further details and example usage can be consulted from the various
 docstrings found within the module.

diff --git a/src/calmjs/parse/lexers/es5.py b/src/calmjs/parse/lexers/es5.py
@@ -220,7 +220,8 @@ def token(self):
                 return self.cur_token
 
     def auto_semi(self, token):
-        if token is None or token.type == 'RBRACE' or self._is_prev_token_lt():
+        if token is None or (token.type not in ('SEMI', 'AUTOSEMI') and (
+                token.type == 'RBRACE' or self._is_prev_token_lt())):
             if token:
                 self.next_tokens.append(token)
             return self._create_semi_token(token)
@@ -303,7 +304,7 @@ def lookup_colno(self, lineno, lexpos):
 
     def _create_semi_token(self, orig_token):
         token = AutoLexToken()
-        token.type = 'SEMI'
+        token.type = 'AUTOSEMI'
         token.value = ';'
         if orig_token is not None:
             token.lineno = orig_token.lineno
@@ -352,6 +353,7 @@ def next(self):
     tokens = (
         # Punctuators
         'PERIOD', 'COMMA', 'SEMI', 'COLON',     # . , ; :
+        'AUTOSEMI',                             # autogenerated ;
         'PLUS', 'MINUS', 'MULT', 'DIV', 'MOD',  # + - * / %
         'BAND', 'BOR', 'BXOR', 'BNOT',          # & | ^ ~
         'CONDOP',                               # conditional operator ?

diff --git a/src/calmjs/parse/parsers/es5.py b/src/calmjs/parse/parsers/es5.py
@@ -100,18 +100,6 @@ def __init__(self, lex_optimize=True, lextab=lextab,
         # over again.
         self._error_tokens = {}
 
-    def _has_been_seen_before(self, token):
-        if token is None:
-            return False
-        key = token.type, token.value, token.lineno, token.lexpos
-        return key in self._error_tokens
-
-    def _mark_as_seen(self, token):
-        if token is None:
-            return
-        key = token.type, token.value, token.lineno, token.lexpos
-        self._error_tokens[key] = True
-
     def _raise_syntax_error(self, token):
         tokens = [format_lex_token(t) for t in [
             self.lexer.valid_prev_token,
@@ -141,22 +129,11 @@ def parse(self, text, debug=False):
     def p_empty(self, p):
         """empty :"""
 
-    def p_auto_semi(self, p):
-        """auto_semi : error"""
-
     def p_error(self, token):
-        # https://github.com/rspivak/slimit/issues/29
-        if self._has_been_seen_before(token):
-            self._raise_syntax_error(token)
-
-        if token is None or token.type != 'SEMI':
-            next_token = self.lexer.auto_semi(token)
-            if next_token is not None:
-                # https://github.com/rspivak/slimit/issues/29
-                self._mark_as_seen(token)
-                self.parser.errok()
-                return next_token
-
+        next_token = self.lexer.auto_semi(token)
+        if next_token is not None:
+            self.parser.errok()
+            return next_token
         self._raise_syntax_error(token)
 
     # Comment rules
@@ -1094,7 +1071,7 @@ def p_expr_nobf(self, p):
     # 12.2 Variable Statement
     def p_variable_statement(self, p):
         """variable_statement : VAR variable_declaration_list SEMI
-                              | VAR variable_declaration_list auto_semi
+                              | VAR variable_declaration_list AUTOSEMI
         """
         p[0] = self.asttypes.VarStatement(p[2])
         p[0].setpos(p)
@@ -1162,7 +1139,7 @@ def p_empty_statement(self, p):
     # 12.4 Expression Statement
     def p_expr_statement(self, p):
         """expr_statement : expr_nobf SEMI
-                          | expr_nobf auto_semi
+                          | expr_nobf AUTOSEMI
         """
         # In 12.4, expression statements cannot start with either the
         # 'function' keyword or '{'.  However, the lexing and production
@@ -1200,7 +1177,7 @@ def p_iteration_statement_1(self, p):
         """
         iteration_statement \
             : DO statement WHILE LPAREN expr RPAREN SEMI
-            | DO statement WHILE LPAREN expr RPAREN auto_semi
+            | DO statement WHILE LPAREN expr RPAREN AUTOSEMI
         """
         p[0] = self.asttypes.DoWhile(predicate=p[5], statement=p[2])
         p[0].setpos(p)
@@ -1287,44 +1264,44 @@ def p_expr_noin_opt(self, p):
     # 12.7 The continue Statement
     def p_continue_statement_1(self, p):
         """continue_statement : CONTINUE SEMI
-                              | CONTINUE auto_semi
+                              | CONTINUE AUTOSEMI
         """
         p[0] = self.asttypes.Continue()
         p[0].setpos(p)
 
     def p_continue_statement_2(self, p):
         """continue_statement : CONTINUE identifier SEMI
-                              | CONTINUE identifier auto_semi
+                              | CONTINUE identifier AUTOSEMI
         """
         p[0] = self.asttypes.Continue(p[2])
         p[0].setpos(p)
 
     # 12.8 The break Statement
     def p_break_statement_1(self, p):
         """break_statement : BREAK SEMI
-                           | BREAK auto_semi
+                           | BREAK AUTOSEMI
         """
         p[0] = self.asttypes.Break()
         p[0].setpos(p)
 
     def p_break_statement_2(self, p):
         """break_statement : BREAK identifier SEMI
-                           | BREAK identifier auto_semi
+                           | BREAK identifier AUTOSEMI
         """
         p[0] = self.asttypes.Break(p[2])
         p[0].setpos(p)
 
     # 12.9 The return Statement
     def p_return_statement_1(self, p):
         """return_statement : RETURN SEMI
-                            | RETURN auto_semi
+                            | RETURN AUTOSEMI
         """
         p[0] = self.asttypes.Return()
         p[0].setpos(p)
 
     def p_return_statement_2(self, p):
         """return_statement : RETURN expr SEMI
-                            | RETURN expr auto_semi
+                            | RETURN expr AUTOSEMI
         """
         p[0] = self.asttypes.Return(expr=p[2])
         p[0].setpos(p)
@@ -1396,7 +1373,7 @@ def p_labelled_statement(self, p):
     # 12.13 The throw Statement
     def p_throw_statement(self, p):
         """throw_statement : THROW expr SEMI
-                           | THROW expr auto_semi
+                           | THROW expr AUTOSEMI
         """
         p[0] = self.asttypes.Throw(expr=p[2])
         p[0].setpos(p)
@@ -1430,7 +1407,7 @@ def p_finally(self, p):
     # 12.15 The debugger statement
     def p_debugger_statement(self, p):
         """debugger_statement : DEBUGGER SEMI
-                              | DEBUGGER auto_semi
+                              | DEBUGGER AUTOSEMI
         """
         p[0] = self.asttypes.Debugger(p[1])
         p[0].setpos(p)

diff --git a/src/calmjs/parse/tests/test_es5_parser.py b/src/calmjs/parse/tests/test_es5_parser.py
@@ -114,7 +114,7 @@ def test_that_parsing_eventually_stops(self):
             parser.parse(text)
         self.assertEqual(
             str(e.exception),
-            "Unexpected ',' at 2:1 between '\\n' at 1:7 and 'b' at 2:3")
+            "Unexpected ',' at 2:1 after '\\n' at 1:7")
 
     def test_bare_start(self):
         text = textwrap.dedent("""
@@ -237,6 +237,65 @@ def test_read(self):
         node = read(stream)
         self.assertEqual(node.sourcepath, 'somefile.js')
 
+    # 7.9.2
+    def test_asi_empty_if_parse_fail(self):
+        text = "if (true)"
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            "Unexpected end of input after ')' at 1:9")
+
+    def test_asi_empty_if_parse_fail_inside_block(self):
+        # https://github.com/rspivak/slimit/issues/101
+        text = textwrap.dedent("""
+        function foo(args) {
+            if (true)
+        }
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected '}' at 3:1 after '\n' at 2:14")
+
+    def test_asi_for_truncated_fail(self):
+        text = textwrap.dedent("""
+        for (a; b
+        )
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected ')' at 2:1 after '\n' at 1:10")
+
+    def test_asi_for_bare_fail(self):
+        text = textwrap.dedent("""
+        for (a; b; c)
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            "Unexpected end of input after ')' at 1:13")
+
+    def test_asi_omitted_if_else_fail(self):
+        text = textwrap.dedent("""
+        if (a > b)
+        else c = d
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected 'else' at 2:1 after '\n' at 1:11")
+
 
 repr_walker = ReprWalker()