Skip to content

Correction to the ASI generation #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,20 @@ Changelog

- Correct the implementation of line continuation in strings. This also
meant a change in the minify unparser so that it will continue to
remove the line continuation sequences.
remove the line continuation sequences. [
`#16 <https://github.com/calmjs/calmjs.parse/issues/16>`_
]

- Correct the implementation of ASI (automatic semicolon insertion) by
introducing a dedicated token type, such that the production of
empty statement can no longer happen and that distinguishes it from
production of statements that should not have ASI applied, such that
incorrectly successful parsing due to this issue will no longer
result. [
`#18 <https://github.com/calmjs/calmjs.parse/issues/18>`_
`rspivak/slimit#29 <https://github.com/rspivak/slimit/issues/29>`_
`rspivak/slimit#101 <https://github.com/rspivak/slimit/issues/101>`_
]

1.0.1 - 2018-04-19
------------------
Expand Down
23 changes: 19 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,18 @@ immediate access to the parsing feature. It may be used like so:
... console.log(main('world'));
... '''
>>> program = es5(program_source)
>>> program # for a simple repr-like nested view of the ast
>>> # for a simple repr-like nested view of the ast
>>> program # equivalent to repr(program)
<ES5Program @3:1 ?children=[
<VarStatement @3:1 ?children=[
<VarDecl @3:5 identifier=<Identifier ...>, initializer=<FuncExpr ...>>
]>,
<ExprStatement @7:1 expr=<FunctionCall @7:1 args=<Arguments ...>,
identifier=<DotAccessor ...>>>
]>
>>> print(program) # automatic reconstruction of ast into source
>>> # automatic reconstruction of ast into source, without having to
>>> # call something like `.to_ecma()`
>>> print(program) # equivalent to str(program)
var main = function(greet) {
var hello = "hello " + greet;
return hello;
Expand Down Expand Up @@ -503,7 +506,7 @@ Object assignments from a given script file:
.. code:: python

>>> from calmjs.parse import es5
>>> from calmjs.parse.asttypes import Object, VarDecl
>>> from calmjs.parse.asttypes import Object, VarDecl, FunctionCall
>>> from calmjs.parse.walkers import Walker
>>> walker = Walker()
>>> declarations = es5(u'''
Expand All @@ -514,12 +517,17 @@ Object assignments from a given script file:
... v: "value"
... }
... };
... foo({foo: "bar"});
... function bar() {
... var t = {
... foo: "bar",
... }
... };
... return t;
... }
... foo.bar = bar;
... foo.bar();
... ''')
>>> # print out the object nodes that were part of some assignments
>>> for node in walker.filter(declarations, lambda node: (
... isinstance(node, VarDecl) and
... isinstance(node.initializer, Object))):
Expand All @@ -534,6 +542,13 @@ Object assignments from a given script file:
{
foo: "bar"
}
>>> # print out all function calls
>>> for node in walker.filter(declarations, lambda node: (
... isinstance(node, FunctionCall))):
... print(node.identifier)
...
foo
foo.bar

Further details and example usage can be consulted from the various
docstrings found within the module.
Expand Down
6 changes: 4 additions & 2 deletions src/calmjs/parse/lexers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ def token(self):
return self.cur_token

def auto_semi(self, token):
if token is None or token.type == 'RBRACE' or self._is_prev_token_lt():
if token is None or (token.type not in ('SEMI', 'AUTOSEMI') and (
token.type == 'RBRACE' or self._is_prev_token_lt())):
if token:
self.next_tokens.append(token)
return self._create_semi_token(token)
Expand Down Expand Up @@ -303,7 +304,7 @@ def lookup_colno(self, lineno, lexpos):

def _create_semi_token(self, orig_token):
token = AutoLexToken()
token.type = 'SEMI'
token.type = 'AUTOSEMI'
token.value = ';'
if orig_token is not None:
token.lineno = orig_token.lineno
Expand Down Expand Up @@ -352,6 +353,7 @@ def next(self):
tokens = (
# Punctuators
'PERIOD', 'COMMA', 'SEMI', 'COLON', # . , ; :
'AUTOSEMI', # autogenerated ;
'PLUS', 'MINUS', 'MULT', 'DIV', 'MOD', # + - * / %
'BAND', 'BOR', 'BXOR', 'BNOT', # & | ^ ~
'CONDOP', # conditional operator ?
Expand Down
53 changes: 15 additions & 38 deletions src/calmjs/parse/parsers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,6 @@ def __init__(self, lex_optimize=True, lextab=lextab,
# over again.
self._error_tokens = {}

def _has_been_seen_before(self, token):
if token is None:
return False
key = token.type, token.value, token.lineno, token.lexpos
return key in self._error_tokens

def _mark_as_seen(self, token):
if token is None:
return
key = token.type, token.value, token.lineno, token.lexpos
self._error_tokens[key] = True

def _raise_syntax_error(self, token):
tokens = [format_lex_token(t) for t in [
self.lexer.valid_prev_token,
Expand Down Expand Up @@ -141,22 +129,11 @@ def parse(self, text, debug=False):
def p_empty(self, p):
"""empty :"""

def p_auto_semi(self, p):
"""auto_semi : error"""

def p_error(self, token):
# https://github.com/rspivak/slimit/issues/29
if self._has_been_seen_before(token):
self._raise_syntax_error(token)

if token is None or token.type != 'SEMI':
next_token = self.lexer.auto_semi(token)
if next_token is not None:
# https://github.com/rspivak/slimit/issues/29
self._mark_as_seen(token)
self.parser.errok()
return next_token

next_token = self.lexer.auto_semi(token)
if next_token is not None:
self.parser.errok()
return next_token
self._raise_syntax_error(token)

# Comment rules
Expand Down Expand Up @@ -1094,7 +1071,7 @@ def p_expr_nobf(self, p):
# 12.2 Variable Statement
def p_variable_statement(self, p):
"""variable_statement : VAR variable_declaration_list SEMI
| VAR variable_declaration_list auto_semi
| VAR variable_declaration_list AUTOSEMI
"""
p[0] = self.asttypes.VarStatement(p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1162,7 +1139,7 @@ def p_empty_statement(self, p):
# 12.4 Expression Statement
def p_expr_statement(self, p):
"""expr_statement : expr_nobf SEMI
| expr_nobf auto_semi
| expr_nobf AUTOSEMI
"""
# In 12.4, expression statements cannot start with either the
# 'function' keyword or '{'. However, the lexing and production
Expand Down Expand Up @@ -1200,7 +1177,7 @@ def p_iteration_statement_1(self, p):
"""
iteration_statement \
: DO statement WHILE LPAREN expr RPAREN SEMI
| DO statement WHILE LPAREN expr RPAREN auto_semi
| DO statement WHILE LPAREN expr RPAREN AUTOSEMI
"""
p[0] = self.asttypes.DoWhile(predicate=p[5], statement=p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1287,44 +1264,44 @@ def p_expr_noin_opt(self, p):
# 12.7 The continue Statement
def p_continue_statement_1(self, p):
"""continue_statement : CONTINUE SEMI
| CONTINUE auto_semi
| CONTINUE AUTOSEMI
"""
p[0] = self.asttypes.Continue()
p[0].setpos(p)

def p_continue_statement_2(self, p):
"""continue_statement : CONTINUE identifier SEMI
| CONTINUE identifier auto_semi
| CONTINUE identifier AUTOSEMI
"""
p[0] = self.asttypes.Continue(p[2])
p[0].setpos(p)

# 12.8 The break Statement
def p_break_statement_1(self, p):
"""break_statement : BREAK SEMI
| BREAK auto_semi
| BREAK AUTOSEMI
"""
p[0] = self.asttypes.Break()
p[0].setpos(p)

def p_break_statement_2(self, p):
"""break_statement : BREAK identifier SEMI
| BREAK identifier auto_semi
| BREAK identifier AUTOSEMI
"""
p[0] = self.asttypes.Break(p[2])
p[0].setpos(p)

# 12.9 The return Statement
def p_return_statement_1(self, p):
"""return_statement : RETURN SEMI
| RETURN auto_semi
| RETURN AUTOSEMI
"""
p[0] = self.asttypes.Return()
p[0].setpos(p)

def p_return_statement_2(self, p):
"""return_statement : RETURN expr SEMI
| RETURN expr auto_semi
| RETURN expr AUTOSEMI
"""
p[0] = self.asttypes.Return(expr=p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1396,7 +1373,7 @@ def p_labelled_statement(self, p):
# 12.13 The throw Statement
def p_throw_statement(self, p):
"""throw_statement : THROW expr SEMI
| THROW expr auto_semi
| THROW expr AUTOSEMI
"""
p[0] = self.asttypes.Throw(expr=p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1430,7 +1407,7 @@ def p_finally(self, p):
# 12.15 The debugger statement
def p_debugger_statement(self, p):
"""debugger_statement : DEBUGGER SEMI
| DEBUGGER auto_semi
| DEBUGGER AUTOSEMI
"""
p[0] = self.asttypes.Debugger(p[1])
p[0].setpos(p)
Expand Down
61 changes: 60 additions & 1 deletion src/calmjs/parse/tests/test_es5_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_that_parsing_eventually_stops(self):
parser.parse(text)
self.assertEqual(
str(e.exception),
"Unexpected ',' at 2:1 between '\\n' at 1:7 and 'b' at 2:3")
"Unexpected ',' at 2:1 after '\\n' at 1:7")

def test_bare_start(self):
text = textwrap.dedent("""
Expand Down Expand Up @@ -237,6 +237,65 @@ def test_read(self):
node = read(stream)
self.assertEqual(node.sourcepath, 'somefile.js')

# 7.9.2
def test_asi_empty_if_parse_fail(self):
text = "if (true)"
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
"Unexpected end of input after ')' at 1:9")

def test_asi_empty_if_parse_fail_inside_block(self):
# https://github.com/rspivak/slimit/issues/101
text = textwrap.dedent("""
function foo(args) {
if (true)
}
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
r"Unexpected '}' at 3:1 after '\n' at 2:14")

def test_asi_for_truncated_fail(self):
text = textwrap.dedent("""
for (a; b
)
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
r"Unexpected ')' at 2:1 after '\n' at 1:10")

def test_asi_for_bare_fail(self):
text = textwrap.dedent("""
for (a; b; c)
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
"Unexpected end of input after ')' at 1:13")

def test_asi_omitted_if_else_fail(self):
text = textwrap.dedent("""
if (a > b)
else c = d
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
r"Unexpected 'else' at 2:1 after '\n' at 1:11")


repr_walker = ReprWalker()

Expand Down