Skip to content

Commit 5c28a08

Browse files
Merge branch 'main' into zihang/update-syntax
2 parents 437ec4d + 7337845 commit 5c28a08

File tree

10 files changed

+103
-272
lines changed

10 files changed

+103
-272
lines changed

encoding/decoding_test.mbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ test "lossy decoding UTF16BE which is `HI+!LO`" {
758758
}
759759

760760
///|
761-
test "decode_lossy_to/UTF16BE" {
761+
test "decode_lossy_to/UTF16BE_2" {
762762
let src = b"\xD8\x00\x00\x48"
763763
assert_eq(src, b"\xd8\x00\x00\x48")
764764
let decoder = @encoding.decoder(UTF16BE)

json5/lex_main.mbt

Lines changed: 71 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -18,106 +18,101 @@ fn lex_value(
1818
allow_rbracket? : Bool = false,
1919
) -> Token raise ParseError {
2020
for {
21-
match read_char(ctx) {
22-
Some('\t' | '\u000B' | '\u000C' | ' ' | '\n' | '\r') => continue
23-
Some('/') => {
24-
lex_comment(ctx)
21+
let view = try! ctx.input[ctx.offset:]
22+
lexmatch view with longest {
23+
(
24+
"[\t\u000B\u000C \n\r\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]",
25+
rest
26+
) => {
27+
ctx.offset = rest.start_offset()
2528
continue
2629
}
27-
Some('{') => return LBrace
28-
Some('[') => return LBracket
29-
Some(']') =>
30+
("//" "[^\n\r\u2028\u2029]*", rest) => {
31+
ctx.offset = rest.start_offset()
32+
continue
33+
}
34+
("/[*]" "([*][^/]?|[^*])*" "[*]/", rest) => {
35+
ctx.offset = rest.start_offset()
36+
continue
37+
}
38+
("/[*]" "([*][^/]?|[^*])*") => parse_error(InvalidEof)
39+
("[{]", rest) => {
40+
ctx.offset = rest.start_offset()
41+
return LBrace
42+
}
43+
("\[", rest) => {
44+
ctx.offset = rest.start_offset()
45+
return LBracket
46+
}
47+
("\]", rest) =>
3048
if allow_rbracket {
49+
ctx.offset = rest.start_offset()
3150
return RBracket
3251
} else {
33-
invalid_char(ctx, shift=-1)
52+
invalid_char(ctx)
3453
}
35-
Some('n') => {
36-
lex_assert_char(ctx, 'u')
37-
lex_assert_char(ctx, 'l')
38-
lex_assert_char(ctx, 'l')
54+
("null", rest) => {
55+
ctx.offset = rest.start_offset()
3956
return Null
4057
}
41-
Some('t') => {
42-
lex_assert_char(ctx, 'r')
43-
lex_assert_char(ctx, 'u')
44-
lex_assert_char(ctx, 'e')
58+
("true", rest) => {
59+
ctx.offset = rest.start_offset()
4560
return True
4661
}
47-
Some('f') => {
48-
lex_assert_char(ctx, 'a')
49-
lex_assert_char(ctx, 'l')
50-
lex_assert_char(ctx, 's')
51-
lex_assert_char(ctx, 'e')
62+
("false", rest) => {
63+
ctx.offset = rest.start_offset()
5264
return False
5365
}
54-
Some('-' | '+' as c) =>
55-
match read_char(ctx) {
56-
Some('I') => {
57-
lex_infinity(ctx)
58-
return Number(
59-
if c == '-' {
60-
@double.neg_infinity
61-
} else {
62-
@double.infinity
63-
},
64-
)
65-
}
66-
Some('N') => {
67-
lex_assert_char(ctx, 'a')
68-
lex_assert_char(ctx, 'N')
69-
return Number(@double.not_a_number)
70-
}
71-
Some('0') => {
72-
let n = lex_zero(ctx, neg=c == '-', start=ctx.offset - 2)
73-
return Number(n)
74-
}
75-
Some('.') => {
76-
let n = lex_decimal_point_leading(ctx, start=ctx.offset - 2)
77-
return Number(n)
78-
}
79-
Some(c2) => {
80-
if c2 >= '1' && c2 <= '9' {
81-
let n = lex_decimal_integer(ctx, start=ctx.offset - 2)
82-
return Number(n)
83-
}
84-
invalid_char(ctx, shift=-1)
85-
}
86-
None => parse_error(InvalidEof)
87-
}
88-
Some('.') => {
89-
let n = lex_decimal_point_leading(ctx, start=ctx.offset - 1)
66+
("\+?0[xX]", rest) => {
67+
ctx.offset = rest.start_offset()
68+
let n = lex_hexadecimal(ctx, neg=false)
9069
return Number(n)
9170
}
92-
Some('0') => {
93-
let n = lex_zero(ctx, neg=false, start=ctx.offset - 1)
71+
("-0[xX]", rest) => {
72+
ctx.offset = rest.start_offset()
73+
let n = lex_hexadecimal(ctx, neg=true)
9474
return Number(n)
9575
}
96-
Some('1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => {
97-
let n = lex_decimal_integer(ctx, start=ctx.offset - 1)
76+
(
77+
"[+\-]?((0|[1-9][0-9]*)(\.[0-9]*)?|\.[0-9]+)([eE][+\-]?[0-9]+)?" as lit,
78+
rest
79+
) => {
80+
ctx.offset = rest.start_offset()
81+
let n = lex_number_end(
82+
ctx,
83+
lit.start_offset(),
84+
lit.start_offset() + lit.length(),
85+
)
9886
return Number(n)
9987
}
100-
Some('I') => {
101-
lex_infinity(ctx)
88+
("\+?Infinity", rest) => {
89+
ctx.offset = rest.start_offset()
10290
return Number(@double.infinity)
10391
}
104-
Some('N') => {
105-
lex_assert_char(ctx, 'a')
106-
lex_assert_char(ctx, 'N')
92+
("-Infinity", rest) => {
93+
ctx.offset = rest.start_offset()
94+
return Number(@double.neg_infinity)
95+
}
96+
("[+\-]?NaN", rest) => {
97+
ctx.offset = rest.start_offset()
10798
return Number(@double.not_a_number)
10899
}
109-
Some('"') => {
110-
let n = lex_string(ctx, '"')
111-
return String(n)
100+
("\"", rest) => {
101+
ctx.offset = rest.start_offset()
102+
let s = lex_string(ctx, '"')
103+
return String(s)
112104
}
113-
Some('\'') => return String(lex_string(ctx, '\''))
114-
Some(c) => {
115-
if c > '\u{7f}' && non_ascii_whitespace.contains(c) {
116-
continue
117-
}
118-
invalid_char(ctx, shift=-1)
105+
("'", rest) => {
106+
ctx.offset = rest.start_offset()
107+
let s = lex_string(ctx, '\'')
108+
return String(s)
109+
}
110+
(".", rest) => {
111+
ctx.offset = rest.start_offset()
112+
no_valid_token(ctx)
119113
}
120-
None => parse_error(InvalidEof)
114+
"" => parse_error(InvalidEof)
115+
_ => panic()
121116
}
122117
}
123118
}

json5/lex_misc.mbt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,6 @@ fn lex_assert_char(ctx : ParseContext, c : Char) -> Unit raise ParseError {
9595
}
9696
}
9797

98-
///|
99-
fn lex_infinity(ctx : ParseContext) -> Unit raise ParseError {
100-
lex_assert_char(ctx, 'n')
101-
lex_assert_char(ctx, 'f')
102-
lex_assert_char(ctx, 'i')
103-
lex_assert_char(ctx, 'n')
104-
lex_assert_char(ctx, 'i')
105-
lex_assert_char(ctx, 't')
106-
lex_assert_char(ctx, 'y')
107-
}
108-
10998
///|
11099
fn lex_comment(ctx : ParseContext) -> Unit raise ParseError {
111100
match read_char(ctx) {

json5/lex_number.mbt

Lines changed: 4 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -12,165 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
///|
16-
fn lex_decimal_integer(
17-
ctx : ParseContext,
18-
start~ : Int,
19-
) -> Double raise ParseError {
20-
for {
21-
match read_char(ctx) {
22-
Some('.') => return lex_decimal_point(ctx, start~)
23-
Some('e' | 'E') => return lex_decimal_exponent(ctx, start~)
24-
Some(c) => {
25-
if c >= '0' && c <= '9' {
26-
continue
27-
}
28-
ctx.offset -= 1
29-
return lex_number_end(ctx, start, ctx.offset)
30-
}
31-
None => return lex_number_end(ctx, start, ctx.offset)
32-
}
33-
}
34-
}
35-
36-
///|
37-
fn lex_decimal_point_leading(
38-
ctx : ParseContext,
39-
start~ : Int,
40-
) -> Double raise ParseError {
41-
match read_char(ctx) {
42-
Some(c) => {
43-
if c >= '0' && c <= '9' {
44-
return lex_decimal_fraction(ctx, start~)
45-
}
46-
ctx.offset -= 1
47-
invalid_char(ctx)
48-
}
49-
None => parse_error(InvalidEof)
50-
}
51-
}
52-
53-
///|
54-
fn lex_decimal_point(
55-
ctx : ParseContext,
56-
start~ : Int,
57-
) -> Double raise ParseError {
58-
match read_char(ctx) {
59-
Some('e' | 'E') => return lex_decimal_exponent(ctx, start~)
60-
Some(c) => {
61-
if c >= '0' && c <= '9' {
62-
return lex_decimal_fraction(ctx, start~)
63-
}
64-
ctx.offset -= 1
65-
return lex_number_end(ctx, start, ctx.offset)
66-
}
67-
None => return lex_number_end(ctx, start, ctx.offset)
68-
}
69-
}
70-
71-
///|
72-
fn lex_decimal_fraction(
73-
ctx : ParseContext,
74-
start~ : Int,
75-
) -> Double raise ParseError {
76-
for {
77-
match read_char(ctx) {
78-
Some('e' | 'E') => return lex_decimal_exponent(ctx, start~)
79-
Some(c) => {
80-
if c >= '0' && c <= '9' {
81-
continue
82-
}
83-
ctx.offset -= 1
84-
return lex_number_end(ctx, start, ctx.offset)
85-
}
86-
None => return lex_number_end(ctx, start, ctx.offset)
87-
}
88-
}
89-
}
90-
91-
///|
92-
fn lex_decimal_exponent(
93-
ctx : ParseContext,
94-
start~ : Int,
95-
) -> Double raise ParseError {
96-
match read_char(ctx) {
97-
Some('+') | Some('-') => return lex_decimal_exponent_sign(ctx, start~)
98-
Some(c) => {
99-
if c >= '0' && c <= '9' {
100-
return lex_decimal_exponent_integer(ctx, start~)
101-
}
102-
ctx.offset -= 1
103-
invalid_char(ctx)
104-
}
105-
None => parse_error(InvalidEof)
106-
}
107-
}
108-
109-
///|
110-
fn lex_decimal_exponent_sign(
111-
ctx : ParseContext,
112-
start~ : Int,
113-
) -> Double raise ParseError {
114-
match read_char(ctx) {
115-
Some(c) => {
116-
if c >= '0' && c <= '9' {
117-
return lex_decimal_exponent_integer(ctx, start~)
118-
}
119-
ctx.offset -= 1
120-
invalid_char(ctx)
121-
}
122-
None => parse_error(InvalidEof)
123-
}
124-
}
125-
126-
///|
127-
fn lex_decimal_exponent_integer(
128-
ctx : ParseContext,
129-
start~ : Int,
130-
) -> Double raise ParseError {
131-
for {
132-
match read_char(ctx) {
133-
Some(c) => {
134-
if c >= '0' && c <= '9' {
135-
continue
136-
}
137-
ctx.offset -= 1
138-
return lex_number_end(ctx, start, ctx.offset)
139-
}
140-
None => return lex_number_end(ctx, start, ctx.offset)
141-
}
142-
}
143-
}
144-
145-
///|
146-
fn lex_zero(
147-
ctx : ParseContext,
148-
neg~ : Bool,
149-
start~ : Int,
150-
) -> Double raise ParseError {
151-
match read_char(ctx) {
152-
Some('.') => return lex_decimal_point(ctx, start~)
153-
Some('e' | 'E') => return lex_decimal_exponent(ctx, start~)
154-
Some('x' | 'X') => return lex_hexadecimal(ctx, neg~)
155-
Some(c) => {
156-
if c >= '0' && c <= '9' {
157-
ctx.offset -= 1
158-
invalid_char(ctx)
159-
}
160-
ctx.offset -= 1
161-
return lex_number_end(ctx, start, ctx.offset)
162-
}
163-
None => return lex_number_end(ctx, start, ctx.offset)
164-
}
165-
}
166-
16715
///|
16816
fn lex_hexadecimal(ctx : ParseContext, neg~ : Bool) -> Double raise ParseError {
16917
match read_char(ctx) {
17018
Some(c) => {
171-
if (c >= '0' && c <= '9') ||
172-
(c >= 'a' && c <= 'f') ||
173-
(c >= 'A' && c <= 'F') {
19+
if c is ('0'..='9' | 'a'..='f' | 'A'..='F') {
17420
let n = lex_hexadecimal_integer(ctx, hex_digit_to_int(c))
17521
return if neg { -n } else { n }
17622
}
@@ -186,9 +32,7 @@ fn lex_hexadecimal_integer(ctx : ParseContext, n : Int) -> Double {
18632
for n = n.to_double() {
18733
match read_char(ctx) {
18834
Some(c) => {
189-
if (c >= '0' && c <= '9') ||
190-
(c >= 'a' && c <= 'f') ||
191-
(c >= 'A' && c <= 'F') {
35+
if c is ('0'..='9' | 'a'..='f' | 'A'..='F') {
19236
continue n * 16.0 + hex_digit_to_int(c).to_double()
19337
}
19438
ctx.offset -= 1
@@ -206,9 +50,7 @@ fn lex_number_end(
20650
end : Int,
20751
) -> Double raise ParseError {
20852
let s = try! ctx.input[start:end].to_string()
209-
match (try? @strconv.parse_double(s)) {
210-
Ok(d) => d
211-
Err(_) =>
212-
parse_error(InvalidNumber(offset_to_position(ctx.input, start), s))
53+
@strconv.parse_double(s) catch {
54+
_ => parse_error(InvalidNumber(offset_to_position(ctx.input, start), s))
21355
}
21456
}

0 commit comments

Comments
 (0)