Skip to content

Commit cdd8f91

Browse files
committed
Reject option 9: digit separators in floats (before decimal point)
Per WG17 decision, options 9-11 (float digit separators) are rejected for consistency. This commit implements rejection of option 9 which allowed digit separators before the decimal point (e.g., "1_0.0"). Changes: - Modified skip_underscore_in_number() to return (char, bool) tuple tracking whether an underscore was encountered - Updated number_token() to track had_separator flag throughout parsing - Added check before decimal point: reject if separators were used - Error type: ParseBigInt (cannot parse as float with separators) Tests: - Added 3 tests verifying option 9 rejection via number_chars and atoms - All tests now pass with proper error handling - Integer separators (1_000) still work correctly - Floats without separators (10.0) still work correctly Behavior: - BEFORE: number_chars(N, "1_0.0") → N = 10.0 (unexpected) - AFTER: number_chars(N, "1_0.0") → error(syntax_error(...)) This addresses UWN's feedback that option 9 should be rejected for consistency since options 10 and 11 (separators after decimal point and in exponents) are also not supported.
1 parent 47fbf5f commit cdd8f91

File tree

2 files changed

+46
-14
lines changed

2 files changed

+46
-14
lines changed

src/parser/lexer.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -720,21 +720,24 @@ impl<'a, R: CharRead> Lexer<'a, R> {
720720
Ok((offset, OrderedFloat(n)))
721721
}
722722

723-
fn skip_underscore_in_number(&mut self) -> Result<char, ParserError> {
723+
fn skip_underscore_in_number(&mut self) -> Result<(char, bool), ParserError> {
724724
let mut c = self.lookahead_char()?;
725+
let had_underscore;
725726

726727
if c == '_' {
728+
had_underscore = true;
727729
self.skip_char(c);
728730
self.scan_for_layout()?;
729731
c = self.lookahead_char()?;
730732

731733
if decimal_digit_char!(c) {
732-
Ok(c)
734+
Ok((c, had_underscore))
733735
} else {
734736
Err(ParserError::ParseBigInt(self.line_num, self.col_num))
735737
}
736738
} else {
737-
Ok(c)
739+
had_underscore = false;
740+
Ok((c, had_underscore))
738741
}
739742
}
740743

@@ -764,18 +767,30 @@ impl<'a, R: CharRead> Lexer<'a, R> {
764767

765768
fn number_token(&mut self, leading_c: char) -> Result<NumberToken, ParserError> {
766769
let mut token = String::with_capacity(16);
770+
let mut had_separator = false;
767771

768772
self.skip_char(leading_c);
769773
token.push(leading_c);
770-
let mut c = try_nt!(token, self.skip_underscore_in_number());
774+
775+
let result = try_nt!(token, self.skip_underscore_in_number());
776+
let mut c = result.0;
777+
had_separator |= result.1;
771778

772779
while decimal_digit_char!(c) {
773780
token.push(c);
774781
self.skip_char(c);
775-
c = try_nt!(token, self.skip_underscore_in_number());
782+
let result = try_nt!(token, self.skip_underscore_in_number());
783+
c = result.0;
784+
had_separator |= result.1;
776785
}
777786

778787
if decimal_point_char!(c) {
788+
// Reject option 9: digit separators in float (before decimal point)
789+
// Per WG17 decision, options 9-11 (float digit separators) are rejected
790+
if had_separator {
791+
return Err(ParserError::ParseBigInt(self.line_num, self.col_num));
792+
}
793+
779794
self.skip_char(c);
780795

781796
if self.reader.peek_char().is_none() {

src/tests/digit_separators.pl

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,29 @@
128128
N =:= 1000
129129
)).
130130

131-
% TODO: This should fail (float digit separators are undecided - options 9-11)
132-
% Currently it succeeds but per UWN it should be rejected for consistency
133-
% test("number_chars rejects float with separator", (
134-
% catch(
135-
% (number_chars(_, "1_0.0"), fail),
136-
% error(syntax_error(_), _),
137-
% true
138-
% )
139-
% )).
131+
% Option 9: Reject digit separators in float (before decimal point)
132+
test("number_chars rejects float with separator before decimal", (
133+
catch(
134+
(number_chars(_, "1_0.0"), fail),
135+
error(syntax_error(_), _),
136+
true
137+
)
138+
)).
139+
140+
test("number_chars rejects float separator via atom_chars", (
141+
catch(
142+
(atom_chars('1_0.0', Cs), number_chars(_, Cs), fail),
143+
error(syntax_error(_), _),
144+
true
145+
)
146+
)).
147+
148+
test("direct literal rejects float with separator before decimal", (
149+
% Can't test direct literal syntax in test file since it fails at parse time
150+
% This test verifies the error occurs via atom -> number conversion
151+
catch(
152+
(atom_chars(A, ['1','_','0','.','0']), number_chars(_, A), fail),
153+
error(_,_),
154+
true
155+
)
156+
)).

0 commit comments

Comments
 (0)