Reject option 9: digit separators in floats (before decimal point)

jjtolton · jjtolton · commit cdd8f913c5bf · 2025-10-24T19:09:26.000-04:00
Per WG17 decision, options 9-11 (float digit separators) are rejected
for consistency. This commit implements rejection of option 9 which
allowed digit separators before the decimal point (e.g., "1_0.0").

Changes:
- Modified skip_underscore_in_number() to return (char, bool) tuple
  tracking whether an underscore was encountered
- Updated number_token() to track had_separator flag throughout parsing
- Added check before decimal point: reject if separators were used
- Error type: ParseBigInt (cannot parse as float with separators)

Tests:
- Added 3 tests verifying option 9 rejection via number_chars and atoms
- All tests now pass with proper error handling
- Integer separators (1_000) still work correctly
- Floats without separators (10.0) still work correctly

Behavior:
- BEFORE: number_chars(N, "1_0.0") → N = 10.0 (unexpected)
- AFTER:  number_chars(N, "1_0.0") → error(syntax_error(...))

This addresses UWN's feedback that option 9 should be rejected for
consistency since options 10 and 11 (separators after decimal point
and in exponents) are also not supported.
diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs
@@ -720,21 +720,24 @@ impl<'a, R: CharRead> Lexer<'a, R> {
         Ok((offset, OrderedFloat(n)))
     }
 
-    fn skip_underscore_in_number(&mut self) -> Result<char, ParserError> {
+    fn skip_underscore_in_number(&mut self) -> Result<(char, bool), ParserError> {
         let mut c = self.lookahead_char()?;
+        let had_underscore;
 
         if c == '_' {
+            had_underscore = true;
             self.skip_char(c);
             self.scan_for_layout()?;
             c = self.lookahead_char()?;
 
             if decimal_digit_char!(c) {
-                Ok(c)
+                Ok((c, had_underscore))
             } else {
                 Err(ParserError::ParseBigInt(self.line_num, self.col_num))
             }
         } else {
-            Ok(c)
+            had_underscore = false;
+            Ok((c, had_underscore))
         }
     }
 
@@ -764,18 +767,30 @@ impl<'a, R: CharRead> Lexer<'a, R> {
 
     fn number_token(&mut self, leading_c: char) -> Result<NumberToken, ParserError> {
         let mut token = String::with_capacity(16);
+        let mut had_separator = false;
 
         self.skip_char(leading_c);
         token.push(leading_c);
-        let mut c = try_nt!(token, self.skip_underscore_in_number());
+
+        let result = try_nt!(token, self.skip_underscore_in_number());
+        let mut c = result.0;
+        had_separator |= result.1;
 
         while decimal_digit_char!(c) {
             token.push(c);
             self.skip_char(c);
-            c = try_nt!(token, self.skip_underscore_in_number());
+            let result = try_nt!(token, self.skip_underscore_in_number());
+            c = result.0;
+            had_separator |= result.1;
         }
 
         if decimal_point_char!(c) {
+            // Reject option 9: digit separators in float (before decimal point)
+            // Per WG17 decision, options 9-11 (float digit separators) are rejected
+            if had_separator {
+                return Err(ParserError::ParseBigInt(self.line_num, self.col_num));
+            }
+
             self.skip_char(c);
 
             if self.reader.peek_char().is_none() {
diff --git a/src/tests/digit_separators.pl b/src/tests/digit_separators.pl
@@ -128,12 +128,29 @@
     N =:= 1000
 )).
 
-% TODO: This should fail (float digit separators are undecided - options 9-11)
-% Currently it succeeds but per UWN it should be rejected for consistency
-% test("number_chars rejects float with separator", (
-%     catch(
-%         (number_chars(_, "1_0.0"), fail),
-%         error(syntax_error(_), _),
-%         true
-%     )
-% )).
+% Option 9: Reject digit separators in float (before decimal point)
+test("number_chars rejects float with separator before decimal", (
+    catch(
+        (number_chars(_, "1_0.0"), fail),
+        error(syntax_error(_), _),
+        true
+    )
+)).
+
+test("number_chars rejects float separator via atom_chars", (
+    catch(
+        (atom_chars('1_0.0', Cs), number_chars(_, Cs), fail),
+        error(syntax_error(_), _),
+        true
+    )
+)).
+
+test("direct literal rejects float with separator before decimal", (
+    % Can't test direct literal syntax in test file since it fails at parse time
+    % This test verifies the error occurs via atom -> number conversion
+    catch(
+        (atom_chars(A, ['1','_','0','.','0']), number_chars(_, A), fail),
+        error(_,_),
+        true
+    )
+)).