Skip to content

Commit 8b9a8ca

Browse files
author
Leonidas Loucas
committed
Add handling for NumericUnderscores extension in numeric literals haskell-suite#455
1 parent d7414ac commit 8b9a8ca

File tree

2 files changed

+80
-53
lines changed

2 files changed

+80
-53
lines changed

src/Language/Haskell/Exts/Extension.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,9 @@ data KnownExtension =
563563
-- | HexFloatLiterals syntax ex 0xFF.FFp-12
564564
| HexFloatLiterals
565565

566+
-- | NumericUnderscores num literal syntax ex 1_000_000 or 0xF_F.F_Fp-12 or 0b11_11_11 or 1_000e+23
567+
| NumericUnderscores
568+
566569
deriving (Show, Read, Eq, Ord, Enum, Bounded, Data, Typeable)
567570

568571
-- | Certain extensions imply other extensions, and this function

src/Language/Haskell/Exts/InternalLexer.hs

Lines changed: 77 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -660,20 +660,20 @@ lexStdToken = do
660660

661661
'0':c:d:_ | toLower c == 'o' && isOctDigit d -> do
662662
discard 2
663-
(n, str) <- lexOctal
663+
(n, str) <- lexOctal $ numUnderscoresEnabled exts
664664
con <- intHash
665665
return (con (n, '0':c:str))
666666
| toLower c == 'b' && isBinDigit d && BinaryLiterals `elem` exts -> do
667667
discard 2
668-
(n, str) <- lexBinary
668+
(n, str) <- lexBinary $ numUnderscoresEnabled exts
669669
con <- intHash
670670
return (con (n, '0':c:str))
671671
| toLower c == 'x' && isHexDigit d && HexFloatLiterals `elem` exts -> do
672672
discard 2
673-
lexHexadecimalFloat c
673+
lexHexadecimalFloat (numUnderscoresEnabled exts) c
674674
| toLower c == 'x' && isHexDigit d -> do
675675
discard 2
676-
(n, str) <- lexHexadecimal
676+
(n, str) <- lexHexadecimal $ numUnderscoresEnabled exts
677677
con <- intHash
678678
return (con (n, '0':c:str))
679679

@@ -806,7 +806,7 @@ lexStdToken = do
806806
return $ LabelVarId ident
807807

808808

809-
c:_ | isDigit c -> lexDecimalOrFloat
809+
c:_ | isDigit c -> lexDecimalOrFloat $ numUnderscoresEnabled exts
810810

811811
| isUpper c -> lexConIdOrQual ""
812812

@@ -1012,73 +1012,73 @@ lexRawPragma = lexRawPragmaAux
10121012
rpr' <- lexRawPragma
10131013
return $ rpr ++ '#':rpr'
10141014

1015-
lexDecimalOrFloat :: Lex a Token
1016-
lexDecimalOrFloat = do
1017-
ds <- lexWhile isDigit
1015+
lexDecimalOrFloat :: NumericUnderscoresAllowed -> Lex a Token
1016+
lexDecimalOrFloat underAllowed = do
1017+
(n, raw) <- lexHandleUnderAllowed underAllowed isDigit
10181018
rest <- getInput
10191019
exts <- getExtensionsL
10201020
case rest of
10211021
('.':d:_) | isDigit d -> do
10221022
discard 1
1023-
frac <- lexWhile isDigit
1024-
let num = parseInteger 10 (ds ++ frac)
1023+
(frac, fracRaw) <- lexHandleUnderAllowed underAllowed isDigit
1024+
let num = parseInteger 10 (n ++ frac)
10251025
decimals = toInteger (length frac)
10261026
(exponent, estr) <- do
10271027
rest2 <- getInput
10281028
case rest2 of
1029-
'e':_ -> lexExponent
1030-
'E':_ -> lexExponent
1029+
'e':_ -> lexExponent underAllowed
1030+
'E':_ -> lexExponent underAllowed
10311031
_ -> return (0,"")
10321032
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1033-
return $ con ((num%1) * 10^^(exponent - decimals), ds ++ '.':frac ++ estr)
1033+
return $ con ((num%1) * 10^^(exponent - decimals), raw ++ '.':fracRaw ++ estr)
10341034
e:_ | toLower e == 'e' -> do
1035-
(exponent, estr) <- lexExponent
1035+
(exponent, estr) <- lexExponent underAllowed
10361036
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1037-
return $ con ((parseInteger 10 ds%1) * 10^^exponent, ds ++ estr)
1038-
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds))
1039-
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds))
1040-
_ -> return (IntTok (parseInteger 10 ds, ds))
1037+
return $ con ((parseInteger 10 n%1) * 10^^exponent, raw ++ estr)
1038+
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 n, raw))
1039+
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 n, raw))
1040+
_ -> return (IntTok (parseInteger 10 n, raw))
10411041

1042-
lexExponent :: Lex a (Integer, String)
1043-
lexExponent = do
1042+
lexExponent :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1043+
lexExponent underAllowed = do
10441044
(e:r) <- getInput
10451045
discard 1 -- discard ex notation
10461046
case r of
10471047
'+':d:_ | isDigit d -> do
10481048
discard 1
1049-
(n, str) <- lexDecimal
1049+
(n, str) <- lexDecimal underAllowed
10501050
return (n, e:'+':str)
10511051
'-':d:_ | isDigit d -> do
10521052
discard 1
1053-
(n, str) <- lexDecimal
1053+
(n, str) <- lexDecimal underAllowed
10541054
return (negate n, e:'-':str)
1055-
d:_ | isDigit d -> lexDecimal >>= \(n,str) -> return (n, e:str)
1055+
d:_ | isDigit d -> lexDecimal underAllowed >>= \(n,str) -> return (n, e:str)
10561056
_ -> fail "Float with missing exponent"
10571057

1058-
lexHexadecimalFloat :: Char -> Lex a Token
1059-
lexHexadecimalFloat c = do
1060-
ds <- lexWhile isHexDigit
1058+
lexHexadecimalFloat :: NumericUnderscoresAllowed -> Char -> Lex a Token
1059+
lexHexadecimalFloat underAllowed c = do
1060+
(n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
10611061
rest <- getInput
10621062
exts <- getExtensionsL
10631063
case rest of
10641064
('.':d:_) | isHexDigit d -> do
10651065
discard 1
1066-
frac <- lexWhile isHexDigit
1067-
let num = parseInteger 16 ds
1066+
(frac, fracRaw) <- lexHandleUnderAllowed underAllowed isHexDigit
1067+
let num = parseInteger 16 n
10681068
numFrac = parseFrac frac
10691069
(exponent, estr) <- do
10701070
rest2 <- getInput
10711071
case rest2 of
1072-
'p':_ -> lexExponent
1073-
'P':_ -> lexExponent
1072+
'p':_ -> lexExponent underAllowed
1073+
'P':_ -> lexExponent underAllowed
10741074
_ -> return (0,"")
10751075
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1076-
return $ con (((num%1) + numFrac) * 2^^(exponent), '0':c:ds ++ '.':frac ++ estr)
1076+
return $ con (((num%1) + numFrac) * 2^^(exponent), '0':c:raw ++ '.':fracRaw ++ estr)
10771077
e:_ | toLower e == 'p' -> do
1078-
(exponent, estr) <- lexExponent
1078+
(exponent, estr) <- lexExponent underAllowed
10791079
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1080-
return $ con (((parseInteger 16 ds)%1) * 2^^exponent, '0':c:ds ++ estr)
1081-
_ -> return (IntTok (parseInteger 16 ds, '0':c:ds))
1080+
return $ con (((parseInteger 16 n)%1) * 2^^exponent, '0':c:raw ++ estr)
1081+
_ -> return (IntTok (parseInteger 16 n, '0':c:raw))
10821082
where
10831083
parseFrac :: String -> Rational
10841084
parseFrac ds =
@@ -1281,16 +1281,16 @@ lexEscape = do
12811281

12821282
'o':c:_ | isOctDigit c -> do
12831283
discard 1
1284-
(n, raw) <- lexOctal
1284+
(n, raw) <- lexOctal NoUnderscoresAllowedInNumeric
12851285
n' <- checkChar n
12861286
return (n', 'o':raw)
12871287
'x':c:_ | isHexDigit c -> do
12881288
discard 1
1289-
(n, raw) <- lexHexadecimal
1289+
(n, raw) <- lexHexadecimal NoUnderscoresAllowedInNumeric
12901290
n' <- checkChar n
12911291
return (n', 'x':raw)
12921292
c:_ | isDigit c -> do
1293-
(n, raw) <- lexDecimal
1293+
(n, raw) <- lexDecimal NoUnderscoresAllowedInNumeric
12941294
n' <- checkChar n
12951295
return (n', raw)
12961296

@@ -1307,28 +1307,28 @@ lexEscape = do
13071307
cntrl _ = fail "Illegal control character"
13081308

13091309
-- assumes at least one octal digit
1310-
lexOctal :: Lex a (Integer, String)
1311-
lexOctal = do
1312-
ds <- lexWhile isOctDigit
1313-
return (parseInteger 8 ds, ds)
1310+
lexOctal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1311+
lexOctal underAllowed = do
1312+
(n, raw) <- lexHandleUnderAllowed underAllowed isOctDigit
1313+
return (parseInteger 8 n, raw)
13141314

13151315
-- assumes at least one binary digit
1316-
lexBinary :: Lex a (Integer, String)
1317-
lexBinary = do
1318-
ds <- lexWhile isBinDigit
1319-
return (parseInteger 2 ds, ds)
1316+
lexBinary :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1317+
lexBinary underAllowed = do
1318+
(n, raw) <- lexHandleUnderAllowed underAllowed isBinDigit
1319+
return (parseInteger 2 n, raw)
13201320

13211321
-- assumes at least one hexadecimal digit
1322-
lexHexadecimal :: Lex a (Integer, String)
1323-
lexHexadecimal = do
1324-
ds <- lexWhile isHexDigit
1325-
return (parseInteger 16 ds, ds)
1322+
lexHexadecimal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1323+
lexHexadecimal underAllowed = do
1324+
(n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
1325+
return (parseInteger 16 n, raw)
13261326

13271327
-- assumes at least one decimal digit
1328-
lexDecimal :: Lex a (Integer, String)
1329-
lexDecimal = do
1330-
ds <- lexWhile isDigit
1331-
return (parseInteger 10 ds, ds)
1328+
lexDecimal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1329+
lexDecimal underAllowed = do
1330+
(n, raw) <- lexHandleUnderAllowed underAllowed isDigit
1331+
return (parseInteger 10 n, raw)
13321332

13331333
-- Stolen from Hugs's Prelude
13341334
parseInteger :: Integer -> String -> Integer
@@ -1341,6 +1341,30 @@ flagKW t =
13411341
exts <- getExtensionsL
13421342
when (NondecreasingIndentation `elem` exts) flagDo
13431343

1344+
data NumericUnderscoresAllowed = UnderscoresAllowedInNumeric | NoUnderscoresAllowedInNumeric
1345+
deriving Show
1346+
1347+
numUnderscoresEnabled :: [KnownExtension] -> NumericUnderscoresAllowed
1348+
numUnderscoresEnabled exts = if (NumericUnderscores `elem` exts)
1349+
then UnderscoresAllowedInNumeric
1350+
else NoUnderscoresAllowedInNumeric
1351+
1352+
lexHandleUnderAllowed :: NumericUnderscoresAllowed -> (Char -> Bool) -> Lex a (String, String)
1353+
lexHandleUnderAllowed NoUnderscoresAllowedInNumeric p = do
1354+
ds <- lexWhile p
1355+
pure (ds, ds)
1356+
lexHandleUnderAllowed UnderscoresAllowedInNumeric p = do
1357+
s <- getInput
1358+
case s of
1359+
c:_ | p c -> do
1360+
raw <- lexWhile (\ic -> p ic || ic == '_')
1361+
if (not $ null raw) && last raw == '_'
1362+
then fail $ "lexHandleUnderAllowed: numeric must not end with _: " <> show raw
1363+
else pure (filter (/= '_') raw, raw)
1364+
c:_ -> fail $ "lexHandleUnderAllowed: numeric must start with proper digit: " <> show c
1365+
_ -> fail $ "lexHandleUnderAllowed: token stream exhausted"
1366+
1367+
13441368
-- | Selects ASCII binary digits, i.e. @\'0\'@..@\'1\'@.
13451369
isBinDigit :: Char -> Bool
13461370
isBinDigit c = c >= '0' && c <= '1'

0 commit comments

Comments
 (0)