@@ -660,20 +660,20 @@ lexStdToken = do
660
660
661
661
' 0' : c: d: _ | toLower c == ' o' && isOctDigit d -> do
662
662
discard 2
663
- (n, str) <- lexOctal
663
+ (n, str) <- lexOctal $ numUnderscoresEnabled exts
664
664
con <- intHash
665
665
return (con (n, ' 0' : c: str))
666
666
| toLower c == ' b' && isBinDigit d && BinaryLiterals `elem` exts -> do
667
667
discard 2
668
- (n, str) <- lexBinary
668
+ (n, str) <- lexBinary $ numUnderscoresEnabled exts
669
669
con <- intHash
670
670
return (con (n, ' 0' : c: str))
671
671
| toLower c == ' x' && isHexDigit d && HexFloatLiterals `elem` exts -> do
672
672
discard 2
673
- lexHexadecimalFloat c
673
+ lexHexadecimalFloat (numUnderscoresEnabled exts) c
674
674
| toLower c == ' x' && isHexDigit d -> do
675
675
discard 2
676
- (n, str) <- lexHexadecimal
676
+ (n, str) <- lexHexadecimal $ numUnderscoresEnabled exts
677
677
con <- intHash
678
678
return (con (n, ' 0' : c: str))
679
679
@@ -806,7 +806,7 @@ lexStdToken = do
806
806
return $ LabelVarId ident
807
807
808
808
809
- c: _ | isDigit c -> lexDecimalOrFloat
809
+ c: _ | isDigit c -> lexDecimalOrFloat $ numUnderscoresEnabled exts
810
810
811
811
| isUpper c -> lexConIdOrQual " "
812
812
@@ -1012,73 +1012,73 @@ lexRawPragma = lexRawPragmaAux
1012
1012
rpr' <- lexRawPragma
1013
1013
return $ rpr ++ ' #' : rpr'
1014
1014
1015
- lexDecimalOrFloat :: Lex a Token
1016
- lexDecimalOrFloat = do
1017
- ds <- lexWhile isDigit
1015
+ lexDecimalOrFloat :: NumericUnderscoresAllowed -> Lex a Token
1016
+ lexDecimalOrFloat underAllowed = do
1017
+ (n, raw) <- lexHandleUnderAllowed underAllowed isDigit
1018
1018
rest <- getInput
1019
1019
exts <- getExtensionsL
1020
1020
case rest of
1021
1021
(' .' : d: _) | isDigit d -> do
1022
1022
discard 1
1023
- frac <- lexWhile isDigit
1024
- let num = parseInteger 10 (ds ++ frac)
1023
+ ( frac, fracRaw) <- lexHandleUnderAllowed underAllowed isDigit
1024
+ let num = parseInteger 10 (n ++ frac)
1025
1025
decimals = toInteger (length frac)
1026
1026
(exponent , estr) <- do
1027
1027
rest2 <- getInput
1028
1028
case rest2 of
1029
- ' e' : _ -> lexExponent
1030
- ' E' : _ -> lexExponent
1029
+ ' e' : _ -> lexExponent underAllowed
1030
+ ' E' : _ -> lexExponent underAllowed
1031
1031
_ -> return (0 ," " )
1032
1032
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1033
- return $ con ((num% 1 ) * 10 ^^ (exponent - decimals), ds ++ ' .' : frac ++ estr)
1033
+ return $ con ((num% 1 ) * 10 ^^ (exponent - decimals), raw ++ ' .' : fracRaw ++ estr)
1034
1034
e: _ | toLower e == ' e' -> do
1035
- (exponent , estr) <- lexExponent
1035
+ (exponent , estr) <- lexExponent underAllowed
1036
1036
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1037
- return $ con ((parseInteger 10 ds % 1 ) * 10 ^^ exponent , ds ++ estr)
1038
- ' #' : ' #' : _ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds ))
1039
- ' #' : _ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds ))
1040
- _ -> return (IntTok (parseInteger 10 ds, ds ))
1037
+ return $ con ((parseInteger 10 n % 1 ) * 10 ^^ exponent , raw ++ estr)
1038
+ ' #' : ' #' : _ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 n, raw ))
1039
+ ' #' : _ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 n, raw ))
1040
+ _ -> return (IntTok (parseInteger 10 n, raw ))
1041
1041
1042
- lexExponent :: Lex a (Integer , String )
1043
- lexExponent = do
1042
+ lexExponent :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1043
+ lexExponent underAllowed = do
1044
1044
(e: r) <- getInput
1045
1045
discard 1 -- discard ex notation
1046
1046
case r of
1047
1047
' +' : d: _ | isDigit d -> do
1048
1048
discard 1
1049
- (n, str) <- lexDecimal
1049
+ (n, str) <- lexDecimal underAllowed
1050
1050
return (n, e: ' +' : str)
1051
1051
' -' : d: _ | isDigit d -> do
1052
1052
discard 1
1053
- (n, str) <- lexDecimal
1053
+ (n, str) <- lexDecimal underAllowed
1054
1054
return (negate n, e: ' -' : str)
1055
- d: _ | isDigit d -> lexDecimal >>= \ (n,str) -> return (n, e: str)
1055
+ d: _ | isDigit d -> lexDecimal underAllowed >>= \ (n,str) -> return (n, e: str)
1056
1056
_ -> fail " Float with missing exponent"
1057
1057
1058
- lexHexadecimalFloat :: Char -> Lex a Token
1059
- lexHexadecimalFloat c = do
1060
- ds <- lexWhile isHexDigit
1058
+ lexHexadecimalFloat :: NumericUnderscoresAllowed -> Char -> Lex a Token
1059
+ lexHexadecimalFloat underAllowed c = do
1060
+ (n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
1061
1061
rest <- getInput
1062
1062
exts <- getExtensionsL
1063
1063
case rest of
1064
1064
(' .' : d: _) | isHexDigit d -> do
1065
1065
discard 1
1066
- frac <- lexWhile isHexDigit
1067
- let num = parseInteger 16 ds
1066
+ ( frac, fracRaw) <- lexHandleUnderAllowed underAllowed isHexDigit
1067
+ let num = parseInteger 16 n
1068
1068
numFrac = parseFrac frac
1069
1069
(exponent , estr) <- do
1070
1070
rest2 <- getInput
1071
1071
case rest2 of
1072
- ' p' : _ -> lexExponent
1073
- ' P' : _ -> lexExponent
1072
+ ' p' : _ -> lexExponent underAllowed
1073
+ ' P' : _ -> lexExponent underAllowed
1074
1074
_ -> return (0 ," " )
1075
1075
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1076
- return $ con (((num% 1 ) + numFrac) * 2 ^^ (exponent ), ' 0' : c: ds ++ ' .' : frac ++ estr)
1076
+ return $ con (((num% 1 ) + numFrac) * 2 ^^ (exponent ), ' 0' : c: raw ++ ' .' : fracRaw ++ estr)
1077
1077
e: _ | toLower e == ' p' -> do
1078
- (exponent , estr) <- lexExponent
1078
+ (exponent , estr) <- lexExponent underAllowed
1079
1079
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1080
- return $ con (((parseInteger 16 ds )% 1 ) * 2 ^^ exponent , ' 0' : c: ds ++ estr)
1081
- _ -> return (IntTok (parseInteger 16 ds , ' 0' : c: ds ))
1080
+ return $ con (((parseInteger 16 n )% 1 ) * 2 ^^ exponent , ' 0' : c: raw ++ estr)
1081
+ _ -> return (IntTok (parseInteger 16 n , ' 0' : c: raw ))
1082
1082
where
1083
1083
parseFrac :: String -> Rational
1084
1084
parseFrac ds =
@@ -1281,16 +1281,16 @@ lexEscape = do
1281
1281
1282
1282
' o' : c: _ | isOctDigit c -> do
1283
1283
discard 1
1284
- (n, raw) <- lexOctal
1284
+ (n, raw) <- lexOctal NoUnderscoresAllowedInNumeric
1285
1285
n' <- checkChar n
1286
1286
return (n', ' o' : raw)
1287
1287
' x' : c: _ | isHexDigit c -> do
1288
1288
discard 1
1289
- (n, raw) <- lexHexadecimal
1289
+ (n, raw) <- lexHexadecimal NoUnderscoresAllowedInNumeric
1290
1290
n' <- checkChar n
1291
1291
return (n', ' x' : raw)
1292
1292
c: _ | isDigit c -> do
1293
- (n, raw) <- lexDecimal
1293
+ (n, raw) <- lexDecimal NoUnderscoresAllowedInNumeric
1294
1294
n' <- checkChar n
1295
1295
return (n', raw)
1296
1296
@@ -1307,28 +1307,28 @@ lexEscape = do
1307
1307
cntrl _ = fail " Illegal control character"
1308
1308
1309
1309
-- assumes at least one octal digit
1310
- lexOctal :: Lex a (Integer , String )
1311
- lexOctal = do
1312
- ds <- lexWhile isOctDigit
1313
- return (parseInteger 8 ds, ds )
1310
+ lexOctal :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1311
+ lexOctal underAllowed = do
1312
+ (n, raw) <- lexHandleUnderAllowed underAllowed isOctDigit
1313
+ return (parseInteger 8 n, raw )
1314
1314
1315
1315
-- assumes at least one binary digit
1316
- lexBinary :: Lex a (Integer , String )
1317
- lexBinary = do
1318
- ds <- lexWhile isBinDigit
1319
- return (parseInteger 2 ds, ds )
1316
+ lexBinary :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1317
+ lexBinary underAllowed = do
1318
+ (n, raw) <- lexHandleUnderAllowed underAllowed isBinDigit
1319
+ return (parseInteger 2 n, raw )
1320
1320
1321
1321
-- assumes at least one hexadecimal digit
1322
- lexHexadecimal :: Lex a (Integer , String )
1323
- lexHexadecimal = do
1324
- ds <- lexWhile isHexDigit
1325
- return (parseInteger 16 ds, ds )
1322
+ lexHexadecimal :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1323
+ lexHexadecimal underAllowed = do
1324
+ (n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
1325
+ return (parseInteger 16 n, raw )
1326
1326
1327
1327
-- assumes at least one decimal digit
1328
- lexDecimal :: Lex a (Integer , String )
1329
- lexDecimal = do
1330
- ds <- lexWhile isDigit
1331
- return (parseInteger 10 ds, ds )
1328
+ lexDecimal :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1329
+ lexDecimal underAllowed = do
1330
+ (n, raw) <- lexHandleUnderAllowed underAllowed isDigit
1331
+ return (parseInteger 10 n, raw )
1332
1332
1333
1333
-- Stolen from Hugs's Prelude
1334
1334
parseInteger :: Integer -> String -> Integer
@@ -1341,6 +1341,30 @@ flagKW t =
1341
1341
exts <- getExtensionsL
1342
1342
when (NondecreasingIndentation `elem` exts) flagDo
1343
1343
1344
+ data NumericUnderscoresAllowed = UnderscoresAllowedInNumeric | NoUnderscoresAllowedInNumeric
1345
+ deriving Show
1346
+
1347
+ numUnderscoresEnabled :: [KnownExtension ] -> NumericUnderscoresAllowed
1348
+ numUnderscoresEnabled exts = if (NumericUnderscores `elem` exts)
1349
+ then UnderscoresAllowedInNumeric
1350
+ else NoUnderscoresAllowedInNumeric
1351
+
1352
+ lexHandleUnderAllowed :: NumericUnderscoresAllowed -> (Char -> Bool ) -> Lex a (String , String )
1353
+ lexHandleUnderAllowed NoUnderscoresAllowedInNumeric p = do
1354
+ ds <- lexWhile p
1355
+ pure (ds, ds)
1356
+ lexHandleUnderAllowed UnderscoresAllowedInNumeric p = do
1357
+ s <- getInput
1358
+ case s of
1359
+ c: _ | p c -> do
1360
+ raw <- lexWhile (\ ic -> p ic || ic == ' _' )
1361
+ if (not $ null raw) && last raw == ' _'
1362
+ then fail $ " lexHandleUnderAllowed: numeric must not end with _: " <> show raw
1363
+ else pure (filter (/= ' _' ) raw, raw)
1364
+ c: _ -> fail $ " lexHandleUnderAllowed: numeric must start with proper digit: " <> show c
1365
+ _ -> fail $ " lexHandleUnderAllowed: token stream exhausted"
1366
+
1367
+
1344
1368
-- | Selects ASCII binary digits, i.e. @\'0\'@..@\'1\'@.
1345
1369
isBinDigit :: Char -> Bool
1346
1370
isBinDigit c = c >= ' 0' && c <= ' 1'
0 commit comments