XXX: avoid useless checking in from_token_lit

nnethercote · nnethercote · commit 0473c67c29a4 · 2024-01-23T14:45:06.000+11:00
XXX: already done by parser

XXX: only need to worry about warnings, hence the str-escape.rs test
change, to cover byte-string and c-string literals
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
@@ -48,6 +48,9 @@ impl LitKind {
             return Err(LitError::InvalidSuffix);
         }
 
+        // For byte/char/string literals, chars and escapes have already been
+        // checked in the lexer (in `cook_lexer_literal`). So we can assume all
+        // chars and escapes are valid here.
         Ok(match kind {
             token::Bool => {
                 assert!(symbol.is_bool_lit());
@@ -56,12 +59,12 @@ impl LitKind {
             token::Byte => {
                 return unescape_byte(symbol.as_str())
                     .map(LitKind::Byte)
-                    .map_err(|_| LitError::LexerError);
+                    .map_err(|_| panic!("failed to unescape byte literal"));
             }
             token::Char => {
                 return unescape_char(symbol.as_str())
                     .map(LitKind::Char)
-                    .map_err(|_| LitError::LexerError);
+                    .map_err(|_| panic!("failed to unescape char literal"));
             }
 
             // There are some valid suffixes for integer and float literals,
@@ -77,113 +80,69 @@ impl LitKind {
                 let s = symbol.as_str();
                 // Vanilla strings are so common we optimize for the common case where no chars
                 // requiring special behaviour are present.
-                let symbol = if s.contains(['\\', '\r']) {
+                let symbol = if s.contains('\\') {
                     let mut buf = String::with_capacity(s.len());
-                    let mut error = Ok(());
                     // Force-inlining here is aggressive but the closure is
-                    // called on every char in the string, so it can be
-                    // hot in programs with many long strings.
+                    // called on every char in the string, so it can be hot in
+                    // programs with many long strings containing escapes.
                     unescape_literal(
                         s,
                         Mode::Str,
                         &mut #[inline(always)]
-                        |_, unescaped_char| match unescaped_char {
+                        |_, c| match c {
                             Ok(c) => buf.push(c),
                             Err(err) => {
-                                if err.is_fatal() {
-                                    error = Err(LitError::LexerError);
-                                }
+                                assert!(!err.is_fatal(), "failed to unescape string literal")
                             }
                         },
                     );
-                    error?;
                     Symbol::intern(&buf)
                 } else {
                     symbol
                 };
                 LitKind::Str(symbol, ast::StrStyle::Cooked)
             }
             token::StrRaw(n) => {
-                // Raw strings have no escapes, so we only need to check for invalid chars, and we
-                // can reuse the symbol on success.
-                let mut error = Ok(());
-                unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
-                    match unescaped_char {
-                        Ok(_) => {}
-                        Err(err) => {
-                            if err.is_fatal() {
-                                error = Err(LitError::LexerError);
-                            }
-                        }
-                    }
-                });
-                error?;
+                // Raw strings have no escapes so no work is needed here.
                 LitKind::Str(symbol, ast::StrStyle::Raw(n))
             }
             token::ByteStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                let mut error = Ok(());
                 unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
                     Ok(c) => buf.push(byte_from_char(c)),
                     Err(err) => {
-                        if err.is_fatal() {
-                            error = Err(LitError::LexerError);
-                        }
+                        assert!(!err.is_fatal(), "failed to unescape string literal")
                     }
                 });
-                error?;
                 LitKind::ByteStr(buf.into(), StrStyle::Cooked)
             }
             token::ByteStrRaw(n) => {
-                // Raw strings have no escapes, so we only need to check for invalid chars, and we
-                // can convert the symbol directly to a `Lrc<u8>` on success.
-                let s = symbol.as_str();
-                let mut error = Ok(());
-                unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
-                    Ok(_) => {}
-                    Err(err) => {
-                        if err.is_fatal() {
-                            error = Err(LitError::LexerError);
-                        }
-                    }
-                });
-                LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
+                // Raw strings have no escapes so we can convert the symbol
+                // directly to a `Lrc<u8>`.
+                let buf = symbol.as_str().to_owned().into_bytes();
+                LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
             }
             token::CStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                let mut error = Ok(());
                 unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
                     Ok(CStrUnit::Byte(b)) => buf.push(b),
                     Ok(CStrUnit::Char(c)) => {
                         buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
                     }
                     Err(err) => {
-                        if err.is_fatal() {
-                            error = Err(LitError::LexerError);
-                        }
+                        assert!(!err.is_fatal(), "failed to unescape C string literal")
                     }
                 });
-                error?;
                 buf.push(0);
                 LitKind::CStr(buf.into(), StrStyle::Cooked)
             }
             token::CStrRaw(n) => {
-                // Raw strings have no escapes, so we only need to check for invalid chars, and we
-                // can convert the symbol directly to a `Lrc<u8>` on success.
-                let s = symbol.as_str();
-                let mut error = Ok(());
-                unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
-                    Ok(_) => {}
-                    Err(err) => {
-                        if err.is_fatal() {
-                            error = Err(LitError::LexerError);
-                        }
-                    }
-                });
-                error?;
-                let mut buf = s.to_owned().into_bytes();
+                // Raw strings have no escapes so we can convert the symbol
+                // directly to a `Lrc<u8>` after appending the terminating NUL
+                // char.
+                let mut buf = symbol.as_str().to_owned().into_bytes();
                 buf.push(0);
                 LitKind::CStr(buf.into(), StrStyle::Raw(n))
             }
diff --git a/tests/ui/str/str-escape.rs b/tests/ui/str/str-escape.rs
@@ -1,5 +1,6 @@
 // check-pass
 // ignore-tidy-tab
+// edition: 2021
 
 fn main() {
     let s = "\
@@ -8,11 +9,11 @@ fn main() {
     //~^^^ WARNING multiple lines skipped by escaped newline
     assert_eq!(s, "");
 
-    let s = "foo\
+    let s = c"foo\
              bar
              ";
     //~^^^ WARNING whitespace symbol '\u{a0}' is not skipped
-    assert_eq!(s, "foo           bar\n             ");
+    assert_eq!(s, c"foo           bar\n             ");
 
     let s = "a\
  b";
@@ -22,10 +23,10 @@ fn main() {
 	b";
     assert_eq!(s, "ab");
 
-    let s = "a\
+    let s = b"a\
     b";
     //~^^ WARNING whitespace symbol '\u{c}' is not skipped
     // '\x0c' is ASCII whitespace, but it may not need skipped
     // discussion: https://github.com/rust-lang/rust/pull/108403
-    assert_eq!(s, "a\x0cb");
+    assert_eq!(s, b"a\x0cb");
 }
diff --git a/tests/ui/str/str-escape.stderr b/tests/ui/str/str-escape.stderr
@@ -1,5 +1,5 @@
 warning: multiple lines skipped by escaped newline
-  --> $DIR/str-escape.rs:5:14
+  --> $DIR/str-escape.rs:6:14
    |
 LL |       let s = "\
    |  ______________^
@@ -8,20 +8,20 @@ LL | |              ";
    | |_____________^ skipping everything up to and including this point
 
 warning: whitespace symbol '\u{a0}' is not skipped
-  --> $DIR/str-escape.rs:11:17
+  --> $DIR/str-escape.rs:12:18
    |
-LL |       let s = "foo\
-   |  _________________^
+LL |       let s = c"foo\
+   |  __________________^
 LL | |              bar
    | |   ^ whitespace symbol '\u{a0}' is not skipped
    | |___|
    | 
 
 warning: whitespace symbol '\u{c}' is not skipped
-  --> $DIR/str-escape.rs:25:15
+  --> $DIR/str-escape.rs:26:16
    |
-LL |       let s = "a\
-   |  _______________^
+LL |       let s = b"a\
+   |  ________________^
 LL | |     b";
    | |    ^- whitespace symbol '\u{c}' is not skipped
    | |____|