Skip to content

Commit 0473c67

Browse files
committed
XXX: avoid useless checking in from_token_lit
XXX: already done by parser XXX: only need to worry about warnings, hence the str-escape.rs test change, to cover byte-string and c-string literals
1 parent 0011fac commit 0473c67

File tree

3 files changed

+33
-73
lines changed

3 files changed

+33
-73
lines changed

compiler/rustc_ast/src/util/literal.rs

+21-62
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ impl LitKind {
4848
return Err(LitError::InvalidSuffix);
4949
}
5050

51+
// For byte/char/string literals, chars and escapes have already been
52+
// checked in the lexer (in `cook_lexer_literal`). So we can assume all
53+
// chars and escapes are valid here.
5154
Ok(match kind {
5255
token::Bool => {
5356
assert!(symbol.is_bool_lit());
@@ -56,12 +59,12 @@ impl LitKind {
5659
token::Byte => {
5760
return unescape_byte(symbol.as_str())
5861
.map(LitKind::Byte)
59-
.map_err(|_| LitError::LexerError);
62+
.map_err(|_| panic!("failed to unescape byte literal"));
6063
}
6164
token::Char => {
6265
return unescape_char(symbol.as_str())
6366
.map(LitKind::Char)
64-
.map_err(|_| LitError::LexerError);
67+
.map_err(|_| panic!("failed to unescape char literal"));
6568
}
6669

6770
// There are some valid suffixes for integer and float literals,
@@ -77,113 +80,69 @@ impl LitKind {
7780
let s = symbol.as_str();
7881
// Vanilla strings are so common we optimize for the common case where no chars
7982
// requiring special behaviour are present.
80-
let symbol = if s.contains(['\\', '\r']) {
83+
let symbol = if s.contains('\\') {
8184
let mut buf = String::with_capacity(s.len());
82-
let mut error = Ok(());
8385
// Force-inlining here is aggressive but the closure is
84-
// called on every char in the string, so it can be
85-
// hot in programs with many long strings.
86+
// called on every char in the string, so it can be hot in
87+
// programs with many long strings containing escapes.
8688
unescape_literal(
8789
s,
8890
Mode::Str,
8991
&mut #[inline(always)]
90-
|_, unescaped_char| match unescaped_char {
92+
|_, c| match c {
9193
Ok(c) => buf.push(c),
9294
Err(err) => {
93-
if err.is_fatal() {
94-
error = Err(LitError::LexerError);
95-
}
95+
assert!(!err.is_fatal(), "failed to unescape string literal")
9696
}
9797
},
9898
);
99-
error?;
10099
Symbol::intern(&buf)
101100
} else {
102101
symbol
103102
};
104103
LitKind::Str(symbol, ast::StrStyle::Cooked)
105104
}
106105
token::StrRaw(n) => {
107-
// Raw strings have no escapes, so we only need to check for invalid chars, and we
108-
// can reuse the symbol on success.
109-
let mut error = Ok(());
110-
unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
111-
match unescaped_char {
112-
Ok(_) => {}
113-
Err(err) => {
114-
if err.is_fatal() {
115-
error = Err(LitError::LexerError);
116-
}
117-
}
118-
}
119-
});
120-
error?;
106+
// Raw strings have no escapes so no work is needed here.
121107
LitKind::Str(symbol, ast::StrStyle::Raw(n))
122108
}
123109
token::ByteStr => {
124110
let s = symbol.as_str();
125111
let mut buf = Vec::with_capacity(s.len());
126-
let mut error = Ok(());
127112
unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
128113
Ok(c) => buf.push(byte_from_char(c)),
129114
Err(err) => {
130-
if err.is_fatal() {
131-
error = Err(LitError::LexerError);
132-
}
115+
assert!(!err.is_fatal(), "failed to unescape string literal")
133116
}
134117
});
135-
error?;
136118
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
137119
}
138120
token::ByteStrRaw(n) => {
139-
// Raw strings have no escapes, so we only need to check for invalid chars, and we
140-
// can convert the symbol directly to a `Lrc<u8>` on success.
141-
let s = symbol.as_str();
142-
let mut error = Ok(());
143-
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
144-
Ok(_) => {}
145-
Err(err) => {
146-
if err.is_fatal() {
147-
error = Err(LitError::LexerError);
148-
}
149-
}
150-
});
151-
LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
121+
// Raw strings have no escapes so we can convert the symbol
122+
// directly to a `Lrc<u8>`.
123+
let buf = symbol.as_str().to_owned().into_bytes();
124+
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
152125
}
153126
token::CStr => {
154127
let s = symbol.as_str();
155128
let mut buf = Vec::with_capacity(s.len());
156-
let mut error = Ok(());
157129
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
158130
Ok(CStrUnit::Byte(b)) => buf.push(b),
159131
Ok(CStrUnit::Char(c)) => {
160132
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
161133
}
162134
Err(err) => {
163-
if err.is_fatal() {
164-
error = Err(LitError::LexerError);
165-
}
135+
assert!(!err.is_fatal(), "failed to unescape C string literal")
166136
}
167137
});
168-
error?;
169138
buf.push(0);
170139
LitKind::CStr(buf.into(), StrStyle::Cooked)
171140
}
172141
token::CStrRaw(n) => {
173-
// Raw strings have no escapes, so we only need to check for invalid chars, and we
174-
// can convert the symbol directly to a `Lrc<u8>` on success.
175-
let s = symbol.as_str();
176-
let mut error = Ok(());
177-
unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
178-
Ok(_) => {}
179-
Err(err) => {
180-
if err.is_fatal() {
181-
error = Err(LitError::LexerError);
182-
}
183-
}
184-
});
185-
error?;
186-
let mut buf = s.to_owned().into_bytes();
142+
// Raw strings have no escapes so we can convert the symbol
143+
// directly to a `Lrc<u8>` after appending the terminating NUL
144+
// char.
145+
let mut buf = symbol.as_str().to_owned().into_bytes();
187146
buf.push(0);
188147
LitKind::CStr(buf.into(), StrStyle::Raw(n))
189148
}

tests/ui/str/str-escape.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// check-pass
22
// ignore-tidy-tab
3+
// edition: 2021
34

45
fn main() {
56
let s = "\
@@ -8,11 +9,11 @@ fn main() {
89
//~^^^ WARNING multiple lines skipped by escaped newline
910
assert_eq!(s, "");
1011

11-
let s = "foo\
12+
let s = c"foo\
1213
  bar
1314
";
1415
//~^^^ WARNING whitespace symbol '\u{a0}' is not skipped
15-
assert_eq!(s, "foo  bar\n ");
16+
assert_eq!(s, c"foo  bar\n ");
1617

1718
let s = "a\
1819
b";
@@ -22,10 +23,10 @@ fn main() {
2223
b";
2324
assert_eq!(s, "ab");
2425

25-
let s = "a\
26+
let s = b"a\
2627
b";
2728
//~^^ WARNING whitespace symbol '\u{c}' is not skipped
2829
// '\x0c' is ASCII whitespace, but it may not need skipped
2930
// discussion: https://github.com/rust-lang/rust/pull/108403
30-
assert_eq!(s, "a\x0cb");
31+
assert_eq!(s, b"a\x0cb");
3132
}

tests/ui/str/str-escape.stderr

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
warning: multiple lines skipped by escaped newline
2-
--> $DIR/str-escape.rs:5:14
2+
--> $DIR/str-escape.rs:6:14
33
|
44
LL | let s = "\
55
| ______________^
@@ -8,20 +8,20 @@ LL | | ";
88
| |_____________^ skipping everything up to and including this point
99

1010
warning: whitespace symbol '\u{a0}' is not skipped
11-
--> $DIR/str-escape.rs:11:17
11+
--> $DIR/str-escape.rs:12:18
1212
|
13-
LL | let s = "foo\
14-
| _________________^
13+
LL | let s = c"foo\
14+
| __________________^
1515
LL | |   bar
1616
| | ^ whitespace symbol '\u{a0}' is not skipped
1717
| |___|
1818
|
1919

2020
warning: whitespace symbol '\u{c}' is not skipped
21-
--> $DIR/str-escape.rs:25:15
21+
--> $DIR/str-escape.rs:26:16
2222
|
23-
LL | let s = "a\
24-
| _______________^
23+
LL | let s = b"a\
24+
| ________________^
2525
LL | | b";
2626
| | ^- whitespace symbol '\u{c}' is not skipped
2727
| |____|

0 commit comments

Comments
 (0)