Skip to content

Commit 4acc5e6

Browse files
committed
Don't rebuild raw strings when unescaping.
Raw strings don't have escape sequences, so for them "unescaping" just means checking for invalid chars like bare CR. Which means there is no need to rebuild them one char or byte at a time while escaping, because the unescaped version will be the same. This commit removes that rebuilding. Also, the commit changes things so that "unescaping" is unconditional for raw strings and raw byte strings. That's simpler and they're rare enough that the perf effect is negligible.
1 parent 028b6d1 commit 4acc5e6

File tree

1 file changed

+30
-43
lines changed

1 file changed

+30
-43
lines changed

compiler/rustc_ast/src/util/literal.rs

+30-43
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ impl LitKind {
7777
// new symbol because the string in the LitKind is different to the
7878
// string in the token.
7979
let s = symbol.as_str();
80+
// Vanilla strings are so common we optimize for the common case where no chars
81+
// requiring special behaviour are present.
8082
let symbol = if s.contains(['\\', '\r']) {
8183
let mut buf = String::with_capacity(s.len());
8284
let mut error = Ok(());
@@ -104,27 +106,20 @@ impl LitKind {
104106
LitKind::Str(symbol, ast::StrStyle::Cooked)
105107
}
106108
token::StrRaw(n) => {
107-
// Ditto.
108-
let s = symbol.as_str();
109-
let symbol =
110-
if s.contains('\r') {
111-
let mut buf = String::with_capacity(s.len());
112-
let mut error = Ok(());
113-
unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| {
114-
match unescaped_char {
115-
Ok(c) => buf.push(c),
116-
Err(err) => {
117-
if err.is_fatal() {
118-
error = Err(LitError::LexerError);
119-
}
120-
}
109+
// Raw strings have no escapes, so we only need to check for invalid chars, and we
110+
// can reuse the symbol on success.
111+
let mut error = Ok(());
112+
unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
113+
match unescaped_char {
114+
Ok(_) => {}
115+
Err(err) => {
116+
if err.is_fatal() {
117+
error = Err(LitError::LexerError);
121118
}
122-
});
123-
error?;
124-
Symbol::intern(&buf)
125-
} else {
126-
symbol
127-
};
119+
}
120+
}
121+
});
122+
error?;
128123
LitKind::Str(symbol, ast::StrStyle::Raw(n))
129124
}
130125
token::ByteStr => {
@@ -143,25 +138,19 @@ impl LitKind {
143138
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
144139
}
145140
token::ByteStrRaw(n) => {
141+
// Raw strings have no escapes, so we only need to check for invalid chars, and we
142+
// can convert the symbol directly to a `Lrc<u8>` on success.
146143
let s = symbol.as_str();
147-
let bytes = if s.contains('\r') {
148-
let mut buf = Vec::with_capacity(s.len());
149-
let mut error = Ok(());
150-
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
151-
Ok(c) => buf.push(byte_from_char(c)),
152-
Err(err) => {
153-
if err.is_fatal() {
154-
error = Err(LitError::LexerError);
155-
}
144+
let mut error = Ok(());
145+
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
146+
Ok(_) => {}
147+
Err(err) => {
148+
if err.is_fatal() {
149+
error = Err(LitError::LexerError);
156150
}
157-
});
158-
error?;
159-
buf
160-
} else {
161-
symbol.to_string().into_bytes()
162-
};
163-
164-
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
151+
}
152+
});
153+
LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
165154
}
166155
token::CStr => {
167156
let s = symbol.as_str();
@@ -187,25 +176,23 @@ impl LitKind {
187176
LitKind::CStr(buf.into(), StrStyle::Cooked)
188177
}
189178
token::CStrRaw(n) => {
179+
// Raw strings have no escapes, so we only need to check for invalid chars, and we
180+
// can convert the symbol directly to a `Lrc<u8>` on success.
190181
let s = symbol.as_str();
191-
let mut buf = Vec::with_capacity(s.len());
192182
let mut error = Ok(());
193183
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
194184
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
195185
error = Err(LitError::NulInCStr(span));
196186
}
197-
Ok(CStrUnit::Byte(b)) => buf.push(b),
198-
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
199-
Ok(CStrUnit::Char(c)) => {
200-
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
201-
}
187+
Ok(_) => {}
202188
Err(err) => {
203189
if err.is_fatal() {
204190
error = Err(LitError::LexerError);
205191
}
206192
}
207193
});
208194
error?;
195+
let mut buf = s.to_owned().into_bytes();
209196
buf.push(0);
210197
LitKind::CStr(buf.into(), StrStyle::Raw(n))
211198
}

0 commit comments

Comments
 (0)