Skip to content

Commit d463274

Browse files
committed
Translate CRLF -> LF in raw (byte) strings
1 parent 49d62e8 commit d463274

File tree

3 files changed

+18
-34
lines changed

3 files changed

+18
-34
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ impl<'a> StringReader<'a> {
13461346

13471347
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
13481348
self.with_str_from_to(content_start, content_end, |lit: &str| {
1349-
unescape::unescape_raw_str(lit, &mut |range, c| {
1349+
unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| {
13501350
if let Err(err) = c {
13511351
emit_unescape_error(
13521352
&self.sess.span_diagnostic,
@@ -1363,7 +1363,7 @@ impl<'a> StringReader<'a> {
13631363

13641364
fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
13651365
self.with_str_from_to(content_start, content_end, |lit: &str| {
1366-
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
1366+
unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| {
13671367
if let Err(err) = c {
13681368
emit_unescape_error(
13691369
&self.sess.span_diagnostic,

src/libsyntax/parse/literal.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::ast::{self, Lit, LitKind};
44
use crate::parse::parser::Parser;
55
use crate::parse::PResult;
66
use crate::parse::token::{self, Token, TokenKind};
7-
use crate::parse::unescape::{unescape_str, unescape_byte_str, unescape_raw_str};
7+
use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str};
88
use crate::parse::unescape::{unescape_char, unescape_byte};
99
use crate::print::pprust;
1010
use crate::symbol::{kw, sym, Symbol};
@@ -144,7 +144,7 @@ impl LitKind {
144144
let symbol = if s.contains('\r') {
145145
let mut buf = String::with_capacity(s.len());
146146
let mut error = Ok(());
147-
unescape_raw_str(&s, &mut |_, unescaped_char| {
147+
unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| {
148148
match unescaped_char {
149149
Ok(c) => buf.push(c),
150150
Err(_) => error = Err(LitError::LexerError),

src/libsyntax/parse/unescape.rs

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -71,44 +71,28 @@ where
7171
/// sequence of characters or errors.
7272
/// NOTE: Raw strings do not perform any explicit character escaping, here we
7373
/// only translate CRLF to LF and produce errors on bare CR.
74-
pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
74+
pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
7575
where
7676
F: FnMut(Range<usize>, Result<char, EscapeError>),
7777
{
7878
let mut byte_offset: usize = 0;
7979

8080
let mut chars = literal_text.chars().peekable();
8181
while let Some(curr) = chars.next() {
82-
let result = match (curr, chars.peek()) {
83-
('\r', Some('\n')) => Ok(curr),
84-
('\r', _) => Err(EscapeError::BareCarriageReturn),
85-
_ => Ok(curr),
82+
let (result, scanned) = match (curr, chars.peek()) {
83+
('\r', Some('\n')) => {
84+
chars.next();
85+
(Ok('\n'), [Some('\r'), Some('\n')])
86+
},
87+
('\r', _) =>
88+
(Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
89+
(c, _) if mode.is_bytes() && c > '\x7F' =>
90+
(Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
91+
(c, _) => (Ok(c), [Some(c), None]),
8692
};
87-
callback(byte_offset..(byte_offset + curr.len_utf8()), result);
88-
byte_offset += curr.len_utf8();
89-
}
90-
}
91-
92-
/// Takes a contents of a string literal (without quotes) and produces a
93-
/// sequence of characters or errors.
94-
/// NOTE: Raw strings do not perform any explicit character escaping, here we
95-
/// only translate CRLF to LF and produce errors on bare CR.
96-
pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
97-
where
98-
F: FnMut(Range<usize>, Result<char, EscapeError>),
99-
{
100-
let mut byte_offset: usize = 0;
101-
102-
let mut chars = literal_text.chars().peekable();
103-
while let Some(curr) = chars.next() {
104-
let result = match (curr, chars.peek()) {
105-
('\r', Some('\n')) => Ok(curr),
106-
('\r', _) => Err(EscapeError::BareCarriageReturn),
107-
(c, _) if c > '\x7F' => Err(EscapeError::NonAsciiCharInByteString),
108-
_ => Ok(curr),
109-
};
110-
callback(byte_offset..(byte_offset + curr.len_utf8()), result);
111-
byte_offset += curr.len_utf8();
93+
let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
94+
callback(byte_offset..(byte_offset + len_utf8), result);
95+
byte_offset += len_utf8;
11296
}
11397
}
11498

0 commit comments

Comments
 (0)