Skip to content

[beta] Revert "Auto merge of #62948 - matklad:failable-file-loading, r=petro… #65273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/librustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -268,6 +268,7 @@ impl Cursor<'_> {
loop {
match self.nth_char(0) {
'\n' => break,
'\r' if self.nth_char(1) == '\n' => break,
EOF_CHAR if self.is_eof() => break,
_ => {
self.bump();
@@ -440,6 +441,7 @@ impl Cursor<'_> {
match self.nth_char(0) {
'/' if !first => break,
'\n' if self.nth_char(1) != '\'' => break,
'\r' if self.nth_char(1) == '\n' => break,
EOF_CHAR if self.is_eof() => break,
'\'' => {
self.bump();
36 changes: 28 additions & 8 deletions src/librustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
@@ -128,7 +128,11 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
if first_char != '\\' {
return match first_char {
'\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
'\r' => Err(if chars.clone().next() == Some('\n') {
EscapeError::EscapeOnlyChar
} else {
EscapeError::BareCarriageReturn
}),
'\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
'"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
_ => {
@@ -240,15 +244,27 @@ where

let unescaped_char = match first_char {
'\\' => {
let second_char = chars.clone().next();
match second_char {
Some('\n') => {
let (second_char, third_char) = {
let mut chars = chars.clone();
(chars.next(), chars.next())
};
match (second_char, third_char) {
(Some('\n'), _) | (Some('\r'), Some('\n')) => {
skip_ascii_whitespace(&mut chars);
continue;
}
_ => scan_escape(first_char, &mut chars, mode),
}
}
'\r' => {
let second_char = chars.clone().next();
if second_char == Some('\n') {
chars.next();
Ok('\n')
} else {
scan_escape(first_char, &mut chars, mode)
}
}
'\n' => Ok('\n'),
'\t' => Ok('\t'),
_ => scan_escape(first_char, &mut chars, mode),
@@ -282,11 +298,15 @@ where
while let Some(curr) = chars.next() {
let start = initial_len - chars.as_str().len() - curr.len_utf8();

let result = match curr {
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
c if mode.is_bytes() && !c.is_ascii() =>
let result = match (curr, chars.clone().next()) {
('\r', Some('\n')) => {
chars.next();
Ok('\n')
},
('\r', _) => Err(EscapeError::BareCarriageReturnInRawString),
(c, _) if mode.is_bytes() && !c.is_ascii() =>
Err(EscapeError::NonAsciiCharInByteString),
c => Ok(c),
(c, _) => Ok(c),
};
let end = initial_len - chars.as_str().len();

11 changes: 8 additions & 3 deletions src/librustc_lexer/src/unescape/tests.rs
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ fn test_unescape_char_bad() {
check(r"\", EscapeError::LoneSlash);

check("\n", EscapeError::EscapeOnlyChar);
check("\r\n", EscapeError::EscapeOnlyChar);
check("\t", EscapeError::EscapeOnlyChar);
check("'", EscapeError::EscapeOnlyChar);
check("\r", EscapeError::BareCarriageReturn);
@@ -30,7 +31,6 @@ fn test_unescape_char_bad() {
check(r"\v", EscapeError::InvalidEscape);
check(r"\💩", EscapeError::InvalidEscape);
check(r"\●", EscapeError::InvalidEscape);
check("\\\r", EscapeError::InvalidEscape);

check(r"\x", EscapeError::TooShortHexEscape);
check(r"\x0", EscapeError::TooShortHexEscape);
@@ -116,9 +116,10 @@ fn test_unescape_str_good() {

check("foo", "foo");
check("", "");
check(" \t\n", " \t\n");
check(" \t\n\r\n", " \t\n\n");

check("hello \\\n world", "hello world");
check("hello \\\r\n world", "hello world");
check("thread's", "thread's")
}

@@ -133,6 +134,7 @@ fn test_unescape_byte_bad() {
check(r"\", EscapeError::LoneSlash);

check("\n", EscapeError::EscapeOnlyChar);
check("\r\n", EscapeError::EscapeOnlyChar);
check("\t", EscapeError::EscapeOnlyChar);
check("'", EscapeError::EscapeOnlyChar);
check("\r", EscapeError::BareCarriageReturn);
@@ -236,9 +238,10 @@ fn test_unescape_byte_str_good() {

check("foo", b"foo");
check("", b"");
check(" \t\n", b" \t\n");
check(" \t\n\r\n", b" \t\n\n");

check("hello \\\n world", b"hello world");
check("hello \\\r\n world", b"hello world");
check("thread's", b"thread's")
}

@@ -250,6 +253,7 @@ fn test_unescape_raw_str() {
assert_eq!(unescaped, expected);
}

check("\r\n", &[(0..2, Ok('\n'))]);
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]);
}
@@ -262,6 +266,7 @@ fn test_unescape_raw_byte_str() {
assert_eq!(unescaped, expected);
}

check("\r\n", &[(0..2, Ok(byte_from_char('\n')))]);
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
check(
81 changes: 66 additions & 15 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -8,7 +8,9 @@ use syntax_pos::{BytePos, Pos, Span};
use rustc_lexer::Base;
use rustc_lexer::unescape;

use std::borrow::Cow;
use std::char;
use std::iter;
use std::convert::TryInto;
use rustc_data_structures::sync::Lrc;
use log::debug;
@@ -179,7 +181,18 @@ impl<'a> StringReader<'a> {
let string = self.str_from(start);
// comments with only more "/"s are not doc comments
let tok = if is_doc_comment(string) {
self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
let mut idx = 0;
loop {
idx = match string[idx..].find('\r') {
None => break,
Some(it) => idx + it + 1
};
if string[idx..].chars().next() != Some('\n') {
self.err_span_(start + BytePos(idx as u32 - 1),
start + BytePos(idx as u32),
"bare CR not allowed in doc-comment");
}
}
token::DocComment(Symbol::intern(string))
} else {
token::Comment
@@ -204,10 +217,15 @@ impl<'a> StringReader<'a> {
}

let tok = if is_doc_comment {
self.forbid_bare_cr(start,
string,
"bare CR not allowed in block doc-comment");
token::DocComment(Symbol::intern(string))
let has_cr = string.contains('\r');
let string = if has_cr {
self.translate_crlf(start,
string,
"bare CR not allowed in block doc-comment")
} else {
string.into()
};
token::DocComment(Symbol::intern(&string[..]))
} else {
token::Comment
};
@@ -473,16 +491,49 @@ impl<'a> StringReader<'a> {
&self.src[self.src_index(start)..self.src_index(end)]
}

fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) {
let mut idx = 0;
loop {
idx = match s[idx..].find('\r') {
None => break,
Some(it) => idx + it + 1
};
self.err_span_(start + BytePos(idx as u32 - 1),
start + BytePos(idx as u32),
errmsg);
/// Converts CRLF to LF in the given string, raising an error on bare CR.
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
let mut chars = s.char_indices().peekable();
while let Some((i, ch)) = chars.next() {
if ch == '\r' {
if let Some((lf_idx, '\n')) = chars.peek() {
return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
}
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
self.err_span_(pos, end_pos, errmsg);
}
}
return s.into();

fn translate_crlf_(rdr: &StringReader<'_>,
start: BytePos,
s: &str,
mut j: usize,
mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
errmsg: &str)
-> String {
let mut buf = String::with_capacity(s.len());
// Skip first CR
buf.push_str(&s[.. j - 1]);
while let Some((i, ch)) = chars.next() {
if ch == '\r' {
if j < i {
buf.push_str(&s[j..i]);
}
let next = i + ch.len_utf8();
j = next;
if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos(next as u32);
rdr.err_span_(pos, end_pos, errmsg);
}
}
}
if j < s.len() {
buf.push_str(&s[j..]);
}
buf
}
}

56 changes: 0 additions & 56 deletions src/libsyntax_pos/lib.rs
Original file line number Diff line number Diff line change
@@ -1064,7 +1064,6 @@ impl SourceFile {
mut src: String,
start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> {
remove_bom(&mut src);
normalize_newlines(&mut src);

let src_hash = {
let mut hasher: StableHasher<u128> = StableHasher::new();
@@ -1232,61 +1231,6 @@ fn remove_bom(src: &mut String) {
}
}


/// Replaces `\r\n` with `\n` in-place in `src`.
///
/// Returns error if there's a lone `\r` in the string
fn normalize_newlines(src: &mut String) {
if !src.as_bytes().contains(&b'\r') {
return;
}

// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
// While we *can* call `as_mut_vec` and do surgery on the live string
// directly, let's rather steal the contents of `src`. This makes the code
// safe even if a panic occurs.

let mut buf = std::mem::replace(src, String::new()).into_bytes();
let mut gap_len = 0;
let mut tail = buf.as_mut_slice();
loop {
let idx = match find_crlf(&tail[gap_len..]) {
None => tail.len(),
Some(idx) => idx + gap_len,
};
tail.copy_within(gap_len..idx, 0);
tail = &mut tail[idx - gap_len..];
if tail.len() == gap_len {
break;
}
gap_len += 1;
}

// Account for removed `\r`.
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
let new_len = buf.len() - gap_len;
unsafe {
buf.set_len(new_len);
*src = String::from_utf8_unchecked(buf);
}

fn find_crlf(src: &[u8]) -> Option<usize> {
let mut search_idx = 0;
while let Some(idx) = find_cr(&src[search_idx..]) {
if src[search_idx..].get(idx + 1) != Some(&b'\n') {
search_idx += idx + 1;
continue;
}
return Some(search_idx + idx);
}
None
}

fn find_cr(src: &[u8]) -> Option<usize> {
src.iter().position(|&b| b == b'\r')
}
}

// _____________________________________________________________________________
// Pos, BytePos, CharPos
//
20 changes: 0 additions & 20 deletions src/libsyntax_pos/tests.rs
Original file line number Diff line number Diff line change
@@ -16,23 +16,3 @@ fn test_lookup_line() {
assert_eq!(lookup_line(lines, BytePos(28)), 2);
assert_eq!(lookup_line(lines, BytePos(29)), 2);
}

#[test]
fn test_normalize_newlines() {
fn check(before: &str, after: &str) {
let mut actual = before.to_string();
normalize_newlines(&mut actual);
assert_eq!(actual.as_str(), after);
}
check("", "");
check("\n", "\n");
check("\r", "\r");
check("\r\r", "\r\r");
check("\r\n", "\n");
check("hello world", "hello world");
check("hello\nworld", "hello\nworld");
check("hello\r\nworld", "hello\nworld");
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n");
check("\r\r\n", "\r\n");
check("hello\rworld", "hello\rworld");
}