Skip to content

Commit ec81025

Browse files
committed
Point at invalid utf-8 span on user's source code
``` error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8 --> $DIR/not-utf8-2.rs:6:5 | LL | include!("not-utf8-bin-file.rs"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | note: `[193]` is not valid utf-8 --> $DIR/not-utf8-bin-file.rs:2:14 | LL | let _ = "�|�␂!5�cc␕␂��"; | ^ = note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info) ``` When we attempt to load a Rust source code file, if there is a OS file failure we try reading the file as bytes. If that succeeds we try to turn it into UTF-8. If *that* fails, we provide additional context about *where* the file has the first invalid UTF-8 character. Fix #76869.
1 parent 27f3361 commit ec81025

18 files changed

+112
-15
lines changed

Diff for: compiler/rustc_builtin_macros/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#![feature(proc_macro_internals)]
1717
#![feature(proc_macro_quote)]
1818
#![feature(rustdoc_internals)]
19+
#![feature(string_from_utf8_lossy_owned)]
1920
#![feature(try_blocks)]
2021
#![warn(unreachable_pub)]
2122
// tidy-alphabetical-end

Diff for: compiler/rustc_builtin_macros/src/source_util.rs

+29-3
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,34 @@ pub(crate) fn expand_include_str(
210210
MacEager::expr(cx.expr_str(cx.with_def_site_ctxt(bsp), interned_src))
211211
}
212212
Err(_) => {
213-
let guar = cx.dcx().span_err(sp, format!("`{path}` wasn't a utf-8 file"));
214-
DummyResult::any(sp, guar)
213+
let mut err = cx.dcx().struct_span_err(sp, format!("`{path}` wasn't a utf-8 file"));
214+
let path = PathBuf::from(path.as_str());
215+
if let Ok(contents) = std::fs::read(&path)
216+
&& let Err(utf8err) = String::from_utf8(contents)
217+
{
218+
let start = utf8err.utf8_error().valid_up_to();
219+
let note = format!("invalid utf-8 at byte `{start}`");
220+
let msg = if let Some(len) = utf8err.utf8_error().error_len() {
221+
format!(
222+
"`{:?}` is not valid utf-8",
223+
&utf8err.as_bytes()[start..start + len]
224+
)
225+
} else {
226+
note.clone()
227+
};
228+
let contents = utf8err.into_utf8_lossy();
229+
let source = cx.source_map().new_source_file(path.into(), contents);
230+
let span = Span::with_root_ctxt(
231+
source.normalized_byte_pos(start as u32),
232+
source.normalized_byte_pos(start as u32),
233+
);
234+
if span.is_dummy() {
235+
err.note(note);
236+
} else {
237+
err.span_note(span, msg);
238+
}
239+
}
240+
DummyResult::any(sp, err.emit())
215241
}
216242
},
217243
Err(dummy) => dummy,
@@ -273,7 +299,7 @@ fn load_binary_file(
273299
.and_then(|path| path.into_os_string().into_string().ok());
274300

275301
if let Some(new_path) = new_path {
276-
err.span_suggestion(
302+
err.span_suggestion_verbose(
277303
path_span,
278304
"there is a file with the same name in a different directory",
279305
format!("\"{}\"", new_path.replace('\\', "/").escape_debug()),

Diff for: compiler/rustc_parse/src/lib.rs

+30-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#![feature(if_let_guard)]
1212
#![feature(iter_intersperse)]
1313
#![feature(let_chains)]
14+
#![feature(string_from_utf8_lossy_owned)]
1415
#![warn(unreachable_pub)]
1516
// tidy-alphabetical-end
1617

@@ -74,8 +75,36 @@ pub fn new_parser_from_file<'a>(
7475
sp: Option<Span>,
7576
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
7677
let source_file = psess.source_map().load_file(path).unwrap_or_else(|e| {
77-
let msg = format!("couldn't read {}: {}", path.display(), e);
78+
let msg = format!("couldn't read `{}`: {}", path.display(), e);
7879
let mut err = psess.dcx().struct_fatal(msg);
80+
if let Ok(contents) = std::fs::read(path)
81+
&& let Err(utf8err) = String::from_utf8(contents)
82+
{
83+
// The file exists, but it wasn't valid UTF-8.
84+
let start = utf8err.utf8_error().valid_up_to();
85+
let note = format!("invalid utf-8 at byte `{start}`");
86+
let msg = if let Some(len) = utf8err.utf8_error().error_len() {
87+
format!("`{:?}` is not valid utf-8", &utf8err.as_bytes()[start..start + len])
88+
} else {
89+
note.clone()
90+
};
91+
let contents = utf8err.into_utf8_lossy();
92+
let source = psess.source_map().new_source_file(path.to_owned().into(), contents);
93+
let span = Span::with_root_ctxt(
94+
source.normalized_byte_pos(start as u32),
95+
source.normalized_byte_pos(start as u32),
96+
);
97+
if span.is_dummy() {
98+
err.note(note);
99+
} else {
100+
if sp.is_some() {
101+
err.span_note(span, msg);
102+
} else {
103+
err.span(span);
104+
err.span_label(span, msg);
105+
}
106+
}
107+
}
79108
if let Some(sp) = sp {
80109
err.span(sp);
81110
}

Diff for: src/tools/compiletest/src/errors.rs

+2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pub fn load_errors(testfile: &Path, revision: Option<&str>) -> Vec<Error> {
101101

102102
rdr.lines()
103103
.enumerate()
104+
// We want to ignore utf-8 failures in tests during collection of annotations.
105+
.filter(|(_, line)| line.is_ok())
104106
.filter_map(|(line_num, line)| {
105107
parse_expected(last_nonfollow_error, line_num + 1, &line.unwrap(), revision).map(
106108
|(which, error)| {

Diff for: src/tools/tidy/src/tests_revision_unpaired_stdout_stderr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pub fn check(tests_path: impl AsRef<Path>, bad: &mut bool) {
5858

5959
let mut expected_revisions = BTreeSet::new();
6060

61-
let contents = std::fs::read_to_string(test).unwrap();
61+
let Ok(contents) = std::fs::read_to_string(test) else { continue };
6262

6363
// Collect directives.
6464
iter_header(&contents, &mut |HeaderLine { revision, directive, .. }| {

Diff for: tests/ui/macros/not-utf8-2.rs

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
//@ error-pattern: did not contain valid UTF-8
2+
//@ reference: input.encoding.utf8
3+
//@ reference: input.encoding.invalid
4+
5+
fn foo() {
6+
include!("not-utf8-bin-file.rs");
7+
}

Diff for: tests/ui/macros/not-utf8-2.stderr

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8
2+
--> $DIR/not-utf8-2.rs:6:5
3+
|
4+
LL | include!("not-utf8-bin-file.rs");
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6+
|
7+
note: `[193]` is not valid utf-8
8+
--> $DIR/not-utf8-bin-file.rs:2:14
9+
|
10+
LL | let _ = "�|�␂!5�cc␕␂��";
11+
| ^
12+
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
13+
14+
error: aborting due to 1 previous error
15+

Diff for: tests/ui/macros/not-utf8-bin-file.rs

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fn main() {
2+
let _ = "Á|Õ!5¢ccŒÓ";
3+
//~^ ERROR stream did not contain valid UTF-8
4+
}

Diff for: tests/ui/macros/not-utf8-bin-file.stderr

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8
2+
--> $DIR/not-utf8-bin-file.rs:2:14
3+
|
4+
LL | let _ = "�|�␂!5�cc␕␂��";
5+
| ^ `[193]` is not valid utf-8
6+
7+
error: aborting due to 1 previous error
8+

Diff for: tests/ui/macros/not-utf8.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
//@ reference: input.encoding.invalid
44

55
fn foo() {
6-
include!("not-utf8.bin")
6+
include!("not-utf8.bin");
77
}

Diff for: tests/ui/macros/not-utf8.stderr

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
error: couldn't read $DIR/not-utf8.bin: stream did not contain valid UTF-8
1+
error: couldn't read `$DIR/not-utf8.bin`: stream did not contain valid UTF-8
22
--> $DIR/not-utf8.rs:6:5
33
|
4-
LL | include!("not-utf8.bin")
4+
LL | include!("not-utf8.bin");
55
| ^^^^^^^^^^^^^^^^^^^^^^^^
66
|
7+
note: `[193]` is not valid utf-8
8+
--> $DIR/not-utf8.bin:1:1
9+
|
10+
LL | �|�␂!5�cc␕␂�Ӻi��WWj�ȥ�'�}�␒�J�ȉ��W�␞O�@����␜w�V���LO����␔[ ␃_�'���SQ�~ذ��ų&��- ��lN~��!@␌ _#���kQ��h�␝�:�...
11+
| ^
712
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
813

914
error: aborting due to 1 previous error

Diff for: tests/ui/modules/path-no-file-name.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ normalize-stderr: "\.:.*\(" -> ".: $$ACCESS_DENIED_MSG ("
1+
//@ normalize-stderr: "\.`:.*\(" -> ".`: $$ACCESS_DENIED_MSG ("
22
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
33

44
#[path = "."]

Diff for: tests/ui/modules/path-no-file-name.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/.: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
1+
error: couldn't read `$DIR/.`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
22
--> $DIR/path-no-file-name.rs:5:1
33
|
44
LL | mod m;

Diff for: tests/ui/parser/issues/issue-5806.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ normalize-stderr: "parser:.*\(" -> "parser: $$ACCESS_DENIED_MSG ("
1+
//@ normalize-stderr: "parser`:.*\(" -> "parser`: $$ACCESS_DENIED_MSG ("
22
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
33

44
#[path = "../parser"]

Diff for: tests/ui/parser/issues/issue-5806.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/../parser: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
1+
error: couldn't read `$DIR/../parser`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
22
--> $DIR/issue-5806.rs:5:1
33
|
44
LL | mod foo;

Diff for: tests/ui/parser/mod_file_with_path_attr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ normalize-stderr: "not_a_real_file.rs:.*\(" -> "not_a_real_file.rs: $$FILE_NOT_FOUND_MSG ("
1+
//@ normalize-stderr: "not_a_real_file.rs`:.*\(" -> "not_a_real_file.rs`: $$FILE_NOT_FOUND_MSG ("
22

33
#[path = "not_a_real_file.rs"]
44
mod m; //~ ERROR not_a_real_file.rs

Diff for: tests/ui/parser/mod_file_with_path_attr.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/not_a_real_file.rs: $FILE_NOT_FOUND_MSG (os error 2)
1+
error: couldn't read `$DIR/not_a_real_file.rs`: $FILE_NOT_FOUND_MSG (os error 2)
22
--> $DIR/mod_file_with_path_attr.rs:4:1
33
|
44
LL | mod m;

Diff for: tests/ui/unpretty/staged-api-invalid-path-108697.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: couldn't read $DIR/lol: No such file or directory (os error 2)
1+
error: couldn't read `$DIR/lol`: No such file or directory (os error 2)
22
--> $DIR/staged-api-invalid-path-108697.rs:8:1
33
|
44
LL | mod foo;

0 commit comments

Comments
 (0)