Skip to content

Commit 89f273f

Browse files
committed
Replace ASCII control chars with Unicode Control Pictures
``` error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32 | LL | /// doc comment with bare CR: '␍' | ^ ```
1 parent 5753b30 commit 89f273f

14 files changed

+81
-31
lines changed

compiler/rustc_errors/src/emitter.rs

+54-15
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,7 @@ impl HumanEmitter {
677677
.skip(left)
678678
.take_while(|ch| {
679679
// Make sure that the trimming on the right will fall within the terminal width.
680-
// FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char`
681-
// is. For now, just accept that sometimes the code line will be longer than
682-
// desired.
683-
let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1);
680+
let next = char_width(*ch);
684681
if taken + next > right - left {
685682
return false;
686683
}
@@ -742,11 +739,7 @@ impl HumanEmitter {
742739
let left = margin.left(source_string.len());
743740

744741
// Account for unicode characters of width !=0 that were removed.
745-
let left = source_string
746-
.chars()
747-
.take(left)
748-
.map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
749-
.sum();
742+
let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum();
750743

751744
self.draw_line(
752745
buffer,
@@ -2039,7 +2032,7 @@ impl HumanEmitter {
20392032
let sub_len: usize =
20402033
if is_whitespace_addition { &part.snippet } else { part.snippet.trim() }
20412034
.chars()
2042-
.map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
2035+
.map(|ch| char_width(ch))
20432036
.sum();
20442037

20452038
let offset: isize = offsets
@@ -2076,11 +2069,8 @@ impl HumanEmitter {
20762069
}
20772070

20782071
// length of the code after substitution
2079-
let full_sub_len = part
2080-
.snippet
2081-
.chars()
2082-
.map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
2083-
.sum::<usize>() as isize;
2072+
let full_sub_len =
2073+
part.snippet.chars().map(|ch| char_width(ch)).sum::<usize>() as isize;
20842074

20852075
// length of the code to be substituted
20862076
let snippet_len = span_end_pos as isize - span_start_pos as isize;
@@ -2580,6 +2570,40 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
25802570
('\u{2068}', ""),
25812571
('\u{202C}', ""),
25822572
('\u{2069}', ""),
2573+
// In terminals without Unicode support the following will be garbled, but in *all* terminals
2574+
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2575+
// support" gate.
2576+
('\u{0000}', "␀"),
2577+
('\u{0001}', "␁"),
2578+
('\u{0002}', "␂"),
2579+
('\u{0003}', "␃"),
2580+
('\u{0004}', "␄"),
2581+
('\u{0005}', "␅"),
2582+
('\u{0006}', "␆"),
2583+
('\u{0007}', "␇"),
2584+
('\u{0008}', "␈"),
2585+
('\u{000B}', "␋"),
2586+
('\u{000C}', "␌"),
2587+
('\u{000D}', "␍"),
2588+
('\u{000E}', "␎"),
2589+
('\u{000F}', "␏"),
2590+
('\u{0010}', "␐"),
2591+
('\u{0011}', "␑"),
2592+
('\u{0012}', "␒"),
2593+
('\u{0013}', "␓"),
2594+
('\u{0014}', "␔"),
2595+
('\u{0015}', "␕"),
2596+
('\u{0016}', "␖"),
2597+
('\u{0017}', "␗"),
2598+
('\u{0018}', "␘"),
2599+
('\u{0019}', "␙"),
2600+
('\u{001A}', "␚"),
2601+
('\u{001B}', "␛"),
2602+
('\u{001C}', "␜"),
2603+
('\u{001D}', "␝"),
2604+
('\u{001E}', "␞"),
2605+
('\u{001F}', "␟"),
2606+
('\u{007F}', "␡"),
25832607
];
25842608

25852609
fn normalize_whitespace(str: &str) -> String {
@@ -2590,6 +2614,21 @@ fn normalize_whitespace(str: &str) -> String {
25902614
s
25912615
}
25922616

2617+
fn char_width(ch: char) -> usize {
2618+
// FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now,
2619+
// just accept that sometimes the code line will be longer than desired.
2620+
match ch {
2621+
'\t' => 4,
2622+
'\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}'
2623+
| '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}'
2624+
| '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
2625+
| '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
2626+
| '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
2627+
| '\u{007F}' => 1,
2628+
_ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
2629+
}
2630+
}
2631+
25932632
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
25942633
buffer.puts(line, col, "| ", Style::LineNumber);
25952634
}

tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,31 @@
11
error: bare CR not allowed in doc-comment
22
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32
33
|
4-
LL | /// doc comment with bare CR: ''
4+
LL | /// doc comment with bare CR: ''
55
| ^
66

77
error: bare CR not allowed in block doc-comment
88
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38
99
|
10-
LL | /** block doc comment with bare CR: '' */
10+
LL | /** block doc comment with bare CR: '' */
1111
| ^
1212

1313
error: bare CR not allowed in doc-comment
1414
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36
1515
|
16-
LL | //! doc comment with bare CR: ''
16+
LL | //! doc comment with bare CR: ''
1717
| ^
1818

1919
error: bare CR not allowed in block doc-comment
2020
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42
2121
|
22-
LL | /*! block doc comment with bare CR: '' */
22+
LL | /*! block doc comment with bare CR: '' */
2323
| ^
2424

2525
error: bare CR not allowed in string, use `\r` instead
2626
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18
2727
|
28-
LL | let _s = "foobar";
28+
LL | let _s = "foobar";
2929
| ^
3030
|
3131
help: escape the character
@@ -36,13 +36,13 @@ LL | let _s = "foo\rbar";
3636
error: bare CR not allowed in raw string
3737
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19
3838
|
39-
LL | let _s = r"barfoo";
39+
LL | let _s = r"barfoo";
4040
| ^
4141

4242
error: unknown character escape: `\r`
4343
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19
4444
|
45-
LL | let _s = "foo\bar";
45+
LL | let _s = "foo\bar";
4646
| ^ unknown character escape
4747
|
4848
= help: this is an isolated carriage return; consider checking your editor and version control settings

tests/ui/parser/bad-char-literals.rs

112 Bytes
Binary file not shown.

tests/ui/parser/bad-char-literals.stderr

+14-3
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,27 @@ LL | '\n';
2525
error: character constant must be escaped: `\r`
2626
--> $DIR/bad-char-literals.rs:15:6
2727
|
28-
LL | '';
28+
LL | '';
2929
| ^
3030
|
3131
help: escape the character
3232
|
3333
LL | '\r';
3434
| ++
3535

36+
error: character literal may only contain one codepoint
37+
--> $DIR/bad-char-literals.rs:18:5
38+
|
39+
LL | '-␀-';
40+
| ^^^^
41+
|
42+
help: if you meant to write a string literal, use double quotes
43+
|
44+
LL | "-␀-";
45+
| ~ ~
46+
3647
error: character constant must be escaped: `\t`
37-
--> $DIR/bad-char-literals.rs:18:6
48+
--> $DIR/bad-char-literals.rs:21:6
3849
|
3950
LL | ' ';
4051
| ^^^^
@@ -44,5 +55,5 @@ help: escape the character
4455
LL | '\t';
4556
| ++
4657

47-
error: aborting due to 4 previous errors
58+
error: aborting due to 5 previous errors
4859

148 Bytes
Binary file not shown.
32 Bytes
Binary file not shown.
28 Bytes
Binary file not shown.

tests/ui/parser/raw/raw-byte-string-literals.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
error: bare CR not allowed in raw string
22
--> $DIR/raw-byte-string-literals.rs:4:9
33
|
4-
LL | br"a";
4+
LL | br"a";
55
| ^
66

77
error: non-ASCII character in raw byte string literal

tests/ui/parser/several-carriage-returns-in-doc-comment.stderr

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
error: bare CR not allowed in doc-comment
22
--> $DIR/several-carriage-returns-in-doc-comment.rs:6:12
33
|
4-
LL | /// This doc comment contains three isolated `\r` symbols
4+
LL | /// This doc comment contains three isolated `\r` symbols
55
| ^
66

77
error: bare CR not allowed in doc-comment
88
--> $DIR/several-carriage-returns-in-doc-comment.rs:6:32
99
|
10-
LL | /// This doc comment contains three isolated `\r` symbols
10+
LL | /// This doc comment contains three isolated `\r` symbols
1111
| ^
1212

1313
error: bare CR not allowed in doc-comment
1414
--> $DIR/several-carriage-returns-in-doc-comment.rs:6:52
1515
|
16-
LL | /// This doc comment contains three isolated `\r` symbols
16+
LL | /// This doc comment contains three isolated `\r` symbols
1717
| ^
1818

1919
error: aborting due to 3 previous errors

tests/ui/parser/trailing-carriage-return-in-string.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
error: unknown character escape: `\r`
22
--> $DIR/trailing-carriage-return-in-string.rs:10:25
33
|
4-
LL | let bad = "This is \ a test";
4+
LL | let bad = "This is \ a test";
55
| ^ unknown character escape
66
|
77
= help: this is an isolated carriage return; consider checking your editor and version control settings
388 Bytes
Binary file not shown.
336 Bytes
Binary file not shown.
Binary file not shown.

tests/ui/str/str-escape.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ warning: whitespace symbol '\u{c}' is not skipped
2222
|
2323
LL | let s = b"a\
2424
| ________________^
25-
LL | | b";
25+
LL | | b";
2626
| | ^- whitespace symbol '\u{c}' is not skipped
2727
| |____|
2828
|

0 commit comments

Comments
 (0)