Skip to content

Commit db14a17

Browse files
committedNov 1, 2021
Auto merge of #90462 - pietroalbini:bidi-master, r=nikomatsakis,pietroalbini
[master] Fix CVE-2021-42574 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released in tomorrow's nightly release. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
2 parents ff0e148 + cdd3b86 commit db14a17

File tree

13 files changed

+543
-10
lines changed

13 files changed

+543
-10
lines changed
 

‎Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4259,6 +4259,7 @@ dependencies = [
42594259
"rustc_span",
42604260
"tracing",
42614261
"unicode-normalization",
4262+
"unicode-width",
42624263
]
42634264

42644265
[[package]]

‎RELEASES.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
Version 1.56.1 (2021-11-01)
2+
===========================
3+
4+
- New lints to detect the presence of bidirectional-override Unicode
5+
codepoints in the compiled source code ([CVE-2021-42574])
6+
7+
[CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
8+
19
Version 1.56.0 (2021-10-21)
210
========================
311

‎compiler/rustc_errors/src/emitter.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2063,8 +2063,26 @@ fn num_decimal_digits(num: usize) -> usize {
20632063
MAX_DIGITS
20642064
}
20652065

2066+
// We replace some characters so the CLI output is always consistent and underlines aligned.
2067+
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2068+
('\t', " "), // We do our own tab replacement
2069+
('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
2070+
('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk
2071+
('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
2072+
('\u{202E}', ""),
2073+
('\u{2066}', ""),
2074+
('\u{2067}', ""),
2075+
('\u{2068}', ""),
2076+
('\u{202C}', ""),
2077+
('\u{2069}', ""),
2078+
];
2079+
20662080
fn replace_tabs(str: &str) -> String {
2067-
str.replace('\t', " ")
2081+
let mut s = str.to_string();
2082+
for (c, replacement) in OUTPUT_REPLACEMENTS {
2083+
s = s.replace(*c, replacement);
2084+
}
2085+
s
20682086
}
20692087

20702088
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {

‎compiler/rustc_lint/src/context.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
1717
use self::TargetLint::*;
1818

19+
use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS;
1920
use crate::levels::{is_known_lint_tool, LintLevelsBuilder};
2021
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
2122
use rustc_ast as ast;
@@ -39,7 +40,7 @@ use rustc_session::lint::{BuiltinLintDiagnostics, ExternDepSpec};
3940
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
4041
use rustc_session::Session;
4142
use rustc_span::lev_distance::find_best_match_for_name;
42-
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
43+
use rustc_span::{symbol::Symbol, BytePos, MultiSpan, Span, DUMMY_SP};
4344
use rustc_target::abi;
4445
use tracing::debug;
4546

@@ -597,6 +598,42 @@ pub trait LintContext: Sized {
597598
// Now, set up surrounding context.
598599
let sess = self.sess();
599600
match diagnostic {
601+
BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => {
602+
let spans: Vec<_> = content
603+
.char_indices()
604+
.filter_map(|(i, c)| {
605+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
606+
let lo = span.lo() + BytePos(2 + i as u32);
607+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
608+
})
609+
})
610+
.collect();
611+
let (an, s) = match spans.len() {
612+
1 => ("an ", ""),
613+
_ => ("", "s"),
614+
};
615+
db.span_label(span, &format!(
616+
"this comment contains {}invisible unicode text flow control codepoint{}",
617+
an,
618+
s,
619+
));
620+
for (c, span) in &spans {
621+
db.span_label(*span, format!("{:?}", c));
622+
}
623+
db.note(
624+
"these kind of unicode codepoints change the way text flows on \
625+
applications that support them, but can cause confusion because they \
626+
change the order of characters on the screen",
627+
);
628+
if !spans.is_empty() {
629+
db.multipart_suggestion_with_style(
630+
"if their presence wasn't intentional, you can remove them",
631+
spans.into_iter().map(|(_, span)| (span, "".to_string())).collect(),
632+
Applicability::MachineApplicable,
633+
SuggestionStyle::HideCodeAlways,
634+
);
635+
}
636+
},
600637
BuiltinLintDiagnostics::Normal => (),
601638
BuiltinLintDiagnostics::BareTraitObject(span, is_global) => {
602639
let (sugg, app) = match sess.source_map().span_to_snippet(span) {
Lines changed: 161 additions & 0 deletions
This file contains bidirectional or hidden Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
use crate::{EarlyContext, EarlyLintPass, LintContext};
2+
use rustc_ast as ast;
3+
use rustc_errors::{Applicability, SuggestionStyle};
4+
use rustc_span::{BytePos, Span, Symbol};
5+
6+
declare_lint! {
7+
/// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
8+
/// visual representation of text on screen in a way that does not correspond to their on
9+
/// memory representation.
10+
///
11+
/// ### Explanation
12+
///
13+
/// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
14+
/// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
15+
/// its direction on software that supports these codepoints. This makes the text "abc" display
16+
/// as "cba" on screen. By leveraging software that supports these, people can write specially
17+
/// crafted literals that make the surrounding code seem like it's performing one action, when
18+
/// in reality it is performing another. Because of this, we proactively lint against their
19+
/// presence to avoid surprises.
20+
///
21+
/// ### Example
22+
///
23+
/// ```rust,compile_fail
24+
/// #![deny(text_direction_codepoint_in_literal)]
25+
/// fn main() {
26+
/// println!("{:?}", '‮');
27+
/// }
28+
/// ```
29+
///
30+
/// {{produces}}
31+
///
32+
pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
33+
Deny,
34+
"detect special Unicode codepoints that affect the visual representation of text on screen, \
35+
changing the direction in which text flows",
36+
}
37+
38+
declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
39+
40+
crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
41+
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
42+
'\u{2069}',
43+
];
44+
45+
impl HiddenUnicodeCodepoints {
46+
fn lint_text_direction_codepoint(
47+
&self,
48+
cx: &EarlyContext<'_>,
49+
text: Symbol,
50+
span: Span,
51+
padding: u32,
52+
point_at_inner_spans: bool,
53+
label: &str,
54+
) {
55+
// Obtain the `Span`s for each of the forbidden chars.
56+
let spans: Vec<_> = text
57+
.as_str()
58+
.char_indices()
59+
.filter_map(|(i, c)| {
60+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
61+
let lo = span.lo() + BytePos(i as u32 + padding);
62+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
63+
})
64+
})
65+
.collect();
66+
67+
cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| {
68+
let mut err = lint.build(&format!(
69+
"unicode codepoint changing visible direction of text present in {}",
70+
label
71+
));
72+
let (an, s) = match spans.len() {
73+
1 => ("an ", ""),
74+
_ => ("", "s"),
75+
};
76+
err.span_label(
77+
span,
78+
&format!(
79+
"this {} contains {}invisible unicode text flow control codepoint{}",
80+
label, an, s,
81+
),
82+
);
83+
if point_at_inner_spans {
84+
for (c, span) in &spans {
85+
err.span_label(*span, format!("{:?}", c));
86+
}
87+
}
88+
err.note(
89+
"these kind of unicode codepoints change the way text flows on applications that \
90+
support them, but can cause confusion because they change the order of \
91+
characters on the screen",
92+
);
93+
if point_at_inner_spans && !spans.is_empty() {
94+
err.multipart_suggestion_with_style(
95+
"if their presence wasn't intentional, you can remove them",
96+
spans.iter().map(|(_, span)| (*span, "".to_string())).collect(),
97+
Applicability::MachineApplicable,
98+
SuggestionStyle::HideCodeAlways,
99+
);
100+
err.multipart_suggestion(
101+
"if you want to keep them but make them visible in your source code, you can \
102+
escape them",
103+
spans
104+
.into_iter()
105+
.map(|(c, span)| {
106+
let c = format!("{:?}", c);
107+
(span, c[1..c.len() - 1].to_string())
108+
})
109+
.collect(),
110+
Applicability::MachineApplicable,
111+
);
112+
} else {
113+
// FIXME: in other suggestions we've reversed the inner spans of doc comments. We
114+
// should do the same here to provide the same good suggestions as we do for
115+
// literals above.
116+
err.note("if their presence wasn't intentional, you can remove them");
117+
err.note(&format!(
118+
"if you want to keep them but make them visible in your source code, you can \
119+
escape them: {}",
120+
spans
121+
.into_iter()
122+
.map(|(c, _)| { format!("{:?}", c) })
123+
.collect::<Vec<String>>()
124+
.join(", "),
125+
));
126+
}
127+
err.emit();
128+
});
129+
}
130+
}
131+
impl EarlyLintPass for HiddenUnicodeCodepoints {
132+
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
133+
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
134+
if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
135+
self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
136+
}
137+
}
138+
}
139+
140+
fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
141+
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
142+
let (text, span, padding) = match &expr.kind {
143+
ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {
144+
let text = token.symbol;
145+
if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
146+
return;
147+
}
148+
let padding = match kind {
149+
// account for `"` or `'`
150+
ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1,
151+
// account for `r###"`
152+
ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2,
153+
_ => return,
154+
};
155+
(text, span, padding)
156+
}
157+
_ => return,
158+
};
159+
self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal");
160+
}
161+
}

‎compiler/rustc_lint/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub mod builtin;
4848
mod context;
4949
mod early;
5050
mod enum_intrinsics_non_enums;
51+
pub mod hidden_unicode_codepoints;
5152
mod internal;
5253
mod late;
5354
mod levels;
@@ -78,6 +79,7 @@ use rustc_span::Span;
7879
use array_into_iter::ArrayIntoIter;
7980
use builtin::*;
8081
use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
82+
use hidden_unicode_codepoints::*;
8183
use internal::*;
8284
use methods::*;
8385
use non_ascii_idents::*;
@@ -129,6 +131,7 @@ macro_rules! early_lint_passes {
129131
DeprecatedAttr: DeprecatedAttr::new(),
130132
WhileTrue: WhileTrue,
131133
NonAsciiIdents: NonAsciiIdents,
134+
HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
132135
IncompleteFeatures: IncompleteFeatures,
133136
RedundantSemicolons: RedundantSemicolons,
134137
UnusedDocComment: UnusedDocComment,

‎compiler/rustc_lint_defs/src/builtin.rs

Lines changed: 28 additions & 0 deletions
This file contains bidirectional or hidden Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line numberDiff line numberDiff line change
@@ -3518,6 +3518,34 @@ declare_lint! {
35183518
@feature_gate = sym::non_exhaustive_omitted_patterns_lint;
35193519
}
35203520

3521+
declare_lint! {
3522+
/// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
3523+
/// change the visual representation of text on screen in a way that does not correspond to
3524+
/// their on memory representation.
3525+
///
3526+
/// ### Example
3527+
///
3528+
/// ```rust,compile_fail
3529+
/// #![deny(text_direction_codepoint_in_comment)]
3530+
/// fn main() {
3531+
/// println!("{:?}"); // '‮');
3532+
/// }
3533+
/// ```
3534+
///
3535+
/// {{produces}}
3536+
///
3537+
/// ### Explanation
3538+
///
3539+
/// Unicode allows changing the visual flow of text on screen in order to support scripts that
3540+
/// are written right-to-left, but a specially crafted comment can make code that will be
3541+
/// compiled appear to be part of a comment, depending on the software used to read the code.
3542+
/// To avoid potential problems or confusion, such as in CVE-2021-42574, by default we deny
3543+
/// their use.
3544+
pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
3545+
Deny,
3546+
"invisible directionality-changing codepoints in comment"
3547+
}
3548+
35213549
declare_lint! {
35223550
/// The `deref_into_dyn_supertrait` lint is output whenever there is a use of the
35233551
/// `Deref` implementation with a `dyn SuperTrait` type as `Output`.

‎compiler/rustc_lint_defs/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ pub enum BuiltinLintDiagnostics {
306306
TrailingMacro(bool, Ident),
307307
BreakWithLabelAndLoop(Span),
308308
NamedAsmLabel(String),
309+
UnicodeTextFlow(Span, String),
309310
}
310311

311312
/// Lints that are buffered up early on in the `Session` before the

‎compiler/rustc_parse/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" }
1818
rustc_span = { path = "../rustc_span" }
1919
rustc_ast = { path = "../rustc_ast" }
2020
unicode-normalization = "0.1.11"
21+
unicode-width = "0.1.4"

‎compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
44
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
55
use rustc_lexer::unescape::{self, Mode};
66
use rustc_lexer::{Base, DocStyle, RawStrError};
7-
use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
7+
use rustc_session::lint::builtin::{
8+
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
9+
};
810
use rustc_session::lint::BuiltinLintDiagnostics;
911
use rustc_session::parse::ParseSess;
1012
use rustc_span::symbol::{sym, Symbol};
@@ -129,14 +131,41 @@ impl<'a> StringReader<'a> {
129131
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
130132
}
131133

134+
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
135+
/// complain about it.
136+
fn lint_unicode_text_flow(&self, start: BytePos) {
137+
// Opening delimiter of the length 2 is not included into the comment text.
138+
let content_start = start + BytePos(2);
139+
let content = self.str_from(content_start);
140+
let span = self.mk_sp(start, self.pos);
141+
const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
142+
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
143+
'\u{202C}', '\u{2069}',
144+
];
145+
if content.contains(UNICODE_TEXT_FLOW_CHARS) {
146+
self.sess.buffer_lint_with_diagnostic(
147+
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
148+
span,
149+
ast::CRATE_NODE_ID,
150+
"unicode codepoint changing visible direction of text present in comment",
151+
BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
152+
);
153+
}
154+
}
155+
132156
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
133157
/// `rustc_ast::TokenKind`. This turns strings into interned
134158
/// symbols and runs additional validation.
135159
fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option<TokenKind> {
136160
Some(match token {
137161
rustc_lexer::TokenKind::LineComment { doc_style } => {
138162
// Skip non-doc comments
139-
let doc_style = doc_style?;
163+
let doc_style = if let Some(doc_style) = doc_style {
164+
doc_style
165+
} else {
166+
self.lint_unicode_text_flow(start);
167+
return None;
168+
};
140169

141170
// Opening delimiter of the length 3 is not included into the symbol.
142171
let content_start = start + BytePos(3);
@@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
158187
}
159188

160189
// Skip non-doc comments
161-
let doc_style = doc_style?;
190+
let doc_style = if let Some(doc_style) = doc_style {
191+
doc_style
192+
} else {
193+
self.lint_unicode_text_flow(start);
194+
return None;
195+
};
162196

163197
// Opening delimiter of the length 3 and closing delimiter of the length 2
164198
// are not included into the symbol.

‎compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,17 @@ pub(crate) fn emit_unescape_error(
187187
assert!(mode.is_bytes());
188188
let (c, span) = last_char();
189189
let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
190-
err.span_label(span, "byte constant must be ASCII");
190+
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
191+
format!(" but is {:?}", c)
192+
} else {
193+
String::new()
194+
};
195+
err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
191196
if (c as u32) <= 0xFF {
192197
err.span_suggestion(
193198
span,
194199
&format!(
195-
"if you meant to use the unicode code point for '{}', use a \\xHH escape",
200+
"if you meant to use the unicode code point for {:?}, use a \\xHH escape",
196201
c
197202
),
198203
format!("\\x{:X}", c as u32),
@@ -206,7 +211,7 @@ pub(crate) fn emit_unescape_error(
206211
err.span_suggestion(
207212
span,
208213
&format!(
209-
"if you meant to use the UTF-8 encoding of '{}', use \\xHH escapes",
214+
"if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
210215
c
211216
),
212217
utf8.as_bytes()
@@ -220,10 +225,15 @@ pub(crate) fn emit_unescape_error(
220225
}
221226
EscapeError::NonAsciiCharInByteString => {
222227
assert!(mode.is_bytes());
223-
let (_c, span) = last_char();
228+
let (c, span) = last_char();
229+
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
230+
format!(" but is {:?}", c)
231+
} else {
232+
String::new()
233+
};
224234
handler
225235
.struct_span_err(span, "raw byte string must be ASCII")
226-
.span_label(span, "must be ASCII")
236+
.span_label(span, &format!("must be ASCII{}", postfix))
227237
.emit();
228238
}
229239
EscapeError::OutOfRangeHexEscape => {
Lines changed: 39 additions & 0 deletions
This file contains bidirectional or hidden Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
fn main() {
2+
// if access_level != "us‫e‪r" { // Check if admin
3+
//~^ ERROR unicode codepoint changing visible direction of text present in comment
4+
println!("us\u{202B}e\u{202A}r");
5+
println!("{:?}", r#"us\u{202B}e\u{202A}r"#);
6+
println!("{:?}", b"us\u{202B}e\u{202A}r");
7+
//~^ ERROR unicode escape in byte string
8+
//~| ERROR unicode escape in byte string
9+
println!("{:?}", br##"us\u{202B}e\u{202A}r"##);
10+
11+
println!("{:?}", "/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only ");
12+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
13+
14+
println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##);
15+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
16+
println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only ");
17+
//~^ ERROR non-ASCII character in byte constant
18+
//~| ERROR non-ASCII character in byte constant
19+
//~| ERROR non-ASCII character in byte constant
20+
//~| ERROR non-ASCII character in byte constant
21+
println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##);
22+
//~^ ERROR raw byte string must be ASCII
23+
//~| ERROR raw byte string must be ASCII
24+
//~| ERROR raw byte string must be ASCII
25+
//~| ERROR raw byte string must be ASCII
26+
println!("{:?}", '‮');
27+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
28+
}
29+
30+
//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"
31+
//~^ ERROR unicode codepoint changing visible direction of text present in comment
32+
33+
/** '‮'); */fn foo() {}
34+
//~^ ERROR unicode codepoint changing visible direction of text present in doc comment
35+
36+
/**
37+
*
38+
* '‮'); */fn bar() {}
39+
//~^^^ ERROR unicode codepoint changing visible direction of text present in doc comment
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
error: unicode escape in byte string
2+
--> $DIR/unicode-control-codepoints.rs:6:26
3+
|
4+
LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
5+
| ^^^^^^^^ unicode escape in byte string
6+
|
7+
= help: unicode escape sequences cannot be used as a byte or in a byte string
8+
9+
error: unicode escape in byte string
10+
--> $DIR/unicode-control-codepoints.rs:6:35
11+
|
12+
LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
13+
| ^^^^^^^^ unicode escape in byte string
14+
|
15+
= help: unicode escape sequences cannot be used as a byte or in a byte string
16+
17+
error: non-ASCII character in byte constant
18+
--> $DIR/unicode-control-codepoints.rs:16:26
19+
|
20+
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
21+
| ^ byte constant must be ASCII but is '\u{202e}'
22+
|
23+
help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes
24+
|
25+
LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only ");
26+
| ~~~~~~~~~~~~
27+
28+
error: non-ASCII character in byte constant
29+
--> $DIR/unicode-control-codepoints.rs:16:30
30+
|
31+
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
32+
| ^ byte constant must be ASCII but is '\u{2066}'
33+
|
34+
help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
35+
|
36+
LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only ");
37+
| ~~~~~~~~~~~~
38+
39+
error: non-ASCII character in byte constant
40+
--> $DIR/unicode-control-codepoints.rs:16:41
41+
|
42+
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
43+
| ^ byte constant must be ASCII but is '\u{2069}'
44+
|
45+
help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes
46+
|
47+
LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only ");
48+
| ~~~~~~~~~~~~
49+
50+
error: non-ASCII character in byte constant
51+
--> $DIR/unicode-control-codepoints.rs:16:43
52+
|
53+
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
54+
| ^ byte constant must be ASCII but is '\u{2066}'
55+
|
56+
help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
57+
|
58+
LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only ");
59+
| ~~~~~~~~~~~~
60+
61+
error: raw byte string must be ASCII
62+
--> $DIR/unicode-control-codepoints.rs:21:29
63+
|
64+
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
65+
| ^ must be ASCII but is '\u{202e}'
66+
67+
error: raw byte string must be ASCII
68+
--> $DIR/unicode-control-codepoints.rs:21:33
69+
|
70+
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
71+
| ^ must be ASCII but is '\u{2066}'
72+
73+
error: raw byte string must be ASCII
74+
--> $DIR/unicode-control-codepoints.rs:21:44
75+
|
76+
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
77+
| ^ must be ASCII but is '\u{2069}'
78+
79+
error: raw byte string must be ASCII
80+
--> $DIR/unicode-control-codepoints.rs:21:46
81+
|
82+
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
83+
| ^ must be ASCII but is '\u{2066}'
84+
85+
error: unicode codepoint changing visible direction of text present in comment
86+
--> $DIR/unicode-control-codepoints.rs:2:5
87+
|
88+
LL | // if access_level != "user" { // Check if admin
89+
| ^^^^^^^^^^^^^^^^^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^
90+
| | ||
91+
| | |'\u{202a}'
92+
| | '\u{202b}'
93+
| this comment contains invisible unicode text flow control codepoints
94+
|
95+
= note: `#[deny(text_direction_codepoint_in_comment)]` on by default
96+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
97+
= help: if their presence wasn't intentional, you can remove them
98+
99+
error: unicode codepoint changing visible direction of text present in comment
100+
--> $DIR/unicode-control-codepoints.rs:30:1
101+
|
102+
LL | //"/* } if isAdmin begin admins only */"
103+
| ^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^
104+
| | | | ||
105+
| | | | |'\u{2066}'
106+
| | | | '\u{2069}'
107+
| | | '\u{2066}'
108+
| | '\u{202e}'
109+
| this comment contains invisible unicode text flow control codepoints
110+
|
111+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
112+
= help: if their presence wasn't intentional, you can remove them
113+
114+
error: unicode codepoint changing visible direction of text present in literal
115+
--> $DIR/unicode-control-codepoints.rs:11:22
116+
|
117+
LL | println!("{:?}", "/* } if isAdmin begin admins only ");
118+
| ^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^
119+
| | | | ||
120+
| | | | |'\u{2066}'
121+
| | | | '\u{2069}'
122+
| | | '\u{2066}'
123+
| | '\u{202e}'
124+
| this literal contains invisible unicode text flow control codepoints
125+
|
126+
= note: `#[deny(text_direction_codepoint_in_literal)]` on by default
127+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
128+
= help: if their presence wasn't intentional, you can remove them
129+
help: if you want to keep them but make them visible in your source code, you can escape them
130+
|
131+
LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begin admins only ");
132+
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
133+
134+
error: unicode codepoint changing visible direction of text present in literal
135+
--> $DIR/unicode-control-codepoints.rs:14:22
136+
|
137+
LL | println!("{:?}", r##"/* } if isAdmin begin admins only "##);
138+
| ^^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^
139+
| | | | ||
140+
| | | | |'\u{2066}'
141+
| | | | '\u{2069}'
142+
| | | '\u{2066}'
143+
| | '\u{202e}'
144+
| this literal contains invisible unicode text flow control codepoints
145+
|
146+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
147+
= help: if their presence wasn't intentional, you can remove them
148+
help: if you want to keep them but make them visible in your source code, you can escape them
149+
|
150+
LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begin admins only "##);
151+
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
152+
153+
error: unicode codepoint changing visible direction of text present in literal
154+
--> $DIR/unicode-control-codepoints.rs:26:22
155+
|
156+
LL | println!("{:?}", '');
157+
| ^-
158+
| ||
159+
| |'\u{202e}'
160+
| this literal contains an invisible unicode text flow control codepoint
161+
|
162+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
163+
= help: if their presence wasn't intentional, you can remove them
164+
help: if you want to keep them but make them visible in your source code, you can escape them
165+
|
166+
LL | println!("{:?}", '\u{202e}');
167+
| ~~~~~~~~
168+
169+
error: unicode codepoint changing visible direction of text present in doc comment
170+
--> $DIR/unicode-control-codepoints.rs:33:1
171+
|
172+
LL | /** ''); */fn foo() {}
173+
| ^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
174+
|
175+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
176+
= note: if their presence wasn't intentional, you can remove them
177+
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
178+
179+
error: unicode codepoint changing visible direction of text present in doc comment
180+
--> $DIR/unicode-control-codepoints.rs:36:1
181+
|
182+
LL | / /**
183+
LL | | *
184+
LL | | * ''); */fn bar() {}
185+
| |___________^ this doc comment contains an invisible unicode text flow control codepoint
186+
|
187+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
188+
= note: if their presence wasn't intentional, you can remove them
189+
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
190+
191+
error: aborting due to 17 previous errors
192+

0 commit comments

Comments
 (0)
Please sign in to comment.