|
| 1 | +use clippy_utils::diagnostics::span_lint_and_then; |
| 2 | +use rustc_ast::ast::{Expr, ExprKind}; |
| 3 | +use rustc_ast::token::{Lit, LitKind}; |
| 4 | +use rustc_errors::Applicability; |
| 5 | +use rustc_lint::{EarlyContext, EarlyLintPass}; |
| 6 | +use rustc_middle::lint::in_external_macro; |
| 7 | +use rustc_session::{declare_lint_pass, declare_tool_lint}; |
| 8 | +use rustc_span::Span; |
| 9 | +use std::fmt::Write; |
| 10 | + |
| 11 | +declare_clippy_lint! { |
| 12 | + /// ### What it does |
| 13 | + /// Checks for `\0` escapes in string and byte literals that look like octal |
| 14 | + /// character escapes in C. |
| 15 | + /// |
| 16 | + /// ### Why is this bad? |
| 17 | + /// |
| 18 | + /// C and other languages support octal character escapes in strings, where |
| 19 | + /// a backslash is followed by up to three octal digits. For example, `\033` |
| 20 | + /// stands for the ASCII character 27 (ESC). Rust does not support this |
| 21 | + /// notation, but has the escape code `\0` which stands for a null |
| 22 | + /// byte/character, and any following digits do not form part of the escape |
| 23 | + /// sequence. Therefore, `\033` is not a compiler error but the result may |
| 24 | + /// be surprising. |
| 25 | + /// |
| 26 | + /// ### Known problems |
| 27 | + /// The actual meaning can be the intended one. `\x00` can be used in these |
| 28 | + /// cases to be unambigious. |
| 29 | + /// |
| 30 | + /// The lint does not trigger for format strings in `print!()`, `write!()` |
| 31 | + /// and friends since the string is already preprocessed when Clippy lints |
| 32 | + /// can see it. |
| 33 | + /// |
| 34 | + /// # Example |
| 35 | + /// ```rust |
| 36 | + /// // Bad |
| 37 | + /// let one = "\033[1m Bold? \033[0m"; // \033 intended as escape |
| 38 | + /// let two = "\033\0"; // \033 intended as null-3-3 |
| 39 | + /// |
| 40 | + /// // Good |
| 41 | + /// let one = "\x1b[1mWill this be bold?\x1b[0m"; |
| 42 | + /// let two = "\x0033\x00"; |
| 43 | + /// ``` |
| 44 | + #[clippy::version = "1.58.0"] |
| 45 | + pub OCTAL_ESCAPES, |
| 46 | + suspicious, |
| 47 | + "string escape sequences looking like octal characters" |
| 48 | +} |
| 49 | + |
| 50 | +declare_lint_pass!(OctalEscapes => [OCTAL_ESCAPES]); |
| 51 | + |
| 52 | +impl EarlyLintPass for OctalEscapes { |
| 53 | + fn check_expr(&mut self, cx: &EarlyContext<'tcx>, expr: &Expr) { |
| 54 | + if in_external_macro(cx.sess, expr.span) { |
| 55 | + return; |
| 56 | + } |
| 57 | + |
| 58 | + if let ExprKind::Lit(lit) = &expr.kind { |
| 59 | + if matches!(lit.token.kind, LitKind::Str) { |
| 60 | + check_lit(cx, &lit.token, lit.span, true); |
| 61 | + } else if matches!(lit.token.kind, LitKind::ByteStr) { |
| 62 | + check_lit(cx, &lit.token, lit.span, false); |
| 63 | + } |
| 64 | + } |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +fn check_lit(cx: &EarlyContext<'tcx>, lit: &Lit, span: Span, is_string: bool) { |
| 69 | + let contents = lit.symbol.as_str(); |
| 70 | + let mut iter = contents.char_indices().peekable(); |
| 71 | + let mut found = vec![]; |
| 72 | + |
| 73 | + // go through the string, looking for \0[0-7][0-7]? |
| 74 | + while let Some((from, ch)) = iter.next() { |
| 75 | + if ch == '\\' { |
| 76 | + if let Some((_, '0')) = iter.next() { |
| 77 | + // collect up to two further octal digits |
| 78 | + if let Some((mut to, '0'..='7')) = iter.next() { |
| 79 | + if let Some((_, '0'..='7')) = iter.peek() { |
| 80 | + to += 1; |
| 81 | + } |
| 82 | + found.push((from, to + 1)); |
| 83 | + } |
| 84 | + } |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + if found.is_empty() { |
| 89 | + return; |
| 90 | + } |
| 91 | + |
| 92 | + // construct two suggestion strings, one with \x escapes with octal meaning |
| 93 | + // as in C, and one with \x00 for null bytes. |
| 94 | + let mut suggest_1 = if is_string { "\"" } else { "b\"" }.to_string(); |
| 95 | + let mut suggest_2 = suggest_1.clone(); |
| 96 | + let mut index = 0; |
| 97 | + for (from, to) in found { |
| 98 | + suggest_1.push_str(&contents[index..from]); |
| 99 | + suggest_2.push_str(&contents[index..from]); |
| 100 | + |
| 101 | + // construct a replacement escape |
| 102 | + // the maximum value is \077, or \x3f, so u8 is sufficient here |
| 103 | + if let Ok(n) = u8::from_str_radix(&contents[from + 1..to], 8) { |
| 104 | + write!(&mut suggest_1, "\\x{:02x}", n).unwrap(); |
| 105 | + } |
| 106 | + |
| 107 | + // append the null byte as \x00 and the following digits literally |
| 108 | + suggest_2.push_str("\\x00"); |
| 109 | + suggest_2.push_str(&contents[from + 2..to]); |
| 110 | + |
| 111 | + index = to; |
| 112 | + } |
| 113 | + suggest_1.push_str(&contents[index..]); |
| 114 | + suggest_1.push('"'); |
| 115 | + suggest_2.push_str(&contents[index..]); |
| 116 | + suggest_2.push('"'); |
| 117 | + |
| 118 | + span_lint_and_then( |
| 119 | + cx, |
| 120 | + OCTAL_ESCAPES, |
| 121 | + span, |
| 122 | + &format!( |
| 123 | + "octal-looking escape in {} literal", |
| 124 | + if is_string { "string" } else { "byte string" } |
| 125 | + ), |
| 126 | + |diag| { |
| 127 | + diag.help(&format!( |
| 128 | + "octal escapes are not supported, `\\0` is always a null {}", |
| 129 | + if is_string { "character" } else { "byte" } |
| 130 | + )); |
| 131 | + // suggestion 1: equivalent hex escape |
| 132 | + diag.span_suggestion( |
| 133 | + span, |
| 134 | + "if an octal escape was intended, use the hexadecimal representation instead", |
| 135 | + suggest_1, |
| 136 | + Applicability::MaybeIncorrect, |
| 137 | + ); |
| 138 | + // suggestion 2: unambiguous null byte |
| 139 | + diag.span_suggestion( |
| 140 | + span, |
| 141 | + &format!( |
| 142 | + "if the null {} is intended, disambiguate using", |
| 143 | + if is_string { "character" } else { "byte" } |
| 144 | + ), |
| 145 | + suggest_2, |
| 146 | + Applicability::MaybeIncorrect, |
| 147 | + ); |
| 148 | + }, |
| 149 | + ); |
| 150 | +} |
0 commit comments