Skip to content

Tweak confusable idents checking #75349

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 29 additions & 61 deletions src/librustc_lint/non_ascii_idents.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{EarlyContext, EarlyLintPass, LintContext};
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::SymbolStr;
use rustc_span::symbol::Symbol;

declare_lint! {
pub NON_ASCII_IDENTS,
Expand Down Expand Up @@ -39,7 +39,6 @@ impl EarlyLintPass for NonAsciiIdents {
use rustc_span::Span;
use std::collections::BTreeMap;
use unicode_security::GeneralSecurityProfile;
use utils::CowBoxSymStr;

let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
let check_uncommon_codepoints =
Expand All @@ -58,6 +57,12 @@ impl EarlyLintPass for NonAsciiIdents {

let mut has_non_ascii_idents = false;
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();

// Sort by `Span` so that error messages make sense with respect to the
// order of identifier locations in the code.
let mut symbols: Vec<_> = symbols.iter().collect();
symbols.sort_by_key(|k| k.1);

for (symbol, &sp) in symbols.iter() {
let symbol_str = symbol.as_str();
if symbol_str.is_ascii() {
Expand All @@ -77,33 +82,34 @@ impl EarlyLintPass for NonAsciiIdents {
}

if has_non_ascii_idents && check_confusable_idents {
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
let mut skeleton_map: FxHashMap<Symbol, (Symbol, Span, bool)> =
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
let mut str_buf = String::new();
for (symbol, &sp) in symbols.iter() {
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
use std::mem::replace;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(symbol_str));
if *symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str.clone())
} else {
let owned = replace(buffer, String::new());
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}
let mut skeleton_buf = String::new();

for (&symbol, &sp) in symbols.iter() {
use unicode_security::confusable_detection::skeleton;

let symbol_str = symbol.as_str();
let is_ascii = symbol_str.is_ascii();
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);

// Get the skeleton as a `Symbol`.
skeleton_buf.clear();
skeleton_buf.extend(skeleton(&symbol_str));
let skeleton_sym = if *symbol_str == *skeleton_buf {
symbol
} else {
Symbol::intern(&skeleton_buf)
};

skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
.entry(skeleton_sym)
.and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
if !*existing_is_ascii || !is_ascii {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
existing_symbol.as_str(),
symbol.as_str()
))
.span_label(
*existing_span,
Expand All @@ -113,12 +119,12 @@ impl EarlyLintPass for NonAsciiIdents {
});
}
if *existing_is_ascii && !is_ascii {
*existing_symbolstr = symbol_str.clone();
*existing_symbol = symbol;
*existing_span = sp;
*existing_is_ascii = is_ascii;
}
})
.or_insert((symbol_str, sp, is_ascii));
.or_insert((symbol, sp, is_ascii));
}
}

Expand Down Expand Up @@ -232,41 +238,3 @@ impl EarlyLintPass for NonAsciiIdents {
}
}
}

mod utils {
use rustc_span::symbol::SymbolStr;
use std::hash::{Hash, Hasher};
use std::ops::Deref;

pub(super) enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}

impl Deref for CowBoxSymStr {
type Target = str;

fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}

impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
}

impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}

impl Eq for CowBoxSymStr {}
}
3 changes: 1 addition & 2 deletions src/librustc_session/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use rustc_span::hygiene::ExpnId;
use rustc_span::source_map::{FilePathMapping, SourceMap};
use rustc_span::{MultiSpan, Span, Symbol};

use std::collections::BTreeMap;
use std::path::PathBuf;
use std::str;

Expand Down Expand Up @@ -64,7 +63,7 @@ impl GatedSpans {
#[derive(Default)]
pub struct SymbolGallery {
/// All symbols occurred and their first occurrence span.
pub symbols: Lock<BTreeMap<Symbol, Span>>,
pub symbols: Lock<FxHashMap<Symbol, Span>>,
}

impl SymbolGallery {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#![allow(uncommon_codepoints, non_upper_case_globals)]

const s: usize = 42;
const s_s: usize = 42;

fn main() {
let s = "rust"; //~ ERROR identifier pair considered confusable
let s_s = "rust2"; //~ ERROR identifier pair considered confusable
not_affected();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error: identifier pair considered confusable between `s` and `s`
--> $DIR/lint-confusable-idents.rs:8:9
--> $DIR/lint-confusable-idents.rs:9:9
|
LL | const s: usize = 42;
| -- this is where the previous identifier occurred
Expand All @@ -13,5 +13,14 @@ note: the lint level is defined here
LL | #![deny(confusable_idents)]
| ^^^^^^^^^^^^^^^^^

error: aborting due to previous error
error: identifier pair considered confusable between `s_s` and `s_s`
--> $DIR/lint-confusable-idents.rs:10:9
|
LL | const s_s: usize = 42;
| --- this is where the previous identifier occurred
...
LL | let s_s = "rust2";
| ^^^^^

error: aborting due to 2 previous errors