Skip to content

Commit 6caa533

Browse files
authored
Rollup merge of #93325 - tmiasko:lev, r=davidtwco
Introduce a limit to Levenshtein distance computation Incorporate distance limit from `find_best_match_for_name` directly into Levenshtein distance computation. Use the string size difference as a lower bound on the distance and exit early when it exceeds the specified limit. After finding a candidate within a limit, lower the limit further to restrict the search space.
2 parents bc26f97 + 6236882 commit 6caa533

File tree

5 files changed

+60
-42
lines changed

5 files changed

+60
-42
lines changed

compiler/rustc_parse/src/parser/item.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ impl<'a> Parser<'a> {
423423
// Maybe the user misspelled `macro_rules` (issue #91227)
424424
if self.token.is_ident()
425425
&& path.segments.len() == 1
426-
&& lev_distance("macro_rules", &path.segments[0].ident.to_string()) <= 3
426+
&& lev_distance("macro_rules", &path.segments[0].ident.to_string(), 3).is_some()
427427
{
428428
err.span_suggestion(
429429
path.span,

compiler/rustc_span/src/lev_distance.rs

+36-32
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,21 @@ use std::cmp;
1111
mod tests;
1212

1313
/// Finds the Levenshtein distance between two strings.
14-
pub fn lev_distance(a: &str, b: &str) -> usize {
15-
// cases which don't require further computation
16-
if a.is_empty() {
17-
return b.chars().count();
18-
} else if b.is_empty() {
19-
return a.chars().count();
14+
///
15+
/// Returns None if the distance exceeds the limit.
16+
pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
17+
let n = a.chars().count();
18+
let m = b.chars().count();
19+
let min_dist = if n < m { m - n } else { n - m };
20+
21+
if min_dist > limit {
22+
return None;
23+
}
24+
if n == 0 || m == 0 {
25+
return (min_dist <= limit).then_some(min_dist);
2026
}
2127

22-
let mut dcol: Vec<_> = (0..=b.len()).collect();
23-
let mut t_last = 0;
28+
let mut dcol: Vec<_> = (0..=m).collect();
2429

2530
for (i, sc) in a.chars().enumerate() {
2631
let mut current = i;
@@ -35,10 +40,10 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
3540
dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
3641
}
3742
current = next;
38-
t_last = j;
3943
}
4044
}
41-
dcol[t_last + 1]
45+
46+
(dcol[m] <= limit).then_some(dcol[m])
4247
}
4348

4449
/// Finds the best match for a given word in the given iterator.
@@ -51,39 +56,38 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
5156
/// on an edge case with a lower(upper)case letters mismatch.
5257
#[cold]
5358
pub fn find_best_match_for_name(
54-
name_vec: &[Symbol],
59+
candidates: &[Symbol],
5560
lookup: Symbol,
5661
dist: Option<usize>,
5762
) -> Option<Symbol> {
5863
let lookup = lookup.as_str();
59-
let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
64+
let lookup_uppercase = lookup.to_uppercase();
6065

6166
// Priority of matches:
6267
// 1. Exact case insensitive match
6368
// 2. Levenshtein distance match
6469
// 3. Sorted word match
65-
if let Some(case_insensitive_match) =
66-
name_vec.iter().find(|candidate| candidate.as_str().to_uppercase() == lookup.to_uppercase())
67-
{
68-
return Some(*case_insensitive_match);
70+
if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) {
71+
return Some(*c);
6972
}
70-
let levenshtein_match = name_vec
71-
.iter()
72-
.filter_map(|&name| {
73-
let dist = lev_distance(lookup, name.as_str());
74-
if dist <= max_dist { Some((name, dist)) } else { None }
75-
})
76-
// Here we are collecting the next structure:
77-
// (levenshtein_match, levenshtein_distance)
78-
.fold(None, |result, (candidate, dist)| match result {
79-
None => Some((candidate, dist)),
80-
Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }),
81-
});
82-
if levenshtein_match.is_some() {
83-
levenshtein_match.map(|(candidate, _)| candidate)
84-
} else {
85-
find_match_by_sorted_words(name_vec, lookup)
73+
74+
let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
75+
let mut best = None;
76+
for c in candidates {
77+
match lev_distance(lookup, c.as_str(), dist) {
78+
Some(0) => return Some(*c),
79+
Some(d) => {
80+
dist = d - 1;
81+
best = Some(*c);
82+
}
83+
None => {}
84+
}
8685
}
86+
if best.is_some() {
87+
return best;
88+
}
89+
90+
find_match_by_sorted_words(candidates, lookup)
8791
}
8892

8993
fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {

compiler/rustc_span/src/lev_distance/tests.rs

+15-7
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,26 @@ fn test_lev_distance() {
55
use std::char::{from_u32, MAX};
66
// Test bytelength agnosticity
77
for c in (0..MAX as u32).filter_map(from_u32).map(|i| i.to_string()) {
8-
assert_eq!(lev_distance(&c[..], &c[..]), 0);
8+
assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0));
99
}
1010

1111
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1212
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
1313
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
14-
assert_eq!(lev_distance(a, b), 1);
15-
assert_eq!(lev_distance(b, a), 1);
16-
assert_eq!(lev_distance(a, c), 2);
17-
assert_eq!(lev_distance(c, a), 2);
18-
assert_eq!(lev_distance(b, c), 1);
19-
assert_eq!(lev_distance(c, b), 1);
14+
assert_eq!(lev_distance(a, b, usize::MAX), Some(1));
15+
assert_eq!(lev_distance(b, a, usize::MAX), Some(1));
16+
assert_eq!(lev_distance(a, c, usize::MAX), Some(2));
17+
assert_eq!(lev_distance(c, a, usize::MAX), Some(2));
18+
assert_eq!(lev_distance(b, c, usize::MAX), Some(1));
19+
assert_eq!(lev_distance(c, b, usize::MAX), Some(1));
20+
}
21+
22+
#[test]
23+
fn test_lev_distance_limit() {
24+
assert_eq!(lev_distance("abc", "abcd", 1), Some(1));
25+
assert_eq!(lev_distance("abc", "abcd", 0), None);
26+
assert_eq!(lev_distance("abc", "xyz", 3), Some(3));
27+
assert_eq!(lev_distance("abc", "xyz", 2), None);
2028
}
2129

2230
#[test]

compiler/rustc_span/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
1616
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
1717
#![feature(array_windows)]
18+
#![feature(bool_to_option)]
1819
#![feature(crate_visibility_modifier)]
1920
#![feature(if_let_guard)]
2021
#![feature(negative_impls)]

compiler/rustc_typeck/src/check/method/probe.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -1904,8 +1904,13 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> {
19041904
.associated_items(def_id)
19051905
.in_definition_order()
19061906
.filter(|x| {
1907-
let dist = lev_distance(name.as_str(), x.name.as_str());
1908-
x.kind.namespace() == Namespace::ValueNS && dist > 0 && dist <= max_dist
1907+
if x.kind.namespace() != Namespace::ValueNS {
1908+
return false;
1909+
}
1910+
match lev_distance(name.as_str(), x.name.as_str(), max_dist) {
1911+
Some(d) => d > 0,
1912+
None => false,
1913+
}
19091914
})
19101915
.copied()
19111916
.collect()

0 commit comments

Comments
 (0)