@@ -11,16 +11,21 @@ use std::cmp;
11
11
mod tests;
12
12
13
13
/// Finds the Levenshtein distance between two strings.
14
- pub fn lev_distance ( a : & str , b : & str ) -> usize {
15
- // cases which don't require further computation
16
- if a. is_empty ( ) {
17
- return b. chars ( ) . count ( ) ;
18
- } else if b. is_empty ( ) {
19
- return a. chars ( ) . count ( ) ;
14
+ ///
15
+ /// Returns None if the distance exceeds the limit.
16
+ pub fn lev_distance ( a : & str , b : & str , limit : usize ) -> Option < usize > {
17
+ let n = a. chars ( ) . count ( ) ;
18
+ let m = b. chars ( ) . count ( ) ;
19
+ let min_dist = if n < m { m - n } else { n - m } ;
20
+
21
+ if min_dist > limit {
22
+ return None ;
23
+ }
24
+ if n == 0 || m == 0 {
25
+ return ( min_dist <= limit) . then_some ( min_dist) ;
20
26
}
21
27
22
- let mut dcol: Vec < _ > = ( 0 ..=b. len ( ) ) . collect ( ) ;
23
- let mut t_last = 0 ;
28
+ let mut dcol: Vec < _ > = ( 0 ..=m) . collect ( ) ;
24
29
25
30
for ( i, sc) in a. chars ( ) . enumerate ( ) {
26
31
let mut current = i;
@@ -35,10 +40,10 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
35
40
dcol[ j + 1 ] = cmp:: min ( dcol[ j + 1 ] , dcol[ j] ) + 1 ;
36
41
}
37
42
current = next;
38
- t_last = j;
39
43
}
40
44
}
41
- dcol[ t_last + 1 ]
45
+
46
+ ( dcol[ m] <= limit) . then_some ( dcol[ m] )
42
47
}
43
48
44
49
/// Finds the best match for a given word in the given iterator.
@@ -51,39 +56,38 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
51
56
/// on an edge case with a lower(upper)case letters mismatch.
52
57
#[ cold]
53
58
pub fn find_best_match_for_name (
54
- name_vec : & [ Symbol ] ,
59
+ candidates : & [ Symbol ] ,
55
60
lookup : Symbol ,
56
61
dist : Option < usize > ,
57
62
) -> Option < Symbol > {
58
63
let lookup = lookup. as_str ( ) ;
59
- let max_dist = dist . unwrap_or_else ( || cmp :: max ( lookup. len ( ) , 3 ) / 3 ) ;
64
+ let lookup_uppercase = lookup. to_uppercase ( ) ;
60
65
61
66
// Priority of matches:
62
67
// 1. Exact case insensitive match
63
68
// 2. Levenshtein distance match
64
69
// 3. Sorted word match
65
- if let Some ( case_insensitive_match) =
66
- name_vec. iter ( ) . find ( |candidate| candidate. as_str ( ) . to_uppercase ( ) == lookup. to_uppercase ( ) )
67
- {
68
- return Some ( * case_insensitive_match) ;
70
+ if let Some ( c) = candidates. iter ( ) . find ( |c| c. as_str ( ) . to_uppercase ( ) == lookup_uppercase) {
71
+ return Some ( * c) ;
69
72
}
70
- let levenshtein_match = name_vec
71
- . iter ( )
72
- . filter_map ( |& name| {
73
- let dist = lev_distance ( lookup, name. as_str ( ) ) ;
74
- if dist <= max_dist { Some ( ( name, dist) ) } else { None }
75
- } )
76
- // Here we are collecting the next structure:
77
- // (levenshtein_match, levenshtein_distance)
78
- . fold ( None , |result, ( candidate, dist) | match result {
79
- None => Some ( ( candidate, dist) ) ,
80
- Some ( ( c, d) ) => Some ( if dist < d { ( candidate, dist) } else { ( c, d) } ) ,
81
- } ) ;
82
- if levenshtein_match. is_some ( ) {
83
- levenshtein_match. map ( |( candidate, _) | candidate)
84
- } else {
85
- find_match_by_sorted_words ( name_vec, lookup)
73
+
74
+ let mut dist = dist. unwrap_or_else ( || cmp:: max ( lookup. len ( ) , 3 ) / 3 ) ;
75
+ let mut best = None ;
76
+ for c in candidates {
77
+ match lev_distance ( lookup, c. as_str ( ) , dist) {
78
+ Some ( 0 ) => return Some ( * c) ,
79
+ Some ( d) => {
80
+ dist = d - 1 ;
81
+ best = Some ( * c) ;
82
+ }
83
+ None => { }
84
+ }
86
85
}
86
+ if best. is_some ( ) {
87
+ return best;
88
+ }
89
+
90
+ find_match_by_sorted_words ( candidates, lookup)
87
91
}
88
92
89
93
fn find_match_by_sorted_words ( iter_names : & [ Symbol ] , lookup : & str ) -> Option < Symbol > {
0 commit comments