Skip to content

Commit 8b48683

Browse files
committed
feat(scanner): added IgnoreCaseNormalized comparison marker.
1 parent 7e9640a commit 8b48683

File tree

2 files changed

+82
-0
lines changed

2 files changed

+82
-0
lines changed

src/input.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,49 @@ fn test_ignore_case() {
633633
assert_eq!(IC::compare("ßẞ", "ẞß"), true);
634634
}
635635

636+
/**
637+
Marker type used to do case-insensitive, normalized string comparisons.
638+
639+
Specifically, this type will compare strings based on the result of a NFD transform, followed by conversion to lower-case.
640+
641+
Note that this *does not* take any locale information into account. It is only as correct as a call to `char::to_lowercase`.
642+
*/
643+
#[cfg(feature="unicode-normalization")]
644+
#[derive(Debug)]
645+
pub enum IgnoreCaseNormalized {}
646+
647+
#[cfg(feature="unicode-normalization")]
648+
impl StrCompare for IgnoreCaseNormalized {
649+
fn compare(a: &str, b: &str) -> bool {
650+
use unicode_normalization::UnicodeNormalization;
651+
652+
let mut acs = a.nfd().flat_map(char::to_lowercase);
653+
let mut bcs = b.nfd().flat_map(char::to_lowercase);
654+
loop {
655+
match (acs.next(), bcs.next()) {
656+
(Some(a), Some(b)) if a == b => (),
657+
(None, None) => return true,
658+
_ => return false
659+
}
660+
}
661+
}
662+
}
663+
664+
#[cfg(feature="unicode-normalization")]
665+
#[cfg(test)]
666+
#[test]
667+
fn test_ignore_case_normalized() {
668+
use self::IgnoreCaseNormalized as ICN;
669+
670+
assert_eq!(ICN::compare("hi", "hi"), true);
671+
assert_eq!(ICN::compare("Hi", "hI"), true);
672+
assert_eq!(ICN::compare("hI", "Hi"), true);
673+
assert_eq!(ICN::compare("café", "cafe\u{301}"), true);
674+
assert_eq!(ICN::compare("cafe\u{301}", "café"), true);
675+
assert_eq!(ICN::compare("CafÉ", "CafE\u{301}"), true);
676+
assert_eq!(ICN::compare("CAFÉ", "cafe\u{301}"), true);
677+
}
678+
636679
/**
637680
Marker type used to do ASCII case-insensitive string comparisons.
638681

tests/case_match.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ use scan_rules::ScanError as SE;
1414
use scan_rules::ScanErrorKind as SEK;
1515
use scan_rules::input::{StrCursor, ExactCompare, IgnoreCase, IgnoreAsciiCase};
1616

17+
#[cfg(feature="unicode-normalization")]
18+
use scan_rules::input::IgnoreCaseNormalized;
19+
1720
#[test]
1821
fn test_case_match() {
1922
let inp = "UPPERCASE lowercase mIxeDcAsE TitleCase";
@@ -109,6 +112,42 @@ fn test_case_match() {
109112
);
110113
}
111114

115+
#[cfg(feature="unicode-normalization")]
116+
#[test]
117+
fn test_case_match_normalized() {
118+
let inp = "UPPERCASE lowercase mIxeDcAsE TitleCase";
119+
120+
assert_match!(
121+
scan!(StrCursor::<IgnoreCaseNormalized>::new(inp);
122+
("UPPERCASE", "lowercase", "mIxeDcAsE", "TitleCase") => ()),
123+
Ok(())
124+
);
125+
126+
assert_match!(
127+
scan!(StrCursor::<IgnoreCaseNormalized>::new(inp);
128+
("UPPERCaSE", "lowercase", "mIxeDcAsE", "TitleCase") => ()),
129+
Ok(())
130+
);
131+
132+
assert_match!(
133+
scan!(StrCursor::<IgnoreCaseNormalized>::new(inp);
134+
("UPPERCASE", "lowerCase", "mIxeDcAsE", "TitleCase") => ()),
135+
Ok(())
136+
);
137+
138+
assert_match!(
139+
scan!(StrCursor::<IgnoreCaseNormalized>::new(inp);
140+
("UPPERCASE", "lowercase", "mIxEdcAsE", "TitleCase") => ()),
141+
Ok(())
142+
);
143+
144+
assert_match!(
145+
scan!(StrCursor::<IgnoreCaseNormalized>::new(inp);
146+
("UPPERCASE", "lowercase", "mIxeDcAsE", "TitLecAse") => ()),
147+
Ok(())
148+
);
149+
}
150+
112151
/**
113152
Make sure the "official" API style for new code works.
114153
*/

0 commit comments

Comments
 (0)