Skip to content

Commit 7e9640a

Browse files
committed
feat(scanner): added Normalized input comparison marker.
1 parent 8e8c781 commit 7e9640a

File tree

4 files changed

+89
-1
lines changed

4 files changed

+89
-1
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ exclude = [
1919

2020
[features]
2121
default = []
22-
all = ["arrays-32", "tuples-16", "regex"]
22+
all = ["arrays-32", "tuples-16", "regex", "unicode-normalization"]
2323
"tuples-16" = []
2424
"arrays-32" = []
2525

@@ -29,6 +29,7 @@ lazy_static = "0.1.14"
2929
strcursor = "0.2.2"
3030

3131
regex = { version = "0.1.41", optional = true }
32+
unicode-normalization = { version = "0.1.2", optional = true }
3233

3334
[dev-dependencies]
3435
bitflags = "0.4.0"

src/input.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,43 @@ impl StrCompare for IgnoreAsciiCase {
650650
}
651651
}
652652

653+
/**
654+
Marker type used to do normalized string comparisons.
655+
656+
Specifically, this type will compare strings based on the result of a NFD transform.
657+
*/
658+
#[cfg(feature="unicode-normalization")]
659+
#[derive(Debug)]
660+
pub enum Normalized {}
661+
662+
#[cfg(feature="unicode-normalization")]
663+
impl StrCompare for Normalized {
664+
fn compare(a: &str, b: &str) -> bool {
665+
use unicode_normalization::UnicodeNormalization;
666+
667+
let mut acs = a.nfd();
668+
let mut bcs = b.nfd();
669+
loop {
670+
match (acs.next(), bcs.next()) {
671+
(Some(a), Some(b)) if a == b => (),
672+
(None, None) => return true,
673+
_ => return false
674+
}
675+
}
676+
}
677+
}
678+
679+
#[cfg(feature="unicode-normalization")]
680+
#[cfg(test)]
681+
#[test]
682+
fn test_normalized() {
683+
use self::Normalized as N;
684+
685+
assert_eq!(N::compare("hi", "hi"), true);
686+
assert_eq!(N::compare("café", "cafe\u{301}"), true);
687+
assert_eq!(N::compare("cafe\u{301}", "café"), true);
688+
}
689+
653690
fn slice_non_space(s: &str) -> Option<usize> {
654691
use ::util::TableUtil;
655692
use ::unicode::property::White_Space_table as WS;

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ The following [optional features](http://doc.crates.io/manifest.html#the-feature
7070
7171
* `tuples-16`: implement scanning for tuples of up to 16 elements. The default is up to 4 elements.
7272
73+
* `unicode-normalization`: include support for `Normalized` and `IgnoreCaseNormalized` cursor types. Adds a dependency on the `unicode-normalization` crate.
74+
7375
## Important Notes
7476
7577
* There are no default scanners for `&str` or `String`; if you want a string, you should pick an appropriate abstract scanner from the [`scanner`](scanner/index.html) module.
@@ -270,6 +272,7 @@ A scanning pattern is made up of one or more pattern terms, separated by commas.
270272
extern crate itertools;
271273
extern crate strcursor;
272274
#[cfg(feature="regex")] extern crate regex;
275+
#[cfg(feature="unicode-normalization")] extern crate unicode_normalization;
273276

274277
#[macro_use] mod macros;
275278

tests/cursor_markers.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,50 @@ fn test_non_space() {
139139
Ok(())
140140
);
141141
}
142+
143+
#[cfg(feature="unicode-normalization")]
144+
#[test]
145+
fn test_normalized() {
146+
use scan_rules::ScanError as SE;
147+
use scan_rules::ScanErrorKind as SEK;
148+
149+
type Cursor<'a> = StrCursor<'a, input::Normalized, input::IgnoreSpace, input::Wordish>;
150+
151+
let inp = "café bäbe";
152+
153+
assert_match!(
154+
scan!(inp;
155+
("café bäbe") => ()),
156+
Ok(())
157+
);
158+
159+
assert_match!(
160+
scan!(inp;
161+
("café bäbe") => ()),
162+
Err(SE { ref at, kind: SEK::LiteralMismatch, .. }) if at.offset() == 0
163+
);
164+
165+
assert_match!(
166+
scan!(inp;
167+
("café bäbe") => ()),
168+
Err(SE { ref at, kind: SEK::LiteralMismatch, .. }) if at.offset() == 6
169+
);
170+
171+
assert_match!(
172+
scan!(Cursor::new(inp);
173+
("café bäbe") => ()),
174+
Ok(())
175+
);
176+
177+
assert_match!(
178+
scan!(Cursor::new(inp);
179+
("café bäbe") => ()),
180+
Ok(())
181+
);
182+
183+
assert_match!(
184+
scan!(Cursor::new(inp);
185+
("café bäbe") => ()),
186+
Ok(())
187+
);
188+
}

0 commit comments

Comments
 (0)