Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit bb5ea62

Browse files
committedMar 5, 2016
feat(scanner): added HorSpace and Newline.
1 parent 4743165 commit bb5ea62

File tree

3 files changed

+218
-2
lines changed

3 files changed

+218
-2
lines changed
 

‎src/scanner/misc.rs‎

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,107 @@ fn test_hex() {
108108
assert_match!(Hex::<i32>::scan_from("BadCafé"), Ok((0xbadcaf, 6)));
109109
}
110110

111+
/**
112+
Scans a sequence of horizontal (non-newline) space characters into a string.
113+
114+
This *will not* match an empty sequence; there must be at least one space character for the scan to succeed.
115+
*/
116+
pub struct HorSpace<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
117+
118+
// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
119+
#[cfg(str_into_output_extra_broken)]
120+
impl<'a> ScanFromStr<'a> for HorSpace<'a, &'a str> {
121+
type Output = &'a str;
122+
123+
fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
124+
let s = s.as_str();
125+
match match_hor_space(s) {
126+
Some(b) => {
127+
let word = &s[..b];
128+
let tail = &s[b..];
129+
Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
130+
},
131+
// None => Err(ScanError::syntax("expected a space")),
132+
None => Err(ScanError::syntax_no_message()),
133+
}
134+
}
135+
136+
fn wants_leading_junk_stripped() -> bool { false }
137+
}
138+
139+
// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
140+
#[cfg(str_into_output_extra_broken)]
141+
impl<'a> ScanFromStr<'a> for HorSpace<'a, String> {
142+
type Output = String;
143+
144+
fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
145+
let s = s.as_str();
146+
match match_hor_space(s) {
147+
Some(b) => {
148+
let word = &s[..b];
149+
let tail = &s[b..];
150+
Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
151+
},
152+
// None => Err(ScanError::syntax("expected a space")),
153+
None => Err(ScanError::syntax_no_message()),
154+
}
155+
}
156+
157+
fn wants_leading_junk_stripped() -> bool { false }
158+
}
159+
160+
// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
161+
#[cfg(not(str_into_output_extra_broken))]
162+
impl<'a, Output> ScanFromStr<'a> for HorSpace<'a, Output>
163+
where &'a str: Into<Output> {
164+
type Output = Output;
165+
166+
fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
167+
let s = s.as_str();
168+
match match_hor_space(s) {
169+
Some(b) => {
170+
let word = &s[..b];
171+
let tail = &s[b..];
172+
Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
173+
},
174+
// None => Err(ScanError::syntax("expected a space")),
175+
None => Err(ScanError::syntax_no_message()),
176+
}
177+
}
178+
179+
fn wants_leading_junk_stripped() -> bool { false }
180+
}
181+
182+
fn match_hor_space(s: &str) -> Option<usize> {
183+
use ::util::TableUtil;
184+
use ::unicode::property::White_Space_table as WS;
185+
186+
s.char_indices()
187+
.take_while(|&(_, c)| match c {
188+
'\x0a'...'\x0d' | '\u{85}' | '\u{2028}' | '\u{2029}' => false,
189+
c => WS.span_table_contains(&c)
190+
})
191+
.map(|(i, c)| i + c.len_utf8())
192+
.last()
193+
}
194+
195+
#[cfg(test)]
196+
#[test]
197+
fn test_hor_space() {
198+
use ::ScanError as SE;
199+
use ::ScanErrorKind as SEK;
200+
201+
assert_match!(HorSpace::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
202+
assert_match!(HorSpace::<&str>::scan_from("a"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
203+
assert_match!(HorSpace::<&str>::scan_from("0"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
204+
assert_match!(HorSpace::<&str>::scan_from(" "), Ok((" ", 1)));
205+
assert_match!(HorSpace::<&str>::scan_from("\t"), Ok(("\t", 1)));
206+
assert_match!(HorSpace::<&str>::scan_from("\r"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
207+
assert_match!(HorSpace::<&str>::scan_from("\n"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
208+
assert_match!(HorSpace::<&str>::scan_from("\r\n"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
209+
assert_match!(HorSpace::<&str>::scan_from(" \t \n \t\t "), Ok((" \t ", 4)));
210+
}
211+
111212
/**
112213
Scans a single identifier into a string.
113214
@@ -305,6 +406,111 @@ fn test_line() {
305406
assert_match!(Line::<&str>::scan_from("abc\rdef"), Ok(("abc", 4)));
306407
}
307408

409+
/**
410+
Scans a single newline into a string.
411+
412+
This *will not* match an empty sequence, and will not match more than one newline.
413+
*/
414+
pub struct Newline<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
415+
416+
// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
417+
#[cfg(str_into_output_extra_broken)]
418+
impl<'a> ScanFromStr<'a> for Newline<'a, &'a str> {
419+
type Output = &'a str;
420+
fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
421+
let s = s.as_str();
422+
match match_newline(s) {
423+
Some(b) => {
424+
let word = &s[..b];
425+
let tail = &s[b..];
426+
Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
427+
},
428+
// None => Err(ScanError::syntax("expected at least one non-space character")),
429+
None => Err(ScanError::syntax_no_message())
430+
}
431+
}
432+
433+
fn wants_leading_junk_stripped() -> bool { false }
434+
}
435+
436+
// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
437+
#[cfg(str_into_output_extra_broken)]
438+
impl<'a> ScanFromStr<'a> for Newline<'a, String> {
439+
type Output = String;
440+
fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
441+
let s = s.as_str();
442+
match match_newline(s) {
443+
Some(b) => {
444+
let word = &s[..b];
445+
let tail = &s[b..];
446+
Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
447+
},
448+
// None => Err(ScanError::syntax("expected at least one non-space character")),
449+
None => Err(ScanError::syntax_no_message())
450+
}
451+
}
452+
453+
fn wants_leading_junk_stripped() -> bool { false }
454+
}
455+
456+
// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
457+
#[cfg(not(str_into_output_extra_broken))]
458+
impl<'a, Output> ScanFromStr<'a> for Newline<'a, Output>
459+
where &'a str: Into<Output> {
460+
type Output = Output;
461+
fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
462+
let s = s.as_str();
463+
match match_newline(s) {
464+
Some(b) => {
465+
let word = &s[..b];
466+
let tail = &s[b..];
467+
Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
468+
},
469+
// None => Err(ScanError::syntax("expected at least one non-space character")),
470+
None => Err(ScanError::syntax_no_message())
471+
}
472+
}
473+
474+
fn wants_leading_junk_stripped() -> bool { false }
475+
}
476+
477+
fn match_newline(s: &str) -> Option<usize> {
478+
// See: <http://www.unicode.org/reports/tr18/#RL1.6>.
479+
println!("match_newline({:?})", s);
480+
let mut cis = s.char_indices();
481+
482+
let r = match cis.next() {
483+
Some((_, '\x0a')) => Some(1),
484+
Some((_, '\x0b')) => Some(1),
485+
Some((_, '\x0c')) => Some(1),
486+
Some((_, '\x0d')) => match cis.next() {
487+
Some((_, '\x0a')) => Some(2),
488+
_ => Some(1)
489+
},
490+
Some((_, c @ '\u{85}')) => Some(c.len_utf8()),
491+
Some((_, c @ '\u{2028}')) => Some(c.len_utf8()),
492+
Some((_, c @ '\u{2029}')) => Some(c.len_utf8()),
493+
_ => None
494+
};
495+
496+
println!("-> {:?}", r);
497+
r
498+
}
499+
500+
#[cfg(test)]
501+
#[test]
502+
fn test_newline() {
503+
use ::ScanError as SE;
504+
use ::ScanErrorKind as SEK;
505+
506+
assert_match!(Newline::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
507+
assert_match!(Newline::<&str>::scan_from("x"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
508+
assert_match!(Newline::<&str>::scan_from("\rx"), Ok(("\r", 1)));
509+
assert_match!(Newline::<&str>::scan_from("\nx"), Ok(("\n", 1)));
510+
assert_match!(Newline::<&str>::scan_from("\r\nx"), Ok(("\r\n", 2)));
511+
assert_match!(Newline::<&str>::scan_from("\n\rx"), Ok(("\n", 1)));
512+
}
513+
308514
/**
309515
Scans a sequence of non-space characters into a string.
310516

‎src/scanner/mod.rs‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Functions ending in `_a` are a shorthand for the common case of wrapping a runti
5757
It is also where implementations for existing standard and external types are kept, though these do not appear in the documentation.
5858
*/
5959
pub use self::misc::{
60-
Everything, NonSpace, Space,
60+
Everything, HorSpace, Newline, NonSpace, Space,
6161
Ident, Line, Number, Word, Wordish,
6262
Inferred, KeyValuePair, QuotedString,
6363
Binary, Octal, Hex,

‎tests/scan_space.rs‎

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ or distributed except according to those terms.
1010
#[macro_use] extern crate scan_rules;
1111
#[macro_use] mod util;
1212

13-
use scan_rules::scanner::{Space, max_width_a};
13+
use scan_rules::scanner::{HorSpace, Newline, Space, max_width_a};
1414

1515
#[test]
1616
fn test_scan_space() {
@@ -35,4 +35,14 @@ fn test_scan_space() {
3535
scan!(inp; (let a <| max_width_a::<Space>(3), let b: Space, "x", ..tail) => (a, b, tail)),
3636
Ok((" \t", " \n ", "\r\n y z \t\r "))
3737
);
38+
39+
assert_match!(
40+
scan!(inp; (
41+
let a: HorSpace, let b: Newline, let c: HorSpace, "x",
42+
let d: Newline, "y",
43+
let e: Space, "z",
44+
let f: Space
45+
) => (a, b, c, d, e, f)),
46+
Ok((" \t ", "\n", " ", "\r\n", " ", " \t\r "))
47+
);
3848
}

0 commit comments

Comments
 (0)
Please sign in to comment.