19
19
20
20
use cmp;
21
21
use fmt;
22
+ use slice:: memchr;
22
23
use usize;
23
24
24
25
// Pattern
@@ -241,25 +242,66 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
241
242
242
243
/// Associated type for `<char as Pattern<'a>>::Searcher`.
243
244
#[ derive( Clone , Debug ) ]
244
- pub struct CharSearcher < ' a > ( & ' a str ) ;
245
+ pub struct CharSearcher < ' a > {
246
+ haystack : & ' a str ,
247
+ // invariant: `finger` must be a valid utf8 byte index of `haystack`
248
+ finger : usize ,
249
+ needle : char ,
250
+ // For ascii chars
251
+ // invariant: must be an ASCII byte (no high bit)
252
+ single_byte : Option < u8 > ,
253
+ }
245
254
246
255
unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
247
256
#[ inline]
248
257
fn haystack ( & self ) -> & ' a str {
249
- unimplemented ! ( ) ;
258
+ self . haystack
250
259
}
251
260
#[ inline]
252
261
fn next ( & mut self ) -> SearchStep {
253
- unimplemented ! ( ) ;
262
+ let old_finger = self . finger ;
263
+ let slice = unsafe { self . haystack . get_unchecked ( old_finger..) } ;
264
+ let mut iter = slice. chars ( ) ;
265
+ let old_len = iter. iter . len ( ) ;
266
+ if let Some ( ch) = iter. next ( ) {
267
+ // add byte offset of current character
268
+ // without recalculating
269
+ self . finger += iter. iter . len ( ) - old_len;
270
+ if ch == self . needle {
271
+ SearchStep :: Match ( old_finger, self . finger )
272
+ } else {
273
+ SearchStep :: Reject ( old_finger, self . finger )
274
+ }
275
+ } else {
276
+ SearchStep :: Done
277
+ }
254
278
}
255
279
#[ inline]
256
280
fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
257
- unimplemented ! ( ) ;
258
- }
259
- #[ inline]
260
- fn next_reject ( & mut self ) -> Option < ( usize , usize ) > {
261
- unimplemented ! ( ) ;
281
+ if let Some ( byte) = self . single_byte {
282
+ let old_finger = self . finger ;
283
+ let slice = unsafe { self . haystack . get_unchecked ( old_finger..) } ;
284
+ let bytes = slice. as_bytes ( ) ;
285
+ if let Some ( index) = memchr:: memchr ( byte, bytes) {
286
+ // index is the index of a valid ASCII byte,
287
+ // so we can add one to it
288
+ self . finger += index + 1 ;
289
+ Some ( ( index, self . finger ) )
290
+ } else {
291
+ None
292
+ }
293
+ } else {
294
+ loop {
295
+ match self . next ( ) {
296
+ SearchStep :: Match ( a, b) => break Some ( ( a, b) ) ,
297
+ SearchStep :: Done => break None ,
298
+ _ => continue ,
299
+ }
300
+ }
301
+ }
262
302
}
303
+
304
+ // let next_reject use the default implementation from the Searcher trait
263
305
}
264
306
265
307
unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
@@ -271,10 +313,8 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
271
313
fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
272
314
unimplemented ! ( ) ;
273
315
}
274
- #[ inline]
275
- fn next_reject_back ( & mut self ) -> Option < ( usize , usize ) > {
276
- unimplemented ! ( ) ;
277
- }
316
+
317
+ // let next_reject_back use the default implementation from the Searcher trait
278
318
}
279
319
280
320
impl < ' a > DoubleEndedSearcher < ' a > for CharSearcher < ' a > { }
@@ -285,7 +325,19 @@ impl<'a> Pattern<'a> for char {
285
325
286
326
#[ inline]
287
327
fn into_searcher ( self , haystack : & ' a str ) -> Self :: Searcher {
288
- CharSearcher ( haystack)
328
+ let single_byte = if self . len_utf8 ( ) == 1 {
329
+ let mut storage = [ 0 ] ;
330
+ self . encode_utf8 ( & mut storage) ;
331
+ Some ( storage[ 0 ] )
332
+ } else {
333
+ None
334
+ } ;
335
+ CharSearcher {
336
+ haystack,
337
+ finger : 0 ,
338
+ needle : self ,
339
+ single_byte,
340
+ }
289
341
}
290
342
291
343
#[ inline]
0 commit comments