10
10
use core:: borrow:: { Borrow , BorrowMut } ;
11
11
use core:: iter:: FusedIterator ;
12
12
use core:: mem;
13
+ use core:: mem:: MaybeUninit ;
13
14
use core:: ptr;
14
15
use core:: str:: pattern:: { DoubleEndedSearcher , Pattern , ReverseSearcher , Searcher } ;
15
16
use core:: unicode:: conversions;
@@ -366,14 +367,9 @@ impl str {
366
367
without modifying the original"]
367
368
#[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
368
369
pub fn to_lowercase ( & self ) -> String {
369
- let out = convert_while_ascii ( self . as_bytes ( ) , u8:: to_ascii_lowercase) ;
370
+ let ( mut s , rest ) = convert_while_ascii ( self , u8:: to_ascii_lowercase) ;
370
371
371
- // Safety: we know this is a valid char boundary since
372
- // out.len() is only progressed if ascii bytes are found
373
- let rest = unsafe { self . get_unchecked ( out. len ( ) ..) } ;
374
-
375
- // Safety: We have written only valid ASCII to our vec
376
- let mut s = unsafe { String :: from_utf8_unchecked ( out) } ;
372
+ let prefix_len = s. len ( ) ;
377
373
378
374
for ( i, c) in rest. char_indices ( ) {
379
375
if c == 'Σ' {
@@ -382,8 +378,7 @@ impl str {
382
378
// in `SpecialCasing.txt`,
383
379
// so hard-code it rather than have a generic "condition" mechanism.
384
380
// See https://github.com/rust-lang/rust/issues/26035
385
- let out_len = self . len ( ) - rest. len ( ) ;
386
- let sigma_lowercase = map_uppercase_sigma ( & self , i + out_len) ;
381
+ let sigma_lowercase = map_uppercase_sigma ( self , prefix_len + i) ;
387
382
s. push ( sigma_lowercase) ;
388
383
} else {
389
384
match conversions:: to_lower ( c) {
@@ -459,14 +454,7 @@ impl str {
459
454
without modifying the original"]
460
455
#[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
461
456
pub fn to_uppercase ( & self ) -> String {
462
- let out = convert_while_ascii ( self . as_bytes ( ) , u8:: to_ascii_uppercase) ;
463
-
464
- // Safety: we know this is a valid char boundary since
465
- // out.len() is only progressed if ascii bytes are found
466
- let rest = unsafe { self . get_unchecked ( out. len ( ) ..) } ;
467
-
468
- // Safety: We have written only valid ASCII to our vec
469
- let mut s = unsafe { String :: from_utf8_unchecked ( out) } ;
457
+ let ( mut s, rest) = convert_while_ascii ( self , u8:: to_ascii_uppercase) ;
470
458
471
459
for c in rest. chars ( ) {
472
460
match conversions:: to_upper ( c) {
@@ -615,50 +603,86 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
615
603
unsafe { Box :: from_raw ( Box :: into_raw ( v) as * mut str ) }
616
604
}
617
605
618
- /// Converts the bytes while the bytes are still ascii.
606
+ /// Converts leading ascii bytes in `s` by calling the `convert` function.
607
+ ///
619
608
/// For better average performance, this happens in chunks of `2*size_of::<usize>()`.
620
- /// Returns a vec with the converted bytes.
609
+ ///
610
+ /// Returns a tuple of the converted prefix and the remainder starting from
611
+ /// the first non-ascii character.
621
612
#[ inline]
622
613
#[ cfg( not( test) ) ]
623
614
#[ cfg( not( no_global_oom_handling) ) ]
624
- fn convert_while_ascii ( b : & [ u8 ] , convert : fn ( & u8 ) -> u8 ) -> Vec < u8 > {
625
- let mut out = Vec :: with_capacity ( b. len ( ) ) ;
626
-
615
+ fn convert_while_ascii ( s : & str , convert : fn ( & u8 ) -> u8 ) -> ( String , & str ) {
627
616
const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
628
617
const MAGIC_UNROLL : usize = 2 ;
629
618
const N : usize = USIZE_SIZE * MAGIC_UNROLL ;
630
- const NONASCII_MASK : usize = usize:: from_ne_bytes ( [ 0x80 ; USIZE_SIZE ] ) ;
631
619
632
- let mut i = 0 ;
633
- unsafe {
634
- while i + N <= b. len ( ) {
635
- // Safety: we have checks the sizes `b` and `out` to know that our
636
- let in_chunk = b. get_unchecked ( i..i + N ) ;
637
- let out_chunk = out. spare_capacity_mut ( ) . get_unchecked_mut ( i..i + N ) ;
638
-
639
- let mut bits = 0 ;
640
- for j in 0 ..MAGIC_UNROLL {
641
- // read the bytes 1 usize at a time (unaligned since we haven't checked the alignment)
642
- // safety: in_chunk is valid bytes in the range
643
- bits |= in_chunk. as_ptr ( ) . cast :: < usize > ( ) . add ( j) . read_unaligned ( ) ;
644
- }
645
- // if our chunks aren't ascii, then return only the prior bytes as init
646
- if bits & NONASCII_MASK != 0 {
647
- break ;
648
- }
620
+ let mut slice = s. as_bytes ( ) ;
621
+ let mut out = Vec :: with_capacity ( slice. len ( ) ) ;
622
+ let mut out_slice = out. spare_capacity_mut ( ) ;
649
623
650
- // perform the case conversions on N bytes (gets heavily autovec'd)
651
- for j in 0 ..N {
652
- // safety: in_chunk and out_chunk is valid bytes in the range
653
- let out = out_chunk. get_unchecked_mut ( j) ;
654
- out. write ( convert ( in_chunk. get_unchecked ( j) ) ) ;
655
- }
624
+ let mut i = 0_usize ;
656
625
657
- // mark these bytes as initialised
658
- i += N ;
626
+ // process the input in chunks to enable auto-vectorization
627
+ let mut is_ascii = [ false ; N ] ;
628
+ while slice. len ( ) >= N {
629
+ // Safety: out_slice was allocated with same lengths as input slice and gets updated with
630
+ // the same offsets
631
+ unsafe {
632
+ core:: intrinsics:: assume ( slice. len ( ) == out_slice. len ( ) ) ;
659
633
}
660
- out. set_len ( i) ;
634
+
635
+ let chunk = & slice[ ..N ] ;
636
+
637
+ for j in 0 ..N {
638
+ is_ascii[ j] = chunk[ j] <= 127 ;
639
+ }
640
+
641
+ // auto-vectorization for this check is a bit fragile,
642
+ // sum and comparing against the chunk size gives the best result,
643
+ // specifically a pmovmsk instruction on x86.
644
+ if is_ascii. iter ( ) . map ( |x| * x as u8 ) . sum :: < u8 > ( ) as usize != N {
645
+ break ;
646
+ }
647
+
648
+ for j in 0 ..N {
649
+ out_slice[ j] = MaybeUninit :: new ( convert ( & chunk[ j] ) ) ;
650
+ }
651
+
652
+ i += N ;
653
+ slice = & slice[ N ..] ;
654
+ out_slice = & mut out_slice[ N ..] ;
661
655
}
662
656
663
- out
657
+ // handle the remainder as individual bytes
658
+ while !slice. is_empty ( ) {
659
+ // Safety: out_slice was allocated with same lengths as input slice and gets updated with
660
+ // the same offsets
661
+ unsafe {
662
+ core:: intrinsics:: assume ( slice. len ( ) == out_slice. len ( ) ) ;
663
+ }
664
+
665
+ let byte = slice[ 0 ] ;
666
+ if byte > 127 {
667
+ break ;
668
+ }
669
+ out_slice[ 0 ] = MaybeUninit :: new ( convert ( & byte) ) ;
670
+ i += 1 ;
671
+ slice = & slice[ 1 ..] ;
672
+ out_slice = & mut out_slice[ 1 ..] ;
673
+ }
674
+
675
+ unsafe {
676
+ // SAFETY: i bytes have been initialized above
677
+ out. set_len ( i) ;
678
+
679
+ // SAFETY: We have written only valid ascii to the output vec
680
+ let ascii_string = String :: from_utf8_unchecked ( out) ;
681
+
682
+ // SAFETY: we know this is a valid char boundary
683
+ // since we only skipped over leading ascii bytes
684
+ let rest = core:: str:: from_utf8_unchecked ( slice) ;
685
+
686
+ ( ascii_string, rest)
687
+ }
664
688
}
0 commit comments