17
17
//! Note: Because the term "leading byte" can sometimes be ambiguous (for
18
18
//! example, it could also refer to the first byte of a slice), we'll often use
19
19
//! the term "non-continuation byte" to refer to these bytes in the code.
20
+ use core:: intrinsics:: unlikely;
20
21
22
+ const USIZE_SIZE : usize = core:: mem:: size_of :: < usize > ( ) ;
23
+ const UNROLL_INNER : usize = 4 ;
24
+
25
+ #[ inline]
21
26
pub ( super ) fn count_chars ( s : & str ) -> usize {
27
+ if s. len ( ) < USIZE_SIZE * UNROLL_INNER {
28
+ // Avoid entering the optimized implementation for strings where the
29
+ // difference is not likely to matter, or where it might even be slower.
30
+ // That said, a ton of thought was not spent on the particular threshold
31
+ // here, beyond "this value seems to make sense".
32
+ char_count_general_case ( s. as_bytes ( ) )
33
+ } else {
34
+ do_count_chars ( s)
35
+ }
36
+ }
37
+
38
+ fn do_count_chars ( s : & str ) -> usize {
22
39
// For correctness, `CHUNK_SIZE` must be:
40
+ //
23
41
// - Less than or equal to 255, otherwise we'll overflow bytes in `counts`.
24
42
// - A multiple of `UNROLL_INNER`, otherwise our `break` inside the
25
43
// `body.chunks(CHUNK_SIZE)` loop.
26
44
//
27
45
// For performance, `CHUNK_SIZE` should be:
28
- // - Relatively cheap to `% ` against.
46
+ // - Relatively cheap to `/ ` against (so some simple sum of powers of two) .
29
47
// - Large enough to avoid paying for the cost of the `sum_bytes_in_usize`
30
48
// too often.
31
49
const CHUNK_SIZE : usize = 192 ;
32
- const UNROLL_INNER : usize = 4 ;
33
50
34
- // Check the properties of `CHUNK_SIZE` / `UNROLL_INNER` that are required
51
+ // Check the properties of `CHUNK_SIZE` and `UNROLL_INNER` that are required
35
52
// for correctness.
36
- const _: [ ( ) ; 1 ] = [ ( ) ; ( CHUNK_SIZE < 256 && ( CHUNK_SIZE % UNROLL_INNER ) == 0 ) as usize ] ;
53
+ const _: ( ) = assert ! ( CHUNK_SIZE < 256 ) ;
54
+ const _: ( ) = assert ! ( CHUNK_SIZE % UNROLL_INNER == 0 ) ;
55
+
37
56
// SAFETY: transmuting `[u8]` to `[usize]` is safe except for size
38
57
// differences which are handled by `align_to`.
39
58
let ( head, body, tail) = unsafe { s. as_bytes ( ) . align_to :: < usize > ( ) } ;
40
59
60
+ // This should be quite rare, and basically exists to handle the degenerate
61
+ // cases where align_to fails (as well as miri under symbolic alignment
62
+ // mode).
63
+ //
64
+ // The `unlikely` helps discourage LLVM from inlining the body, which is
65
+ // nice, as we would rather not mark the `char_count_general_case` function
66
+ // as cold.
67
+ if unlikely ( body. is_empty ( ) || head. len ( ) > USIZE_SIZE || tail. len ( ) > USIZE_SIZE ) {
68
+ return char_count_general_case ( s. as_bytes ( ) ) ;
69
+ }
70
+
41
71
let mut total = char_count_general_case ( head) + char_count_general_case ( tail) ;
42
72
// Split `body` into `CHUNK_SIZE` chunks to reduce the frequency with which
43
73
// we call `sum_bytes_in_usize`.
44
74
for chunk in body. chunks ( CHUNK_SIZE ) {
45
75
// We accumulate intermediate sums in `counts`, where each byte contains
46
76
// a subset of the sum of this chunk, like a `[u8; size_of::<usize>()]`.
47
77
let mut counts = 0 ;
48
- let unrolled_chunks = chunk. array_chunks :: < UNROLL_INNER > ( ) ;
49
- // If there's a remainder (know can only happen for the last item in
50
- // `chunks`, because `CHUNK_SIZE % UNROLL == 0`), then we need to
51
- // account for that (although we don't use it to later).
52
- let remainder = unrolled_chunks. remainder ( ) ;
78
+
79
+ let ( unrolled_chunks, remainder) = chunk. as_chunks :: < UNROLL_INNER > ( ) ;
53
80
for unrolled in unrolled_chunks {
54
81
for & word in unrolled {
55
82
// Because `CHUNK_SIZE` is < 256, this addition can't cause the
@@ -85,8 +112,8 @@ pub(super) fn count_chars(s: &str) -> usize {
85
112
// true)
86
113
#[ inline]
87
114
fn contains_non_continuation_byte ( w : usize ) -> usize {
88
- let lsb = 0x0101_0101_0101_0101u64 as usize ;
89
- ( ( !w >> 7 ) | ( w >> 6 ) ) & lsb
115
+ const LSB : usize = 0x0101_0101_0101_0101u64 as usize ;
116
+ ( ( !w >> 7 ) | ( w >> 6 ) ) & LSB
90
117
}
91
118
92
119
// Morally equivalent to `values.to_ne_bytes().into_iter().sum::<usize>()`, but
@@ -97,20 +124,13 @@ fn sum_bytes_in_usize(values: usize) -> usize {
97
124
const SKIP_BYTES : usize = 0x00ff_00ff_00ff_00ff_u64 as usize ;
98
125
99
126
let pair_sum: usize = ( values & SKIP_BYTES ) + ( ( values >> 8 ) & SKIP_BYTES ) ;
100
- pair_sum. wrapping_mul ( LSB_SHORTS ) >> ( ( core :: mem :: size_of :: < usize > ( ) - 2 ) * 8 )
127
+ pair_sum. wrapping_mul ( LSB_SHORTS ) >> ( ( USIZE_SIZE - 2 ) * 8 )
101
128
}
102
129
103
130
// This is the most direct implementation of the concept of "count the number of
104
131
// bytes in the string which are not continuation bytes", and is used for the
105
132
// head and tail of the input string (the first and last item in the tuple
106
133
// returned by `slice::align_to`).
107
134
fn char_count_general_case ( s : & [ u8 ] ) -> usize {
108
- const CONT_MASK_U8 : u8 = 0b0011_1111 ;
109
- const TAG_CONT_U8 : u8 = 0b1000_0000 ;
110
- let mut leads = 0 ;
111
- for & byte in s {
112
- let is_lead = ( byte & !CONT_MASK_U8 ) != TAG_CONT_U8 ;
113
- leads += is_lead as usize ;
114
- }
115
- leads
135
+ s. iter ( ) . filter ( |& & byte| !super :: validations:: utf8_is_cont_byte ( byte) ) . count ( )
116
136
}
0 commit comments