Skip to content

Commit 2712fbe

Browse files
authored
Rollup merge of rust-lang#52116 - Pazzaz:match-str-case, r=SimonSapin
Handle array manually in str case conversion methods Avoiding the overhead incurred from `String.extend(char.to_lowercase())` showed a notable performance improvement when I benchmarked it. I tested on these strings: ```rust ALL_LOWER: "loremipsumdolorsitametduosensibusmnesarchumabcdefgh" ALL_UPPER: "LOREMIPSUMDOLORSITAMETDUOSENSIBUSMNESARCHUMABCDEFGH" REALISTIC_UPPER: "LOREM IPSUM DOLOR SIT AMET, DUO SENSIBUS MNESARCHUM" SIGMAS: "ΣΣΣΣΣ ΣΣΣΣΣ ΣΣΣΣΣ ΣΣΣ ΣΣΣΣ, ΣΣΣ ΣΣΣΣΣΣΣΣ ΣΣΣΣΣΣΣΣΣΣ" WORD_UPPER: "Lorem Ipsum Dolor Sit Amet, Duo Sensibus Mnesarchum" ``` the performance improvements of `to_lowercase()` were ``` running 10 tests test tests::all_lower ... bench: 1,752 ns/iter (+/- 49) test tests::all_lower_new ... bench: 1,266 ns/iter (+/- 15) -28% test tests::all_upper ... bench: 1,832 ns/iter (+/- 39) test tests::all_upper_new ... bench: 1,337 ns/iter (+/- 18) -27% test tests::realistic_upper ... bench: 1,993 ns/iter (+/- 14) test tests::realistic_upper_new ... bench: 1,445 ns/iter (+/- 22) -27% test tests::sigmas ... bench: 1,342 ns/iter (+/- 39) test tests::sigmas_new ... bench: 1,226 ns/iter (+/- 16) -9% test tests::word_upper ... bench: 1,899 ns/iter (+/- 12) test tests::word_upper_new ... bench: 1,381 ns/iter (+/- 26) -27% ``` and of `to_uppercase()` ``` running 10 tests test tests::all_lower ... bench: 1,813 ns/iter (+/- 20) test tests::all_lower_new ... bench: 1,321 ns/iter (+/- 16) -27% test tests::all_upper ... bench: 1,629 ns/iter (+/- 22) test tests::all_upper_new ... bench: 1,241 ns/iter (+/- 9) -24% test tests::realistic_upper ... bench: 1,670 ns/iter (+/- 24) test tests::realistic_upper_new ... bench: 1,241 ns/iter (+/- 17) -26% test tests::sigmas ... bench: 2,053 ns/iter (+/- 20) test tests::sigmas_new ... bench: 1,753 ns/iter (+/- 23) -15% test tests::word_upper ... bench: 1,873 ns/iter (+/- 30) test tests::word_upper_new ... bench: 1,412 ns/iter (+/- 25) -25% ``` I gave up on the more advanced method from rust-lang#52061 as it wasn't always a clear improvement and would help in even less cases if this PR was merged.
2 parents 29ee654 + ad7621d commit 2712fbe

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

src/liballoc/str.rs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
4545
use core::mem;
4646
use core::ptr;
4747
use core::iter::FusedIterator;
48+
use core::unicode::conversions;
4849

4950
use borrow::{Borrow, ToOwned};
5051
use boxed::Box;
@@ -369,7 +370,18 @@ impl str {
369370
// See https://github.com/rust-lang/rust/issues/26035
370371
map_uppercase_sigma(self, i, &mut s)
371372
} else {
372-
s.extend(c.to_lowercase());
373+
match conversions::to_lower(c) {
374+
[a, '\0', _] => s.push(a),
375+
[a, b, '\0'] => {
376+
s.push(a);
377+
s.push(b);
378+
}
379+
[a, b, c] => {
380+
s.push(a);
381+
s.push(b);
382+
s.push(c);
383+
}
384+
}
373385
}
374386
}
375387
return s;
@@ -423,7 +435,20 @@ impl str {
423435
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
424436
pub fn to_uppercase(&self) -> String {
425437
let mut s = String::with_capacity(self.len());
426-
s.extend(self.chars().flat_map(|c| c.to_uppercase()));
438+
for c in self[..].chars() {
439+
match conversions::to_upper(c) {
440+
[a, '\0', _] => s.push(a),
441+
[a, b, '\0'] => {
442+
s.push(a);
443+
s.push(b);
444+
}
445+
[a, b, c] => {
446+
s.push(a);
447+
s.push(b);
448+
s.push(c);
449+
}
450+
}
451+
}
427452
return s;
428453
}
429454

src/libcore/unicode/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ pub(crate) mod version;
2020
pub mod derived_property {
2121
pub use unicode::tables::derived_property::{Case_Ignorable, Cased};
2222
}
23+
pub mod conversions {
24+
pub use unicode::tables::conversions::{to_lower, to_upper};
25+
}
2326

2427
// For use in libsyntax
2528
pub mod property {

0 commit comments

Comments
 (0)