Skip to content

Commit 8f3fb6a

Browse files
committed
Auto merge of rust-lang#145479 - Kmeakin:km/hardcode-char-is-control, r=joboet
Hard-code `char::is_control` Split off from rust-lang#145219 According to https://www.unicode.org/policies/stability_policy.html#Property_Value, the set of codepoints in `Cc` will never change. So we can hard-code the patterns to match against instead of using a table. This doesn't change the generated assembly, since the lookup table is small enough that[ LLVM is able to inline the whole search](https://godbolt.org/z/bG8dM37YG). But this does reduce the chance of regressions if LLVM's heuristics change in the future, and means less generated Rust code checked in to `unicode-data.rs`.
2 parents d05edd6 + 23d055e commit 8f3fb6a

File tree

3 files changed

+5
-27
lines changed

3 files changed

+5
-27
lines changed

core/src/char/methods.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,11 @@ impl char {
950950
#[stable(feature = "rust1", since = "1.0.0")]
951951
#[inline]
952952
pub fn is_control(self) -> bool {
953-
unicode::Cc(self)
953+
// According to
954+
// https://www.unicode.org/policies/stability_policy.html#Property_Value,
955+
// the set of codepoints in `Cc` will never change.
956+
// So we can just hard-code the patterns to match against instead of using a table.
957+
matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
954958
}
955959

956960
/// Returns `true` if this `char` has the `Grapheme_Extend` property.

core/src/unicode/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ pub use unicode_data::conversions;
1010

1111
#[rustfmt::skip]
1212
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
13-
pub(crate) use unicode_data::cc::lookup as Cc;
1413
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
1514
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
1615
pub(crate) use unicode_data::n::lookup as N;

core/src/unicode/unicode_data.rs

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -358,31 +358,6 @@ pub mod cased {
358358
}
359359
}
360360

361-
#[rustfmt::skip]
362-
pub mod cc {
363-
use super::ShortOffsetRunHeader;
364-
365-
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [
366-
ShortOffsetRunHeader::new(0, 1114272),
367-
];
368-
static OFFSETS: [u8; 5] = [
369-
0, 32, 95, 33, 0,
370-
];
371-
pub fn lookup(c: char) -> bool {
372-
const {
373-
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
374-
let mut i = 0;
375-
while i < SHORT_OFFSET_RUNS.len() {
376-
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
377-
i += 1;
378-
}
379-
}
380-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
381-
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
382-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
383-
}
384-
}
385-
386361
#[rustfmt::skip]
387362
pub mod grapheme_extend {
388363
use super::ShortOffsetRunHeader;

0 commit comments

Comments
 (0)