|
1 | 1 | ///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
|
2 | 2 | // Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist
|
3 | 3 | // Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist
|
4 |
| -// Cc : 7 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using skiplist |
| 4 | +// Cc : 0 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using match |
5 | 5 | // Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
|
6 | 6 | // Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset
|
7 |
| -// Lt : 33 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using skiplist |
| 7 | +// Lt : 0 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using match |
8 | 8 | // N : 455 bytes, 1901 codepoints in 143 ranges (U+0000B2 - U+01FBFA) using skiplist
|
9 | 9 | // Uppercase : 797 bytes, 1952 codepoints in 655 ranges (U+0000C0 - U+01F18A) using bitset
|
10 |
| -// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading |
| 10 | +// White_Space : 0 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using match |
11 | 11 | // to_lower : 11484 bytes
|
12 | 12 | // to_upper : 13432 bytes
|
13 |
| -// Total : 31050 bytes |
| 13 | +// Total : 30754 bytes |
14 | 14 |
|
15 | 15 | #[inline(always)]
|
16 | 16 | const fn bitset_search<
|
@@ -340,33 +340,13 @@ pub mod case_ignorable {
|
340 | 340 |
|
341 | 341 | #[rustfmt::skip]
|
342 | 342 | pub mod cc {
|
343 |
| - use super::ShortOffsetRunHeader; |
344 |
| - |
345 |
| - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [ |
346 |
| - ShortOffsetRunHeader::new(0, 1114272), |
347 |
| - ]; |
348 |
| - static OFFSETS: [u8; 3] = [ |
349 |
| - 128, 32, 0, |
350 |
| - ]; |
351 | 343 | #[inline]
|
352 |
| - pub fn lookup(c: char) -> bool { |
| 344 | + pub const fn lookup(c: char) -> bool { |
353 | 345 | debug_assert!(!c.is_ascii());
|
354 |
| - (c as u32) >= 0x80 && lookup_slow(c) |
355 |
| - } |
356 |
| - |
357 |
| - #[inline(never)] |
358 |
| - fn lookup_slow(c: char) -> bool { |
359 |
| - const { |
360 |
| - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
361 |
| - let mut i = 0; |
362 |
| - while i < SHORT_OFFSET_RUNS.len() { |
363 |
| - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
364 |
| - i += 1; |
365 |
| - } |
| 346 | + match c as u32 { |
| 347 | + 0x80..=0x9f => true, |
| 348 | + _ => false, |
366 | 349 | }
|
367 |
| - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
368 |
| - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
369 |
| - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
370 | 350 | }
|
371 | 351 | }
|
372 | 352 |
|
@@ -554,34 +534,22 @@ pub mod lowercase {
|
554 | 534 |
|
555 | 535 | #[rustfmt::skip]
|
556 | 536 | pub mod lt {
|
557 |
| - use super::ShortOffsetRunHeader; |
558 |
| - |
559 |
| - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 3] = [ |
560 |
| - ShortOffsetRunHeader::new(0, 453), ShortOffsetRunHeader::new(1, 8072), |
561 |
| - ShortOffsetRunHeader::new(9, 1122301), |
562 |
| - ]; |
563 |
| - static OFFSETS: [u8; 21] = [ |
564 |
| - 0, 1, 2, 1, 2, 1, 38, 1, 0, 8, 8, 8, 8, 8, 12, 1, 15, 1, 47, 1, 0, |
565 |
| - ]; |
566 | 537 | #[inline]
|
567 |
| - pub fn lookup(c: char) -> bool { |
| 538 | + pub const fn lookup(c: char) -> bool { |
568 | 539 | debug_assert!(!c.is_ascii());
|
569 |
| - (c as u32) >= 0x1c5 && lookup_slow(c) |
570 |
| - } |
571 |
| - |
572 |
| - #[inline(never)] |
573 |
| - fn lookup_slow(c: char) -> bool { |
574 |
| - const { |
575 |
| - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
576 |
| - let mut i = 0; |
577 |
| - while i < SHORT_OFFSET_RUNS.len() { |
578 |
| - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
579 |
| - i += 1; |
580 |
| - } |
| 540 | + match c as u32 { |
| 541 | + 0x1c5 => true, |
| 542 | + 0x1c8 => true, |
| 543 | + 0x1cb => true, |
| 544 | + 0x1f2 => true, |
| 545 | + 0x1f88..=0x1f8f => true, |
| 546 | + 0x1f98..=0x1f9f => true, |
| 547 | + 0x1fa8..=0x1faf => true, |
| 548 | + 0x1fbc => true, |
| 549 | + 0x1fcc => true, |
| 550 | + 0x1ffc => true, |
| 551 | + _ => false, |
581 | 552 | }
|
582 |
| - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
583 |
| - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
584 |
| - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
585 | 553 | }
|
586 | 554 | }
|
587 | 555 |
|
@@ -743,25 +711,18 @@ pub mod uppercase {
|
743 | 711 |
|
744 | 712 | #[rustfmt::skip]
|
745 | 713 | pub mod white_space {
|
746 |
| - static WHITESPACE_MAP: [u8; 256] = [ |
747 |
| - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
748 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
749 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
750 |
| - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
751 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
752 |
| - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
753 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
754 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
755 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, |
756 |
| - ]; |
757 | 714 | #[inline]
|
758 | 715 | pub const fn lookup(c: char) -> bool {
|
759 | 716 | debug_assert!(!c.is_ascii());
|
760 |
| - match c as u32 >> 8 { |
761 |
| - 0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, |
762 |
| - 22 => c as u32 == 0x1680, |
763 |
| - 32 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, |
764 |
| - 48 => c as u32 == 0x3000, |
| 717 | + match c as u32 { |
| 718 | + 0x85 => true, |
| 719 | + 0xa0 => true, |
| 720 | + 0x1680 => true, |
| 721 | + 0x2000..=0x200a => true, |
| 722 | + 0x2028..=0x2029 => true, |
| 723 | + 0x202f => true, |
| 724 | + 0x205f => true, |
| 725 | + 0x3000 => true, |
765 | 726 | _ => false,
|
766 | 727 | }
|
767 | 728 | }
|
|
0 commit comments