43
43
from itertools import batched
44
44
from typing import Callable , Iterable
45
45
46
- UNICODE_VERSION = "16 .0.0"
46
+ UNICODE_VERSION = "17 .0.0"
47
47
"""The version of the Unicode data files to download."""
48
48
49
49
NUM_CODEPOINTS = 0x110000
@@ -178,7 +178,9 @@ class WidthState(enum.IntEnum):
178
178
(if set, should also set 3rd and 4th)
179
179
- 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
180
180
where no ZWJ has been encountered yet; encountering one flips this on
181
- - Seventh bit: is VS1 (if CJK) or is VS2 (not CJK)
181
+ - Seventh bit:
182
+ - CJK mode: is VS1 or VS3
183
+ - Not CJK: is VS2
182
184
"""
183
185
184
186
# BASIC WIDTHS
@@ -275,8 +277,8 @@ class WidthState(enum.IntEnum):
275
277
276
278
# VARIATION SELECTORS
277
279
278
- VARIATION_SELECTOR_1_OR_2 = 0b0000_0010_0000_0000
279
- "\\ uFE00 if CJK, or \\ uFE01 otherwise"
280
+ VARIATION_SELECTOR_1_2_OR_3 = 0b0000_0010_0000_0000
281
+ "\\ uFE00 or \\ uFE02 if CJK, or \\ uFE01 otherwise"
280
282
281
283
# Text presentation sequences (not CJK)
282
284
VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000
@@ -373,7 +375,7 @@ def width_alone(self) -> int:
373
375
| WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
374
376
| WidthState .VARIATION_SELECTOR_15
375
377
| WidthState .VARIATION_SELECTOR_16
376
- | WidthState .VARIATION_SELECTOR_1_OR_2
378
+ | WidthState .VARIATION_SELECTOR_1_2_OR_3
377
379
):
378
380
return 0
379
381
case (
@@ -657,11 +659,12 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
657
659
ea [cp ] = width
658
660
659
661
# East-Asian only
660
- ea [0xFE00 ] = WidthState .VARIATION_SELECTOR_1_OR_2
661
662
ea [0x0338 ] = WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
663
+ ea [0xFE00 ] = WidthState .VARIATION_SELECTOR_1_2_OR_3
664
+ ea [0xFE02 ] = WidthState .VARIATION_SELECTOR_1_2_OR_3
662
665
663
666
# Not East Asian only
664
- not_ea [0xFE01 ] = WidthState .VARIATION_SELECTOR_1_OR_2
667
+ not_ea [0xFE01 ] = WidthState .VARIATION_SELECTOR_1_2_OR_3
665
668
not_ea [0xFE0E ] = WidthState .VARIATION_SELECTOR_15
666
669
667
670
return (not_ea , ea )
@@ -759,7 +762,7 @@ def load_solidus_transparent(
759
762
num_chars = len (ccc_above_1 )
760
763
761
764
for cp in ccc_above_1 :
762
- if cp not in [0xFE00 , 0xFE0F ]:
765
+ if cp not in [0xFE00 , 0xFE02 , 0xFE0F ]:
763
766
assert (
764
767
cjk_width_map [cp ].table_width () != CharWidthInTable .SPECIAL
765
768
), f"U+{ cp :X} "
@@ -1317,14 +1320,14 @@ def lookup_fns(
1317
1320
1318
1321
if is_cjk :
1319
1322
s += """
1320
- if c == '\\ u{FE00}' {
1321
- return (0, next_info.set_vs1_2 ());
1323
+ if matches!(c, '\\ u{FE00}' | ' \\ u{FE02}') {
1324
+ return (0, next_info.set_vs1_2_3 ());
1322
1325
}
1323
1326
"""
1324
1327
else :
1325
1328
s += """
1326
1329
if c == '\\ u{FE01}' {
1327
- return (0, next_info.set_vs1_2 ());
1330
+ return (0, next_info.set_vs1_2_3 ());
1328
1331
}
1329
1332
if c == '\\ u{FE0E}' {
1330
1333
return (0, next_info.set_text_presentation());
@@ -1345,7 +1348,7 @@ def lookup_fns(
1345
1348
1346
1349
s += """, WidthInfo::DEFAULT);
1347
1350
} else {
1348
- next_info = next_info.unset_vs1_2 ();
1351
+ next_info = next_info.unset_vs1_2_3 ();
1349
1352
}
1350
1353
}
1351
1354
if next_info.is_ligature_transparent() {
@@ -1655,7 +1658,7 @@ def emit_module(
1655
1658
self.0
1656
1659
| WidthInfo::VARIATION_SELECTOR_16.0
1657
1660
& !WidthInfo::VARIATION_SELECTOR_15.0
1658
- & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0,
1661
+ & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0,
1659
1662
)
1660
1663
}} else {{
1661
1664
Self::VARIATION_SELECTOR_16
@@ -1683,7 +1686,7 @@ def emit_module(
1683
1686
self.0
1684
1687
| WidthInfo::VARIATION_SELECTOR_15.0
1685
1688
& !WidthInfo::VARIATION_SELECTOR_16.0
1686
- & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0,
1689
+ & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0,
1687
1690
)
1688
1691
}} else {{
1689
1692
Self(WidthInfo::VARIATION_SELECTOR_15.0)
@@ -1697,26 +1700,26 @@ def emit_module(
1697
1700
1698
1701
/// Has 7th bit set
1699
1702
fn is_vs1_2(self) -> bool {{
1700
- (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2 .0) == WidthInfo::VARIATION_SELECTOR_1_OR_2 .0
1703
+ (self.0 & WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0) == WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0
1701
1704
}}
1702
1705
1703
1706
/// Set 7th bit
1704
- fn set_vs1_2 (self) -> Self {{
1707
+ fn set_vs1_2_3 (self) -> Self {{
1705
1708
if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
1706
1709
Self(
1707
1710
self.0
1708
- | WidthInfo::VARIATION_SELECTOR_1_OR_2 .0
1711
+ | WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0
1709
1712
& !WidthInfo::VARIATION_SELECTOR_15.0
1710
1713
& !WidthInfo::VARIATION_SELECTOR_16.0,
1711
1714
)
1712
1715
}} else {{
1713
- Self(WidthInfo::VARIATION_SELECTOR_1_OR_2 .0)
1716
+ Self(WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0)
1714
1717
}}
1715
1718
}}
1716
1719
1717
1720
/// Clear 7th bit
1718
- fn unset_vs1_2 (self) -> Self {{
1719
- Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0)
1721
+ fn unset_vs1_2_3 (self) -> Self {{
1722
+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0)
1720
1723
}}
1721
1724
}}
1722
1725
0 commit comments