43
43
from itertools import batched
44
44
from typing import Callable , Iterable
45
45
46
- UNICODE_VERSION = "16 .0.0"
46
+ UNICODE_VERSION = "17 .0.0"
47
47
"""The version of the Unicode data files to download."""
48
48
49
49
NUM_CODEPOINTS = 0x110000
@@ -178,7 +178,9 @@ class WidthState(enum.IntEnum):
178
178
(if set, should also set 3rd and 4th)
179
179
- 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
180
180
where no ZWJ has been encountered yet; encountering one flips this on
181
- - Seventh bit: is VS1 (if CJK) or is VS2 (not CJK)
181
+ - Seventh bit:
182
+ - CJK mode: is VS1 or VS3
183
+ - Not CJK: is VS2
182
184
"""
183
185
184
186
# BASIC WIDTHS
@@ -275,8 +277,8 @@ class WidthState(enum.IntEnum):
275
277
276
278
# VARIATION SELECTORS
277
279
278
- VARIATION_SELECTOR_1_OR_2 = 0b0000_0010_0000_0000
279
- "\\ uFE00 if CJK, or \\ uFE01 otherwise"
280
+ VARIATION_SELECTOR_1_2_OR_3 = 0b0000_0010_0000_0000
281
+ "\\ uFE00 or \\ uFE02 if CJK, or \\ uFE01 otherwise"
280
282
281
283
# Text presentation sequences (not CJK)
282
284
VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000
@@ -373,7 +375,7 @@ def width_alone(self) -> int:
373
375
| WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
374
376
| WidthState .VARIATION_SELECTOR_15
375
377
| WidthState .VARIATION_SELECTOR_16
376
- | WidthState .VARIATION_SELECTOR_1_OR_2
378
+ | WidthState .VARIATION_SELECTOR_1_2_OR_3
377
379
):
378
380
return 0
379
381
case (
@@ -657,11 +659,12 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
657
659
ea [cp ] = width
658
660
659
661
# East-Asian only
660
- ea [0xFE00 ] = WidthState .VARIATION_SELECTOR_1_OR_2
661
662
ea [0x0338 ] = WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
663
+ ea [0xFE00 ] = WidthState .VARIATION_SELECTOR_1_2_OR_3
664
+ ea [0xFE02 ] = WidthState .VARIATION_SELECTOR_1_2_OR_3
662
665
663
666
# Not East Asian only
664
- not_ea [0xFE01 ] = WidthState .VARIATION_SELECTOR_1_OR_2
667
+ not_ea [0xFE01 ] = WidthState .VARIATION_SELECTOR_1_2_OR_3
665
668
not_ea [0xFE0E ] = WidthState .VARIATION_SELECTOR_15
666
669
667
670
return (not_ea , ea )
@@ -759,7 +762,7 @@ def load_solidus_transparent(
759
762
num_chars = len (ccc_above_1 )
760
763
761
764
for cp in ccc_above_1 :
762
- if cp not in [0xFE00 , 0xFE0F ]:
765
+ if cp not in [0xFE00 , 0xFE02 , 0xFE0F ]:
763
766
assert (
764
767
cjk_width_map [cp ].table_width () != CharWidthInTable .SPECIAL
765
768
), f"U+{ cp :X} "
@@ -1317,14 +1320,14 @@ def lookup_fns(
1317
1320
1318
1321
if is_cjk :
1319
1322
s += """
1320
- if c == '\\ u{FE00}' {
1321
- return (0, next_info.set_vs1_2 ());
1323
+ if matches!(c, '\\ u{FE00}' | ' \\ u{FE02}') {
1324
+ return (0, next_info.set_vs1_2_3 ());
1322
1325
}
1323
1326
"""
1324
1327
else :
1325
1328
s += """
1326
1329
if c == '\\ u{FE01}' {
1327
- return (0, next_info.set_vs1_2 ());
1330
+ return (0, next_info.set_vs1_2_3 ());
1328
1331
}
1329
1332
if c == '\\ u{FE0E}' {
1330
1333
return (0, next_info.set_text_presentation());
@@ -1337,15 +1340,15 @@ def lookup_fns(
1337
1340
}
1338
1341
} else """
1339
1342
1340
- s += """if next_info.is_vs1_2 () {
1343
+ s += """if next_info.is_vs1_2_3 () {
1341
1344
if matches!(c, '\\ u{2018}' | '\\ u{2019}' | '\\ u{201C}' | '\\ u{201D}') {
1342
1345
return ("""
1343
1346
1344
1347
s += str (2 - is_cjk )
1345
1348
1346
1349
s += """, WidthInfo::DEFAULT);
1347
1350
} else {
1348
- next_info = next_info.unset_vs1_2 ();
1351
+ next_info = next_info.unset_vs1_2_3 ();
1349
1352
}
1350
1353
}
1351
1354
if next_info.is_ligature_transparent() {
@@ -1655,7 +1658,7 @@ def emit_module(
1655
1658
self.0
1656
1659
| WidthInfo::VARIATION_SELECTOR_16.0
1657
1660
& !WidthInfo::VARIATION_SELECTOR_15.0
1658
- & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0,
1661
+ & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0,
1659
1662
)
1660
1663
}} else {{
1661
1664
Self::VARIATION_SELECTOR_16
@@ -1683,7 +1686,7 @@ def emit_module(
1683
1686
self.0
1684
1687
| WidthInfo::VARIATION_SELECTOR_15.0
1685
1688
& !WidthInfo::VARIATION_SELECTOR_16.0
1686
- & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0,
1689
+ & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0,
1687
1690
)
1688
1691
}} else {{
1689
1692
Self(WidthInfo::VARIATION_SELECTOR_15.0)
@@ -1696,27 +1699,28 @@ def emit_module(
1696
1699
}}
1697
1700
1698
1701
/// Has 7th bit set
1699
- fn is_vs1_2(self) -> bool {{
1700
- (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1702
+ fn is_vs1_2_3(self) -> bool {{
1703
+ (self.0 & WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0)
1704
+ == WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0
1701
1705
}}
1702
1706
1703
1707
/// Set 7th bit
1704
- fn set_vs1_2 (self) -> Self {{
1708
+ fn set_vs1_2_3 (self) -> Self {{
1705
1709
if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
1706
1710
Self(
1707
1711
self.0
1708
- | WidthInfo::VARIATION_SELECTOR_1_OR_2 .0
1712
+ | WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0
1709
1713
& !WidthInfo::VARIATION_SELECTOR_15.0
1710
1714
& !WidthInfo::VARIATION_SELECTOR_16.0,
1711
1715
)
1712
1716
}} else {{
1713
- Self(WidthInfo::VARIATION_SELECTOR_1_OR_2 .0)
1717
+ Self(WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0)
1714
1718
}}
1715
1719
}}
1716
1720
1717
1721
/// Clear 7th bit
1718
- fn unset_vs1_2 (self) -> Self {{
1719
- Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0)
1722
+ fn unset_vs1_2_3 (self) -> Self {{
1723
+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0)
1720
1724
}}
1721
1725
}}
1722
1726
0 commit comments