@@ -12,41 +12,39 @@ class GraphemeBreakProperty(IntEnum):
12
12
Other = 0
13
13
Control = 1
14
14
Extend = 2
15
- RegionalIndicator = 3
15
+ ZeroWidthJoiner = 3
16
16
Prepend = 4
17
17
HangulL = 5
18
18
HangulV = 6
19
19
HangulT = 7
20
20
HangulLV = 8
21
21
HangulLVT = 9
22
22
ExtendedPictographic = 10
23
- ZeroWidthJoiner = 11
23
+ RegionalIndicator = 11
24
24
# Indic_Conjunct_Break
25
25
ConjunctLinker = 12
26
26
LinkingConsonant = 13
27
- ExtendConjunctLinker = 14
28
27
# merged property
29
- SpacingMark = 15
30
- CR = 16
31
- LF = 17
28
+ SpacingMark = 14
29
+ CR = 15
30
+ LF = 16
32
31
33
32
# https://www.unicode.org/reports/tr35/tr35-general.html#segmentations
34
33
# https://github.com/unicode-org/cldr/blob/main/common/segments/root.xml
35
34
# https://www.unicode.org/reports/tr51/#Emoji_Properties
36
35
GraphemeBreakPropertyMap = GraphemeBreakProperty .__members__ | {
37
- 'Regional_Indicator' : GraphemeBreakProperty .RegionalIndicator ,
38
- 'RI' : GraphemeBreakProperty .RegionalIndicator ,
36
+ 'ZWJ' : GraphemeBreakProperty .ZeroWidthJoiner ,
39
37
'L' : GraphemeBreakProperty .HangulL ,
40
38
'V' : GraphemeBreakProperty .HangulV ,
41
39
'T' : GraphemeBreakProperty .HangulT ,
42
40
'LV' : GraphemeBreakProperty .HangulLV ,
43
41
'LVT' : GraphemeBreakProperty .HangulLVT ,
44
42
'Extended_Pictographic' : GraphemeBreakProperty .ExtendedPictographic ,
45
43
'ExtPict' : GraphemeBreakProperty .ExtendedPictographic ,
46
- 'ZWJ' : GraphemeBreakProperty .ZeroWidthJoiner ,
44
+ 'Regional_Indicator' : GraphemeBreakProperty .RegionalIndicator ,
45
+ 'RI' : GraphemeBreakProperty .RegionalIndicator ,
47
46
'Consonant' : GraphemeBreakProperty .LinkingConsonant ,
48
47
'Virama' : GraphemeBreakProperty .ConjunctLinker ,
49
- 'ExtendLinker' : GraphemeBreakProperty .ExtendConjunctLinker ,
50
48
}
51
49
52
50
# https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break
@@ -57,14 +55,15 @@ def updateIndicConjunctBreak(graphemeBreakTable):
57
55
flattenUnicodePropertyTable (indicConjunctBreak , propertyList )
58
56
defaultValue = int (GraphemeBreakProperty .Other )
59
57
extend = int (GraphemeBreakProperty .Extend )
60
- extendLinker = int (GraphemeBreakProperty .ExtendConjunctLinker )
58
+ linker = int (GraphemeBreakProperty .ConjunctLinker )
61
59
for index , conjunct in enumerate (indicConjunctBreak ):
62
60
grapheme = graphemeBreakTable [index ]
63
61
if grapheme == defaultValue :
62
+ assert conjunct != 'Virama'
64
63
grapheme = int (GraphemeBreakPropertyMap .get (conjunct , grapheme ))
65
64
elif grapheme == extend :
66
65
if conjunct == 'Virama' :
67
- grapheme = extendLinker
66
+ grapheme = linker
68
67
graphemeBreakTable [index ] = grapheme
69
68
70
69
graphemeClusterBoundary = [0x3ffff ] * (max (GraphemeBreakProperty .__members__ .values ()) + 1 )
@@ -73,22 +72,21 @@ def buildGraphemeClusterBoundary():
73
72
table = graphemeClusterBoundary
74
73
75
74
notBreak = {
76
- 'Other' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker ' ],
75
+ 'Other' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker ' ],
77
76
'CR' : ['LF' ],
78
- 'Extend' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' ],
79
- 'RI' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'RI' ],
80
- 'Prepend' : ['Other' , 'Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'RI' , 'Prepend' , 'L' , 'V' , 'T' , 'LV' , 'LVT' , 'ExtPict' , 'ConjunctLinker' , 'Consonant' ],
81
- 'SpacingMark' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' ],
82
- 'L' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'L' , 'V' , 'LV' , 'LVT' ],
83
- 'V' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'V' , 'T' ],
84
- 'T' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'T' ],
85
- 'LV' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'V' , 'T' ],
86
- 'LVT' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'T' ],
87
- 'ExtPict' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' ],
88
- 'ZWJ' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'ExtPict' , 'Consonant' ],
89
- 'ConjunctLinker' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'Consonant' ],
90
- 'Consonant' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'ConjunctLinker' ],
91
- 'ExtendLinker' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ExtendLinker' , 'Consonant' ],
77
+ 'Extend' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' ],
78
+ 'ZWJ' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'ExtPict' , 'Consonant' ],
79
+ 'Prepend' : ['Other' , 'Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'RI' , 'Prepend' , 'L' , 'V' , 'T' , 'LV' , 'LVT' , 'ExtPict' , 'Consonant' ],
80
+ 'SpacingMark' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' ],
81
+ 'L' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'L' , 'V' , 'LV' , 'LVT' ],
82
+ 'V' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'V' , 'T' ],
83
+ 'T' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'T' ],
84
+ 'LV' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'V' , 'T' ],
85
+ 'LVT' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'T' ],
86
+ 'ExtPict' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' ],
87
+ 'RI' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'RI' ],
88
+ 'ConjunctLinker' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' , 'Consonant' ],
89
+ 'Consonant' : ['Extend' , 'SpacingMark' , 'ZWJ' , 'ConjunctLinker' ],
92
90
}
93
91
94
92
for key , row in notBreak .items ():
0 commit comments