@@ -218,22 +218,38 @@ def updateGraphemeBreakTable(headerFile, sourceFile):
218
218
valueBit , rangeList = rangeEncode ('Unicode Grapheme Break range' , graphemeBreakTable , sentinel = sentinel )
219
219
assert valueBit == 4
220
220
221
+ commonCharacterCount = 0x4000 # graphemeMap
222
+ valueBit , totalBit , data = runLengthEncode ('Unicode Grapheme Break BMP' , graphemeBreakTable [:commonCharacterCount ])
223
+ assert valueBit == 4
224
+ assert totalBit == 16
225
+ cxx_output = ['const uint16_t GraphemeBreakRLE_BMP[] = {' ]
226
+ cxx_output .extend (dumpArray (data , 20 ))
227
+ cxx_output .append ("};" )
228
+ cxx_output .append ("" )
229
+ cxx_output .append ("}" ) # namespace
230
+ cxx_output .append ("" )
231
+
221
232
config = {
222
233
'tableName' : 'GraphemeBreakTable' ,
223
234
'tableVarName' : 'CharClassify::GraphemeBreakTable' ,
224
235
'function' : """static GraphemeBreakProperty GetGraphemeBreakProperty(uint32_t ch) noexcept {
236
+ if (ch < sizeof(graphemeMap)) {
237
+ return static_cast<GraphemeBreakProperty>(graphemeMap[ch]);
238
+ }
225
239
if (ch >= maxUnicodeGraphemeBreakCharacter) {
226
240
return GraphemeBreakProperty::Other;
227
241
}
228
- """ ,
242
+
243
+ ch -= sizeof(graphemeMap);""" ,
229
244
'returnType' : 'GraphemeBreakProperty'
230
245
}
231
246
232
247
Regenerate (headerFile , "//grapheme type" , output )
233
- table , function = buildMultiStageTable ('Unicode Grapheme Break' , graphemeBreakTable , config = config , level = 3 )
248
+ table , function = buildMultiStageTable ('Unicode Grapheme Break' , graphemeBreakTable [ commonCharacterCount :] , config = config , level = 3 )
234
249
output = ['\t ' + line for line in function ]
250
+ cxx_output .extend (table )
235
251
Regenerate (headerFile , "//grapheme function" , output )
236
- Regenerate (sourceFile , "//grapheme table" , table )
252
+ Regenerate (sourceFile , "//grapheme table" , cxx_output )
237
253
238
254
if __name__ == '__main__' :
239
255
# parseSegmentationChart('Grapheme Break Chart.html')
0 commit comments