Skip to content

Commit de5c52f

Browse files
committed
Cache grapheme break property for common charterers,
reuse `DiscardLastCombinedCharacter()` for `SafeSegment()`.
1 parent aceab0f commit de5c52f

File tree

4 files changed

+260
-288
lines changed

4 files changed

+260
-288
lines changed

scintilla/scripts/GenerateGraphemeBreak.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -218,22 +218,38 @@ def updateGraphemeBreakTable(headerFile, sourceFile):
218218
valueBit, rangeList = rangeEncode('Unicode Grapheme Break range', graphemeBreakTable, sentinel=sentinel)
219219
assert valueBit == 4
220220

221+
commonCharacterCount = 0x4000 # graphemeMap
222+
valueBit, totalBit, data = runLengthEncode('Unicode Grapheme Break BMP', graphemeBreakTable[:commonCharacterCount])
223+
assert valueBit == 4
224+
assert totalBit == 16
225+
cxx_output = ['const uint16_t GraphemeBreakRLE_BMP[] = {']
226+
cxx_output.extend(dumpArray(data, 20))
227+
cxx_output.append("};")
228+
cxx_output.append("")
229+
cxx_output.append("}") # namespace
230+
cxx_output.append("")
231+
221232
config = {
222233
'tableName': 'GraphemeBreakTable',
223234
'tableVarName': 'CharClassify::GraphemeBreakTable',
224235
'function': """static GraphemeBreakProperty GetGraphemeBreakProperty(uint32_t ch) noexcept {
236+
if (ch < sizeof(graphemeMap)) {
237+
return static_cast<GraphemeBreakProperty>(graphemeMap[ch]);
238+
}
225239
if (ch >= maxUnicodeGraphemeBreakCharacter) {
226240
return GraphemeBreakProperty::Other;
227241
}
228-
""",
242+
243+
ch -= sizeof(graphemeMap);""",
229244
'returnType': 'GraphemeBreakProperty'
230245
}
231246

232247
Regenerate(headerFile, "//grapheme type", output)
233-
table, function = buildMultiStageTable('Unicode Grapheme Break', graphemeBreakTable, config=config, level=3)
248+
table, function = buildMultiStageTable('Unicode Grapheme Break', graphemeBreakTable[commonCharacterCount:], config=config, level=3)
234249
output = ['\t' + line for line in function]
250+
cxx_output.extend(table)
235251
Regenerate(headerFile, "//grapheme function", output)
236-
Regenerate(sourceFile, "//grapheme table", table)
252+
Regenerate(sourceFile, "//grapheme table", cxx_output)
237253

238254
if __name__ == '__main__':
239255
# parseSegmentationChart('Grapheme Break Chart.html')

0 commit comments

Comments
 (0)