57
57
nfc_qc_maybe_ranges = []
58
58
numeric_codepoints = []
59
59
60
+ # Note that an element of range `[m, n]` (a list in python) represents [m, n)
61
+
62
+
63
+ def binary_search_ranges (ranges , target ):
64
+ low = 0
65
+ high = len (ranges ) - 1
66
+ while low <= high :
67
+ mid = (low + high ) // 2
68
+ start , end = ranges [mid ]
69
+ if start <= target <= end - 1 :
70
+ return mid # target found. returns index.
71
+ elif target < start :
72
+ high = mid - 1
73
+ else :
74
+ low = mid + 1
75
+ # target not found.
76
+ return - 1
77
+
60
78
61
79
# Utility function to parse '<codepoint>...<codepoint>' or '<codepoint>'
62
80
def parse_codepoint_range (range_str ):
63
81
codepoint_range = range_str .split (".." )
64
82
assert len (codepoint_range ) == 1 or len (codepoint_range ) == 2 , "Invalid format"
65
83
start_cp , end_cp = 0 , 0
66
84
if len (codepoint_range ) == 1 :
85
+ # m..n => [m, n+1)
67
86
start_cp = int (codepoint_range [0 ], 16 )
68
87
end_cp = start_cp + 1
69
88
else :
89
+ # m => [m, m+1)
70
90
start_cp = int (codepoint_range [0 ], 16 )
71
- end_cp = int (codepoint_range [1 ], 16 )
91
+ end_cp = int (codepoint_range [1 ], 16 ) + 1
72
92
return [start_cp , end_cp ]
73
93
74
94
@@ -170,6 +190,24 @@ def write_decomposition():
170
190
print ("};" )
171
191
172
192
193
+ def write_recomposition ():
194
+ # non const.
195
+ print ("std::map<std::pair<uint32_t, uint32_t>, uint32_t> RECOMPOSITION_MAP = {{" )
196
+ print (" // clang-format off" )
197
+ for cp in decomposition_map :
198
+ if binary_search_ranges (composition_exclusion_ranges , cp ) != - 1 :
199
+ continue
200
+ if len (decomposition_map [cp ]) == 1 :
201
+ d1 = decomposition_map [cp ][0 ]
202
+ d2 = 0
203
+ else :
204
+ d1 = decomposition_map [cp ][0 ]
205
+ d2 = decomposition_map [cp ][1 ]
206
+ print (" {{{{{:#06x}, {:#06x}}}, {:#06x}}}," .format (d1 , d2 , cp ))
207
+ print (" // clang-format on" )
208
+ print ("}};" )
209
+
210
+
173
211
def write_ccc ():
174
212
print ("const std::map<uint32_t, int32_t> CCC_TABLE = {" )
175
213
print (" // clang-format off" )
@@ -179,19 +217,6 @@ def write_ccc():
179
217
print ("};" )
180
218
181
219
182
- # TODO: remove this
183
- # def write_composition_exclusion():
184
- # print(
185
- # "const std::array<std::pair<uint32_t, uint32_t>, {}>".format(
186
- # len(composition_exclusion_ranges)
187
- # )
188
- # )
189
- # print("NO_RECOMP_RANGES = {{")
190
- # for r in composition_exclusion_ranges:
191
- # print(" {{{:#06x}, {:#06x}}},".format(r[0], r[1]))
192
- # print("}};")
193
-
194
-
195
220
def write_alphabetic ():
196
221
print (
197
222
"const std::array<std::pair<uint32_t, uint32_t>, {}> ALPHABETIC_RANGES = {{{{" .format (
@@ -224,23 +249,6 @@ def write_numeric():
224
249
print ("}};" )
225
250
226
251
227
- def write_recomposition ():
228
- # non const.
229
- # TODO: Exclude `Composition_Exclusion`s
230
- print ("std::map<std::pair<uint32_t, uint32_t>, uint32_t> RECOMPOSITION_MAP = {{" )
231
- print (" // clang-format off" )
232
- for cp in decomposition_map :
233
- if len (decomposition_map [cp ]) == 1 :
234
- d1 = decomposition_map [cp ][0 ]
235
- d2 = 0
236
- else :
237
- d1 = decomposition_map [cp ][0 ]
238
- d2 = decomposition_map [cp ][1 ]
239
- print (" {{{{{:#06x}, {:#06x}}}, {:#06x}}}," .format (d1 , d2 , cp ))
240
- print (" // clang-format on" )
241
- print ("}};" )
242
-
243
-
244
252
def main ():
245
253
if len (sys .argv ) != 4 :
246
254
print ("too few arguments" , file = sys .stderr )
0 commit comments