Skip to content

Commit 370f1f8

Browse files
committed
exclude Composition_Exclusion and fix range parser
1 parent 3c01514 commit 370f1f8

File tree

2 files changed

+851
-1963
lines changed

2 files changed

+851
-1963
lines changed

gcc/rust/util/make-rust-unicode.py

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,38 @@
5757
nfc_qc_maybe_ranges = []
5858
numeric_codepoints = []
5959

60+
# Note that an element of range `[m, n]` (a list in python) represents [m, n)
61+
62+
63+
def binary_search_ranges(ranges, target):
64+
low = 0
65+
high = len(ranges) - 1
66+
while low <= high:
67+
mid = (low + high) // 2
68+
start, end = ranges[mid]
69+
if start <= target <= end - 1:
70+
return mid # target found. returns index.
71+
elif target < start:
72+
high = mid - 1
73+
else:
74+
low = mid + 1
75+
# target not found.
76+
return -1
77+
6078

6179
# Utility function to parse '<codepoint>...<codepoint>' or '<codepoint>'
6280
def parse_codepoint_range(range_str):
6381
codepoint_range = range_str.split("..")
6482
assert len(codepoint_range) == 1 or len(codepoint_range) == 2, "Invalid format"
6583
start_cp, end_cp = 0, 0
6684
if len(codepoint_range) == 1:
85+
# m..n => [m, n+1)
6786
start_cp = int(codepoint_range[0], 16)
6887
end_cp = start_cp + 1
6988
else:
89+
# m => [m, m+1)
7090
start_cp = int(codepoint_range[0], 16)
71-
end_cp = int(codepoint_range[1], 16)
91+
end_cp = int(codepoint_range[1], 16) + 1
7292
return [start_cp, end_cp]
7393

7494

@@ -170,6 +190,24 @@ def write_decomposition():
170190
print("};")
171191

172192

193+
def write_recomposition():
194+
# non const.
195+
print("std::map<std::pair<uint32_t, uint32_t>, uint32_t> RECOMPOSITION_MAP = {{")
196+
print(" // clang-format off")
197+
for cp in decomposition_map:
198+
if binary_search_ranges(composition_exclusion_ranges, cp) != -1:
199+
continue
200+
if len(decomposition_map[cp]) == 1:
201+
d1 = decomposition_map[cp][0]
202+
d2 = 0
203+
else:
204+
d1 = decomposition_map[cp][0]
205+
d2 = decomposition_map[cp][1]
206+
print(" {{{{{:#06x}, {:#06x}}}, {:#06x}}},".format(d1, d2, cp))
207+
print(" // clang-format on")
208+
print("}};")
209+
210+
173211
def write_ccc():
174212
print("const std::map<uint32_t, int32_t> CCC_TABLE = {")
175213
print(" // clang-format off")
@@ -179,19 +217,6 @@ def write_ccc():
179217
print("};")
180218

181219

182-
# TODO: remove this
183-
# def write_composition_exclusion():
184-
# print(
185-
# "const std::array<std::pair<uint32_t, uint32_t>, {}>".format(
186-
# len(composition_exclusion_ranges)
187-
# )
188-
# )
189-
# print("NO_RECOMP_RANGES = {{")
190-
# for r in composition_exclusion_ranges:
191-
# print(" {{{:#06x}, {:#06x}}},".format(r[0], r[1]))
192-
# print("}};")
193-
194-
195220
def write_alphabetic():
196221
print(
197222
"const std::array<std::pair<uint32_t, uint32_t>, {}> ALPHABETIC_RANGES = {{{{".format(
@@ -224,23 +249,6 @@ def write_numeric():
224249
print("}};")
225250

226251

227-
def write_recomposition():
228-
# non const.
229-
# TODO: Exclude `Composition_Exclusion`s
230-
print("std::map<std::pair<uint32_t, uint32_t>, uint32_t> RECOMPOSITION_MAP = {{")
231-
print(" // clang-format off")
232-
for cp in decomposition_map:
233-
if len(decomposition_map[cp]) == 1:
234-
d1 = decomposition_map[cp][0]
235-
d2 = 0
236-
else:
237-
d1 = decomposition_map[cp][0]
238-
d2 = decomposition_map[cp][1]
239-
print(" {{{{{:#06x}, {:#06x}}}, {:#06x}}},".format(d1, d2, cp))
240-
print(" // clang-format on")
241-
print("}};")
242-
243-
244252
def main():
245253
if len(sys.argv) != 4:
246254
print("too few arguments", file=sys.stderr)

0 commit comments

Comments
 (0)