@@ -307,12 +307,114 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
307
307
format_table_content (f , data , 8 )
308
308
f .write ("\n ];\n \n " )
309
309
310
+ def emit_trie_lookup_range_table (f ):
311
+ f .write ("""
312
+ pub struct BoolTrie {
313
+ // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
314
+ r1: [u64; 32], // leaves
315
+
316
+ // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
317
+ r2: [u8; 1024], // first level
318
+ r3: &'static [u64], // leaves
319
+
320
+ // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
321
+ r4: [u8; 272], // first level
322
+ r5: &'static [u8], // second level
323
+ r6: &'static [u64], // leaves
324
+ }
325
+
326
+ fn trie_range_leaf(c: usize, bitmap_chunk: u64) -> bool {
327
+ ((bitmap_chunk >> (c & 63)) & 1) != 0
328
+ }
329
+
330
+ fn trie_lookup_range_table(c: char, r: &'static BoolTrie) -> bool {
331
+ let c = c as usize;
332
+ if c < 0x800 {
333
+ trie_range_leaf(c, r.r1[c >> 8])
334
+ } else if c < 0x10000 {
335
+ let child = r.r2[c >> 6];
336
+ trie_range_leaf(c, r.r3[child as usize])
337
+ } else {
338
+ let child = r.r4[c >> 12];
339
+ let leaf = r.r5[((child as usize) << 6) + ((c >> 6) & 0x3f)];
340
+ trie_range_leaf(c, r.r6[leaf as usize])
341
+ }
342
+ }\n
343
+ """ )
344
+
345
+ def compute_trie (rawdata , chunksize ):
346
+ root = []
347
+ childmap = {}
348
+ child_data = []
349
+ for i in range (len (rawdata ) / chunksize ):
350
+ data = rawdata [i * chunksize : (i + 1 ) * chunksize ]
351
+ child = '|' .join (map (str , data ))
352
+ if child not in childmap :
353
+ childmap [child ] = len (childmap )
354
+ child_data .extend (data )
355
+ root .append (childmap [child ])
356
+ return (root , child_data )
357
+
358
+ def emit_bool_trie (f , name , t_data , is_pub = True ):
359
+ CHUNK = 64
360
+ rawdata = [False ] * 0x110000 ;
361
+ for (lo , hi ) in t_data :
362
+ for cp in range (lo , hi + 1 ):
363
+ rawdata [cp ] = True
364
+
365
+ # convert to bitmap chunks of 64 bits each
366
+ chunks = []
367
+ for i in range (0x110000 / CHUNK ):
368
+ chunk = 0
369
+ for j in range (64 ):
370
+ if rawdata [i * 64 + j ]:
371
+ chunk |= 1 << j
372
+ chunks .append (chunk )
373
+
374
+ pub_string = ""
375
+ if is_pub :
376
+ pub_string = "pub "
377
+ f .write (" %sconst %s: &'static super::BoolTrie = &super::BoolTrie {\n " % (pub_string , name ))
378
+ f .write (" r1: [\n " )
379
+ data = ',' .join ('0x%016x' % chunk for chunk in chunks [0 :0x800 / CHUNK ])
380
+ format_table_content (f , data , 12 )
381
+ f .write ("\n ],\n " )
382
+
383
+ # 0x800..0x10000 trie
384
+ (r2 , r3 ) = compute_trie (chunks [0x800 / CHUNK : 0x10000 / CHUNK ], 64 / CHUNK )
385
+ f .write (" r2: [\n " )
386
+ data = ',' .join (str (node ) for node in [255 ] * 32 + r2 )
387
+ format_table_content (f , data , 12 )
388
+ f .write ("\n ],\n " )
389
+ f .write (" r3: &[\n " )
390
+ data = ',' .join ('0x%016x' % chunk for chunk in r3 )
391
+ format_table_content (f , data , 12 )
392
+ f .write ("\n ],\n " )
393
+
394
+ # 0x10000..0x110000 trie
395
+ (mid , r6 ) = compute_trie (chunks [0x10000 / CHUNK : 0x110000 / CHUNK ], 64 / CHUNK )
396
+ (r4 , r5 ) = compute_trie (mid , 64 )
397
+ f .write (" r4: [\n " )
398
+ data = ',' .join (str (node ) for node in [255 ] * 16 + r4 )
399
+ format_table_content (f , data , 12 )
400
+ f .write ("\n ],\n " )
401
+ f .write (" r5: &[\n " )
402
+ data = ',' .join (str (node ) for node in r5 )
403
+ format_table_content (f , data , 12 )
404
+ f .write ("\n ],\n " )
405
+ f .write (" r6: &[\n " )
406
+ data = ',' .join ('0x%016x' % chunk for chunk in r6 )
407
+ format_table_content (f , data , 12 )
408
+ f .write ("\n ],\n " )
409
+
410
+ f .write (" };\n \n " )
411
+
310
412
def emit_property_module (f , mod , tbl , emit ):
311
413
f .write ("pub mod %s {\n " % mod )
312
414
for cat in sorted (emit ):
313
- emit_table (f , "%s_table" % cat , tbl [cat ])
415
+ emit_bool_trie (f , "%s_table" % cat , tbl [cat ])
314
416
f .write (" pub fn %s(c: char) -> bool {\n " % cat )
315
- f .write (" super::bsearch_range_table (c, %s_table)\n " % cat )
417
+ f .write (" super::trie_lookup_range_table (c, %s_table)\n " % cat )
316
418
f .write (" }\n \n " )
317
419
f .write ("}\n \n " )
318
420
@@ -402,8 +504,9 @@ def emit_norm_module(f, canon, compat, combine, norm_props):
402
504
norm_props = load_properties ("DerivedNormalizationProps.txt" ,
403
505
["Full_Composition_Exclusion" ])
404
506
405
- # bsearch_range_table is used in all the property modules below
406
- emit_bsearch_range_table (rf )
507
+ # trie_lookup_table is used in all the property modules below
508
+ emit_trie_lookup_range_table (rf )
509
+ # emit_bsearch_range_table(rf)
407
510
408
511
# category tables
409
512
for (name , cat , pfuns ) in ("general_category" , gencats , ["N" , "Cc" ]), \
0 commit comments