@@ -305,33 +305,13 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
305
305
format_table_content (f , data , 8 )
306
306
f .write ("\n ];\n \n " )
307
307
308
- def emit_property_module (f , mod , tbl , emit_fn ):
308
+ def emit_property_module (f , mod , tbl , emit ):
309
309
f .write ("pub mod %s {\n " % mod )
310
- keys = tbl .keys ()
311
- keys .sort ()
312
- for cat in keys :
310
+ for cat in sorted (emit ):
313
311
emit_table (f , "%s_table" % cat , tbl [cat ])
314
- if cat in emit_fn :
315
- f .write (" pub fn %s(c: char) -> bool {\n " % cat )
316
- f .write (" super::bsearch_range_table(c, %s_table)\n " % cat )
317
- f .write (" }\n \n " )
318
- f .write ("}\n \n " )
319
-
320
- def emit_regex_module (f , cats , w_data ):
321
- f .write ("pub mod regex {\n " )
322
- regex_class = "&'static [(char, char)]"
323
- class_table = "&'static [(&'static str, %s)]" % regex_class
324
-
325
- emit_table (f , "UNICODE_CLASSES" , cats , class_table ,
326
- pfun = lambda x : "(\" %s\" ,super::%s::%s_table)" % (x [0 ], x [1 ], x [0 ]))
327
-
328
- f .write (" pub const PERLD: %s = super::general_category::Nd_table;\n \n "
329
- % regex_class )
330
- f .write (" pub const PERLS: %s = super::property::White_Space_table;\n \n "
331
- % regex_class )
332
-
333
- emit_table (f , "PERLW" , w_data , regex_class )
334
-
312
+ f .write (" pub fn %s(c: char) -> bool {\n " % cat )
313
+ f .write (" super::bsearch_range_table(c, %s_table)\n " % cat )
314
+ f .write (" }\n \n " )
335
315
f .write ("}\n \n " )
336
316
337
317
def emit_conversions_module (f , lowerupper , upperlower ):
@@ -605,8 +585,7 @@ def optimize_width_table(wtable):
605
585
(canon_decomp , compat_decomp , gencats , combines ,
606
586
lowerupper , upperlower ) = load_unicode_data ("UnicodeData.txt" )
607
587
want_derived = ["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ]
608
- other_derived = ["Default_Ignorable_Code_Point" ]
609
- derived = load_properties ("DerivedCoreProperties.txt" , want_derived + other_derived )
588
+ derived = load_properties ("DerivedCoreProperties.txt" , want_derived )
610
589
scripts = load_properties ("Scripts.txt" , [])
611
590
props = load_properties ("PropList.txt" ,
612
591
["White_Space" , "Join_Control" , "Noncharacter_Code_Point" ])
@@ -616,27 +595,11 @@ def optimize_width_table(wtable):
616
595
# bsearch_range_table is used in all the property modules below
617
596
emit_bsearch_range_table (rf )
618
597
619
- # all of these categories will also be available as \p{} in libregex
620
- allcats = []
598
+ # category tables
621
599
for (name , cat , pfuns ) in ("general_category" , gencats , ["N" , "Cc" ]), \
622
600
("derived_property" , derived , want_derived ), \
623
- ("script" , scripts , []), \
624
601
("property" , props , ["White_Space" ]):
625
602
emit_property_module (rf , name , cat , pfuns )
626
- allcats .extend (map (lambda x : (x , name ), cat ))
627
- allcats .sort (key = lambda c : c [0 ])
628
-
629
- # the \w regex corresponds to Alphabetic + Mark + Decimal_Number +
630
- # Connector_Punctuation + Join-Control according to UTS#18
631
- # http://www.unicode.org/reports/tr18/#Compatibility_Properties
632
- perl_words = []
633
- for cat in derived ["Alphabetic" ], gencats ["M" ], gencats ["Nd" ], \
634
- gencats ["Pc" ], props ["Join_Control" ]:
635
- perl_words .extend (ungroup_cat (cat ))
636
- perl_words = group_cat (perl_words )
637
-
638
- # emit lookup tables for \p{}, along with \d, \w, and \s for libregex
639
- emit_regex_module (rf , allcats , perl_words )
640
603
641
604
# normalizations and conversions module
642
605
emit_norm_module (rf , canon_decomp , compat_decomp , combines , norm_props )
0 commit comments