Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d0bb7e1

Browse files
committedNov 18, 2016
Fix fmt::Debug for strings, e.g. for Chinese characters
The problem occured due to lines like ``` 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; ``` in `UnicodeData.txt`, which the script previously interpreted as two characters, although it represents the whole range. Fixes #34318.
1 parent 01d061f commit d0bb7e1

File tree

3 files changed

+276
-105
lines changed

3 files changed

+276
-105
lines changed
 

‎src/etc/char_private.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,16 @@
1111
# except according to those terms.
1212

1313
# This script uses the following Unicode tables:
14-
# - Categories.txt
14+
# - UnicodeData.txt
1515

16+
17+
from collections import namedtuple
18+
import csv
1619
import os
1720
import subprocess
1821

22+
NUM_CODEPOINTS=0x110000
23+
1924
def to_ranges(iter):
2025
current = None
2126
for i in iter:
@@ -28,10 +33,10 @@ def to_ranges(iter):
2833
if current is not None:
2934
yield tuple(current)
3035

31-
def get_escaped(dictionary):
32-
for i in range(0x110000):
33-
if dictionary.get(i, "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and i != ord(' '):
34-
yield i
36+
def get_escaped(codepoints):
37+
for c in codepoints:
38+
if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '):
39+
yield c.value
3540

3641
def get_file(f):
3742
try:
@@ -40,10 +45,41 @@ def get_file(f):
4045
subprocess.run(["curl", "-O", f], check=True)
4146
return open(os.path.basename(f))
4247

48+
Codepoint = namedtuple('Codepoint', 'value class_')
49+
50+
def get_codepoints(f):
51+
r = csv.reader(f, delimiter=";")
52+
prev_codepoint = 0
53+
class_first = None
54+
for row in r:
55+
codepoint = int(row[0], 16)
56+
name = row[1]
57+
class_ = row[2]
58+
59+
if class_first is not None:
60+
if not name.endswith("Last>"):
61+
raise ValueError("Missing Last after First")
62+
63+
for c in range(prev_codepoint + 1, codepoint):
64+
yield Codepoint(c, class_first)
65+
66+
class_first = None
67+
if name.endswith("First>"):
68+
class_first = class_
69+
70+
yield Codepoint(codepoint, class_)
71+
prev_codepoint = codepoint
72+
73+
if class_first != None:
74+
raise ValueError("Missing Last after First")
75+
76+
for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
77+
yield Codepoint(c, None)
78+
4379
def main():
44-
file = get_file("http://www.unicode.org/notes/tn36/Categories.txt")
80+
file = get_file("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
4581

46-
dictionary = {int(line.split()[0], 16): line.split()[1] for line in file}
82+
codepoints = get_codepoints(file)
4783

4884
CUTOFF=0x10000
4985
singletons0 = []
@@ -52,7 +88,7 @@ def main():
5288
normal1 = []
5389
extra = []
5490

55-
for a, b in to_ranges(get_escaped(dictionary)):
91+
for a, b in to_ranges(get_escaped(codepoints)):
5692
if a > 2 * CUTOFF:
5793
extra.append((a, b - a))
5894
elif a == b - 1:

‎src/libcore/char_private.rs

Lines changed: 230 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
// NOTE: The following code was generated by "src/etc/char_private.py",
1212
// do not edit directly!
1313

14+
use slice::SliceExt;
15+
1416
fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool {
1517
for &s in singletons {
1618
if x == s {
@@ -42,7 +44,16 @@ pub fn is_printable(x: char) -> bool {
4244
} else if x < 0x20000 {
4345
check(lower, SINGLETONS1, NORMAL1)
4446
} else {
45-
if 0x20000 <= x && x < 0x2f800 {
47+
if 0x2a6d7 <= x && x < 0x2a700 {
48+
return false;
49+
}
50+
if 0x2b735 <= x && x < 0x2b740 {
51+
return false;
52+
}
53+
if 0x2b81e <= x && x < 0x2b820 {
54+
return false;
55+
}
56+
if 0x2cea2 <= x && x < 0x2f800 {
4657
return false;
4758
}
4859
if 0x2fa1e <= x && x < 0xe0100 {
@@ -62,10 +73,13 @@ const SINGLETONS0: &'static [u16] = &[
6273
0x38b,
6374
0x38d,
6475
0x3a2,
76+
0x530,
6577
0x557,
6678
0x558,
6779
0x560,
6880
0x588,
81+
0x58b,
82+
0x58c,
6983
0x590,
7084
0x61c,
7185
0x61d,
@@ -79,10 +93,8 @@ const SINGLETONS0: &'static [u16] = &[
7993
0x83f,
8094
0x85c,
8195
0x85d,
82-
0x8a1,
83-
0x8ff,
84-
0x978,
85-
0x980,
96+
0x8b5,
97+
0x8e2,
8698
0x984,
8799
0x98d,
88100
0x98e,
@@ -154,14 +166,11 @@ const SINGLETONS0: &'static [u16] = &[
154166
0xc0d,
155167
0xc11,
156168
0xc29,
157-
0xc34,
158169
0xc45,
159170
0xc49,
160171
0xc57,
161172
0xc64,
162173
0xc65,
163-
0xc80,
164-
0xc81,
165174
0xc84,
166175
0xc8d,
167176
0xc91,
@@ -193,6 +202,8 @@ const SINGLETONS0: &'static [u16] = &[
193202
0xdbf,
194203
0xdd5,
195204
0xdd7,
205+
0xdf0,
206+
0xdf1,
196207
0xe83,
197208
0xe85,
198209
0xe86,
@@ -245,6 +256,10 @@ const SINGLETONS0: &'static [u16] = &[
245256
0x1317,
246257
0x135b,
247258
0x135c,
259+
0x13f6,
260+
0x13f7,
261+
0x13fe,
262+
0x13ff,
248263
0x1680,
249264
0x170d,
250265
0x176d,
@@ -253,13 +268,17 @@ const SINGLETONS0: &'static [u16] = &[
253268
0x17df,
254269
0x180e,
255270
0x180f,
271+
0x191f,
256272
0x196e,
257273
0x196f,
258274
0x1a1c,
259275
0x1a1d,
260276
0x1a5f,
261277
0x1a7d,
262278
0x1a7e,
279+
0x1aae,
280+
0x1aaf,
281+
0x1cf7,
263282
0x1f16,
264283
0x1f17,
265284
0x1f1e,
@@ -285,7 +304,12 @@ const SINGLETONS0: &'static [u16] = &[
285304
0x2072,
286305
0x2073,
287306
0x208f,
288-
0x2700,
307+
0x23ff,
308+
0x2b74,
309+
0x2b75,
310+
0x2b96,
311+
0x2b97,
312+
0x2bc9,
289313
0x2c2f,
290314
0x2c5f,
291315
0x2d26,
@@ -306,8 +330,11 @@ const SINGLETONS0: &'static [u16] = &[
306330
0x318f,
307331
0x321f,
308332
0x32ff,
309-
0xa78f,
333+
0xa7af,
334+
0xa8fe,
335+
0xa8ff,
310336
0xa9ce,
337+
0xa9ff,
311338
0xaa4e,
312339
0xaa4f,
313340
0xaa5a,
@@ -317,6 +344,7 @@ const SINGLETONS0: &'static [u16] = &[
317344
0xab0f,
318345
0xab10,
319346
0xab27,
347+
0xab2f,
320348
0xabee,
321349
0xabef,
322350
0xfa6e,
@@ -350,7 +378,7 @@ const SINGLETONS1: &'static [u16] = &[
350378
0x3e,
351379
0x4e,
352380
0x4f,
353-
0x31f,
381+
0x18f,
354382
0x39e,
355383
0x49e,
356384
0x49f,
@@ -361,13 +389,59 @@ const SINGLETONS1: &'static [u16] = &[
361389
0x83d,
362390
0x83e,
363391
0x856,
392+
0x8f3,
393+
0x9d0,
394+
0x9d1,
364395
0xa04,
365396
0xa14,
366397
0xa18,
367398
0xb56,
368399
0xb57,
369400
0x10bd,
370401
0x1135,
402+
0x11ce,
403+
0x11cf,
404+
0x11e0,
405+
0x1212,
406+
0x1287,
407+
0x1289,
408+
0x128e,
409+
0x129e,
410+
0x1304,
411+
0x130d,
412+
0x130e,
413+
0x1311,
414+
0x1312,
415+
0x1329,
416+
0x1331,
417+
0x1334,
418+
0x133a,
419+
0x133b,
420+
0x1345,
421+
0x1346,
422+
0x1349,
423+
0x134a,
424+
0x134e,
425+
0x134f,
426+
0x1364,
427+
0x1365,
428+
0x145a,
429+
0x145c,
430+
0x15b6,
431+
0x15b7,
432+
0x1c09,
433+
0x1c37,
434+
0x1c90,
435+
0x1c91,
436+
0x1ca8,
437+
0x246f,
438+
0x6a5f,
439+
0x6aee,
440+
0x6aef,
441+
0x6b5a,
442+
0x6b62,
443+
0xbc9a,
444+
0xbc9b,
371445
0xd127,
372446
0xd128,
373447
0xd455,
@@ -395,6 +469,14 @@ const SINGLETONS1: &'static [u16] = &[
395469
0xd6a7,
396470
0xd7cc,
397471
0xd7cd,
472+
0xdaa0,
473+
0xe007,
474+
0xe019,
475+
0xe01a,
476+
0xe022,
477+
0xe025,
478+
0xe8c5,
479+
0xe8c6,
398480
0xee04,
399481
0xee20,
400482
0xee23,
@@ -429,31 +511,25 @@ const SINGLETONS1: &'static [u16] = &[
429511
0xeeaa,
430512
0xf0af,
431513
0xf0b0,
432-
0xf0bf,
433514
0xf0c0,
434515
0xf0d0,
435516
0xf12f,
436-
0xf336,
437-
0xf3c5,
438-
0xf43f,
439-
0xf441,
440-
0xf4f8,
441-
0xf53e,
442-
0xf53f,
517+
0xf91f,
518+
0xf931,
519+
0xf932,
520+
0xf93f,
443521
];
444522
const NORMAL0: &'static [u16] = &[
445523
0x0, 0x20,
446524
0x7f, 0x22,
447-
0x37f, 0x5,
448-
0x528, 0x9,
449-
0x58b, 0x4,
525+
0x380, 0x4,
450526
0x5c8, 0x8,
451527
0x5eb, 0x5,
452528
0x5f5, 0x11,
453529
0x7b2, 0xe,
454530
0x7fb, 0x5,
455531
0x85f, 0x41,
456-
0x8ad, 0x37,
532+
0x8be, 0x16,
457533
0x9b3, 0x3,
458534
0x9cf, 0x8,
459535
0x9d8, 0x4,
@@ -465,7 +541,8 @@ const NORMAL0: &'static [u16] = &[
465541
0xa5f, 0x7,
466542
0xa76, 0xb,
467543
0xad1, 0xf,
468-
0xaf2, 0xf,
544+
0xaf2, 0x7,
545+
0xafa, 0x7,
469546
0xb4e, 0x8,
470547
0xb58, 0x4,
471548
0xb78, 0xa,
@@ -478,21 +555,19 @@ const NORMAL0: &'static [u16] = &[
478555
0xbc3, 0x3,
479556
0xbd1, 0x6,
480557
0xbd8, 0xe,
481-
0xbfb, 0x6,
558+
0xbfb, 0x5,
482559
0xc3a, 0x3,
483560
0xc4e, 0x7,
484-
0xc5a, 0x6,
561+
0xc5b, 0x5,
485562
0xc70, 0x8,
486563
0xcce, 0x7,
487564
0xcd7, 0x7,
488-
0xcf3, 0xf,
489-
0xd4f, 0x8,
490-
0xd58, 0x8,
491-
0xd76, 0x3,
565+
0xcf3, 0xe,
566+
0xd50, 0x4,
492567
0xd97, 0x3,
493568
0xdc7, 0x3,
494569
0xdcb, 0x4,
495-
0xde0, 0x12,
570+
0xde0, 0x6,
496571
0xdf5, 0xc,
497572
0xe3b, 0x4,
498573
0xe5c, 0x25,
@@ -503,9 +578,8 @@ const NORMAL0: &'static [u16] = &[
503578
0x10c8, 0x5,
504579
0x137d, 0x3,
505580
0x139a, 0x6,
506-
0x13f5, 0xb,
507581
0x169d, 0x3,
508-
0x16f1, 0xf,
582+
0x16f9, 0x7,
509583
0x1715, 0xb,
510584
0x1737, 0x9,
511585
0x1754, 0xc,
@@ -516,7 +590,6 @@ const NORMAL0: &'static [u16] = &[
516590
0x1878, 0x8,
517591
0x18ab, 0x5,
518592
0x18f6, 0xa,
519-
0x191d, 0x3,
520593
0x192c, 0x4,
521594
0x193c, 0x4,
522595
0x1941, 0x3,
@@ -526,67 +599,63 @@ const NORMAL0: &'static [u16] = &[
526599
0x19db, 0x3,
527600
0x1a8a, 0x6,
528601
0x1a9a, 0x6,
529-
0x1aae, 0x52,
602+
0x1abf, 0x41,
530603
0x1b4c, 0x4,
531604
0x1b7d, 0x3,
532605
0x1bf4, 0x8,
533606
0x1c38, 0x3,
534607
0x1c4a, 0x3,
535-
0x1c80, 0x40,
608+
0x1c89, 0x37,
536609
0x1cc8, 0x8,
537-
0x1cf7, 0x9,
538-
0x1de7, 0x15,
610+
0x1cfa, 0x6,
611+
0x1df6, 0x5,
539612
0x1fff, 0x11,
540613
0x2028, 0x8,
541614
0x205f, 0x11,
542615
0x209d, 0x3,
543-
0x20ba, 0x16,
616+
0x20bf, 0x11,
544617
0x20f1, 0xf,
545-
0x218a, 0x6,
546-
0x23f4, 0xc,
618+
0x218c, 0x4,
547619
0x2427, 0x19,
548620
0x244b, 0x15,
549-
0x2b4d, 0x3,
550-
0x2b5a, 0xa6,
621+
0x2bba, 0x3,
622+
0x2bd2, 0x1a,
623+
0x2bf0, 0x10,
551624
0x2cf4, 0x5,
552625
0x2d28, 0x5,
553626
0x2d68, 0x7,
554627
0x2d71, 0xe,
555628
0x2d97, 0x9,
556-
0x2e3c, 0x44,
629+
0x2e45, 0x3b,
557630
0x2ef4, 0xc,
558631
0x2fd6, 0x1a,
559632
0x2ffc, 0x5,
560633
0x3100, 0x5,
561634
0x312e, 0x3,
562635
0x31bb, 0x5,
563636
0x31e4, 0xc,
564-
0x3400, 0x19c0,
565-
0x4e00, 0x5200,
637+
0x4db6, 0xa,
638+
0x9fd6, 0x2a,
566639
0xa48d, 0x3,
567640
0xa4c7, 0x9,
568641
0xa62c, 0x14,
569-
0xa698, 0x7,
570642
0xa6f8, 0x8,
571-
0xa794, 0xc,
572-
0xa7ab, 0x4d,
643+
0xa7b8, 0x3f,
573644
0xa82c, 0x4,
574645
0xa83a, 0x6,
575646
0xa878, 0x8,
576-
0xa8c5, 0x9,
647+
0xa8c6, 0x8,
577648
0xa8da, 0x6,
578-
0xa8fc, 0x4,
579649
0xa954, 0xb,
580650
0xa97d, 0x3,
581651
0xa9da, 0x4,
582-
0xa9e0, 0x20,
583652
0xaa37, 0x9,
584-
0xaa7c, 0x4,
585653
0xaac3, 0x18,
586654
0xaaf7, 0xa,
587655
0xab17, 0x9,
588-
0xab2f, 0x91,
589-
0xabfa, 0x2bb6,
656+
0xab66, 0xa,
657+
0xabfa, 0x6,
658+
0xd7a4, 0xc,
590659
0xd7c7, 0x4,
591660
0xd7fc, 0x2104,
592661
0xfada, 0x26,
@@ -596,7 +665,6 @@ const NORMAL0: &'static [u16] = &[
596665
0xfd40, 0x10,
597666
0xfdc8, 0x28,
598667
0xfe1a, 0x6,
599-
0xfe27, 0x9,
600668
0xfe6c, 0x4,
601669
0xfefd, 0x4,
602670
0xffbf, 0x3,
@@ -608,86 +676,151 @@ const NORMAL1: &'static [u16] = &[
608676
0xfb, 0x5,
609677
0x103, 0x4,
610678
0x134, 0x3,
611-
0x18b, 0x5,
612-
0x19c, 0x34,
679+
0x19c, 0x4,
680+
0x1a1, 0x2f,
613681
0x1fe, 0x82,
614682
0x29d, 0x3,
615-
0x2d1, 0x2f,
683+
0x2d1, 0xf,
684+
0x2fc, 0x4,
616685
0x324, 0xc,
617-
0x34b, 0x35,
686+
0x34b, 0x5,
687+
0x37b, 0x5,
618688
0x3c4, 0x4,
619689
0x3d6, 0x2a,
620-
0x4aa, 0x356,
690+
0x4aa, 0x6,
691+
0x4d4, 0x4,
692+
0x4fc, 0x4,
693+
0x528, 0x8,
694+
0x564, 0xb,
695+
0x570, 0x90,
696+
0x737, 0x9,
697+
0x756, 0xa,
698+
0x768, 0x98,
621699
0x839, 0x3,
622-
0x860, 0xa0,
700+
0x89f, 0x8,
701+
0x8b0, 0x30,
702+
0x8f6, 0x5,
623703
0x91c, 0x3,
624704
0x93a, 0x5,
625705
0x940, 0x40,
626-
0x9b8, 0x6,
627-
0x9c0, 0x40,
706+
0x9b8, 0x4,
628707
0xa07, 0x5,
629708
0xa34, 0x4,
630709
0xa3b, 0x4,
631710
0xa48, 0x8,
632711
0xa59, 0x7,
633-
0xa80, 0x80,
712+
0xaa0, 0x20,
713+
0xae7, 0x4,
714+
0xaf7, 0x9,
634715
0xb36, 0x3,
635716
0xb73, 0x5,
636-
0xb80, 0x80,
637-
0xc49, 0x217,
717+
0xb92, 0x7,
718+
0xb9d, 0xc,
719+
0xbb0, 0x50,
720+
0xc49, 0x37,
721+
0xcb3, 0xd,
722+
0xcf3, 0x7,
723+
0xd00, 0x160,
638724
0xe7f, 0x181,
639725
0x104e, 0x4,
640-
0x1070, 0x10,
726+
0x1070, 0xf,
641727
0x10c2, 0xe,
642728
0x10e9, 0x7,
643729
0x10fa, 0x6,
644-
0x1144, 0x3c,
645-
0x11c9, 0x7,
646-
0x11da, 0x4a6,
730+
0x1144, 0xc,
731+
0x1177, 0x9,
732+
0x11f5, 0xb,
733+
0x123f, 0x41,
734+
0x12aa, 0x6,
735+
0x12eb, 0x5,
736+
0x12fa, 0x6,
737+
0x1351, 0x6,
738+
0x1358, 0x5,
739+
0x136d, 0x3,
740+
0x1375, 0x8b,
741+
0x145e, 0x22,
742+
0x14c8, 0x8,
743+
0x14da, 0xa6,
744+
0x15de, 0x22,
745+
0x1645, 0xb,
746+
0x165a, 0x6,
747+
0x166d, 0x13,
647748
0x16b8, 0x8,
648-
0x16ca, 0x936,
649-
0x236f, 0x91,
650-
0x2463, 0xd,
651-
0x2474, 0xb8c,
652-
0x342f, 0x33d1,
653-
0x6a39, 0x4c7,
749+
0x16ca, 0x36,
750+
0x171a, 0x3,
751+
0x172c, 0x4,
752+
0x1740, 0x160,
753+
0x18f3, 0xc,
754+
0x1900, 0x1c0,
755+
0x1af9, 0x107,
756+
0x1c46, 0xa,
757+
0x1c6d, 0x3,
758+
0x1cb7, 0x349,
759+
0x239a, 0x66,
760+
0x2475, 0xb,
761+
0x2544, 0xabc,
762+
0x342f, 0xfd1,
763+
0x4647, 0x21b9,
764+
0x6a39, 0x7,
765+
0x6a6a, 0x4,
766+
0x6a70, 0x60,
767+
0x6af6, 0xa,
768+
0x6b46, 0xa,
769+
0x6b78, 0x5,
770+
0x6b90, 0x370,
654771
0x6f45, 0xb,
655772
0x6f7f, 0x10,
656-
0x6fa0, 0x4060,
657-
0xb002, 0x1ffe,
773+
0x6fa0, 0x40,
774+
0x6fe1, 0x1f,
775+
0x87ed, 0x13,
776+
0x8af3, 0x250d,
777+
0xb002, 0xbfe,
778+
0xbc6b, 0x5,
779+
0xbc7d, 0x3,
780+
0xbc89, 0x7,
781+
0xbca0, 0x1360,
658782
0xd0f6, 0xa,
659783
0xd173, 0x8,
660-
0xd1de, 0x22,
784+
0xd1e9, 0x17,
661785
0xd246, 0xba,
662786
0xd357, 0x9,
663787
0xd372, 0x8e,
664788
0xd547, 0x3,
665-
0xd800, 0x1600,
789+
0xda8c, 0xf,
790+
0xdab0, 0x550,
791+
0xe02b, 0x7d5,
792+
0xe8d7, 0x29,
793+
0xe94b, 0x5,
794+
0xe95a, 0x4,
795+
0xe960, 0x4a0,
666796
0xee3c, 0x6,
667797
0xee43, 0x4,
668798
0xee9c, 0x5,
669799
0xeebc, 0x34,
670800
0xeef2, 0x10e,
671801
0xf02c, 0x4,
672802
0xf094, 0xc,
673-
0xf0e0, 0x20,
674-
0xf10b, 0x5,
803+
0xf0f6, 0xa,
804+
0xf10d, 0x3,
675805
0xf16c, 0x4,
676-
0xf19b, 0x4b,
806+
0xf1ad, 0x39,
677807
0xf203, 0xd,
678-
0xf23b, 0x5,
808+
0xf23c, 0x4,
679809
0xf249, 0x7,
680810
0xf252, 0xae,
681-
0xf321, 0xf,
682-
0xf37d, 0x3,
683-
0xf394, 0xc,
684-
0xf3cb, 0x15,
685-
0xf3f1, 0xf,
686-
0xf4fd, 0x3,
687-
0xf544, 0xc,
688-
0xf568, 0x93,
689-
0xf641, 0x4,
690-
0xf650, 0x30,
691-
0xf6c6, 0x3a,
692-
0xf774, 0x88c,
811+
0xf6d3, 0xd,
812+
0xf6ed, 0x3,
813+
0xf6f7, 0x9,
814+
0xf774, 0xc,
815+
0xf7d5, 0x2b,
816+
0xf80c, 0x4,
817+
0xf848, 0x8,
818+
0xf85a, 0x6,
819+
0xf888, 0x8,
820+
0xf8ae, 0x62,
821+
0xf928, 0x8,
822+
0xf94c, 0x4,
823+
0xf95f, 0x21,
824+
0xf992, 0x2e,
825+
0xf9c1, 0x63f,
693826
];

‎src/libcoretest/char.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ fn test_escape_debug() {
162162
assert_eq!(s, "~");
163163
let s = string('é');
164164
assert_eq!(s, "é");
165+
let s = string('文');
166+
assert_eq!(s, "文");
165167
let s = string('\x00');
166168
assert_eq!(s, "\\u{0}");
167169
let s = string('\x1f');

0 commit comments

Comments
 (0)
Please sign in to comment.