arielb1
diff --git a/‎src/etc/char_private.py
+98-34 b/‎src/etc/char_private.py
+98-34
@@ -76,6 +76,66 @@ def get_codepoints(f):
     for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
         yield Codepoint(c, None)
 
+def compress_singletons(singletons):
+    uppers = [] # (upper, # items in lowers)
+    lowers = []
+
+    for i in singletons:
+        upper = i >> 8
+        lower = i & 0xff
+        if len(uppers) == 0 or uppers[-1][0] != upper:
+            uppers.append((upper, 1))
+        else:
+            upper, count = uppers[-1]
+            uppers[-1] = upper, count + 1
+        lowers.append(lower)
+
+    return uppers, lowers
+
+def compress_normal(normal):
+    # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
+    # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
+    compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
+
+    prev_start = 0
+    for start, count in normal:
+        truelen = start - prev_start
+        falselen = count
+        prev_start = start + count
+
+        assert truelen < 0x8000 and falselen < 0x8000
+        entry = []
+        if truelen > 0x7f:
+            entry.append(0x80 | (truelen >> 8))
+            entry.append(truelen & 0xff)
+        else:
+            entry.append(truelen & 0x7f)
+        if falselen > 0x7f:
+            entry.append(0x80 | (falselen >> 8))
+            entry.append(falselen & 0xff)
+        else:
+            entry.append(falselen & 0x7f)
+
+        compressed.append(entry)
+
+    return compressed
+
+def print_singletons(uppers, lowers, uppersname, lowersname):
+    print("const {}: &'static [(u8, u8)] = &[".format(uppersname))
+    for u, c in uppers:
+        print("    ({:#04x}, {}),".format(u, c))
+    print("];")
+    print("const {}: &'static [u8] = &[".format(lowersname))
+    for i in range(0, len(lowers), 8):
+        print("    {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8])))
+    print("];")
+
+def print_normal(normal, normalname):
+    print("const {}: &'static [u8] = &[".format(normalname))
+    for v in normal:
+        print("    {}".format(" ".join("{:#04x},".format(i) for i in v)))
+    print("];")
+
 def main():
     file = get_file("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
 
@@ -111,6 +171,11 @@ def main():
             else:
                 normal0.append((a, b - a))
 
+    singletons0u, singletons0l = compress_singletons(singletons0)
+    singletons1u, singletons1l = compress_singletons(singletons1)
+    normal0 = compress_normal(normal0)
+    normal1 = compress_normal(normal1)
+
     print("""\
 // Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
@@ -125,38 +190,49 @@ def main():
 // NOTE: The following code was generated by "src/etc/char_private.py",
 //       do not edit directly!
 
-use slice::SliceExt;
-
-fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool {
-    for &s in singletons {
-        if x == s {
-            return false;
-        } else if x < s {
+fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8],
+         normal: &[u8]) -> bool {
+    let xupper = (x >> 8) as u8;
+    let mut lowerstart = 0;
+    for &(upper, lowercount) in singletonuppers {
+        let lowerend = lowerstart + lowercount as usize;
+        if xupper == upper {
+            for &lower in &singletonlowers[lowerstart..lowerend] {
+                if lower == x as u8 {
+                    return false;
+                }
+            }
+        } else if xupper < upper {
             break;
         }
+        lowerstart = lowerend;
     }
-    for w in normal.chunks(2) {
-        let start = w[0];
-        let len = w[1];
-        let difference = (x as i32) - (start as i32);
-        if 0 <= difference {
-            if difference < len as i32 {
-                return false;
-            }
+
+    let mut x = x as i32;
+    let mut normal = normal.iter().cloned();
+    let mut current = true;
+    while let Some(v) = normal.next() {
+        let len = if v & 0x80 != 0 {
+            ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
         } else {
+            v as i32
+        };
+        x -= len;
+        if x < 0 {
             break;
         }
+        current = !current;
     }
-    true
+    current
 }
 
 pub fn is_printable(x: char) -> bool {
     let x = x as u32;
     let lower = x as u16;
     if x < 0x10000 {
-        check(lower, SINGLETONS0, NORMAL0)
+        check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
     } else if x < 0x20000 {
-        check(lower, SINGLETONS1, NORMAL1)
+        check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
     } else {\
 """)
     for a, b in extra:
@@ -169,22 +245,10 @@ def main():
 }\
 """)
     print()
-    print("const SINGLETONS0: &'static [u16] = &[")
-    for s in singletons0:
-        print("    0x{:x},".format(s))
-    print("];")
-    print("const SINGLETONS1: &'static [u16] = &[")
-    for s in singletons1:
-        print("    0x{:x},".format(s))
-    print("];")
-    print("const NORMAL0: &'static [u16] = &[")
-    for a, b in normal0:
-        print("    0x{:x}, 0x{:x},".format(a, b))
-    print("];")
-    print("const NORMAL1: &'static [u16] = &[")
-    for a, b in normal1:
-        print("    0x{:x}, 0x{:x},".format(a, b))
-    print("];")
+    print_singletons(singletons0u, singletons0l, 'SINGLETONS0U', 'SINGLETONS0L')
+    print_singletons(singletons1u, singletons1l, 'SINGLETONS1U', 'SINGLETONS1L')
+    print_normal(normal0, 'NORMAL0')
+    print_normal(normal1, 'NORMAL1')
 
 if __name__ == '__main__':
     main()