Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit c52cee1

Browse files
committedMar 28, 2020
Auto merge of #70499 - Dylan-DPC:rollup-f9je1l8, r=Dylan-DPC
Rollup of 5 pull requests Successful merges: - #70418 (Add long error explanation for E0703) - #70448 (Create output dir in rustdoc markdown render) - #70486 (Shrink Unicode tables (even more)) - #70493 (Fix rustdoc.css CSS tab-size property) - #70495 (Replace last mention of IRC with Discord) Failed merges: r? @ghost
2 parents e768d6f + e3ccd5b commit c52cee1

File tree

14 files changed

+1183
-657
lines changed

14 files changed

+1183
-657
lines changed
 

‎src/libcore/unicode/mod.rs

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,28 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase;
3232
pub use unicode_data::n::lookup as N;
3333
pub use unicode_data::uppercase::lookup as Uppercase;
3434
pub use unicode_data::white_space::lookup as White_Space;
35-
36-
#[inline(always)]
37-
fn range_search<const N: usize, const N1: usize, const N2: usize>(
38-
needle: u32,
39-
chunk_idx_map: &[u8; N],
40-
(last_chunk_idx, last_chunk_mapping): (u16, u8),
41-
bitset_chunk_idx: &[[u8; 16]; N1],
42-
bitset: &[u64; N2],
43-
) -> bool {
44-
let bucket_idx = (needle / 64) as usize;
45-
let chunk_map_idx = bucket_idx / 16;
46-
let chunk_piece = bucket_idx % 16;
47-
let chunk_idx = if chunk_map_idx >= N {
48-
if chunk_map_idx == last_chunk_idx as usize {
49-
last_chunk_mapping
50-
} else {
51-
return false;
52-
}
53-
} else {
54-
chunk_idx_map[chunk_map_idx]
55-
};
56-
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
57-
let word = bitset[(idx as usize)];
58-
(word & (1 << (needle % 64) as u64)) != 0
59-
}

‎src/libcore/unicode/unicode_data.rs

Lines changed: 443 additions & 514 deletions
Large diffs are not rendered by default.

‎src/librustc_error_codes/error_codes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ E0698: include_str!("./error_codes/E0698.md"),
389389
E0699: include_str!("./error_codes/E0699.md"),
390390
E0700: include_str!("./error_codes/E0700.md"),
391391
E0701: include_str!("./error_codes/E0701.md"),
392+
E0703: include_str!("./error_codes/E0703.md"),
392393
E0704: include_str!("./error_codes/E0704.md"),
393394
E0705: include_str!("./error_codes/E0705.md"),
394395
E0706: include_str!("./error_codes/E0706.md"),
@@ -603,7 +604,6 @@ E0751: include_str!("./error_codes/E0751.md"),
603604
// E0694, // an unknown tool name found in scoped attributes
604605
E0696, // `continue` pointing to a labeled block
605606
// E0702, // replaced with a generic attribute input check
606-
E0703, // invalid ABI
607607
// E0707, // multiple elided lifetimes used in arguments of `async fn`
608608
E0708, // `async` non-`move` closures with parameters are not currently
609609
// supported
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Invalid ABI (Application Binary Interface) used in the code.
2+
3+
Erroneous code example:
4+
5+
```compile_fail,E0703
6+
extern "invalid" fn foo() {} // error!
7+
# fn main() {}
8+
```
9+
10+
At present few predefined ABI's (like Rust, C, system, etc.) can be
11+
used in Rust. Verify that the ABI is predefined. For example you can
12+
replace the given ABI from 'Rust'.
13+
14+
```
15+
extern "Rust" fn foo() {} // ok!
16+
# fn main() { }
17+
```

‎src/librustdoc/html/static/rustdoc.css

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,8 +1082,8 @@ h3 > .collapse-toggle, h4 > .collapse-toggle {
10821082

10831083
pre.rust {
10841084
position: relative;
1085-
tab-width: 4;
1086-
-moz-tab-width: 4;
1085+
tab-size: 4;
1086+
-moz-tab-size: 4;
10871087
}
10881088

10891089
.search-failed {

‎src/librustdoc/markdown.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::fs::File;
1+
use std::fs::{create_dir_all, File};
22
use std::io::prelude::*;
33
use std::path::PathBuf;
44

@@ -40,6 +40,11 @@ pub fn render(
4040
diag: &rustc_errors::Handler,
4141
edition: Edition,
4242
) -> i32 {
43+
if let Err(e) = create_dir_all(&options.output) {
44+
diag.struct_err(&format!("{}: {}", options.output.display(), e)).emit();
45+
return 4;
46+
}
47+
4348
let mut output = options.output;
4449
output.push(input.file_name().unwrap());
4550
output.set_extension("html");

‎src/libstd/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@
9191
//! pull-requests for your suggested changes.
9292
//!
9393
//! Contributions are appreciated! If you see a part of the docs that can be
94-
//! improved, submit a PR, or chat with us first on irc.mozilla.org #rust-docs.
94+
//! improved, submit a PR, or chat with us first on [Discord][rust-discord]
95+
//! #docs.
9596
//!
9697
//! # A Tour of The Rust Standard Library
9798
//!
@@ -194,6 +195,7 @@
194195
//! [multithreading]: thread/index.html
195196
//! [other]: #what-is-in-the-standard-library-documentation
196197
//! [primitive types]: ../book/ch03-02-data-types.html
198+
//! [rust-discord]: https://discord.gg/rust-lang
197199
198200
#![stable(feature = "rust1", since = "1.0.0")]
199201
#![doc(

‎src/test/ui/codemap_tests/unicode.stderr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ LL | extern "路濫狼á́́" fn foo() {}
88

99
error: aborting due to previous error
1010

11+
For more information about this error, try `rustc --explain E0703`.

‎src/test/ui/parser/issue-8537.stderr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ LL | "invalid-ab_isize"
88

99
error: aborting due to previous error
1010

11+
For more information about this error, try `rustc --explain E0703`.

‎src/tools/unicode-table-generator/src/main.rs

Lines changed: 182 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,83 @@
1+
//! This implements the core logic of the compression scheme used to compactly
2+
//! encode Unicode properties.
3+
//!
4+
//! We have two primary goals with the encoding: we want to be compact, because
5+
//! these tables often end up in ~every Rust program (especially the
6+
//! grapheme_extend table, used for str debugging), including those for embedded
7+
//! targets (where space is important). We also want to be relatively fast,
8+
//! though this is more of a nice to have rather than a key design constraint.
9+
//! It is expected that libraries/applications which are performance-sensitive
10+
//! to Unicode property lookups are extremely rare, and those that care may find
11+
//! the tradeoff of the raw bitsets worth it. For most applications, a
12+
//! relatively fast but much smaller (and as such less cache-impacting, etc.)
13+
//! data set is likely preferable.
14+
//!
15+
//! We have two separate encoding schemes: a skiplist-like approach, and a
16+
//! compressed bitset. The datasets we consider mostly use the skiplist (it's
17+
//! smaller) but the lowercase and uppercase sets are sufficiently sparse for
18+
//! the bitset to be worthwhile -- for those sets the biset is a 2x size win.
19+
//! Since the bitset is also faster, this seems an obvious choice. (As a
20+
//! historical note, the bitset was also the prior implementation, so its
21+
//! relative complexity had already been paid).
22+
//!
23+
//! ## The bitset
24+
//!
25+
//! The primary idea is that we 'flatten' the Unicode ranges into an enormous
26+
//! bitset. To represent any arbitrary codepoint in a raw bitset, we would need
27+
//! over 17 kilobytes of data per character set -- way too much for our
28+
//! purposes.
29+
//!
30+
//! First, the raw bitset (one bit for every valid `char`, from 0 to 0x10FFFF,
31+
//! not skipping the small 'gap') is associated into words (u64) and
32+
//! deduplicated. On random data, this would be useless; on our data, this is
33+
//! incredibly beneficial -- our data sets have (far) less than 256 unique
34+
//! words.
35+
//!
36+
//! This gives us an array that maps `u8 -> word`; the current algorithm does
37+
//! not handle the case of more than 256 unique words, but we are relatively far
38+
//! from coming that close.
39+
//!
40+
//! With that scheme, we now have a single byte for every 64 codepoints.
41+
//!
42+
//! We further chunk these by some constant N (between 1 and 64 per group,
43+
//! dynamically chosen for smallest size), and again deduplicate and store in an
44+
//! array (u8 -> [u8; N]).
45+
//!
46+
//! The bytes of this array map into the words from the bitset above, but we
47+
//! apply another trick here: some of these words are similar enough that they
48+
//! can be represented by some function of another word. The particular
49+
//! functions chosen are rotation, inversion, and shifting (right).
50+
//!
51+
//! ## The skiplist
52+
//!
53+
//! The skip list arose out of the desire for an even smaller encoding than the
54+
//! bitset -- and was the answer to the question "what is the smallest
55+
//! representation we can imagine?". However, it is not necessarily the
56+
//! smallest, and if you have a better proposal, please do suggest it!
57+
//!
58+
//! This is a relatively straightforward encoding. First, we break up all the
59+
//! ranges in the input data into offsets from each other, essentially a gap
60+
//! encoding. In practice, most gaps are small -- less than u8::MAX -- so we
61+
//! store those directly. We make use of the larger gaps (which are nicely
62+
//! interspersed already) throughout the dataset to index this data set.
63+
//!
64+
//! In particular, each run of small gaps (terminating in a large gap) is
65+
//! indexed in a separate dataset. That data set stores an index into the
66+
//! primary offset list and a prefix sum of that offset list. These are packed
67+
//! into a single u32 (11 bits for the offset, 21 bits for the prefix sum).
68+
//!
69+
//! Lookup proceeds via a binary search in the index and then a straightforward
70+
//! linear scan (adding up the offsets) until we reach the needle, and then the
71+
//! index of that offset is utilized as the answer to whether we're in the set
72+
//! or not.
73+
174
use std::collections::{BTreeMap, HashMap};
275
use std::ops::Range;
376
use ucd_parse::Codepoints;
477

578
mod case_mapping;
679
mod raw_emitter;
80+
mod skiplist;
781
mod unicode_download;
882

983
use raw_emitter::{emit_codepoints, RawEmitter};
@@ -152,9 +226,17 @@ fn main() {
152226
std::process::exit(1);
153227
});
154228

229+
// Optional test path, which is a Rust source file testing that the unicode
230+
// property lookups are correct.
231+
let test_path = std::env::args().nth(2);
232+
155233
let unicode_data = load_data();
156234
let ranges_by_property = &unicode_data.ranges;
157235

236+
if let Some(path) = test_path {
237+
std::fs::write(&path, generate_tests(&write_location, &ranges_by_property)).unwrap();
238+
}
239+
158240
let mut total_bytes = 0;
159241
let mut modules = Vec::new();
160242
for (property, ranges) in ranges_by_property {
@@ -163,7 +245,16 @@ fn main() {
163245
emit_codepoints(&mut emitter, &ranges);
164246

165247
modules.push((property.to_lowercase().to_string(), emitter.file));
166-
println!("{:15}: {} bytes, {} codepoints", property, emitter.bytes_used, datapoints,);
248+
println!(
249+
"{:15}: {} bytes, {} codepoints in {} ranges ({} - {}) using {}",
250+
property,
251+
emitter.bytes_used,
252+
datapoints,
253+
ranges.len(),
254+
ranges.first().unwrap().start,
255+
ranges.last().unwrap().end,
256+
emitter.desc,
257+
);
167258
total_bytes += emitter.bytes_used;
168259
}
169260

@@ -173,7 +264,10 @@ fn main() {
173264
"///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
174265
);
175266

176-
table_file.push_str("use super::range_search;\n\n");
267+
// Include the range search function
268+
table_file.push('\n');
269+
table_file.push_str(include_str!("range_search.rs"));
270+
table_file.push('\n');
177271

178272
table_file.push_str(&version());
179273

@@ -236,26 +330,110 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
236330
out
237331
}
238332

333+
fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String {
334+
let mut s = String::new();
335+
s.push_str("#![allow(incomplete_features, unused)]\n");
336+
s.push_str("#![feature(const_generics)]\n\n");
337+
s.push_str("\n#[allow(unused)]\nuse std::hint;\n");
338+
s.push_str(&format!("#[path = \"{}\"]\n", data_path));
339+
s.push_str("mod unicode_data;\n\n");
340+
341+
s.push_str("\nfn main() {\n");
342+
343+
for (property, ranges) in ranges {
344+
s.push_str(&format!(r#" println!("Testing {}");"#, property));
345+
s.push('\n');
346+
s.push_str(&format!(" {}_true();\n", property.to_lowercase()));
347+
s.push_str(&format!(" {}_false();\n", property.to_lowercase()));
348+
let mut is_true = Vec::new();
349+
let mut is_false = Vec::new();
350+
for ch_num in 0..(std::char::MAX as u32) {
351+
if std::char::from_u32(ch_num).is_none() {
352+
continue;
353+
}
354+
if ranges.iter().any(|r| r.contains(&ch_num)) {
355+
is_true.push(ch_num);
356+
} else {
357+
is_false.push(ch_num);
358+
}
359+
}
360+
361+
s.push_str(&format!(" fn {}_true() {{\n", property.to_lowercase()));
362+
generate_asserts(&mut s, property, &is_true, true);
363+
s.push_str(" }\n\n");
364+
s.push_str(&format!(" fn {}_false() {{\n", property.to_lowercase()));
365+
generate_asserts(&mut s, property, &is_false, false);
366+
s.push_str(" }\n\n");
367+
}
368+
369+
s.push_str("}");
370+
s
371+
}
372+
373+
fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool) {
374+
for range in ranges_from_set(points) {
375+
if range.end == range.start + 1 {
376+
s.push_str(&format!(
377+
" assert!({}unicode_data::{}::lookup({:?}), \"{}\");\n",
378+
if truthy { "" } else { "!" },
379+
property.to_lowercase(),
380+
std::char::from_u32(range.start).unwrap(),
381+
range.start,
382+
));
383+
} else {
384+
s.push_str(&format!(" for chn in {:?}u32 {{\n", range));
385+
s.push_str(&format!(
386+
" assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \"{{:?}}\", chn);\n",
387+
if truthy { "" } else { "!" },
388+
property.to_lowercase(),
389+
));
390+
s.push_str(" }\n");
391+
}
392+
}
393+
}
394+
395+
fn ranges_from_set(set: &[u32]) -> Vec<Range<u32>> {
396+
let mut ranges = set.iter().map(|e| (*e)..(*e + 1)).collect::<Vec<Range<u32>>>();
397+
merge_ranges(&mut ranges);
398+
ranges
399+
}
400+
239401
fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
240402
loop {
241403
let mut new_ranges = Vec::new();
242404
let mut idx_iter = 0..(ranges.len() - 1);
405+
let mut should_insert_last = true;
243406
while let Some(idx) = idx_iter.next() {
244407
let cur = ranges[idx].clone();
245408
let next = ranges[idx + 1].clone();
246409
if cur.end == next.start {
247-
let _ = idx_iter.next(); // skip next as we're merging it in
410+
if idx_iter.next().is_none() {
411+
// We're merging the last element
412+
should_insert_last = false;
413+
}
248414
new_ranges.push(cur.start..next.end);
249415
} else {
416+
// We're *not* merging the last element
417+
should_insert_last = true;
250418
new_ranges.push(cur);
251419
}
252420
}
253-
new_ranges.push(ranges.last().unwrap().clone());
421+
if should_insert_last {
422+
new_ranges.push(ranges.last().unwrap().clone());
423+
}
254424
if new_ranges.len() == ranges.len() {
255425
*ranges = new_ranges;
256426
break;
257427
} else {
258428
*ranges = new_ranges;
259429
}
260430
}
431+
432+
let mut last_end = None;
433+
for range in ranges {
434+
if let Some(last) = last_end {
435+
assert!(range.start > last, "{:?}", range);
436+
}
437+
last_end = Some(range.end);
438+
}
261439
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#[inline(always)]
2+
fn bitset_search<
3+
const N: usize,
4+
const CHUNK_SIZE: usize,
5+
const N1: usize,
6+
const CANONICAL: usize,
7+
const CANONICALIZED: usize,
8+
>(
9+
needle: u32,
10+
chunk_idx_map: &[u8; N],
11+
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
12+
bitset_canonical: &[u64; CANONICAL],
13+
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
14+
) -> bool {
15+
let bucket_idx = (needle / 64) as usize;
16+
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
17+
let chunk_piece = bucket_idx % CHUNK_SIZE;
18+
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
19+
v
20+
} else {
21+
return false;
22+
};
23+
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
24+
let word = if let Some(word) = bitset_canonical.get(idx) {
25+
*word
26+
} else {
27+
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
28+
let mut word = bitset_canonical[real_idx as usize];
29+
let should_invert = mapping & (1 << 6) != 0;
30+
if should_invert {
31+
word = !word;
32+
}
33+
// Lower 6 bits
34+
let quantity = mapping & ((1 << 6) - 1);
35+
if mapping & (1 << 7) != 0 {
36+
// shift
37+
word >>= quantity as u64;
38+
} else {
39+
word = word.rotate_left(quantity as u32);
40+
}
41+
word
42+
};
43+
(word & (1 << (needle % 64) as u64)) != 0
44+
}
45+
46+
fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
47+
short_offset_run_header & ((1 << 21) - 1)
48+
}
49+
50+
fn decode_length(short_offset_run_header: u32) -> usize {
51+
(short_offset_run_header >> 21) as usize
52+
}
53+
54+
#[inline(always)]
55+
fn skip_search<const SOR: usize, const OFFSETS: usize>(
56+
needle: u32,
57+
short_offset_runs: &[u32; SOR],
58+
offsets: &[u8; OFFSETS],
59+
) -> bool {
60+
// Note that this *cannot* be past the end of the array, as the last
61+
// element is greater than std::char::MAX (the largest possible needle).
62+
//
63+
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
64+
// location cannot be past it, so Err(idx) != length either.
65+
//
66+
// This means that we can avoid bounds checking for the accesses below, too.
67+
let last_idx =
68+
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
69+
Ok(idx) => idx + 1,
70+
Err(idx) => idx,
71+
};
72+
73+
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
74+
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
75+
decode_length(*next) - offset_idx
76+
} else {
77+
offsets.len() - offset_idx
78+
};
79+
let prev =
80+
last_idx.checked_sub(1).map(|prev| decode_prefix_sum(short_offset_runs[prev])).unwrap_or(0);
81+
82+
let total = needle - prev;
83+
let mut prefix_sum = 0;
84+
for _ in 0..(length - 1) {
85+
let offset = offsets[offset_idx];
86+
prefix_sum += offset as u32;
87+
if prefix_sum > total {
88+
break;
89+
}
90+
offset_idx += 1;
91+
}
92+
offset_idx % 2 == 1
93+
}

‎src/tools/unicode-table-generator/src/raw_emitter.rs

Lines changed: 328 additions & 106 deletions
Large diffs are not rendered by default.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
use crate::fmt_list;
2+
use crate::raw_emitter::RawEmitter;
3+
use std::convert::TryInto;
4+
use std::fmt::Write as _;
5+
use std::ops::Range;
6+
7+
/// This will get packed into a single u32 before inserting into the data set.
8+
#[derive(Debug, PartialEq)]
9+
struct ShortOffsetRunHeader {
10+
/// Note, we only allow for 21 bits here.
11+
prefix_sum: u32,
12+
13+
/// Note, we actually only allow for 11 bits here. This should be enough --
14+
/// our largest sets are around ~1400 offsets long.
15+
start_idx: u16,
16+
}
17+
18+
impl ShortOffsetRunHeader {
19+
fn pack(&self) -> u32 {
20+
assert!(self.start_idx < (1 << 11));
21+
assert!(self.prefix_sum < (1 << 21));
22+
23+
(self.start_idx as u32) << 21 | self.prefix_sum
24+
}
25+
}
26+
27+
impl RawEmitter {
28+
pub fn emit_skiplist(&mut self, ranges: &[Range<u32>]) {
29+
let mut offsets = Vec::<u32>::new();
30+
let points = ranges.iter().flat_map(|r| vec![r.start, r.end]).collect::<Vec<u32>>();
31+
let mut offset = 0;
32+
for pt in points {
33+
let delta = pt - offset;
34+
offsets.push(delta);
35+
offset = pt;
36+
}
37+
// Guaranteed to terminate, as it's impossible to subtract a value this
38+
// large from a valid char.
39+
offsets.push(std::char::MAX as u32 + 1);
40+
let mut coded_offsets: Vec<u8> = Vec::new();
41+
let mut short_offset_runs: Vec<ShortOffsetRunHeader> = vec![];
42+
let mut iter = offsets.iter().cloned();
43+
let mut prefix_sum = 0;
44+
loop {
45+
let mut any_elements = false;
46+
let mut inserted = false;
47+
let start = coded_offsets.len();
48+
for offset in iter.by_ref() {
49+
any_elements = true;
50+
prefix_sum += offset;
51+
if let Ok(offset) = offset.try_into() {
52+
coded_offsets.push(offset);
53+
} else {
54+
short_offset_runs.push(ShortOffsetRunHeader {
55+
start_idx: start.try_into().unwrap(),
56+
prefix_sum,
57+
});
58+
// This is just needed to maintain indices even/odd
59+
// correctly.
60+
coded_offsets.push(0);
61+
inserted = true;
62+
break;
63+
}
64+
}
65+
if !any_elements {
66+
break;
67+
}
68+
// We always append the huge char::MAX offset to the end which
69+
// should never be able to fit into the u8 offsets.
70+
assert!(inserted);
71+
}
72+
73+
writeln!(
74+
&mut self.file,
75+
"static SHORT_OFFSET_RUNS: [u32; {}] = [{}];",
76+
short_offset_runs.len(),
77+
fmt_list(short_offset_runs.iter().map(|v| v.pack()))
78+
)
79+
.unwrap();
80+
self.bytes_used += 4 * short_offset_runs.len();
81+
writeln!(
82+
&mut self.file,
83+
"static OFFSETS: [u8; {}] = [{}];",
84+
coded_offsets.len(),
85+
fmt_list(&coded_offsets)
86+
)
87+
.unwrap();
88+
self.bytes_used += coded_offsets.len();
89+
90+
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
91+
writeln!(&mut self.file, " super::skip_search(",).unwrap();
92+
writeln!(&mut self.file, " c as u32,").unwrap();
93+
writeln!(&mut self.file, " &SHORT_OFFSET_RUNS,").unwrap();
94+
writeln!(&mut self.file, " &OFFSETS,").unwrap();
95+
writeln!(&mut self.file, " )").unwrap();
96+
writeln!(&mut self.file, "}}").unwrap();
97+
}
98+
}

‎src/tools/unicode-table-generator/src/unicode_download.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,15 @@ static RESOURCES: &[&str] =
1111

1212
pub fn fetch_latest() {
1313
let directory = Path::new(UNICODE_DIRECTORY);
14+
if directory.exists() {
15+
eprintln!(
16+
"Not refetching unicode data, already exists, please delete {:?} to regenerate",
17+
directory
18+
);
19+
return;
20+
}
1421
if let Err(e) = std::fs::create_dir_all(directory) {
15-
if e.kind() != std::io::ErrorKind::AlreadyExists {
16-
panic!("Failed to create {:?}: {}", UNICODE_DIRECTORY, e);
17-
}
22+
panic!("Failed to create {:?}: {}", UNICODE_DIRECTORY, e);
1823
}
1924
let output = Command::new("curl").arg(URL_PREFIX.to_owned() + README).output().unwrap();
2025
if !output.status.success() {

0 commit comments

Comments
 (0)
Please sign in to comment.