Skip to content

Commit 4fdfc59

Browse files
More CI checks
- Check formatting - Check clippy - Check MSRV - Check docs
1 parent 32659a7 commit 4fdfc59

13 files changed

+9925
-24623
lines changed

.github/workflows/rust.yml

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ on:
77
branches: [ master ]
88

99
env:
10+
CARGO_INCREMENTAL: 0
1011
CARGO_TERM_COLOR: always
12+
RUST_BACKTRACE: 1
13+
RUSTFLAGS: -D warnings
14+
RUSTDOCFLAGS: -D warnings --cfg docsrs
1115

1216
jobs:
1317
build:
@@ -20,27 +24,50 @@ jobs:
2024
- nightly
2125
steps:
2226
- uses: actions/checkout@v2
23-
- name: Install latest nightly
27+
- name: Install toolchain
2428
uses: actions-rs/toolchain@v1
2529
with:
2630
toolchain: ${{ matrix.rust }}
2731
override: true
2832
- name: Build
2933
run: cargo build --verbose
30-
- name: Run tests
31-
run: cargo test --verbose
34+
- name: Run tests with all features
35+
run: cargo test --all-features --verbose
3236
- name: Run tests without features
33-
run: cargo test --verbose --no-default-features
37+
run: cargo test --no-default-features --verbose
3438
- name: Package
3539
run: cargo package
3640
- name: Test package
3741
run: cd $(find target/package/ -maxdepth 1 -mindepth 1 -type d) && cargo test
3842
- name: Test package without features
3943
run: cd $(find target/package/ -maxdepth 1 -mindepth 1 -type d) && cargo test --no-default-features
44+
- name: Build docs
45+
if: matrix.rust == 'nightly'
46+
run: cargo doc --all-features --verbose
47+
- name: Check formatting
48+
if: matrix.rust == 'stable'
49+
run: cargo fmt --all --check
50+
- name: Check clippy
51+
if: matrix.rust == 'stable'
52+
run: cargo clippy --all-features --all --verbose
53+
msrv:
54+
runs-on: ubuntu-latest
55+
steps:
56+
- uses: actions/checkout@v2
57+
- name: Install msrv toolchain
58+
uses: actions-rs/toolchain@v1
59+
with:
60+
toolchain: 1.36
61+
override: true
62+
- name: Build
63+
run: cargo build --verbose --all-features
4064
regen:
4165
runs-on: ubuntu-latest
4266
steps:
4367
- uses: actions/checkout@v3
68+
- uses: actions/setup-python@v5
69+
with:
70+
python-version: '3.12'
4471
- name: Regen
4572
run: cd scripts && python3 unicode.py
4673
- name: Diff

scripts/unicode.py

100644100755
Lines changed: 32 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# out-of-line and check the tables.rs and normalization_tests.rs files into git.
2121
import collections
2222
import urllib.request
23+
from itertools import batched
2324

2425
UNICODE_VERSION = "15.1.0"
2526
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -100,9 +101,9 @@ def _load_unicode_data(self):
100101
self.general_category_mark = []
101102
self.general_category_public_assigned = []
102103

103-
assigned_start = 0;
104-
prev_char_int = -1;
105-
prev_name = "";
104+
assigned_start = 0
105+
prev_char_int = -1
106+
prev_name = ""
106107

107108
for line in self._fetch("UnicodeData.txt").splitlines():
108109
# See ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
@@ -131,7 +132,7 @@ def _load_unicode_data(self):
131132
self.general_category_public_assigned.append((assigned_start, prev_char_int))
132133
assigned_start = char_int
133134
prev_char_int = char_int
134-
prev_name = name;
135+
prev_name = name
135136

136137
self.general_category_public_assigned.append((assigned_start, prev_char_int))
137138

@@ -158,16 +159,16 @@ def _load_cjk_compat_ideograph_variants(self):
158159

159160
char_int = self.name_to_char_int[description]
160161

161-
assert not char_int in self.combining_classes, "Unexpected: CJK compat variant with a combining class"
162-
assert not char_int in self.compat_decomp, "Unexpected: CJK compat variant and compatibility decomposition"
162+
assert char_int not in self.combining_classes, "Unexpected: CJK compat variant with a combining class"
163+
assert char_int not in self.compat_decomp, "Unexpected: CJK compat variant and compatibility decomposition"
163164
assert len(self.canon_decomp[char_int]) == 1, "Unexpected: CJK compat variant and non-singleton canonical decomposition"
164165
# If we ever need to handle Hangul here, we'll need to handle it separately.
165166
assert not (S_BASE <= char_int < S_BASE + S_COUNT)
166167

167168
cjk_compat_variant_parts = [int(c, 16) for c in variation_sequence.split()]
168169
for c in cjk_compat_variant_parts:
169-
assert not c in self.canon_decomp, "Unexpected: CJK compat variant is unnormalized (canon)"
170-
assert not c in self.compat_decomp, "Unexpected: CJK compat variant is unnormalized (compat)"
170+
assert c not in self.canon_decomp, "Unexpected: CJK compat variant is unnormalized (canon)"
171+
assert c not in self.compat_decomp, "Unexpected: CJK compat variant is unnormalized (compat)"
171172
self.cjk_compat_variants_fully_decomp[char_int] = cjk_compat_variant_parts
172173

173174
def _load_norm_props(self):
@@ -354,20 +355,26 @@ def is_first_and_last(first, last):
354355
return False
355356
return first[1:-8] == last[1:-7]
356357

357-
def gen_mph_data(name, d, kv_type, kv_callback):
358+
def gen_mph_data(name, d, kv_type, kv_callback, kv_row_width):
358359
(salt, keys) = minimal_perfect_hash(d)
359-
out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
360-
for s in salt:
361-
out.write(" 0x{:x},\n".format(s))
360+
out.write(f"\npub(crate) const {name.upper()}_SALT: &[u16] = &[\n")
361+
for s_row in batched(salt, 13):
362+
out.write(" ")
363+
for s in s_row:
364+
out.write(f" 0x{s:03X},")
365+
out.write("\n")
366+
out.write("];\n")
367+
out.write(f"pub(crate) const {name.upper()}_KV: &[{kv_type}] = &[\n")
368+
for k_row in batched(keys, kv_row_width):
369+
out.write(" ")
370+
for k in k_row:
371+
out.write(f" {kv_callback(k)},")
372+
out.write("\n")
362373
out.write("];\n")
363-
out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
364-
for k in keys:
365-
out.write(" {},\n".format(kv_callback(k)))
366-
out.write("];\n\n")
367374

368375
def gen_combining_class(combining_classes, out):
369376
gen_mph_data('canonical_combining_class', combining_classes, 'u32',
370-
lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))
377+
lambda k: f"0x{int(combining_classes[k]) | (k << 8):07X}", 8)
371378

372379
def gen_composition_table(canon_comp, out):
373380
table = {}
@@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
376383
table[(c1 << 16) | c2] = c3
377384
(salt, keys) = minimal_perfect_hash(table)
378385
gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
379-
lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))
386+
lambda k: f"(0x{k:08X}, '\\u{{{table[k]:06X}}}')", 1)
380387

381388
out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
382389
out.write(" match (c1, c2) {\n")
@@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
403410
assert offset < 65536
404411
out.write("];\n")
405412
gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))",
406-
lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k])))
413+
lambda k: f"(0x{k:05X}, (0x{offsets[k]:03X}, 0x{len(table[k]):X}))", 1)
407414

408415
def gen_qc_match(prop_table, out):
409416
out.write(" match c {\n")
@@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
421428
out.write(" }\n")
422429

423430
def gen_nfc_qc(prop_tables, out):
424-
out.write("#[inline]\n")
431+
out.write("\n#[inline]\n")
425432
out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
426433
out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
427434
gen_qc_match(prop_tables['NFC_QC'], out)
@@ -450,13 +457,13 @@ def gen_nfkd_qc(prop_tables, out):
450457

451458
def gen_combining_mark(general_category_mark, out):
452459
gen_mph_data('combining_mark', general_category_mark, 'u32',
453-
lambda k: '0x{:04x}'.format(k))
460+
lambda k: '0x{:05X}'.format(k), 10)
454461

455462
def gen_public_assigned(general_category_public_assigned, out):
456463
# This could be done as a hash but the table is somewhat small.
457464
out.write("#[inline]\n")
458465
out.write("pub fn is_public_assigned(c: char) -> bool {\n")
459-
out.write(" match c {\n")
466+
out.write(" matches!(c,\n")
460467

461468
start = True
462469
for first, last in general_category_public_assigned:
@@ -469,12 +476,9 @@ def gen_public_assigned(general_category_public_assigned, out):
469476
out.write("'\\u{%s}'\n" % hexify(first))
470477
else:
471478
out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
472-
out.write(" => true,\n")
473479

474-
out.write(" _ => false,\n")
475-
out.write(" }\n")
480+
out.write(" )\n")
476481
out.write("}\n")
477-
out.write("\n")
478482

479483
def gen_stream_safe(leading, trailing, out):
480484
# This could be done as a hash but the table is very small.
@@ -488,10 +492,9 @@ def gen_stream_safe(leading, trailing, out):
488492
out.write(" _ => 0,\n")
489493
out.write(" }\n")
490494
out.write("}\n")
491-
out.write("\n")
492495

493496
gen_mph_data('trailing_nonstarters', trailing, 'u32',
494-
lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))
497+
lambda k: f"0x{int(trailing[k]) | (k << 8):07X}", 8)
495498

496499
def gen_tests(tests, out):
497500
out.write("""#[derive(Debug)]
@@ -585,36 +588,27 @@ def minimal_perfect_hash(d):
585588

586589
version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
587590
out.write("#[allow(unused)]\n")
588-
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)
591+
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n" % version)
589592

590593
gen_combining_class(data.combining_classes, out)
591-
out.write("\n")
592594

593595
gen_composition_table(data.canon_comp, out)
594-
out.write("\n")
595596

596597
gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)
597598

598599
gen_combining_mark(data.general_category_mark, out)
599-
out.write("\n")
600600

601601
gen_public_assigned(data.general_category_public_assigned, out)
602-
out.write("\n")
603602

604603
gen_nfc_qc(data.norm_props, out)
605-
out.write("\n")
606604

607605
gen_nfkc_qc(data.norm_props, out)
608-
out.write("\n")
609606

610607
gen_nfd_qc(data.norm_props, out)
611-
out.write("\n")
612608

613609
gen_nfkd_qc(data.norm_props, out)
614-
out.write("\n")
615610

616611
gen_stream_safe(data.ss_leading, data.ss_trailing, out)
617-
out.write("\n")
618612

619613
with open("normalization_tests.rs", "w", newline = "\n") as out:
620614
out.write(PREAMBLE)

src/__test_api.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// If you're caught using this outside this crates tests/, you get to clean up the mess.
66

77
#[cfg(not(feature = "std"))]
8-
use crate::no_std_prelude::*;
8+
use alloc::string::String;
99

1010
use crate::stream_safe::StreamSafe;
1111

src/decompose.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010
use core::fmt::{self, Write};
11-
use core::iter::Fuse;
11+
use core::iter::{Fuse, FusedIterator};
1212
use core::ops::Range;
1313
use tinyvec::TinyVec;
1414

@@ -151,6 +151,8 @@ impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
151151
}
152152
}
153153

154+
impl<I: Iterator<Item = char>> FusedIterator for Decompositions<I> {}
155+
154156
impl<I: Iterator<Item = char> + Clone> fmt::Display for Decompositions<I> {
155157
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
156158
for c in self.clone() {

src/lib.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,7 @@ pub use crate::recompose::Recompositions;
6262
pub use crate::replace::Replacements;
6363
pub use crate::stream_safe::StreamSafe;
6464
pub use crate::tables::UNICODE_VERSION;
65-
use core::{
66-
str::Chars,
67-
option,
68-
};
69-
70-
mod no_std_prelude;
65+
use core::{option, str::Chars};
7166

7267
mod decompose;
7368
mod lookups;
@@ -169,7 +164,6 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
169164
}
170165
}
171166

172-
173167
impl UnicodeNormalization<option::IntoIter<char>> for char {
174168
#[inline]
175169
fn nfd(self) -> Decompositions<option::IntoIter<char>> {

src/no_std_prelude.rs

Lines changed: 0 additions & 6 deletions
This file was deleted.

src/recompose.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
// except according to those terms.
1010

1111
use crate::decompose::Decompositions;
12-
use core::fmt::{self, Write};
12+
use core::{
13+
fmt::{self, Write},
14+
iter::FusedIterator,
15+
};
1316
use tinyvec::TinyVec;
1417

1518
#[derive(Clone)]
@@ -144,6 +147,8 @@ impl<I: Iterator<Item = char>> Iterator for Recompositions<I> {
144147
}
145148
}
146149

150+
impl<I: Iterator<Item = char> + FusedIterator> FusedIterator for Recompositions<I> {}
151+
147152
impl<I: Iterator<Item = char> + Clone> fmt::Display for Recompositions<I> {
148153
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
149154
for c in self.clone() {

src/replace.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
10-
use core::fmt::{self, Write};
10+
use core::{
11+
fmt::{self, Write},
12+
iter::FusedIterator,
13+
};
1114
use tinyvec::ArrayVec;
1215

1316
/// External iterator for replacements for a string's characters.
@@ -51,6 +54,8 @@ impl<I: Iterator<Item = char>> Iterator for Replacements<I> {
5154
}
5255
}
5356

57+
impl<I: Iterator<Item = char> + FusedIterator> FusedIterator for Replacements<I> {}
58+
5459
impl<I: Iterator<Item = char> + Clone> fmt::Display for Replacements<I> {
5560
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5661
for c in self.clone() {

src/stream_safe.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use core::iter::FusedIterator;
2+
13
use crate::lookups::{
24
canonical_combining_class, canonical_fully_decomposed, compatibility_fully_decomposed,
35
stream_safe_trailing_nonstarters,
@@ -59,6 +61,8 @@ impl<I: Iterator<Item = char>> Iterator for StreamSafe<I> {
5961
}
6062
}
6163

64+
impl<I: Iterator<Item = char> + FusedIterator> FusedIterator for StreamSafe<I> {}
65+
6266
#[derive(Debug)]
6367
pub(crate) struct Decomposition {
6468
pub(crate) leading_nonstarters: usize,
@@ -110,7 +114,7 @@ mod tests {
110114
use crate::normalize::decompose_compatible;
111115

112116
#[cfg(not(feature = "std"))]
113-
use crate::no_std_prelude::*;
117+
use alloc::{string::String, vec::Vec};
114118

115119
use core::char;
116120

0 commit comments

Comments
 (0)