Skip to content

Commit 360b17e

Browse files
committed
Add AsciiSet::EMPTY and impl ops::Add for AsciiSet
In RFCs, the sets of characters to percent-encode are often defined as the union of multiple sets. This change adds an `EMPTY` constant to `AsciiSet` and implements the `Add` trait for `AsciiSet` so that sets can be combined with the `+` operator. AsciiSet now derives `Debug`, `PartialEq`, and `Eq` so that it can be used in tests.
1 parent 9404ff5 commit 360b17e

File tree

1 file changed

+32
-1
lines changed

1 file changed

+32
-1
lines changed

percent_encoding/src/lib.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ use alloc::{
5151
string::String,
5252
vec::Vec,
5353
};
54-
use core::{fmt, mem, slice, str};
54+
use core::{fmt, mem, ops, slice, str};
5555

5656
/// Represents a set of characters or bytes in the ASCII range.
5757
///
@@ -66,6 +66,7 @@ use core::{fmt, mem, slice, str};
6666
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
6767
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
6868
/// ```
69+
#[derive(Debug, PartialEq, Eq)]
6970
pub struct AsciiSet {
7071
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
7172
}
@@ -77,6 +78,11 @@ const ASCII_RANGE_LEN: usize = 0x80;
7778
const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
7879

7980
impl AsciiSet {
81+
/// An empty set.
82+
pub const EMPTY: AsciiSet = AsciiSet {
83+
mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK],
84+
};
85+
8086
/// Called with UTF-8 bytes rather than code points.
8187
/// Not used for non-ASCII bytes.
8288
const fn contains(&self, byte: u8) -> bool {
@@ -102,6 +108,18 @@ impl AsciiSet {
102108
}
103109
}
104110

111+
impl ops::Add for AsciiSet {
112+
type Output = Self;
113+
114+
fn add(self, other: Self) -> Self {
115+
let mut mask = self.mask.clone();
116+
for i in 0..mask.len() {
117+
mask[i] |= other.mask[i];
118+
}
119+
AsciiSet { mask }
120+
}
121+
}
122+
105123
/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
106124
///
107125
/// Note that this includes the newline and tab characters, but not the space 0x20.
@@ -478,3 +496,16 @@ fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
478496
}
479497
}
480498
}
499+
500+
#[cfg(test)]
501+
mod tests {
502+
use super::*;
503+
504+
#[test]
505+
fn add() {
506+
let left = AsciiSet::EMPTY.add(b'A');
507+
let right = AsciiSet::EMPTY.add(b'B');
508+
let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
509+
assert_eq!(left + right, expected);
510+
}
511+
}

0 commit comments

Comments
 (0)