@@ -51,7 +51,7 @@ use alloc::{
5151 string:: String ,
5252 vec:: Vec ,
5353} ;
54- use core:: { fmt, mem, slice, str} ;
54+ use core:: { fmt, mem, ops , slice, str} ;
5555
5656/// Represents a set of characters or bytes in the ASCII range.
5757///
@@ -66,6 +66,7 @@ use core::{fmt, mem, slice, str};
6666/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
6767/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
6868/// ```
69+ #[ derive( Debug , PartialEq , Eq ) ]
6970pub struct AsciiSet {
7071 mask : [ Chunk ; ASCII_RANGE_LEN / BITS_PER_CHUNK ] ,
7172}
@@ -77,6 +78,11 @@ const ASCII_RANGE_LEN: usize = 0x80;
7778const BITS_PER_CHUNK : usize = 8 * mem:: size_of :: < Chunk > ( ) ;
7879
7980impl AsciiSet {
81+ /// An empty set.
82+ pub const EMPTY : AsciiSet = AsciiSet {
83+ mask : [ 0 ; ASCII_RANGE_LEN / BITS_PER_CHUNK ] ,
84+ } ;
85+
8086 /// Called with UTF-8 bytes rather than code points.
8187 /// Not used for non-ASCII bytes.
8288 const fn contains ( & self , byte : u8 ) -> bool {
@@ -100,6 +106,39 @@ impl AsciiSet {
100106 mask[ byte as usize / BITS_PER_CHUNK ] &= !( 1 << ( byte as usize % BITS_PER_CHUNK ) ) ;
101107 AsciiSet { mask }
102108 }
109+
110+ /// Return the union of two sets.
111+ pub const fn union ( & self , other : Self ) -> Self {
112+ let mask = [
113+ self . mask [ 0 ] | other. mask [ 0 ] ,
114+ self . mask [ 1 ] | other. mask [ 1 ] ,
115+ self . mask [ 2 ] | other. mask [ 2 ] ,
116+ self . mask [ 3 ] | other. mask [ 3 ] ,
117+ ] ;
118+ AsciiSet { mask }
119+ }
120+
121+ /// Return the negation of the set.
122+ pub const fn complement ( & self ) -> Self {
123+ let mask = [ !self . mask [ 0 ] , !self . mask [ 1 ] , !self . mask [ 2 ] , !self . mask [ 3 ] ] ;
124+ AsciiSet { mask }
125+ }
126+ }
127+
128+ impl ops:: Add for AsciiSet {
129+ type Output = Self ;
130+
131+ fn add ( self , other : Self ) -> Self {
132+ self . union ( other)
133+ }
134+ }
135+
136+ impl ops:: Not for AsciiSet {
137+ type Output = Self ;
138+
139+ fn not ( self ) -> Self {
140+ self . complement ( )
141+ }
103142}
104143
105144/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
@@ -478,3 +517,46 @@ fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
478517 }
479518 }
480519}
520+
521+ #[ cfg( test) ]
522+ mod tests {
523+ use super :: * ;
524+
525+ #[ test]
526+ fn add_op ( ) {
527+ let left = AsciiSet :: EMPTY . add ( b'A' ) ;
528+ let right = AsciiSet :: EMPTY . add ( b'B' ) ;
529+ let expected = AsciiSet :: EMPTY . add ( b'A' ) . add ( b'B' ) ;
530+ assert_eq ! ( left + right, expected) ;
531+ }
532+
533+ #[ test]
534+ fn not_op ( ) {
535+ let set = AsciiSet :: EMPTY . add ( b'A' ) . add ( b'B' ) ;
536+ let not_set = !set;
537+ assert ! ( !not_set. contains( b'A' ) ) ;
538+ assert ! ( not_set. contains( b'C' ) ) ;
539+ }
540+
541+ /// This test ensures that we can get the union of two sets as a constant value, which is
542+ /// useful for defining sets in a modular way.
543+ #[ test]
544+ fn union ( ) {
545+ const A : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) ;
546+ const B : AsciiSet = AsciiSet :: EMPTY . add ( b'B' ) ;
547+ const UNION : AsciiSet = A . union ( B ) ;
548+ const EXPECTED : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) . add ( b'B' ) ;
549+ assert_eq ! ( UNION , EXPECTED ) ;
550+ }
551+
552+ /// This test ensures that we can get the complement of a set as a constant value, which is
553+ /// useful for defining sets in a modular way.
554+ #[ test]
555+ fn complement ( ) {
556+ const BOTH : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) . add ( b'B' ) ;
557+ const COMPLEMENT : AsciiSet = BOTH . complement ( ) ;
558+ assert ! ( !COMPLEMENT . contains( b'A' ) ) ;
559+ assert ! ( !COMPLEMENT . contains( b'B' ) ) ;
560+ assert ! ( COMPLEMENT . contains( b'C' ) ) ;
561+ }
562+ }
0 commit comments