2
2
3
3
use memchr;
4
4
use std:: borrow:: Cow ;
5
- use std:: collections:: HashMap ;
6
5
use std:: ops:: Range ;
7
6
8
7
#[ cfg( test) ]
@@ -66,31 +65,15 @@ impl std::error::Error for EscapeError {}
66
65
/// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
67
66
/// corresponding xml escaped value.
68
67
pub fn escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
69
- #[ inline]
70
- fn to_escape ( b : u8 ) -> bool {
71
- match b {
72
- b'<' | b'>' | b'\'' | b'&' | b'"' => true ,
73
- _ => false ,
74
- }
75
- }
76
-
77
- _escape ( raw, to_escape)
68
+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' | b'\'' | b'\"' ) )
78
69
}
79
70
80
71
/// Should only be used for escaping text content. In xml text content, it is allowed
81
72
/// (though not recommended) to leave the quote special characters " and ' unescaped.
82
73
/// This function escapes a `&[u8]` and replaces xml special characters (<, >, &) with
83
74
/// their corresponding xml escaped value, but does not escape quote characters.
84
75
pub fn partial_escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
85
- #[ inline]
86
- fn to_escape ( b : u8 ) -> bool {
87
- match b {
88
- b'<' | b'>' | b'&' => true ,
89
- _ => false ,
90
- }
91
- }
92
-
93
- _escape ( raw, to_escape)
76
+ _escape ( raw, |ch| matches ! ( ch, b'<' | b'>' | b'&' ) )
94
77
}
95
78
96
79
/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
@@ -130,32 +113,22 @@ fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
130
113
/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
131
114
/// value
132
115
pub fn unescape ( raw : & [ u8 ] ) -> Result < Cow < [ u8 ] > , EscapeError > {
133
- do_unescape ( raw, None )
116
+ unescape_with ( raw, |_| None )
134
117
}
135
118
136
119
/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
137
- /// value, using a dictionnary of custom entities.
120
+ /// value, using a dictionary of custom entities.
138
121
///
139
122
/// # Pre-condition
140
123
///
141
- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
142
- pub fn unescape_with < ' a > (
124
+ /// The implementation of `lookup_custom_entity` is expected to operate over UTF-8 inputs .
125
+ pub fn unescape_with < ' a , ' b > (
143
126
raw : & ' a [ u8 ] ,
144
- custom_entities : & HashMap < Vec < u8 > , Vec < u8 > > ,
145
- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
146
- do_unescape ( raw, Some ( custom_entities) )
147
- }
148
-
149
- /// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
150
- /// value, using an optional dictionary of custom entities.
151
- ///
152
- /// # Pre-condition
153
- ///
154
- /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
155
- pub fn do_unescape < ' a > (
156
- raw : & ' a [ u8 ] ,
157
- custom_entities : Option < & HashMap < Vec < u8 > , Vec < u8 > > > ,
158
- ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
127
+ lookup_custom_entity : impl Fn ( & ' b [ u8 ] ) -> Option < & ' b str > ,
128
+ ) -> Result < Cow < ' a , [ u8 ] > , EscapeError >
129
+ where
130
+ ' a : ' b ,
131
+ {
159
132
let mut unescaped = None ;
160
133
let mut last_end = 0 ;
161
134
let mut iter = memchr:: memchr2_iter ( b'&' , b';' , raw) ;
@@ -171,12 +144,14 @@ pub fn do_unescape<'a>(
171
144
172
145
// search for character correctness
173
146
let pat = & raw [ start + 1 ..end] ;
174
- if let Some ( s) = named_entity ( pat) {
175
- unescaped. extend_from_slice ( s. as_bytes ( ) ) ;
176
- } else if pat. starts_with ( b"#" ) {
177
- push_utf8 ( unescaped, parse_number ( & pat[ 1 ..] , start..end) ?) ;
178
- } else if let Some ( value) = custom_entities. and_then ( |hm| hm. get ( pat) ) {
179
- unescaped. extend_from_slice ( & value) ;
147
+ if pat. starts_with ( b"#" ) {
148
+ let entity = & pat[ 1 ..] ; // starts after the #
149
+ let codepoint = parse_number ( entity, start..end) ?;
150
+ push_utf8 ( unescaped, codepoint) ;
151
+ } else if let Some ( value) = named_entity ( pat) {
152
+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
153
+ } else if let Some ( value) = lookup_custom_entity ( pat) {
154
+ unescaped. extend_from_slice ( value. as_bytes ( ) ) ;
180
155
} else {
181
156
return Err ( EscapeError :: UnrecognizedSymbol (
182
157
start + 1 ..end,
@@ -1740,18 +1715,20 @@ fn test_unescape() {
1740
1715
1741
1716
#[ test]
1742
1717
fn test_unescape_with ( ) {
1743
- let custom_entities = vec ! [ ( b"foo" . to_vec( ) , b"BAR" . to_vec( ) ) ]
1744
- . into_iter ( )
1745
- . collect ( ) ;
1746
- assert_eq ! ( & * unescape_with( b"test" , & custom_entities) . unwrap( ) , b"test" ) ;
1718
+ let custom_entities = |ent : & [ u8 ] | match ent {
1719
+ b"foo" => Some ( "BAR" ) ,
1720
+ _ => None ,
1721
+ } ;
1722
+
1723
+ assert_eq ! ( & * unescape_with( b"test" , custom_entities) . unwrap( ) , b"test" ) ;
1747
1724
assert_eq ! (
1748
- & * unescape_with( b"<test>" , & custom_entities) . unwrap( ) ,
1725
+ & * unescape_with( b"<test>" , custom_entities) . unwrap( ) ,
1749
1726
b"<test>"
1750
1727
) ;
1751
- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1752
- assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
1753
- assert_eq ! ( & * unescape_with( b"&foo;" , & custom_entities) . unwrap( ) , b"BAR" ) ;
1754
- assert ! ( unescape_with( b"&fop;" , & custom_entities) . is_err( ) ) ;
1728
+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1729
+ assert_eq ! ( & * unescape_with( b"0" , custom_entities) . unwrap( ) , b"0" ) ;
1730
+ assert_eq ! ( & * unescape_with( b"&foo;" , custom_entities) . unwrap( ) , b"BAR" ) ;
1731
+ assert ! ( unescape_with( b"&fop;" , custom_entities) . is_err( ) ) ;
1755
1732
}
1756
1733
1757
1734
#[ test]
0 commit comments