@@ -46,7 +46,7 @@ impl Decoder {
46
46
///
47
47
/// If you instead want to use XML declared encoding, use the `encoding` feature
48
48
pub fn decode_with_bom_removal < ' b > ( & self , bytes : & ' b [ u8 ] ) -> Result < Cow < ' b , str > > {
49
- let bytes = if bytes. starts_with ( b" \xEF \xBB \xBF " ) {
49
+ let bytes = if bytes. starts_with ( & [ 0xEF , 0xBB , 0xBF ] ) {
50
50
& bytes[ 3 ..]
51
51
} else {
52
52
bytes
@@ -72,13 +72,7 @@ impl Decoder {
72
72
///
73
73
/// Returns an error in case of malformed sequences in the `bytes`.
74
74
pub fn decode < ' b > ( & self , bytes : & ' b [ u8 ] ) -> Result < Cow < ' b , str > > {
75
- match self
76
- . encoding
77
- . decode_without_bom_handling_and_without_replacement ( bytes)
78
- {
79
- None => Err ( Error :: NonDecodable ( None ) ) ,
80
- Some ( s) => Ok ( s) ,
81
- }
75
+ decode ( bytes, self . encoding )
82
76
}
83
77
84
78
/// Decodes a slice with BOM removal if it is present in the `bytes` using
@@ -91,25 +85,54 @@ impl Decoder {
91
85
///
92
86
/// Returns an error in case of malformed sequences in the `bytes`.
93
87
pub fn decode_with_bom_removal < ' b > ( & self , bytes : & ' b [ u8 ] ) -> Result < Cow < ' b , str > > {
94
- self . decode ( self . remove_bom ( bytes) )
88
+ self . decode ( remove_bom ( bytes, self . encoding ) )
95
89
}
96
- /// Copied from [`Encoding::decode_with_bom_removal`]
97
- # [ inline ]
98
- fn remove_bom < ' b > ( & self , bytes : & ' b [ u8 ] ) -> & ' b [ u8 ] {
99
- if self . encoding == UTF_8 && bytes. starts_with ( b" \xEF \xBB \xBF " ) {
100
- return & bytes [ 3 .. ] ;
101
- }
102
- if self . encoding == UTF_16LE && bytes . starts_with ( b" \xFF \xFE " ) {
103
- return & bytes [ 2 .. ] ;
104
- }
105
- if self . encoding == UTF_16BE && bytes . starts_with ( b" \xFE \xFF " ) {
106
- return & bytes [ 2 .. ] ;
107
- }
90
+ }
91
+
92
+ /// Decodes the provided bytes using the specified encoding, ignoring the BOM
93
+ /// if it is present in the ` bytes`.
94
+ ///
95
+ /// Returns an error in case of malformed sequences in the `bytes`.
96
+ # [ cfg ( feature = "encoding" ) ]
97
+ pub fn decode < ' b > ( bytes : & ' b [ u8 ] , encoding : & ' static Encoding ) -> Result < Cow < ' b , str > > {
98
+ encoding
99
+ . decode_without_bom_handling_and_without_replacement ( bytes )
100
+ . ok_or ( Error :: NonDecodable ( None ) )
101
+ }
108
102
109
- bytes
103
+ /// Decodes a slice with an unknown encoding, removing the BOM if it is present
104
+ /// in the bytes.
105
+ ///
106
+ /// Returns an error in case of malformed sequences in the `bytes`.
107
+ #[ cfg( feature = "encoding" ) ]
108
+ pub fn decode_with_bom_removal < ' b > ( bytes : & ' b [ u8 ] ) -> Result < Cow < ' b , str > > {
109
+ if let Some ( encoding) = detect_encoding ( bytes) {
110
+ let bytes = remove_bom ( bytes, encoding) ;
111
+ decode ( bytes, encoding)
112
+ } else {
113
+ decode ( bytes, UTF_8 )
114
+ }
115
+ }
116
+
117
+ #[ cfg( feature = "encoding" ) ]
118
+ fn split_at_bom < ' b > ( bytes : & ' b [ u8 ] , encoding : & ' static Encoding ) -> ( & ' b [ u8 ] , & ' b [ u8 ] ) {
119
+ if encoding == UTF_8 && bytes. starts_with ( & [ 0xEF , 0xBB , 0xBF ] ) {
120
+ bytes. split_at ( 3 )
121
+ } else if encoding == UTF_16LE && bytes. starts_with ( & [ 0xFF , 0xFE ] ) {
122
+ bytes. split_at ( 2 )
123
+ } else if encoding == UTF_16BE && bytes. starts_with ( & [ 0xFE , 0xFF ] ) {
124
+ bytes. split_at ( 2 )
125
+ } else {
126
+ ( & [ ] , bytes)
110
127
}
111
128
}
112
129
130
+ #[ cfg( feature = "encoding" ) ]
131
+ fn remove_bom < ' b > ( bytes : & ' b [ u8 ] , encoding : & ' static Encoding ) -> & ' b [ u8 ] {
132
+ let ( _, bytes) = split_at_bom ( bytes, encoding) ;
133
+ bytes
134
+ }
135
+
113
136
/// This implementation is required for tests of other parts of the library
114
137
#[ cfg( test) ]
115
138
#[ cfg( feature = "serialize" ) ]
@@ -158,7 +181,7 @@ impl Decoder {
158
181
///
159
182
/// If encoding is detected, `Some` is returned, otherwise `None` is returned.
160
183
#[ cfg( feature = "encoding" ) ]
161
- pub ( crate ) fn detect_encoding ( bytes : & [ u8 ] ) -> Option < & ' static Encoding > {
184
+ pub fn detect_encoding ( bytes : & [ u8 ] ) -> Option < & ' static Encoding > {
162
185
match bytes {
163
186
// with BOM
164
187
_ if bytes. starts_with ( & [ 0xFE , 0xFF ] ) => Some ( UTF_16BE ) ,
@@ -173,3 +196,5 @@ pub(crate) fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
173
196
_ => None ,
174
197
}
175
198
}
199
+
200
+ // TODO: add tests from these functions
0 commit comments