1
- // Source: https://crates.io/crates/base32 v0.4.0
2
- // License: MIT or Apache-2.0
3
- // Copyright (c) 2015 The base32 Developers
4
- // Permission is hereby granted, free of charge, to any person obtaining a copy
5
- // of this software and associated documentation files (the "Software"), to deal
6
- // in the Software without restriction, including without limitation the rights
7
- // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- // copies of the Software, and to permit persons to whom the Software is
9
- // furnished to do so, subject to the following conditions:
10
-
11
- // The above copyright notice and this permission notice shall be included in all
12
- // copies or substantial portions of the Software.
13
-
14
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
- // SOFTWARE.
21
- // (reference https://github.com/andreasots/base32/blob/master/LICENSE-MIT)
1
+ // This is a modification of base32 encoding to support the zbase32 alphabet.
2
+ // The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
3
+ // The original portions of this software are Copyright (c) 2015 The base32 Developers
4
+
5
+ /* This file is licensed under either of
6
+ * Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
7
+ * MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
8
+ * at your option.
9
+ */
22
10
23
11
use crate :: prelude:: * ;
24
12
@@ -30,19 +18,29 @@ pub enum Alphabet {
30
18
/// Whether to use padding.
31
19
padding : bool
32
20
} ,
21
+ ZBase32
33
22
}
34
23
35
- /// RFC4648 base32 encoding with padding.
36
- const RFC4648_ALPHABET : & ' static [ u8 ] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" ;
37
24
38
- /// Encode a byte slice into a base32 string.
39
- pub fn encode ( alphabet : Alphabet , data : & [ u8 ] ) -> String {
40
- let ( alphabet, padding) = match alphabet {
41
- Alphabet :: RFC4648 { padding } => ( RFC4648_ALPHABET , padding) ,
42
- } ;
25
+ // ASCII 0-Z
26
+ const ZBASE_INV_ALPHABET : [ i8 ; 43 ] = [
27
+ -1 , 18 , -1 , 25 , 26 , 27 , 30 , 29 , 7 , 31 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 24 , 1 , 12 , 3 , 8 , 5 , 6 , 28 ,
28
+ 21 , 9 , 10 , -1 , 11 , 2 , 16 , 13 , 14 , 4 , 22 , 17 , 19 , -1 , 20 , 15 , 0 , 23 ,
29
+ ] ;
30
+
31
+ // zbase alphabet
32
+ const ZBASE_ALPHABET : & ' static [ u8 ] = b"ybndrfg8ejkmcpqxot1uwisza345h769" ;
33
+
34
+ /// Inverse RFC4648 lookup table for decoding.
35
+ const RFC4648_INV_ALPHABET : [ i8 ; 43 ] = [
36
+ -1 , -1 , 26 , 27 , 28 , 29 , 30 , 31 , -1 , -1 , -1 , -1 , -1 , 0 , -1 , -1 , -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
37
+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 ,
38
+ ] ;
43
39
44
- let mut ret = Vec :: with_capacity ( ( data. len ( ) + 3 ) / 4 * 5 ) ;
40
+ /// RFC4648 base32 encoding with padding.
41
+ const RFC4648_ALPHABET : & ' static [ u8 ] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" ;
45
42
43
+ fn map_alphabet ( mut ret : Vec < u8 > , data : & [ u8 ] , alphabet : & ' static [ u8 ] ) -> Vec < u8 > {
46
44
for chunk in data. chunks ( 5 ) {
47
45
let buf = {
48
46
let mut buf = [ 0u8 ; 5 ] ;
@@ -61,6 +59,12 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
61
59
ret. push ( alphabet[ ( buf[ 4 ] & 0x1F ) as usize ] ) ;
62
60
}
63
61
62
+ ret
63
+ }
64
+
65
+
66
+ fn rfc4648_encode ( data : & [ u8 ] , padding : bool ) -> String {
67
+ let mut ret = map_alphabet ( Vec :: with_capacity ( ( data. len ( ) + 3 ) / 4 * 5 ) , data, RFC4648_ALPHABET ) ;
64
68
if data. len ( ) % 5 != 0 {
65
69
let len = ret. len ( ) ;
66
70
let num_extra = 8 - ( data. len ( ) % 5 * 8 + 4 ) / 5 ;
@@ -72,50 +76,163 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
72
76
ret. truncate ( len - num_extra) ;
73
77
}
74
78
}
79
+ String :: from_utf8 ( ret) . unwrap ( )
80
+
81
+ }
82
+
83
+ fn zbase32_encode ( data : & [ u8 ] ) -> String {
84
+ let mut ret = Vec :: with_capacity ( ( data. len ( ) + 4 ) / 5 * 8 ) ;
85
+ ret = map_alphabet ( ret, data, RFC4648_ALPHABET ) ;
86
+ ret. truncate ( ( data. len ( ) * 8 + 4 ) / 5 ) ;
87
+
88
+ // Check that our capacity calculation doesn't under-shoot in fuzzing
89
+ #[ cfg( fuzzing) ]
90
+ assert_eq ! ( ret. capacity( ) , ( data. len( ) + 4 ) / 5 * 8 ) ;
75
91
76
92
String :: from_utf8 ( ret) . unwrap ( )
77
93
}
78
94
79
- /// Inverse RFC4648 lookup table for decoding.
80
- const RFC4648_INV_ALPHABET : [ i8 ; 43 ] = [
81
- -1 , -1 , 26 , 27 , 28 , 29 , 30 , 31 , -1 , -1 , -1 , -1 , -1 , 0 , -1 , -1 , -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
82
- 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 ,
83
- ] ;
95
+ /// Encode a byte slice into a base32 string.
96
+ pub fn encode ( alphabet : Alphabet , data : & [ u8 ] ) -> String {
97
+ match alphabet {
98
+ Alphabet :: RFC4648 { padding } =>{
99
+ rfc4648_encode ( data, padding)
100
+ } ,
101
+ Alphabet :: ZBase32 => {
102
+ zbase32_encode ( data)
103
+ }
104
+ }
105
+ }
106
+
84
107
85
108
/// Decode a base32 string into a byte vector.
86
- pub fn decode ( alphabet : Alphabet , data : & str ) -> Option < Vec < u8 > > {
87
- let data = data. as_bytes ( ) ;
88
- let alphabet = match alphabet {
89
- Alphabet :: RFC4648 { .. } => RFC4648_INV_ALPHABET ,
90
- } ;
91
- let mut unpadded_data_length = data. len ( ) ;
92
- data. iter ( ) . rev ( ) . take ( 6 ) . for_each ( |& c| {
93
- if c != b'=' {
94
- return ;
95
- }
96
- unpadded_data_length -= 1 ;
97
- } ) ;
98
- let output_length = unpadded_data_length * 5 / 8 ;
99
- let mut ret = Vec :: with_capacity ( ( output_length + 4 ) / 5 * 5 ) ;
100
- for chunk in data. chunks ( 8 ) {
101
- let buf = {
102
- let mut buf = [ 0u8 ; 8 ] ;
103
- for ( i, & c) in chunk. iter ( ) . enumerate ( ) {
104
- match alphabet. get ( c. to_ascii_uppercase ( ) . wrapping_sub ( b'0' ) as usize ) {
105
- Some ( & -1 ) | None => return None ,
106
- Some ( & value) => buf[ i] = value as u8 ,
109
+ pub fn decode ( alphabet : Alphabet , data : & str ) -> Result < Vec < u8 > , ( ) > {
110
+ match alphabet {
111
+ Alphabet :: RFC4648 { .. } => {
112
+ let alphabet = RFC4648_INV_ALPHABET ;
113
+ let data = data. as_bytes ( ) ;
114
+ let mut unpadded_data_length = data. len ( ) ;
115
+ data. iter ( ) . rev ( ) . take ( 6 ) . for_each ( |& c| {
116
+ if c != b'=' {
117
+ return ;
118
+ }
119
+ unpadded_data_length -= 1 ;
120
+ } ) ;
121
+ let output_length = unpadded_data_length * 5 / 8 ;
122
+ let mut ret = Vec :: with_capacity ( ( output_length + 4 ) / 5 * 5 ) ;
123
+ for chunk in data. chunks ( 8 ) {
124
+ let buf = {
125
+ let mut buf = [ 0u8 ; 8 ] ;
126
+ for ( i, & c) in chunk. iter ( ) . enumerate ( ) {
127
+ match alphabet. get ( c. to_ascii_uppercase ( ) . wrapping_sub ( b'0' ) as usize ) {
128
+ Some ( & -1 ) | None => return Err ( ( ) ) ,
129
+ Some ( & value) => buf[ i] = value as u8 ,
130
+ } ;
131
+ }
132
+ buf
107
133
} ;
134
+ ret. push ( ( buf[ 0 ] << 3 ) | ( buf[ 1 ] >> 2 ) ) ;
135
+ ret. push ( ( buf[ 1 ] << 6 ) | ( buf[ 2 ] << 1 ) | ( buf[ 3 ] >> 4 ) ) ;
136
+ ret. push ( ( buf[ 3 ] << 4 ) | ( buf[ 4 ] >> 1 ) ) ;
137
+ ret. push ( ( buf[ 4 ] << 7 ) | ( buf[ 5 ] << 2 ) | ( buf[ 6 ] >> 3 ) ) ;
138
+ ret. push ( ( buf[ 6 ] << 5 ) | buf[ 7 ] ) ;
108
139
}
109
- buf
110
- } ;
111
- ret. push ( ( buf[ 0 ] << 3 ) | ( buf[ 1 ] >> 2 ) ) ;
112
- ret. push ( ( buf[ 1 ] << 6 ) | ( buf[ 2 ] << 1 ) | ( buf[ 3 ] >> 4 ) ) ;
113
- ret. push ( ( buf[ 3 ] << 4 ) | ( buf[ 4 ] >> 1 ) ) ;
114
- ret. push ( ( buf[ 4 ] << 7 ) | ( buf[ 5 ] << 2 ) | ( buf[ 6 ] >> 3 ) ) ;
115
- ret. push ( ( buf[ 6 ] << 5 ) | buf[ 7 ] ) ;
140
+ ret. truncate ( output_length) ;
141
+ Ok ( ret)
142
+
143
+ } ,
144
+ Alphabet :: ZBase32 => {
145
+ if !data. is_ascii ( ) {
146
+ return Err ( ( ) ) ;
147
+ }
148
+ let alphabet = ZBASE_INV_ALPHABET ;
149
+
150
+ let data = data. as_bytes ( ) ;
151
+ let output_length = data. len ( ) * 5 / 8 ;
152
+ if data. len ( ) > ( output_length * 8 + 4 ) / 5 {
153
+ // If the string has more charachters than are required to alphabet_encode the number of bytes
154
+ // decodable, treat the string as invalid.
155
+ return Err ( ( ) ) ;
156
+ }
157
+
158
+ let mut ret = Vec :: with_capacity ( ( data. len ( ) + 7 ) / 8 * 5 ) ;
159
+
160
+ for chunk in data. chunks ( 8 ) {
161
+ let buf = {
162
+ let mut buf = [ 0u8 ; 8 ] ;
163
+ for ( i, & c) in chunk. iter ( ) . enumerate ( ) {
164
+ match ZBASE_INV_ALPHABET . get ( c. to_ascii_uppercase ( ) . wrapping_sub ( b'0' ) as usize ) {
165
+ Some ( & -1 ) | None => return Err ( ( ) ) ,
166
+ Some ( & value) => buf[ i] = value as u8 ,
167
+ } ;
168
+ }
169
+ buf
170
+ } ;
171
+ ret. push ( ( buf[ 0 ] << 3 ) | ( buf[ 1 ] >> 2 ) ) ;
172
+ ret. push ( ( buf[ 1 ] << 6 ) | ( buf[ 2 ] << 1 ) | ( buf[ 3 ] >> 4 ) ) ;
173
+ ret. push ( ( buf[ 3 ] << 4 ) | ( buf[ 4 ] >> 1 ) ) ;
174
+ ret. push ( ( buf[ 4 ] << 7 ) | ( buf[ 5 ] << 2 ) | ( buf[ 6 ] >> 3 ) ) ;
175
+ ret. push ( ( buf[ 6 ] << 5 ) | buf[ 7 ] ) ;
176
+ }
177
+ for c in ret. drain ( output_length..) {
178
+ if c != 0 {
179
+ // If the original string had any bits set at positions outside of the encoded data,
180
+ // treat the string as invalid.
181
+ return Err ( ( ) ) ;
182
+ }
183
+ }
184
+
185
+ // Check that our capacity calculation doesn't under-shoot in fuzzing
186
+ #[ cfg( fuzzing) ]
187
+ assert_eq ! ( ret. capacity( ) , ( data. len( ) + 7 ) / 8 * 5 ) ;
188
+
189
+ Ok ( ret)
190
+ } ,
191
+ }
192
+ }
193
+
194
+ #[ cfg( test) ]
195
+ mod tests {
196
+ use super :: * ;
197
+
198
+ const TEST_DATA : & [ ( & str , & [ u8 ] ) ] = & [
199
+ ( "" , & [ ] ) ,
200
+ ( "yy" , & [ 0x00 ] ) ,
201
+ ( "oy" , & [ 0x80 ] ) ,
202
+ ( "tqrey" , & [ 0x8b , 0x88 , 0x80 ] ) ,
203
+ ( "6n9hq" , & [ 0xf0 , 0xbf , 0xc7 ] ) ,
204
+ ( "4t7ye" , & [ 0xd4 , 0x7a , 0x04 ] ) ,
205
+ ( "6im5sdy" , & [ 0xf5 , 0x57 , 0xbb , 0x0c ] ) ,
206
+ ( "ybndrfg8ejkmcpqxot1uwisza345h769" , & [ 0x00 , 0x44 , 0x32 , 0x14 , 0xc7 , 0x42 , 0x54 , 0xb6 ,
207
+ 0x35 , 0xcf , 0x84 , 0x65 , 0x3a , 0x56 , 0xd7 , 0xc6 ,
208
+ 0x75 , 0xbe , 0x77 , 0xdf ] )
209
+ ] ;
210
+
211
+ #[ test]
212
+ fn test_encode ( ) {
213
+ for & ( zbase32, data) in TEST_DATA {
214
+ assert_eq ! ( encode( Alphabet :: ZBase32 , data) , zbase32) ;
215
+ }
216
+ }
217
+
218
+ #[ test]
219
+ fn test_decode ( ) {
220
+ for & ( zbase32, data) in TEST_DATA {
221
+ assert_eq ! ( decode( Alphabet :: ZBase32 , zbase32) . unwrap( ) , data) ;
222
+ }
223
+ }
224
+
225
+ #[ test]
226
+ fn test_decode_wrong ( ) {
227
+ const WRONG_DATA : & [ & str ] = & [ "00" , "l1" , "?" , "=" ] ;
228
+
229
+ for & data in WRONG_DATA {
230
+ match decode ( Alphabet :: ZBase32 , data) {
231
+ Ok ( _) => assert ! ( false , "Data shouldn't be decodable" ) ,
232
+ Err ( _) => assert ! ( true ) ,
233
+ }
234
+ }
116
235
}
117
- ret. truncate ( output_length) ;
118
- Some ( ret)
119
236
}
120
237
121
238
#[ cfg( test) ]
@@ -203,11 +320,84 @@ mod test {
203
320
204
321
#[ test]
205
322
fn invalid_chars_rfc4648 ( ) {
206
- assert_eq ! ( decode( RFC4648 { padding: true } , "," ) , None )
323
+ assert_eq ! ( decode( RFC4648 { padding: true } , "," ) . is_err ( ) , true )
207
324
}
208
325
209
326
#[ test]
210
327
fn invalid_chars_unpadded_rfc4648 ( ) {
211
- assert_eq ! ( decode( RFC4648 { padding: false } , "," ) , None )
328
+ assert_eq ! ( decode( RFC4648 { padding: false } , "," ) . is_err ( ) , true )
212
329
}
213
330
}
331
+
332
+ // Decodes a zbase32 string to the original bytes, failing if the string was not encoded by a
333
+ // proper zbase32 encoder.
334
+ // pub fn alphabet_decode(data: &str) -> Result<Vec<u8>, ()> {
335
+ // if !data.is_ascii() {
336
+ // return Err(());
337
+ // }
338
+
339
+ // let data = data.as_bytes();
340
+ // let output_length = data.len() * 5 / 8;
341
+ // if data.len() > (output_length * 8 + 4) / 5 {
342
+ // // If the string has more charachters than are required to alphabet_encode the number of bytes
343
+ // // decodable, treat the string as invalid.
344
+ // return Err(());
345
+ // }
346
+
347
+ // let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);
348
+
349
+ // for chunk in data.chunks(8) {
350
+ // let buf = {
351
+ // let mut buf = [0u8; 8];
352
+ // for (i, &c) in chunk.iter().enumerate() {
353
+ // match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
354
+ // Some(&-1) | None => return Err(()),
355
+ // Some(&value) => buf[i] = value as u8,
356
+ // };
357
+ // }
358
+ // buf
359
+ // };
360
+ // ret.push((buf[0] << 3) | (buf[1] >> 2));
361
+ // ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
362
+ // ret.push((buf[3] << 4) | (buf[4] >> 1));
363
+ // ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
364
+ // ret.push((buf[6] << 5) | buf[7]);
365
+ // }
366
+ // for c in ret.drain(output_length..) {
367
+ // if c != 0 {
368
+ // // If the original string had any bits set at positions outside of the encoded data,
369
+ // // treat the string as invalid.
370
+ // return Err(());
371
+ // }
372
+ // }
373
+
374
+ // // Check that our capacity calculation doesn't under-shoot in fuzzing
375
+ // #[cfg(fuzzing)]
376
+ // assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);
377
+
378
+ // Ok(ret)
379
+ // }
380
+ //
381
+ //
382
+ //
383
+ // fn inv_map_alphabet(mut ret: Vec<u8>, data: &str, alphabet: &'static [u8]) -> Result<Vec<u8>, ()> {
384
+ // let data = data.as_bytes();
385
+ // for chunk in data.chunks(8) {
386
+ // let buf = {
387
+ // let mut buf = [0u8; 8];
388
+ // for (i, &c) in chunk.iter().enumerate() {
389
+ // match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
390
+ // Some(&-1) | None => return Err(()),
391
+ // Some(&value) => buf[i] = value as u8,
392
+ // };
393
+ // }
394
+ // buf
395
+ // };
396
+ // ret.push((buf[0] << 3) | (buf[1] >> 2));
397
+ // ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
398
+ // ret.push((buf[3] << 4) | (buf[4] >> 1));
399
+ // ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
400
+ // ret.push((buf[6] << 5) | buf[7]);
401
+ // }
402
+ // Ok(ret)
403
+ // }
0 commit comments