Skip to content

Commit ea1b296

Browse files
committed
start merging base32 and zbase32 - wip
1 parent 5da926a commit ea1b296

File tree

1 file changed

+256
-66
lines changed

1 file changed

+256
-66
lines changed

lightning/src/util/base32.rs

+256-66
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
1-
// Source: https://crates.io/crates/base32 v0.4.0
2-
// License: MIT or Apache-2.0
3-
// Copyright (c) 2015 The base32 Developers
4-
// Permission is hereby granted, free of charge, to any person obtaining a copy
5-
// of this software and associated documentation files (the "Software"), to deal
6-
// in the Software without restriction, including without limitation the rights
7-
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8-
// copies of the Software, and to permit persons to whom the Software is
9-
// furnished to do so, subject to the following conditions:
10-
11-
// The above copyright notice and this permission notice shall be included in all
12-
// copies or substantial portions of the Software.
13-
14-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15-
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16-
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17-
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18-
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19-
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20-
// SOFTWARE.
21-
// (reference https://github.com/andreasots/base32/blob/master/LICENSE-MIT)
1+
// This is a modification of base32 encoding to support the zbase32 alphabet.
2+
// The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
3+
// The original portions of this software are Copyright (c) 2015 The base32 Developers
4+
5+
/* This file is licensed under either of
6+
* Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
7+
* MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
8+
* at your option.
9+
*/
2210

2311
use crate::prelude::*;
2412

@@ -30,19 +18,29 @@ pub enum Alphabet {
3018
/// Whether to use padding.
3119
padding: bool
3220
},
21+
ZBase32
3322
}
3423

35-
/// RFC4648 base32 encoding with padding.
36-
const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
3724

38-
/// Encode a byte slice into a base32 string.
39-
pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
40-
let (alphabet, padding) = match alphabet {
41-
Alphabet::RFC4648 { padding } => (RFC4648_ALPHABET, padding),
42-
};
25+
// ASCII 0-Z
26+
const ZBASE_INV_ALPHABET: [i8; 43] = [
27+
-1, 18, -1, 25, 26, 27, 30, 29, 7, 31, -1, -1, -1, -1, -1, -1, -1, 24, 1, 12, 3, 8, 5, 6, 28,
28+
21, 9, 10, -1, 11, 2, 16, 13, 14, 4, 22, 17, 19, -1, 20, 15, 0, 23,
29+
];
30+
31+
// zbase alphabet
32+
const ZBASE_ALPHABET: &'static [u8] = b"ybndrfg8ejkmcpqxot1uwisza345h769";
33+
34+
/// Inverse RFC4648 lookup table for decoding.
35+
const RFC4648_INV_ALPHABET: [i8; 43] = [
36+
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
37+
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
38+
];
4339

44-
let mut ret = Vec::with_capacity((data.len() + 3) / 4 * 5);
40+
/// RFC4648 base32 encoding with padding.
41+
const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
4542

43+
fn map_alphabet(mut ret: Vec<u8>, data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
4644
for chunk in data.chunks(5) {
4745
let buf = {
4846
let mut buf = [0u8; 5];
@@ -61,6 +59,12 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
6159
ret.push(alphabet[(buf[4] & 0x1F) as usize]);
6260
}
6361

62+
ret
63+
}
64+
65+
66+
fn rfc4648_encode(data: &[u8], padding: bool) -> String {
67+
let mut ret = map_alphabet(Vec::with_capacity((data.len() + 3) / 4 * 5), data, RFC4648_ALPHABET);
6468
if data.len() % 5 != 0 {
6569
let len = ret.len();
6670
let num_extra = 8 - (data.len() % 5 * 8 + 4) / 5;
@@ -72,50 +76,163 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
7276
ret.truncate(len - num_extra);
7377
}
7478
}
79+
String::from_utf8(ret).unwrap()
80+
81+
}
82+
83+
fn zbase32_encode(data: &[u8])-> String {
84+
let mut ret = Vec::with_capacity((data.len() + 4) / 5 * 8);
85+
ret = map_alphabet(ret, data, RFC4648_ALPHABET);
86+
ret.truncate((data.len() * 8 + 4) / 5);
87+
88+
// Check that our capacity calculation doesn't under-shoot in fuzzing
89+
#[cfg(fuzzing)]
90+
assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);
7591

7692
String::from_utf8(ret).unwrap()
7793
}
7894

79-
/// Inverse RFC4648 lookup table for decoding.
80-
const RFC4648_INV_ALPHABET: [i8; 43] = [
81-
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
82-
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
83-
];
95+
/// Encode a byte slice into a base32 string.
96+
pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
97+
match alphabet {
98+
Alphabet::RFC4648 { padding } =>{
99+
rfc4648_encode(data, padding)
100+
},
101+
Alphabet::ZBase32 => {
102+
zbase32_encode(data)
103+
}
104+
}
105+
}
106+
84107

85108
/// Decode a base32 string into a byte vector.
86-
pub fn decode(alphabet: Alphabet, data: &str) -> Option<Vec<u8>> {
87-
let data = data.as_bytes();
88-
let alphabet = match alphabet {
89-
Alphabet::RFC4648 { .. } => RFC4648_INV_ALPHABET,
90-
};
91-
let mut unpadded_data_length = data.len();
92-
data.iter().rev().take(6).for_each(|&c| {
93-
if c != b'=' {
94-
return;
95-
}
96-
unpadded_data_length -= 1;
97-
});
98-
let output_length = unpadded_data_length * 5 / 8;
99-
let mut ret = Vec::with_capacity((output_length + 4) / 5 * 5);
100-
for chunk in data.chunks(8) {
101-
let buf = {
102-
let mut buf = [0u8; 8];
103-
for (i, &c) in chunk.iter().enumerate() {
104-
match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
105-
Some(&-1) | None => return None,
106-
Some(&value) => buf[i] = value as u8,
109+
pub fn decode(alphabet: Alphabet, data: &str) -> Result<Vec<u8>, ()> {
110+
match alphabet {
111+
Alphabet::RFC4648 { .. } => {
112+
let alphabet = RFC4648_INV_ALPHABET;
113+
let data = data.as_bytes();
114+
let mut unpadded_data_length = data.len();
115+
data.iter().rev().take(6).for_each(|&c| {
116+
if c != b'=' {
117+
return;
118+
}
119+
unpadded_data_length -= 1;
120+
});
121+
let output_length = unpadded_data_length * 5 / 8;
122+
let mut ret = Vec::with_capacity((output_length + 4) / 5 * 5);
123+
for chunk in data.chunks(8) {
124+
let buf = {
125+
let mut buf = [0u8; 8];
126+
for (i, &c) in chunk.iter().enumerate() {
127+
match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
128+
Some(&-1) | None => return Err(()),
129+
Some(&value) => buf[i] = value as u8,
130+
};
131+
}
132+
buf
107133
};
134+
ret.push((buf[0] << 3) | (buf[1] >> 2));
135+
ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
136+
ret.push((buf[3] << 4) | (buf[4] >> 1));
137+
ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
138+
ret.push((buf[6] << 5) | buf[7]);
108139
}
109-
buf
110-
};
111-
ret.push((buf[0] << 3) | (buf[1] >> 2));
112-
ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
113-
ret.push((buf[3] << 4) | (buf[4] >> 1));
114-
ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
115-
ret.push((buf[6] << 5) | buf[7]);
140+
ret.truncate(output_length);
141+
Ok(ret)
142+
143+
},
144+
Alphabet::ZBase32 => {
145+
if !data.is_ascii() {
146+
return Err(());
147+
}
148+
let alphabet = ZBASE_INV_ALPHABET;
149+
150+
let data = data.as_bytes();
151+
let output_length = data.len() * 5 / 8;
152+
if data.len() > (output_length * 8 + 4) / 5 {
153+
// If the string has more charachters than are required to alphabet_encode the number of bytes
154+
// decodable, treat the string as invalid.
155+
return Err(());
156+
}
157+
158+
let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);
159+
160+
for chunk in data.chunks(8) {
161+
let buf = {
162+
let mut buf = [0u8; 8];
163+
for (i, &c) in chunk.iter().enumerate() {
164+
match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
165+
Some(&-1) | None => return Err(()),
166+
Some(&value) => buf[i] = value as u8,
167+
};
168+
}
169+
buf
170+
};
171+
ret.push((buf[0] << 3) | (buf[1] >> 2));
172+
ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
173+
ret.push((buf[3] << 4) | (buf[4] >> 1));
174+
ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
175+
ret.push((buf[6] << 5) | buf[7]);
176+
}
177+
for c in ret.drain(output_length..) {
178+
if c != 0 {
179+
// If the original string had any bits set at positions outside of the encoded data,
180+
// treat the string as invalid.
181+
return Err(());
182+
}
183+
}
184+
185+
// Check that our capacity calculation doesn't under-shoot in fuzzing
186+
#[cfg(fuzzing)]
187+
assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);
188+
189+
Ok(ret)
190+
},
191+
}
192+
}
193+
194+
#[cfg(test)]
195+
mod tests {
196+
use super::*;
197+
198+
const TEST_DATA: &[(&str, &[u8])] = &[
199+
("", &[]),
200+
("yy", &[0x00]),
201+
("oy", &[0x80]),
202+
("tqrey", &[0x8b, 0x88, 0x80]),
203+
("6n9hq", &[0xf0, 0xbf, 0xc7]),
204+
("4t7ye", &[0xd4, 0x7a, 0x04]),
205+
("6im5sdy", &[0xf5, 0x57, 0xbb, 0x0c]),
206+
("ybndrfg8ejkmcpqxot1uwisza345h769", &[0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6,
207+
0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7, 0xc6,
208+
0x75, 0xbe, 0x77, 0xdf])
209+
];
210+
211+
#[test]
212+
fn test_encode() {
213+
for &(zbase32, data) in TEST_DATA {
214+
assert_eq!(encode(Alphabet::ZBase32, data), zbase32);
215+
}
216+
}
217+
218+
#[test]
219+
fn test_decode() {
220+
for &(zbase32, data) in TEST_DATA {
221+
assert_eq!(decode(Alphabet::ZBase32, zbase32).unwrap(), data);
222+
}
223+
}
224+
225+
#[test]
226+
fn test_decode_wrong() {
227+
const WRONG_DATA: &[&str] = &["00", "l1", "?", "="];
228+
229+
for &data in WRONG_DATA {
230+
match decode(Alphabet::ZBase32, data) {
231+
Ok(_) => assert!(false, "Data shouldn't be decodable"),
232+
Err(_) => assert!(true),
233+
}
234+
}
116235
}
117-
ret.truncate(output_length);
118-
Some(ret)
119236
}
120237

121238
#[cfg(test)]
@@ -203,11 +320,84 @@ mod test {
203320

204321
#[test]
205322
fn invalid_chars_rfc4648() {
206-
assert_eq!(decode(RFC4648 { padding: true }, ","), None)
323+
assert_eq!(decode(RFC4648 { padding: true }, ",").is_err(), true)
207324
}
208325

209326
#[test]
210327
fn invalid_chars_unpadded_rfc4648() {
211-
assert_eq!(decode(RFC4648 { padding: false }, ","), None)
328+
assert_eq!(decode(RFC4648 { padding: false }, ",").is_err(), true)
212329
}
213330
}
331+
332+
// Decodes a zbase32 string to the original bytes, failing if the string was not encoded by a
333+
// proper zbase32 encoder.
334+
// pub fn alphabet_decode(data: &str) -> Result<Vec<u8>, ()> {
335+
// if !data.is_ascii() {
336+
// return Err(());
337+
// }
338+
339+
// let data = data.as_bytes();
340+
// let output_length = data.len() * 5 / 8;
341+
// if data.len() > (output_length * 8 + 4) / 5 {
342+
// // If the string has more charachters than are required to alphabet_encode the number of bytes
343+
// // decodable, treat the string as invalid.
344+
// return Err(());
345+
// }
346+
347+
// let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);
348+
349+
// for chunk in data.chunks(8) {
350+
// let buf = {
351+
// let mut buf = [0u8; 8];
352+
// for (i, &c) in chunk.iter().enumerate() {
353+
// match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
354+
// Some(&-1) | None => return Err(()),
355+
// Some(&value) => buf[i] = value as u8,
356+
// };
357+
// }
358+
// buf
359+
// };
360+
// ret.push((buf[0] << 3) | (buf[1] >> 2));
361+
// ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
362+
// ret.push((buf[3] << 4) | (buf[4] >> 1));
363+
// ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
364+
// ret.push((buf[6] << 5) | buf[7]);
365+
// }
366+
// for c in ret.drain(output_length..) {
367+
// if c != 0 {
368+
// // If the original string had any bits set at positions outside of the encoded data,
369+
// // treat the string as invalid.
370+
// return Err(());
371+
// }
372+
// }
373+
374+
// // Check that our capacity calculation doesn't under-shoot in fuzzing
375+
// #[cfg(fuzzing)]
376+
// assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);
377+
378+
// Ok(ret)
379+
// }
380+
//
381+
//
382+
//
383+
// fn inv_map_alphabet(mut ret: Vec<u8>, data: &str, alphabet: &'static [u8]) -> Result<Vec<u8>, ()> {
384+
// let data = data.as_bytes();
385+
// for chunk in data.chunks(8) {
386+
// let buf = {
387+
// let mut buf = [0u8; 8];
388+
// for (i, &c) in chunk.iter().enumerate() {
389+
// match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
390+
// Some(&-1) | None => return Err(()),
391+
// Some(&value) => buf[i] = value as u8,
392+
// };
393+
// }
394+
// buf
395+
// };
396+
// ret.push((buf[0] << 3) | (buf[1] >> 2));
397+
// ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
398+
// ret.push((buf[3] << 4) | (buf[4] >> 1));
399+
// ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
400+
// ret.push((buf[6] << 5) | buf[7]);
401+
// }
402+
// Ok(ret)
403+
// }

0 commit comments

Comments
 (0)