start merging base32 and zbase32 - wip

jbesraa · jbesraa · commit ea1b296ce08e · 2023-05-12T15:09:18.000+03:00
diff --git a/lightning/src/util/base32.rs b/lightning/src/util/base32.rs
@@ -1,24 +1,12 @@
-// Source: https://crates.io/crates/base32 v0.4.0
-// License: MIT or Apache-2.0
-// Copyright (c) 2015 The base32 Developers
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-// (reference https://github.com/andreasots/base32/blob/master/LICENSE-MIT)
+// This is a modification of base32 encoding to support the zbase32 alphabet.
+// The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
+// The original portions of this software are Copyright (c) 2015 The base32 Developers
+
+/* This file is licensed under either of
+ *  Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
+ *  MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
+ * at your option.
+ */
 
 use crate::prelude::*;
 
@@ -30,19 +18,29 @@ pub enum Alphabet {
         /// Whether to use padding.
         padding: bool
     },
+    ZBase32
 }
 
-/// RFC4648 base32 encoding with padding.
-const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
 
-/// Encode a byte slice into a base32 string.
-pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
-    let (alphabet, padding) = match alphabet {
-        Alphabet::RFC4648 { padding } => (RFC4648_ALPHABET, padding),
-    };
+// ASCII 0-Z
+const ZBASE_INV_ALPHABET: [i8; 43] = [
+    -1, 18, -1, 25, 26, 27, 30, 29, 7, 31, -1, -1, -1, -1, -1, -1, -1,  24, 1, 12, 3, 8, 5, 6, 28,
+    21, 9, 10, -1, 11, 2, 16, 13, 14, 4, 22, 17, 19, -1, 20, 15, 0, 23,
+];
+
+// zbase alphabet
+const ZBASE_ALPHABET: &'static [u8] = b"ybndrfg8ejkmcpqxot1uwisza345h769";
+
+/// Inverse RFC4648 lookup table for decoding.
+const RFC4648_INV_ALPHABET: [i8; 43] = [
+    -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
+    9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+];
 
-    let mut ret = Vec::with_capacity((data.len() + 3) / 4 * 5);
+/// RFC4648 base32 encoding with padding.
+const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
 
+fn map_alphabet(mut ret: Vec<u8>, data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
     for chunk in data.chunks(5) {
         let buf = {
             let mut buf = [0u8; 5];
@@ -61,6 +59,12 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
         ret.push(alphabet[(buf[4] & 0x1F) as usize]);
     }
 
+    ret
+}
+
+
+fn rfc4648_encode(data: &[u8], padding: bool) -> String {
+    let mut ret = map_alphabet(Vec::with_capacity((data.len() + 3) / 4 * 5), data, RFC4648_ALPHABET);
     if data.len() % 5 != 0 {
         let len = ret.len();
         let num_extra = 8 - (data.len() % 5 * 8 + 4) / 5;
@@ -72,50 +76,163 @@ pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
             ret.truncate(len - num_extra);
         }
     }
+    String::from_utf8(ret).unwrap()
+
+}
+
+fn zbase32_encode(data: &[u8])-> String {
+    let mut ret = Vec::with_capacity((data.len() + 4) / 5 * 8);
+    ret = map_alphabet(ret, data, RFC4648_ALPHABET);
+    ret.truncate((data.len() * 8 + 4) / 5);
+
+    // Check that our capacity calculation doesn't under-shoot in fuzzing
+    #[cfg(fuzzing)]
+    assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);
 
     String::from_utf8(ret).unwrap()
 }
 
-/// Inverse RFC4648 lookup table for decoding.
-const RFC4648_INV_ALPHABET: [i8; 43] = [
-    -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
-    9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
-];
+/// Encode a byte slice into a base32 string.
+pub fn encode(alphabet: Alphabet, data: &[u8]) -> String {
+    match alphabet {
+        Alphabet::RFC4648 { padding } =>{
+            rfc4648_encode(data, padding)
+        },
+        Alphabet::ZBase32 => {
+            zbase32_encode(data)
+        }
+    }
+}
+
 
 /// Decode a base32 string into a byte vector.
-pub fn decode(alphabet: Alphabet, data: &str) -> Option<Vec<u8>> {
-    let data = data.as_bytes();
-    let alphabet = match alphabet {
-        Alphabet::RFC4648 { .. } => RFC4648_INV_ALPHABET,
-    };
-    let mut unpadded_data_length = data.len();
-	data.iter().rev().take(6).for_each(|&c| {
-		if c != b'=' {
-			return;
-		}
-		unpadded_data_length -= 1;
-	});
-    let output_length = unpadded_data_length * 5 / 8;
-    let mut ret = Vec::with_capacity((output_length + 4) / 5 * 5);
-    for chunk in data.chunks(8) {
-        let buf = {
-            let mut buf = [0u8; 8];
-            for (i, &c) in chunk.iter().enumerate() {
-                match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
-                    Some(&-1) | None => return None,
-                    Some(&value) => buf[i] = value as u8,
+pub fn decode(alphabet: Alphabet, data: &str) -> Result<Vec<u8>, ()> {
+    match alphabet {
+        Alphabet::RFC4648 { .. } => {
+            let alphabet = RFC4648_INV_ALPHABET;
+            let data = data.as_bytes();
+            let mut unpadded_data_length = data.len();
+            data.iter().rev().take(6).for_each(|&c| {
+                if c != b'=' {
+                    return;
+                }
+                unpadded_data_length -= 1;
+            });
+            let output_length = unpadded_data_length * 5 / 8;
+            let mut ret = Vec::with_capacity((output_length + 4) / 5 * 5);
+            for chunk in data.chunks(8) {
+                let buf = {
+                    let mut buf = [0u8; 8];
+                    for (i, &c) in chunk.iter().enumerate() {
+                        match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
+                            Some(&-1) | None => return Err(()),
+                            Some(&value) => buf[i] = value as u8,
+                        };
+                    }
+                    buf
                 };
+                ret.push((buf[0] << 3) | (buf[1] >> 2));
+                ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
+                ret.push((buf[3] << 4) | (buf[4] >> 1));
+                ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
+                ret.push((buf[6] << 5) | buf[7]);
             }
-            buf
-        };
-        ret.push((buf[0] << 3) | (buf[1] >> 2));
-        ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
-        ret.push((buf[3] << 4) | (buf[4] >> 1));
-        ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
-        ret.push((buf[6] << 5) | buf[7]);
+            ret.truncate(output_length);
+            Ok(ret)
+
+        },
+        Alphabet::ZBase32 => {
+            if !data.is_ascii() {
+                return Err(());
+            }
+            let alphabet = ZBASE_INV_ALPHABET;
+
+            let data = data.as_bytes();
+            let output_length = data.len() * 5 / 8;
+            if data.len() > (output_length * 8 + 4) / 5 {
+                // If the string has more charachters than are required to alphabet_encode the number of bytes
+                // decodable, treat the string as invalid.
+                return Err(());
+            }
+
+            let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);
+
+            for chunk in data.chunks(8) {
+                let buf = {
+                    let mut buf = [0u8; 8];
+                    for (i, &c) in chunk.iter().enumerate() {
+                        match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
+                            Some(&-1) | None => return Err(()),
+                            Some(&value) => buf[i] = value as u8,
+                        };
+                    }
+                    buf
+                };
+                ret.push((buf[0] << 3) | (buf[1] >> 2));
+                ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
+                ret.push((buf[3] << 4) | (buf[4] >> 1));
+                ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
+                ret.push((buf[6] << 5) | buf[7]);
+            }
+            for c in ret.drain(output_length..) {
+                if c != 0 {
+                    // If the original string had any bits set at positions outside of the encoded data,
+                    // treat the string as invalid.
+                    return Err(());
+                }
+            }
+
+            // Check that our capacity calculation doesn't under-shoot in fuzzing
+            #[cfg(fuzzing)]
+            assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);
+
+            Ok(ret)
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const TEST_DATA: &[(&str, &[u8])] = &[
+        ("",	   &[]),
+        ("yy",	 &[0x00]),
+        ("oy",	 &[0x80]),
+        ("tqrey",   &[0x8b, 0x88, 0x80]),
+        ("6n9hq",  &[0xf0, 0xbf, 0xc7]),
+        ("4t7ye",  &[0xd4, 0x7a, 0x04]),
+        ("6im5sdy", &[0xf5, 0x57, 0xbb, 0x0c]),
+        ("ybndrfg8ejkmcpqxot1uwisza345h769", &[0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6,
+         0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7, 0xc6,
+         0x75, 0xbe, 0x77, 0xdf])
+    ];
+
+    #[test]
+    fn test_encode() {
+        for &(zbase32, data) in TEST_DATA {
+            assert_eq!(encode(Alphabet::ZBase32, data), zbase32);
+        }
+    }
+
+    #[test]
+    fn test_decode() {
+        for &(zbase32, data) in TEST_DATA {
+            assert_eq!(decode(Alphabet::ZBase32, zbase32).unwrap(), data);
+        }
+    }
+
+    #[test]
+    fn test_decode_wrong() {
+        const WRONG_DATA: &[&str] = &["00", "l1", "?", "="];
+
+        for &data in WRONG_DATA {
+            match decode(Alphabet::ZBase32, data) {
+                Ok(_) => assert!(false, "Data shouldn't be decodable"),
+                Err(_) => assert!(true),
+            }
+        }
     }
-    ret.truncate(output_length);
-    Some(ret)
 }
 
 #[cfg(test)]
@@ -203,11 +320,84 @@ mod test {
 
     #[test]
     fn invalid_chars_rfc4648() {
-        assert_eq!(decode(RFC4648 { padding: true }, ","), None)
+        assert_eq!(decode(RFC4648 { padding: true }, ",").is_err(), true)
     }
 
     #[test]
     fn invalid_chars_unpadded_rfc4648() {
-        assert_eq!(decode(RFC4648 { padding: false }, ","), None)
+        assert_eq!(decode(RFC4648 { padding: false }, ",").is_err(), true)
     }
 }
+
+// Decodes a zbase32 string to the original bytes, failing if the string was not encoded by a
+// proper zbase32 encoder.
+// pub fn alphabet_decode(data: &str) -> Result<Vec<u8>, ()> {
+//     if !data.is_ascii() {
+//         return Err(());
+//     }
+
+//     let data = data.as_bytes();
+//     let output_length = data.len() * 5 / 8;
+//     if data.len() > (output_length * 8 + 4) / 5 {
+//         // If the string has more charachters than are required to alphabet_encode the number of bytes
+//         // decodable, treat the string as invalid.
+//         return Err(());
+//     }
+
+//     let mut ret = Vec::with_capacity((data.len() + 7) / 8 * 5);
+
+//     for chunk in data.chunks(8) {
+//         let buf = {
+//             let mut buf = [0u8; 8];
+//             for (i, &c) in chunk.iter().enumerate() {
+//                 match ZBASE_INV_ALPHABET.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
+//                     Some(&-1) | None => return Err(()),
+//                     Some(&value) => buf[i] = value as u8,
+//                 };
+//             }
+//             buf
+//         };
+//         ret.push((buf[0] << 3) | (buf[1] >> 2));
+//         ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
+//         ret.push((buf[3] << 4) | (buf[4] >> 1));
+//         ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
+//         ret.push((buf[6] << 5) | buf[7]);
+//     }
+//     for c in ret.drain(output_length..) {
+//         if c != 0 {
+//             // If the original string had any bits set at positions outside of the encoded data,
+//             // treat the string as invalid.
+//             return Err(());
+//         }
+//     }
+
+//     // Check that our capacity calculation doesn't under-shoot in fuzzing
+//     #[cfg(fuzzing)]
+//     assert_eq!(ret.capacity(), (data.len() + 7) / 8 * 5);
+
+//     Ok(ret)
+// }
+//
+//
+//
+// fn inv_map_alphabet(mut ret: Vec<u8>, data: &str, alphabet: &'static [u8]) -> Result<Vec<u8>, ()> {
+//     let data = data.as_bytes();
+//     for chunk in data.chunks(8) {
+//         let buf = {
+//             let mut buf = [0u8; 8];
+//             for (i, &c) in chunk.iter().enumerate() {
+//                 match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
+//                     Some(&-1) | None => return Err(()),
+//                     Some(&value) => buf[i] = value as u8,
+//                 };
+//             }
+//             buf
+//         };
+//         ret.push((buf[0] << 3) | (buf[1] >> 2));
+//         ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
+//         ret.push((buf[3] << 4) | (buf[4] >> 1));
+//         ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
+//         ret.push((buf[6] << 5) | buf[7]);
+//     }
+//     Ok(ret)
+// }