amazon-ion · zslayton · Jul 6, 2022 · Jun 18, 2022 · Jun 27, 2022 · Jun 28, 2022
diff --git a/.gitignore b/.gitignore
@@ -226,7 +226,8 @@ massif.out.*
 bb.out
 bb.out.*
 
-
+# macOS folder metadata
+**/.DS_Store
 
 ### VisualStudioCode ###
 .vscode/*

diff --git a/src/binary/non_blocking/binary_buffer.rs b/src/binary/non_blocking/binary_buffer.rs
@@ -93,7 +93,7 @@ impl<A: AsRef<[u8]>> BinaryBuffer<A> {
     /// If the buffer is not empty, returns `Some(_)` containing the next byte in the buffer.
     /// Otherwise, returns `None`.
     pub fn peek_next_byte(&self) -> Option<u8> {
-        self.bytes().get(0).copied()
+        self.data.as_ref().get(self.start).copied()
     }
 
     /// If there are at least `n` bytes left in the buffer, returns `Some(_)` containing a slice
@@ -262,7 +262,7 @@ impl<A: AsRef<[u8]>> BinaryBuffer<A> {
     ///
     /// See: https://amzn.github.io/ion-docs/docs/binary.html#uint-and-int-fields
     pub fn read_uint(&mut self, length: usize) -> IonResult<DecodedUInt> {
-        if length <= mem::size_of::<usize>() {
+        if length <= mem::size_of::<u64>() {
             return self.read_small_uint(length);
         }
 
@@ -271,18 +271,13 @@ impl<A: AsRef<[u8]>> BinaryBuffer<A> {
     }
 
     /// Reads the first `length` bytes from the buffer as a `UInt`. The caller must confirm that
-    /// `length` is small enough to fit in a `usize`.
+    /// `length` is small enough to fit in a `u64`.
     #[inline]
     fn read_small_uint(&mut self, length: usize) -> IonResult<DecodedUInt> {
         let uint_bytes = self
             .peek_n_bytes(length)
             .ok_or_else(|| incomplete_data_error_raw("a UInt", self.total_consumed()))?;
-        let mut magnitude: u64 = 0;
-        for &byte in uint_bytes {
-            let byte = u64::from(byte);
-            magnitude <<= 8;
-            magnitude |= byte;
-        }
+        let magnitude = DecodedUInt::small_uint_from_slice(uint_bytes);
         self.consume(length);
         Ok(DecodedUInt::new(UInteger::U64(magnitude), length))
     }

diff --git a/src/binary/non_blocking/raw_binary_reader.rs b/src/binary/non_blocking/raw_binary_reader.rs
@@ -9,7 +9,7 @@ use crate::result::{
     decoding_error, decoding_error_raw, illegal_operation, illegal_operation_raw,
     incomplete_data_error,
 };
-use crate::types::integer::{IntAccess, UInteger};
+use crate::types::integer::IntAccess;
 use crate::types::SymbolId;
 use crate::{
     Decimal, Integer, IonResult, IonType, RawStreamItem, RawSymbolToken, StreamReader, Timestamp,
@@ -18,6 +18,7 @@ use bytes::{BigEndian, Buf, ByteOrder};
 use num_bigint::BigUint;
 use num_traits::Zero;
 use std::io::Read;
+use std::mem;
 use std::ops::Range;
 
 /// Type, offset, and length information about the serialized value over which the
@@ -475,18 +476,13 @@ impl<A: AsRef<[u8]>> RawBinaryBufferReader<A> {
 
     /// If the reader is currently positioned on a symbol value, parses that value into a `SymbolId`.
     pub fn read_symbol_id(&mut self) -> IonResult<SymbolId> {
-        let (encoded_value, mut buffer) = self.value_and_buffer(IonType::Symbol)?;
-        match buffer.read_uint(encoded_value.value_length())?.value() {
-            UInteger::U64(symbol_id) => {
-                // This will always succeed on 64-bit platforms where u64 and usize are the same.
-                if let Ok(sid) = usize::try_from(*symbol_id) {
-                    Ok(sid)
-                } else {
-                    decoding_error("found a u64 symbol ID that was too large to fit in a usize")
-                }
-            }
-            UInteger::BigUInt(symbol_id) => Self::try_symbol_id_from_big_uint(symbol_id),
+        let (_encoded_value, bytes) = self.value_and_bytes(IonType::Symbol)?;
+        if bytes.len() > mem::size_of::<usize>() {
+            return decoding_error("found a symbol Id that was too large to fit in a usize");
         }
+        let magnitude = DecodedUInt::small_uint_from_slice(bytes);
+        // This cast is safe because we've confirmed the value was small enough to fit in a usize.
+        Ok(magnitude as usize)
     }
 
     /// Tries to downgrade the provided BigUint to a SymbolId (usize).
@@ -628,6 +624,12 @@ impl<A: AsRef<[u8]>> StreamReader for RawBinaryBufferReader<A> {
         Box::new(self.annotations_iter())
     }
 
+    fn has_annotations(&self) -> bool {
+        self.encoded_value()
+            .map(|v| v.annotations_sequence_length > 0)
+            .unwrap_or(false)
+    }
+
     fn field_name(&self) -> IonResult<Self::Symbol> {
         // If the reader is parked on a value...
         self.encoded_value()
@@ -685,9 +687,12 @@ impl<A: AsRef<[u8]>> StreamReader for RawBinaryBufferReader<A> {
     }
 
     fn read_integer(&mut self) -> IonResult<Integer> {
-        let (encoded_value, mut buffer) = self.value_and_buffer(IonType::Integer)?;
-        let uint: DecodedUInt = buffer.read_uint(encoded_value.value_length())?;
-        let value: Integer = uint.into();
+        let (encoded_value, bytes) = self.value_and_bytes(IonType::Integer)?;
+        let value: Integer = if bytes.len() <= mem::size_of::<u64>() {
+            DecodedUInt::small_uint_from_slice(bytes).into()
+        } else {
+            DecodedUInt::big_uint_from_slice(bytes).into()
+        };
 
         use self::IonTypeCode::*;
         let value = match (encoded_value.header.ion_type_code, value) {
@@ -1195,7 +1200,7 @@ impl<'a, A: AsRef<[u8]>> TxReader<'a, A> {
         // Read the length of the annotations sequence
         let annotations_length = self.tx_buffer.read_var_uint()?;
 
-        // Validate that neither the annotations sequence is not empty.
+        // Validate that the annotations sequence is not empty.
         if annotations_length.value() == 0 {
             return decoding_error("found an annotations wrapper with no annotations");
         }
@@ -1204,7 +1209,7 @@ impl<'a, A: AsRef<[u8]>> TxReader<'a, A> {
         let expected_value_length = annotations_and_value_length
             - annotations_length.size_in_bytes()
             - annotations_length.value();
-        self.tx_buffer.total_consumed();
+
         if expected_value_length == 0 {
             return decoding_error("found an annotation wrapper with no value");
         }
@@ -1749,7 +1754,7 @@ mod tests {
     fn debug() -> IonResult<()> {
         let data = &[
             0xE0, 0x01, 0x00, 0xEA, // IVM
-            0xc3, 0xd2, 0x84, 0x11, // {'name': true}
+            0xc3, 0xd2, 0x84, 0x11, // ({'name': true})
         ]; // Empty string
         let mut reader = RawBinaryBufferReader::new(data);
         let item = reader.next()?;

diff --git a/src/binary/uint.rs b/src/binary/uint.rs
@@ -28,6 +28,24 @@ impl DecodedUInt {
         }
     }
 
+    /// Interprets all of the bytes in the provided slice as big-endian unsigned integer bytes.
+    /// The caller must confirm that `uint_bytes` is no longer than 8 bytes long; otherwise,
+    /// overflow may quietly occur.
+    pub(crate) fn small_uint_from_slice(uint_bytes: &[u8]) -> u64 {
+        let mut magnitude: u64 = 0;
+        for &byte in uint_bytes {
+            let byte = u64::from(byte);
+            magnitude <<= 8;
+            magnitude |= byte;
+        }
+        magnitude
+    }
+
+    /// Interprets all of the bytes in the provided slice as big-endian unsigned integer bytes.
+    pub(crate) fn big_uint_from_slice(uint_bytes: &[u8]) -> BigUint {
+        BigUint::from_bytes_be(uint_bytes)
+    }
+
     /// Reads a UInt with `length` bytes from the provided data source.
     pub fn read<R: IonDataSource>(data_source: &mut R, length: usize) -> IonResult<DecodedUInt> {
         if length > MAX_UINT_SIZE_IN_BYTES {

diff --git a/src/types/integer.rs b/src/types/integer.rs
@@ -316,6 +316,48 @@ impl Zero for Integer {
     }
 }
 
+// Trivial conversion to Integer::I64 from integers that can safely be converted to an i64
+macro_rules! impl_integer_i64_from {
+    ($($t:ty),*) => ($(
+        impl From<$t> for Integer {
+            fn from(value: $t) -> Integer {
+                let i64_value = i64::from(value);
+                Integer::I64(i64_value)
+            }
+        }
+    )*)
+}
+impl_integer_i64_from!(u8, u16, u32, i8, i16, i32, i64);
+
+// Conversion to Integer from integer types that may or may not fit in an i64
+macro_rules! impl_integer_from {
+    ($($t:ty),*) => ($(
+        impl From<$t> for Integer {
+            fn from(value: $t) -> Integer {
+                match i64::try_from(value) {
+                    Ok(i64_value) => Integer::I64(i64_value),
+                    Err(_) => Integer::BigInt(BigInt::from(value))
+                }
+            }
+        }
+    )*)
+}
+
+impl_integer_from!(isize, usize, u64);
+
+impl From<BigUint> for Integer {
+    fn from(value: BigUint) -> Self {
+        let big_int = BigInt::from(value);
+        Integer::BigInt(big_int)
+    }
+}
+
+impl From<BigInt> for Integer {
+    fn from(value: BigInt) -> Self {
+        Integer::BigInt(value)
+    }
+}
+
 impl<T> IntAccess for T
 where
     T: Element,

diff --git a/tests/element_test_vectors.rs b/tests/element_test_vectors.rs
@@ -788,6 +788,10 @@ mod non_blocking_native_element_tests {
                 "ion-tests/iontestdata/good/subfieldVarUInt15bit.ion",
                 "ion-tests/iontestdata/good/subfieldVarUInt16bit.ion",
                 "ion-tests/iontestdata/good/subfieldVarUInt32bit.ion",
+                // This test requires the reader to be able to read symbols whose ID is encoded
+                // with more than 8 bytes. Having a symbol table with more than 18 quintillion
+                // symbols is not very practical.
+                "ion-tests/iontestdata/good/typecodes/T7-large.10n",
                 // ---
                 // Requires importing shared symbol tables
                 "ion-tests/iontestdata/good/item1.10n",