Limit unit magnitude + precision to fit into 63 bits. Approach and tests ported from equivalent PR to Java impl.

marshallpierce · marshallpierce · commit 00f087b3bc02 · 2017-02-18T17:36:19.000-08:00
diff --git a/src/lib.rs b/src/lib.rs
@@ -632,9 +632,13 @@ impl<T: Counter> Histogram<T> {
         let sub_bucket_count = 1_u32 << (sub_bucket_count_magnitude as u32);
 
         if unit_magnitude + sub_bucket_count_magnitude > 63 {
-            // Cannot represent shifted sub bucket count in 64 bits.
-            // This will cause an infinite loop when calculating number of buckets
-            return Err("Cannot represent significant figures' worth of measurements beyond lowest value");
+            // sub_bucket_count entries can't be represented, with unit_magnitude applied, in a
+            // u64. Technically it still sort of works if their sum is 64: you can represent all
+            // but the last number in the shifted sub_bucket_count. However, the utility of such a
+            // histogram vs ones whose magnitude here fits in 63 bits is debatable, and it makes
+            // it harder to work through the logic. Sums larger than 64 are totally broken as
+            // leading_zero_count_base would go negative.
+            return Err("Cannot represent sigfig worth of values beyond low");
         };
 
         let sub_bucket_half_count = sub_bucket_count / 2;
@@ -1250,6 +1254,7 @@ impl<T: Counter> Histogram<T> {
 
     /// Find the number of buckets needed such that `value` is representable.
     fn buckets_to_cover(&self, value: u64) -> u8 {
+        // Shift won't overflow because sub_bucket_magnitude + unit_magnitude <= 63.
         // the k'th bucket can express from 0 * 2^k to sub_bucket_count * 2^k in units of 2^k
         let mut smallest_untrackable_value = (self.sub_bucket_count as u64) << self.unit_magnitude;
 
@@ -1468,5 +1473,6 @@ impl<T: Counter, F: Counter> PartialEq<Histogram<F>> for Histogram<T>
 // TODO: timestamps and tags
 // TODO: textual output
 
+#[path = "tests/tests.rs"]
 #[cfg(test)]
 mod tests;
diff --git a/src/tests/helpers.rs b/src/tests/helpers.rs
@@ -0,0 +1,8 @@
+use super::Histogram;
+
+#[cfg(test)]
+pub fn histo64(lowest_discernible_value: u64, highest_trackable_value: u64, num_significant_digits: u8)
+           -> Histogram<u64> {
+    Histogram::<u64>::new_with_bounds(lowest_discernible_value, highest_trackable_value,
+                                      num_significant_digits).unwrap()
+}
diff --git a/src/tests/index_calculation.rs b/src/tests/index_calculation.rs
@@ -0,0 +1,153 @@
+use super::Histogram;
+use tests::helpers::histo64;
+
+#[test]
+fn unit_magnitude_0_index_calculations() {
+    let h = histo64(1_u64, 1_u64 << 32, 3);
+    assert_eq!(2048, h.sub_bucket_count);
+    assert_eq!(0, h.unit_magnitude);
+    // sub_bucket_count = 2^11, so 2^11 << 22 is > the max of 2^32 for 23 buckets total
+    assert_eq!(23, h.bucket_count);
+
+    // first half of first bucket
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(3, h.sub_bucket_for(3, 0));
+
+    // second half of first bucket
+    assert_eq!(0, h.bucket_for(1024 + 3));
+    assert_eq!(1024 + 3, h.sub_bucket_for(1024 + 3, 0));
+
+    // second bucket (top half)
+    assert_eq!(1, h.bucket_for(2048 + 3 * 2));
+    // counting by 2s, starting at halfway through the bucket
+    assert_eq!(1024 + 3, h.sub_bucket_for(2048 + 3 * 2, 1));
+
+    // third bucket (top half)
+    assert_eq!(2, h.bucket_for((2048 << 1) + 3 * 4));
+    // counting by 4s, starting at halfway through the bucket
+    assert_eq!(1024 + 3, h.sub_bucket_for((2048 << 1) + 3 * 4, 2));
+
+    // past last bucket -- not near u64::max_value(), so should still calculate ok.
+    assert_eq!(23, h.bucket_for((2048_u64 << 22) + 3 * (1 << 23)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((2048_u64 << 22) + 3 * (1 << 23), 23));
+}
+
+#[test]
+fn unit_magnitude_4_index_calculations() {
+    let h = histo64(1_u64 << 12, 1_u64 << 32, 3);
+    assert_eq!(2048, h.sub_bucket_count);
+    assert_eq!(12, h.unit_magnitude);
+    // sub_bucket_count = 2^11. With unit magnitude shift, it's 2^23. 2^23 << 10 is > the max of
+    // 2^32 for 11 buckets total
+    assert_eq!(11, h.bucket_count);
+    let unit = 1_u64 << 12;
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // first half of first bucket
+    assert_eq!(0, h.bucket_for(3 * unit));
+    assert_eq!(3, h.sub_bucket_for(3 * unit, 0));
+
+    // second half of first bucket
+    // sub_bucket_half_count's worth of units, plus 3 more
+    assert_eq!(0, h.bucket_for(unit * (1024 + 3)));
+    assert_eq!(1024 + 3, h.sub_bucket_for(unit * (1024 + 3), 0));
+
+    // second bucket (top half), bucket scale = unit << 1.
+    // Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 11.
+    // Add on 3 of bucket scale.
+    assert_eq!(1, h.bucket_for((unit << 11) + 3 * (unit << 1)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 11) + 3 * (unit << 1), 1));
+
+    // third bucket (top half), bucket scale = unit << 2.
+    // Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 12.
+    // Add on 3 of bucket scale.
+    assert_eq!(2, h.bucket_for((unit << 12) + 3 * (unit << 2)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 12) + 3 * (unit << 2), 2));
+
+    // past last bucket -- not near u64::max_value(), so should still calculate ok.
+    assert_eq!(11, h.bucket_for((unit << 21) + 3 * (unit << 11)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 21) + 3 * (unit << 11), 11));
+}
+
+#[test]
+fn unit_magnitude_52_sub_bucket_magnitude_11_index_calculations() {
+    // maximum unit magnitude for this precision
+    let h = histo64(1_u64 << 52, u64::max_value(), 3);
+    assert_eq!(2048, h.sub_bucket_count);
+    assert_eq!(52, h.unit_magnitude);
+    // sub_bucket_count = 2^11. With unit magnitude shift, it's 2^63. 1 more bucket to (almost)
+    // reach 2^64.
+    assert_eq!(2, h.bucket_count);
+    assert_eq!(1, h.leading_zero_count_base);
+    let unit = 1_u64 << 52;
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // first half of first bucket
+    assert_eq!(0, h.bucket_for(3 * unit));
+    assert_eq!(3, h.sub_bucket_for(3 * unit, 0));
+
+    // second half of first bucket
+    // sub_bucket_half_count's worth of units, plus 3 more
+    assert_eq!(0, h.bucket_for(unit * (1024 + 3)));
+    assert_eq!(1024 + 3, h.sub_bucket_for(unit * (1024 + 3), 0));
+
+    // end of second half
+    assert_eq!(0, h.bucket_for(unit * 1024 + 1023 * unit));
+    assert_eq!(1024 + 1023, h.sub_bucket_for(unit * 1024 + 1023 * unit, 0));
+
+    // second bucket (top half), bucket scale = unit << 1.
+    // Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 11.
+    // Add on 3 of bucket scale.
+    assert_eq!(1, h.bucket_for((unit << 11) + 3 * (unit << 1)));
+    assert_eq!(1024 + 3, h.sub_bucket_for((unit << 11) + 3 * (unit << 1), 1));
+
+    // upper half of second bucket, last slot
+    assert_eq!(1, h.bucket_for(u64::max_value()));
+    assert_eq!(1024 + 1023, h.sub_bucket_for(u64::max_value(), 1));
+}
+
+#[test]
+fn unit_magnitude_53_sub_bucket_magnitude_11_throws() {
+    assert_eq!("Cannot represent sigfig worth of values beyond low",
+        Histogram::<u64>::new_with_bounds(1_u64 << 53, 1_u64 << 63, 3).unwrap_err());
+}
+
+#[test]
+fn unit_magnitude_55_sub_bucket_magnitude_8_ok() {
+    let h = histo64(1_u64 << 55, 1_u64 << 63, 2);
+    assert_eq!(256, h.sub_bucket_count);
+    assert_eq!(55, h.unit_magnitude);
+    // sub_bucket_count = 2^8. With unit magnitude shift, it's 2^63.
+    assert_eq!(2, h.bucket_count);
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // upper half of second bucket, last slot
+    assert_eq!(1, h.bucket_for(u64::max_value()));
+    assert_eq!(128 + 127, h.sub_bucket_for(u64::max_value(), 1));
+}
+
+#[test]
+fn unit_magnitude_62_sub_bucket_magnitude_1_ok() {
+    let h = histo64(1_u64 << 62, 1_u64 << 63, 0);
+    assert_eq!(2, h.sub_bucket_count);
+    assert_eq!(62, h.unit_magnitude);
+    // sub_bucket_count = 2^1. With unit magnitude shift, it's 2^63.
+    assert_eq!(2, h.bucket_count);
+
+    // below lowest value
+    assert_eq!(0, h.bucket_for(3));
+    assert_eq!(0, h.sub_bucket_for(3, 0));
+
+    // upper half of second bucket, last slot
+    assert_eq!(1, h.bucket_for(u64::max_value()));
+    assert_eq!(1, h.sub_bucket_for(u64::max_value(), 1));
+}
diff --git a/src/tests/init.rs b/src/tests/init.rs
@@ -1,4 +1,4 @@
-use super::Histogram;
+use tests::helpers::histo64;
 
 #[test]
 fn init_fields_smallest_possible_array() {
@@ -250,23 +250,3 @@ fn init_fields_max_value_max_unit_magnitude_max_precision() {
 
     assert_eq!(64 - 62 - 1, h.leading_zero_count_base);
 }
-
-#[test]
-fn new_err_lowest_value_too_large_for_precision() {
-    let res = Histogram::<u64>::new_with_bounds(u64::max_value() / 2, u64::max_value(), 0);
-    assert_eq!("Cannot represent significant figures' worth of measurements beyond lowest value",
-               res.unwrap_err());
-}
-
-#[test]
-fn new_err_high_not_double_low() {
-    let res = Histogram::<u64>::new_with_bounds(10, 15, 0);
-    assert_eq!("highest trackable value must be >= 2 * lowest discernible value", res.unwrap_err());
-}
-
-#[cfg(test)]
-fn histo64(lowest_discernible_value: u64, highest_trackable_value: u64, num_significant_digits: u8)
-           -> Histogram<u64> {
-    Histogram::<u64>::new_with_bounds(lowest_discernible_value, highest_trackable_value,
-                                      num_significant_digits).unwrap()
-}
diff --git a/src/tests/tests.rs b/src/tests/tests.rs
@@ -0,0 +1,14 @@
+use super::Histogram;
+
+#[path = "helpers.rs"]
+mod helpers;
+#[path = "init.rs"]
+mod init;
+#[path = "index_calculation.rs"]
+mod index_calculation;
+
+#[test]
+fn new_err_high_not_double_low() {
+    let res = Histogram::<u64>::new_with_bounds(10, 15, 0);
+    assert_eq!("highest trackable value must be >= 2 * lowest discernible value", res.unwrap_err());
+}