Skip to content

Commit 00f087b

Browse files
Limit unit magnitude + precision to fit into 63 bits. Approach and tests ported from equivalent PR to Java impl.
1 parent 6494f3c commit 00f087b

File tree

5 files changed

+185
-24
lines changed

5 files changed

+185
-24
lines changed

src/lib.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -632,9 +632,13 @@ impl<T: Counter> Histogram<T> {
632632
let sub_bucket_count = 1_u32 << (sub_bucket_count_magnitude as u32);
633633

634634
if unit_magnitude + sub_bucket_count_magnitude > 63 {
635-
// Cannot represent shifted sub bucket count in 64 bits.
636-
// This will cause an infinite loop when calculating number of buckets
637-
return Err("Cannot represent significant figures' worth of measurements beyond lowest value");
635+
// sub_bucket_count entries can't be represented, with unit_magnitude applied, in a
636+
// u64. Technically it still sort of works if their sum is 64: you can represent all
637+
// but the last number in the shifted sub_bucket_count. However, the utility of such a
638+
// histogram vs ones whose magnitude here fits in 63 bits is debatable, and it makes
639+
// it harder to work through the logic. Sums larger than 64 are totally broken as
640+
// leading_zero_count_base would go negative.
641+
return Err("Cannot represent sigfig worth of values beyond low");
638642
};
639643

640644
let sub_bucket_half_count = sub_bucket_count / 2;
@@ -1250,6 +1254,7 @@ impl<T: Counter> Histogram<T> {
12501254

12511255
/// Find the number of buckets needed such that `value` is representable.
12521256
fn buckets_to_cover(&self, value: u64) -> u8 {
1257+
// Shift won't overflow because sub_bucket_magnitude + unit_magnitude <= 63.
12531258
// the k'th bucket can express from 0 * 2^k to sub_bucket_count * 2^k in units of 2^k
12541259
let mut smallest_untrackable_value = (self.sub_bucket_count as u64) << self.unit_magnitude;
12551260

@@ -1468,5 +1473,6 @@ impl<T: Counter, F: Counter> PartialEq<Histogram<F>> for Histogram<T>
14681473
// TODO: timestamps and tags
14691474
// TODO: textual output
14701475

1476+
#[path = "tests/tests.rs"]
14711477
#[cfg(test)]
14721478
mod tests;

src/tests/helpers.rs

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
use super::Histogram;
2+
3+
#[cfg(test)]
4+
pub fn histo64(lowest_discernible_value: u64, highest_trackable_value: u64, num_significant_digits: u8)
5+
-> Histogram<u64> {
6+
Histogram::<u64>::new_with_bounds(lowest_discernible_value, highest_trackable_value,
7+
num_significant_digits).unwrap()
8+
}

src/tests/index_calculation.rs

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
use super::Histogram;
2+
use tests::helpers::histo64;
3+
4+
#[test]
5+
fn unit_magnitude_0_index_calculations() {
6+
let h = histo64(1_u64, 1_u64 << 32, 3);
7+
assert_eq!(2048, h.sub_bucket_count);
8+
assert_eq!(0, h.unit_magnitude);
9+
// sub_bucket_count = 2^11, so 2^11 << 22 is > the max of 2^32 for 23 buckets total
10+
assert_eq!(23, h.bucket_count);
11+
12+
// first half of first bucket
13+
assert_eq!(0, h.bucket_for(3));
14+
assert_eq!(3, h.sub_bucket_for(3, 0));
15+
16+
// second half of first bucket
17+
assert_eq!(0, h.bucket_for(1024 + 3));
18+
assert_eq!(1024 + 3, h.sub_bucket_for(1024 + 3, 0));
19+
20+
// second bucket (top half)
21+
assert_eq!(1, h.bucket_for(2048 + 3 * 2));
22+
// counting by 2s, starting at halfway through the bucket
23+
assert_eq!(1024 + 3, h.sub_bucket_for(2048 + 3 * 2, 1));
24+
25+
// third bucket (top half)
26+
assert_eq!(2, h.bucket_for((2048 << 1) + 3 * 4));
27+
// counting by 4s, starting at halfway through the bucket
28+
assert_eq!(1024 + 3, h.sub_bucket_for((2048 << 1) + 3 * 4, 2));
29+
30+
// past last bucket -- not near u64::max_value(), so should still calculate ok.
31+
assert_eq!(23, h.bucket_for((2048_u64 << 22) + 3 * (1 << 23)));
32+
assert_eq!(1024 + 3, h.sub_bucket_for((2048_u64 << 22) + 3 * (1 << 23), 23));
33+
}
34+
35+
#[test]
36+
fn unit_magnitude_4_index_calculations() {
37+
let h = histo64(1_u64 << 12, 1_u64 << 32, 3);
38+
assert_eq!(2048, h.sub_bucket_count);
39+
assert_eq!(12, h.unit_magnitude);
40+
// sub_bucket_count = 2^11. With unit magnitude shift, it's 2^23. 2^23 << 10 is > the max of
41+
// 2^32 for 11 buckets total
42+
assert_eq!(11, h.bucket_count);
43+
let unit = 1_u64 << 12;
44+
45+
// below lowest value
46+
assert_eq!(0, h.bucket_for(3));
47+
assert_eq!(0, h.sub_bucket_for(3, 0));
48+
49+
// first half of first bucket
50+
assert_eq!(0, h.bucket_for(3 * unit));
51+
assert_eq!(3, h.sub_bucket_for(3 * unit, 0));
52+
53+
// second half of first bucket
54+
// sub_bucket_half_count's worth of units, plus 3 more
55+
assert_eq!(0, h.bucket_for(unit * (1024 + 3)));
56+
assert_eq!(1024 + 3, h.sub_bucket_for(unit * (1024 + 3), 0));
57+
58+
// second bucket (top half), bucket scale = unit << 1.
59+
// Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 11.
60+
// Add on 3 of bucket scale.
61+
assert_eq!(1, h.bucket_for((unit << 11) + 3 * (unit << 1)));
62+
assert_eq!(1024 + 3, h.sub_bucket_for((unit << 11) + 3 * (unit << 1), 1));
63+
64+
// third bucket (top half), bucket scale = unit << 2.
65+
// Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 12.
66+
// Add on 3 of bucket scale.
67+
assert_eq!(2, h.bucket_for((unit << 12) + 3 * (unit << 2)));
68+
assert_eq!(1024 + 3, h.sub_bucket_for((unit << 12) + 3 * (unit << 2), 2));
69+
70+
// past last bucket -- not near u64::max_value(), so should still calculate ok.
71+
assert_eq!(11, h.bucket_for((unit << 21) + 3 * (unit << 11)));
72+
assert_eq!(1024 + 3, h.sub_bucket_for((unit << 21) + 3 * (unit << 11), 11));
73+
}
74+
75+
#[test]
76+
fn unit_magnitude_52_sub_bucket_magnitude_11_index_calculations() {
77+
// maximum unit magnitude for this precision
78+
let h = histo64(1_u64 << 52, u64::max_value(), 3);
79+
assert_eq!(2048, h.sub_bucket_count);
80+
assert_eq!(52, h.unit_magnitude);
81+
// sub_bucket_count = 2^11. With unit magnitude shift, it's 2^63. 1 more bucket to (almost)
82+
// reach 2^64.
83+
assert_eq!(2, h.bucket_count);
84+
assert_eq!(1, h.leading_zero_count_base);
85+
let unit = 1_u64 << 52;
86+
87+
// below lowest value
88+
assert_eq!(0, h.bucket_for(3));
89+
assert_eq!(0, h.sub_bucket_for(3, 0));
90+
91+
// first half of first bucket
92+
assert_eq!(0, h.bucket_for(3 * unit));
93+
assert_eq!(3, h.sub_bucket_for(3 * unit, 0));
94+
95+
// second half of first bucket
96+
// sub_bucket_half_count's worth of units, plus 3 more
97+
assert_eq!(0, h.bucket_for(unit * (1024 + 3)));
98+
assert_eq!(1024 + 3, h.sub_bucket_for(unit * (1024 + 3), 0));
99+
100+
// end of second half
101+
assert_eq!(0, h.bucket_for(unit * 1024 + 1023 * unit));
102+
assert_eq!(1024 + 1023, h.sub_bucket_for(unit * 1024 + 1023 * unit, 0));
103+
104+
// second bucket (top half), bucket scale = unit << 1.
105+
// Middle of bucket is (sub_bucket_half_count = 2^10) of bucket scale, = unit << 11.
106+
// Add on 3 of bucket scale.
107+
assert_eq!(1, h.bucket_for((unit << 11) + 3 * (unit << 1)));
108+
assert_eq!(1024 + 3, h.sub_bucket_for((unit << 11) + 3 * (unit << 1), 1));
109+
110+
// upper half of second bucket, last slot
111+
assert_eq!(1, h.bucket_for(u64::max_value()));
112+
assert_eq!(1024 + 1023, h.sub_bucket_for(u64::max_value(), 1));
113+
}
114+
115+
#[test]
116+
fn unit_magnitude_53_sub_bucket_magnitude_11_throws() {
117+
assert_eq!("Cannot represent sigfig worth of values beyond low",
118+
Histogram::<u64>::new_with_bounds(1_u64 << 53, 1_u64 << 63, 3).unwrap_err());
119+
}
120+
121+
#[test]
122+
fn unit_magnitude_55_sub_bucket_magnitude_8_ok() {
123+
let h = histo64(1_u64 << 55, 1_u64 << 63, 2);
124+
assert_eq!(256, h.sub_bucket_count);
125+
assert_eq!(55, h.unit_magnitude);
126+
// sub_bucket_count = 2^8. With unit magnitude shift, it's 2^63.
127+
assert_eq!(2, h.bucket_count);
128+
129+
// below lowest value
130+
assert_eq!(0, h.bucket_for(3));
131+
assert_eq!(0, h.sub_bucket_for(3, 0));
132+
133+
// upper half of second bucket, last slot
134+
assert_eq!(1, h.bucket_for(u64::max_value()));
135+
assert_eq!(128 + 127, h.sub_bucket_for(u64::max_value(), 1));
136+
}
137+
138+
#[test]
139+
fn unit_magnitude_62_sub_bucket_magnitude_1_ok() {
140+
let h = histo64(1_u64 << 62, 1_u64 << 63, 0);
141+
assert_eq!(2, h.sub_bucket_count);
142+
assert_eq!(62, h.unit_magnitude);
143+
// sub_bucket_count = 2^1. With unit magnitude shift, it's 2^63.
144+
assert_eq!(2, h.bucket_count);
145+
146+
// below lowest value
147+
assert_eq!(0, h.bucket_for(3));
148+
assert_eq!(0, h.sub_bucket_for(3, 0));
149+
150+
// upper half of second bucket, last slot
151+
assert_eq!(1, h.bucket_for(u64::max_value()));
152+
assert_eq!(1, h.sub_bucket_for(u64::max_value(), 1));
153+
}

src/tests.rs renamed to src/tests/init.rs

+1-21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::Histogram;
1+
use tests::helpers::histo64;
22

33
#[test]
44
fn init_fields_smallest_possible_array() {
@@ -250,23 +250,3 @@ fn init_fields_max_value_max_unit_magnitude_max_precision() {
250250

251251
assert_eq!(64 - 62 - 1, h.leading_zero_count_base);
252252
}
253-
254-
#[test]
255-
fn new_err_lowest_value_too_large_for_precision() {
256-
let res = Histogram::<u64>::new_with_bounds(u64::max_value() / 2, u64::max_value(), 0);
257-
assert_eq!("Cannot represent significant figures' worth of measurements beyond lowest value",
258-
res.unwrap_err());
259-
}
260-
261-
#[test]
262-
fn new_err_high_not_double_low() {
263-
let res = Histogram::<u64>::new_with_bounds(10, 15, 0);
264-
assert_eq!("highest trackable value must be >= 2 * lowest discernible value", res.unwrap_err());
265-
}
266-
267-
#[cfg(test)]
268-
fn histo64(lowest_discernible_value: u64, highest_trackable_value: u64, num_significant_digits: u8)
269-
-> Histogram<u64> {
270-
Histogram::<u64>::new_with_bounds(lowest_discernible_value, highest_trackable_value,
271-
num_significant_digits).unwrap()
272-
}

src/tests/tests.rs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use super::Histogram;
2+
3+
#[path = "helpers.rs"]
4+
mod helpers;
5+
#[path = "init.rs"]
6+
mod init;
7+
#[path = "index_calculation.rs"]
8+
mod index_calculation;
9+
10+
#[test]
11+
fn new_err_high_not_double_low() {
12+
let res = Histogram::<u64>::new_with_bounds(10, 15, 0);
13+
assert_eq!("highest trackable value must be >= 2 * lowest discernible value", res.unwrap_err());
14+
}

0 commit comments

Comments
 (0)