Skip to content

Commit 0b88329

Browse files
authored
Merge pull request #70 from jonhoo/log
Interval log support
2 parents 0290f68 + cf9ec24 commit 0b88329

24 files changed

+1956
-270
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ Cargo.lock
1212

1313
.criterion
1414
perf.data*
15+
/tmp

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@ travis-ci = { repository = "jonhoo/hdrsample" }
2424

2525
[features]
2626
bench_private = [] # for enabling nightly-only feature(test) on the main crate to allow benchmarking private code
27-
serialization = [ "flate2" ]
27+
serialization = [ "flate2", "nom", "base64" ]
2828
default = [ "serialization" ]
2929

3030
[dependencies]
3131
num-traits = "0.1"
3232
byteorder = "1.0.0"
3333
flate2 = { version = "0.2.17", optional = true }
34+
nom = { version = "^3.2.0", optional = true }
35+
base64 = { version = "0.7.0", optional = true }
3436

3537
[dev-dependencies]
3638
rand = "0.3.15"

benches/interval_log.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#![feature(test)]
2+
3+
extern crate hdrsample;
4+
extern crate rand;
5+
extern crate test;
6+
7+
use std::time;
8+
9+
use hdrsample::*;
10+
use hdrsample::serialization;
11+
use hdrsample::serialization::interval_log;
12+
use test::Bencher;
13+
14+
use self::rand_varint::*;
15+
16+
#[path = "../src/serialization/rand_varint.rs"]
17+
mod rand_varint;
18+
19+
#[bench]
20+
fn write_interval_log_1k_hist_10k_value(b: &mut Bencher) {
21+
let mut log = Vec::new();
22+
let mut histograms = Vec::new();
23+
let mut rng = rand::weak_rng();
24+
25+
for _ in 0..1000 {
26+
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
27+
28+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
29+
h.record(v).unwrap();
30+
}
31+
32+
histograms.push(h);
33+
}
34+
35+
let mut serializer = serialization::V2Serializer::new();
36+
37+
b.iter(|| {
38+
log.clear();
39+
40+
let mut writer = interval_log::IntervalLogWriterBuilder::new()
41+
.begin_log_with(&mut log, &mut serializer)
42+
.unwrap();
43+
44+
let dur = time::Duration::new(5, 678_000_000);
45+
for h in histograms.iter() {
46+
writer
47+
.write_histogram(h, time::Duration::new(1, 234_000_000), dur, None)
48+
.unwrap();
49+
}
50+
})
51+
}
52+
53+
#[bench]
54+
fn parse_interval_log_1k_hist_10k_value(b: &mut Bencher) {
55+
let mut log = Vec::new();
56+
let mut histograms = Vec::new();
57+
let mut rng = rand::weak_rng();
58+
59+
for _ in 0..1000 {
60+
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
61+
62+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
63+
h.record(v).unwrap();
64+
}
65+
66+
histograms.push(h);
67+
}
68+
69+
{
70+
let mut serializer = serialization::V2Serializer::new();
71+
let mut writer = interval_log::IntervalLogWriterBuilder::new()
72+
.begin_log_with(&mut log, &mut serializer)
73+
.unwrap();
74+
75+
let dur = time::Duration::new(5, 678_000_000);
76+
for h in histograms.iter() {
77+
writer
78+
.write_histogram(h, time::Duration::new(1, 234_000_000), dur, None)
79+
.unwrap();
80+
}
81+
}
82+
83+
b.iter(|| {
84+
let iter = interval_log::IntervalLogIterator::new(&log);
85+
86+
assert_eq!(1000, iter.count());
87+
})
88+
}

benches/record.rs

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,24 @@ extern crate rand;
55
extern crate test;
66

77
use hdrsample::*;
8-
use self::rand::Rng;
98
use self::test::Bencher;
109

10+
use self::rand_varint::*;
11+
12+
#[path = "../src/serialization/rand_varint.rs"]
13+
mod rand_varint;
14+
1115
#[bench]
1216
fn record_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
1317
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
1418
let mut indices = Vec::<u64>::new();
15-
// TODO improve this and similar benchmarks to use a non-uniform distribution (like that used
16-
// in serialization tests) so we're not always recording in the top few buckets
1719
let mut rng = rand::weak_rng();
1820

1921
// same value approach as record_precalc_random_values_with_max_count_u64 so that they are
2022
// comparable
2123

22-
for _ in 0..1000_000 {
23-
indices.push(rng.gen());
24+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
25+
indices.push(v);
2426
}
2527

2628
b.iter(|| {
@@ -39,10 +41,9 @@ fn record_precalc_random_values_with_max_count_u64(b: &mut Bencher) {
3941

4042
// store values in an array and re-use so we can be sure to hit the overflow case
4143

42-
for _ in 0..1000_000 {
43-
let r = rng.gen();
44-
indices.push(r);
45-
h.record_n(r, u64::max_value()).unwrap();
44+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
45+
indices.push(v);
46+
h.record_n(v, u64::max_value()).unwrap();
4647
}
4748

4849
b.iter(|| {
@@ -59,8 +60,8 @@ fn record_correct_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
5960
let mut indices = Vec::<u64>::new();
6061
let mut rng = rand::weak_rng();
6162

62-
for _ in 0..10_000 {
63-
indices.push(rng.gen());
63+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
64+
indices.push(v);
6465
}
6566

6667
b.iter(|| {
@@ -79,8 +80,10 @@ fn record_random_values_with_1_count_u64(b: &mut Bencher) {
7980
// This should be *slower* than the benchmarks above where we pre-calculate the values
8081
// outside of the hot loop. If it isn't, then those measurements are likely spurious.
8182

82-
b.iter(|| for _ in 0..1000_000 {
83-
h.record(rng.gen()).unwrap()
83+
b.iter(|| {
84+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
85+
h.record(v).unwrap()
86+
}
8487
})
8588
}
8689

@@ -136,11 +139,10 @@ fn do_subtract_benchmark<F: Fn() -> Histogram<u64>>(
136139
for _ in 0..1000 {
137140
let mut h = addend_factory();
138141

139-
for _ in 0..1000 {
140-
let r = rng.gen();
141-
h.record_n(r, count_at_each_addend_value).unwrap();
142+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
143+
h.record_n(v, count_at_each_addend_value).unwrap();
142144
// ensure there's a count to subtract from
143-
accum.record_n(r, count_at_each_addend_value).unwrap();
145+
accum.record_n(v, count_at_each_addend_value).unwrap();
144146
}
145147

146148
subtrahends.push(h);
@@ -166,15 +168,16 @@ fn do_add_benchmark<F: Fn() -> Histogram<u64>>(
166168
for _ in 0..1000 {
167169
let mut h = addend_factory();
168170

169-
for _ in 0..1000 {
170-
let r = rng.gen();
171-
h.record_n(r, count_at_each_addend_value).unwrap();
171+
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
172+
h.record_n(v, count_at_each_addend_value).unwrap();
172173
}
173174

174175
addends.push(h);
175176
}
176177

177-
b.iter(|| for h in addends.iter() {
178-
accum.add(h).unwrap();
178+
b.iter(|| {
179+
for h in addends.iter() {
180+
accum.add(h).unwrap();
181+
}
179182
})
180183
}

benches/serialization.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ extern crate test;
66

77
use hdrsample::*;
88
use hdrsample::serialization::*;
9-
use self::rand::distributions::range::Range;
10-
use self::rand::distributions::IndependentSample;
119
use self::test::Bencher;
12-
use std::io::{Cursor, Write};
13-
use std::fmt::Debug;
10+
use std::io::Cursor;
11+
12+
use self::rand_varint::*;
13+
14+
#[path = "../src/serialization/rand_varint.rs"]
15+
mod rand_varint;
1416

1517
#[bench]
1618
fn serialize_tiny_dense_v2(b: &mut Bencher) {
@@ -168,17 +170,18 @@ fn do_serialize_bench<S>(
168170
digits: u8,
169171
fraction_of_counts_len: f64,
170172
) where
171-
S: TestOnlyHypotheticalSerializerInterface,
173+
S: Serializer,
172174
{
173175
let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
174176
let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
175177
let mut vec = Vec::with_capacity(random_counts);
176178

177-
let range = Range::new(low, high);
178-
179179
let mut rng = rand::weak_rng();
180-
for _ in 0..random_counts {
181-
h.record(range.ind_sample(&mut rng)).unwrap();
180+
for v in RandomVarintEncodedLengthIter::new(&mut rng)
181+
.filter(|v| v >= &low && v <= &high)
182+
.take(random_counts)
183+
{
184+
h.record(v).unwrap();
182185
}
183186

184187
b.iter(|| {
@@ -196,17 +199,18 @@ fn do_deserialize_bench<S>(
196199
digits: u8,
197200
fraction_of_counts_len: f64,
198201
) where
199-
S: TestOnlyHypotheticalSerializerInterface,
202+
S: Serializer,
200203
{
201204
let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
202205
let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
203206
let mut vec = Vec::with_capacity(random_counts);
204207

205-
let range = Range::new(low, high);
206-
207208
let mut rng = rand::weak_rng();
208-
for _ in 0..random_counts {
209-
h.record(range.ind_sample(&mut rng)).unwrap();
209+
for v in RandomVarintEncodedLengthIter::new(&mut rng)
210+
.filter(|v| v >= &low && v <= &high)
211+
.take(random_counts)
212+
{
213+
h.record(v).unwrap();
210214
}
211215

212216
let _ = s.serialize(&h, &mut vec).unwrap();
@@ -217,5 +221,3 @@ fn do_deserialize_bench<S>(
217221
let _: Histogram<u64> = d.deserialize(&mut cursor).unwrap();
218222
});
219223
}
220-
221-
include!("../src/serialization/test_serialize_trait.rs");

examples/cli.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use std::fmt::Display;
99
use clap::{App, Arg, SubCommand};
1010

1111
use hdrsample::{Histogram, RecordError};
12-
use hdrsample::serialization::{DeserializeError, Deserializer, V2DeflateSerializeError,
13-
V2DeflateSerializer, V2SerializeError, V2Serializer};
12+
use hdrsample::serialization::{DeserializeError, Deserializer, Serializer,
13+
V2DeflateSerializeError, V2DeflateSerializer, V2SerializeError,
14+
V2Serializer};
1415

1516
fn main() {
1617
let default_max = format!("{}", u64::max_value());

src/errors/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//! Errors types used throughout this library
1+
//! Error types used throughout this library
22
33
/// Errors that can occur when creating a histogram.
44
#[derive(Debug, Eq, PartialEq, Clone, Copy)]

src/iterators/linear.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
2222
value_units_per_bucket > 0,
2323
"value_units_per_bucket must be > 0"
2424
);
25+
26+
let new_lowest = hist.lowest_equivalent(value_units_per_bucket - 1);
2527
HistogramIterator::new(
2628
hist,
2729
Iter {
2830
hist,
2931
value_units_per_bucket,
3032
// won't underflow because value_units_per_bucket > 0
3133
current_step_highest_value_reporting_level: value_units_per_bucket - 1,
32-
current_step_lowest_value_reporting_level: hist.lowest_equivalent(
33-
value_units_per_bucket - 1,
34-
),
34+
current_step_lowest_value_reporting_level: new_lowest,
3535
},
3636
)
3737
}

src/iterators/log.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,16 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
2727
"value_units_per_bucket must be > 0"
2828
);
2929
assert!(log_base > 1.0, "log_base must be > 1.0");
30+
31+
let new_lowest = hist.lowest_equivalent(value_units_in_first_bucket - 1);
3032
HistogramIterator::new(
3133
hist,
3234
Iter {
3335
hist,
3436
log_base,
3537
next_value_reporting_level: value_units_in_first_bucket as f64,
3638
current_step_highest_value_reporting_level: value_units_in_first_bucket - 1,
37-
current_step_lowest_value_reporting_level: hist.lowest_equivalent(
38-
value_units_in_first_bucket - 1,
39-
),
39+
current_step_lowest_value_reporting_level: new_lowest,
4040
},
4141
)
4242
}

src/lib.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
//! use hdrsample::Histogram;
9595
//! let hist = Histogram::<u64>::new(2).unwrap();
9696
//! // ...
97-
//! println!("# of samples: {}", hist.count());
97+
//! println!("# of samples: {}", hist.len());
9898
//! println!("99.9'th percentile: {}", hist.value_at_quantile(0.999));
9999
//! ```
100100
//!
@@ -170,7 +170,6 @@
170170
//! - `DoubleHistogram`.
171171
//! - The `Recorder` feature of HdrHistogram.
172172
//! - Value shifting ("normalization").
173-
//! - Timestamps and tags.
174173
//! - Textual output methods. These seem almost orthogonal to HdrSample, though it might be
175174
//! convenient if we implemented some relevant traits (CSV, JSON, and possibly simple
176175
//! `fmt::Display`).
@@ -189,6 +188,10 @@
189188

190189
extern crate num_traits as num;
191190

191+
#[cfg(feature = "serialization")]
192+
#[macro_use]
193+
extern crate nom;
194+
192195
use std::borrow::Borrow;
193196
use std::cmp;
194197
use std::ops::{AddAssign, SubAssign};
@@ -777,7 +780,8 @@ impl<T: Counter> Histogram<T> {
777780
};
778781

779782
// Already checked that high >= 2*low
780-
h.resize(high).map_err(|_| CreationError::UsizeTypeTooSmall)?;
783+
h.resize(high)
784+
.map_err(|_| CreationError::UsizeTypeTooSmall)?;
781785
Ok(h)
782786
}
783787

@@ -1748,9 +1752,6 @@ where
17481752

17491753
// TODO: shift
17501754
// TODO: hash
1751-
// TODO: encoding/decoding
1752-
// TODO: timestamps and tags
1753-
// TODO: textual output
17541755

17551756
#[path = "tests/tests.rs"]
17561757
#[cfg(test)]

0 commit comments

Comments
 (0)