Skip to content

Commit 3061676

Browse files
author
Marshall Pierce
committed
Better comments & other PR feedback
1 parent ba76d87 commit 3061676

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

src/serialization/serialization.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
//!
3232
//! V2 + DEFLATE is significantly slower to serialize (around 10x) but only a little bit slower to
3333
//! deserialize (less than 2x). YMMV depending on the compressibility of your histogram data, the
34-
//! speed of the underlying storage medium, etc.
34+
//! speed of the underlying storage medium, etc. Naturally, you can always compress at a later time:
35+
//! there's no reason why you couldn't serialize as V2 and then later re-serialize it as V2 +
36+
//! DEFLATE on another system (perhaps as a batch job) for better archival storage density.
3537
//!
3638
//! # API
3739
//!
@@ -88,11 +90,11 @@
8890
//!
8991
//! impl Serialize for V2HistogramWrapper {
9092
//! fn serialize<S: Serializer>(&self, serializer: S) -> Result<(), ()> {
91-
//! // not optimal to not re-use the vec and serializer, but it'll work
93+
//! // Not optimal to not re-use the vec and serializer, but it'll work
9294
//! let mut vec = Vec::new();
93-
//! // pick the format you want to use
94-
//!
95-
//! // map errors as appropriate for your use case
95+
//! // Pick the serialization format you want to use. Here, we use plain V2, but V2 +
96+
//! // DEFLATE is also available.
97+
//! // Map errors as appropriate for your use case.
9698
//! V2Serializer::new().serialize(&self.histogram, &mut vec)
9799
//! .map_err(|_| ())?;
98100
//! serializer.serialize_bytes(&vec)?;

src/serialization/v2_deflate_serializer.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ impl V2DeflateSerializer {
5858
.map_err(|e| V2DeflateSerializeError::InternalSerializationError(e))?;
5959

6060
debug_assert_eq!(self.uncompressed_buf.len(), uncompressed_len);
61+
// On randomized test histograms we get about 10% compression, but of course random data
62+
// doesn't compress well. Real-world data may compress better, so let's assume a more
63+
// optimistic 50% compression as a baseline to reserve. If we're overly optimistic that's
64+
// still only one more allocation the first time it's needed.
65+
self.compressed_buf.reserve(self.uncompressed_buf.len() / 2);
6166

6267
self.compressed_buf.write_u32::<BigEndian>(V2_COMPRESSED_COOKIE)?;
6368
// placeholder for length
@@ -71,7 +76,7 @@ impl V2DeflateSerializer {
7176
let mut compressor = DeflateEncoder::new(&mut self.compressed_buf, Compression::Default);
7277
compressor.write_all(&self.uncompressed_buf[0..uncompressed_len])?;
7378
let _ = compressor.finish()?;
74-
};
79+
}
7580

7681
// fill in length placeholder. Won't underflow since length is always at least 8, and won't
7782
// overflow u32 as the largest array is about 6 million entries, so about 54MiB encoded (if

0 commit comments

Comments
 (0)