File tree 2 files changed +13
-6
lines changed
2 files changed +13
-6
lines changed Original file line number Diff line number Diff line change 31
31
//!
32
32
//! V2 + DEFLATE is significantly slower to serialize (around 10x) but only a little bit slower to
33
33
//! deserialize (less than 2x). YMMV depending on the compressibility of your histogram data, the
34
- //! speed of the underlying storage medium, etc.
34
+ //! speed of the underlying storage medium, etc. Naturally, you can always compress at a later time:
35
+ //! there's no reason why you couldn't serialize as V2 and then later re-serialize it as V2 +
36
+ //! DEFLATE on another system (perhaps as a batch job) for better archival storage density.
35
37
//!
36
38
//! # API
37
39
//!
88
90
//!
89
91
//! impl Serialize for V2HistogramWrapper {
90
92
//! fn serialize<S: Serializer>(&self, serializer: S) -> Result<(), ()> {
91
- //! // not optimal to not re-use the vec and serializer, but it'll work
93
+ //! // Not optimal to not re-use the vec and serializer, but it'll work
92
94
//! let mut vec = Vec::new();
93
- //! // pick the format you want to use
94
- //!
95
- //! // map errors as appropriate for your use case
95
+ //! // Pick the serialization format you want to use. Here, we use plain V2, but V2 +
96
+ //! // DEFLATE is also available.
97
+ //! // Map errors as appropriate for your use case.
96
98
//! V2Serializer::new().serialize(&self.histogram, &mut vec)
97
99
//! .map_err(|_| ())?;
98
100
//! serializer.serialize_bytes(&vec)?;
Original file line number Diff line number Diff line change @@ -58,6 +58,11 @@ impl V2DeflateSerializer {
58
58
. map_err ( |e| V2DeflateSerializeError :: InternalSerializationError ( e) ) ?;
59
59
60
60
debug_assert_eq ! ( self . uncompressed_buf. len( ) , uncompressed_len) ;
61
+ // On randomized test histograms we get about 10% compression, but of course random data
62
+ // doesn't compress well. Real-world data may compress better, so let's assume a more
63
+ // optimistic 50% compression as a baseline to reserve. If we're overly optimistic that's
64
+ // still only one more allocation the first time it's needed.
65
+ self . compressed_buf . reserve ( self . uncompressed_buf . len ( ) / 2 ) ;
61
66
62
67
self . compressed_buf . write_u32 :: < BigEndian > ( V2_COMPRESSED_COOKIE ) ?;
63
68
// placeholder for length
@@ -71,7 +76,7 @@ impl V2DeflateSerializer {
71
76
let mut compressor = DeflateEncoder :: new ( & mut self . compressed_buf , Compression :: Default ) ;
72
77
compressor. write_all ( & self . uncompressed_buf [ 0 ..uncompressed_len] ) ?;
73
78
let _ = compressor. finish ( ) ?;
74
- } ;
79
+ }
75
80
76
81
// fill in length placeholder. Won't underflow since length is always at least 8, and won't
77
82
// overflow u32 as the largest array is about 6 million entries, so about 54MiB encoded (if
You can’t perform that action at this time.
0 commit comments