Skip to content

Commit a319d13

Browse files
committed
Small improvement to SipHasher
1 parent 7bccb82 commit a319d13

File tree

2 files changed

+118
-50
lines changed

2 files changed

+118
-50
lines changed

src/libcore/hash/sip.rs

+97-50
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
use marker::PhantomData;
1616
use ptr;
17+
use cmp;
18+
use mem;
1719

1820
/// An implementation of SipHash 1-3.
1921
///
@@ -78,45 +80,6 @@ struct State {
7880
v3: u64,
7981
}
8082

81-
// sadly, these macro definitions can't appear later,
82-
// because they're needed in the following defs;
83-
// this design could be improved.
84-
85-
macro_rules! u8to64_le {
86-
($buf:expr, $i:expr) =>
87-
($buf[0+$i] as u64 |
88-
($buf[1+$i] as u64) << 8 |
89-
($buf[2+$i] as u64) << 16 |
90-
($buf[3+$i] as u64) << 24 |
91-
($buf[4+$i] as u64) << 32 |
92-
($buf[5+$i] as u64) << 40 |
93-
($buf[6+$i] as u64) << 48 |
94-
($buf[7+$i] as u64) << 56);
95-
($buf:expr, $i:expr, $len:expr) =>
96-
({
97-
let mut t = 0;
98-
let mut out = 0;
99-
while t < $len {
100-
out |= ($buf[t+$i] as u64) << t*8;
101-
t += 1;
102-
}
103-
out
104-
});
105-
}
106-
107-
/// Load a full u64 word from a byte stream, in LE order. Use
108-
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
109-
/// to load u64 from a possibly unaligned address.
110-
///
111-
/// Unsafe because: unchecked indexing at i..i+8
112-
#[inline]
113-
unsafe fn load_u64_le(buf: &[u8], i: usize) -> u64 {
114-
debug_assert!(i + 8 <= buf.len());
115-
let mut data = 0u64;
116-
ptr::copy_nonoverlapping(buf.get_unchecked(i), &mut data as *mut _ as *mut u8, 8);
117-
data.to_le()
118-
}
119-
12083
macro_rules! compress {
12184
($state:expr) => ({
12285
compress!($state.v0, $state.v1, $state.v2, $state.v3)
@@ -132,6 +95,47 @@ macro_rules! compress {
13295
});
13396
}
13497

98+
/// Load an integer of the desired type from a byte stream, in LE order. Uses
99+
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
100+
/// to load it from a possibly unaligned address.
101+
///
102+
/// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
103+
macro_rules! load_int_le {
104+
($buf:expr, $i:expr, $int_ty:ident) =>
105+
({
106+
debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
107+
let mut data = 0 as $int_ty;
108+
ptr::copy_nonoverlapping($buf.get_unchecked($i),
109+
&mut data as *mut _ as *mut u8,
110+
mem::size_of::<$int_ty>());
111+
data.to_le()
112+
});
113+
}
114+
115+
/// Load an u64 using up to 7 bytes of a byte slice.
116+
///
117+
/// Unsafe because: unchecked indexing at start..start+len
118+
#[inline]
119+
unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
120+
debug_assert!(len < 8);
121+
let mut i = 0; // current byte index (from LSB) in the output u64
122+
let mut out = 0;
123+
if i + 3 < len {
124+
out = load_int_le!(buf, start + i, u32) as u64;
125+
i += 4;
126+
}
127+
if i + 1 < len {
128+
out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
129+
i += 2
130+
}
131+
if i < len {
132+
out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
133+
i += 1;
134+
}
135+
debug_assert_eq!(i, len);
136+
out
137+
}
138+
135139
impl SipHasher {
136140
/// Creates a new `SipHasher` with the two initial keys set to 0.
137141
#[inline]
@@ -220,6 +224,37 @@ impl<S: Sip> Hasher<S> {
220224
self.state.v3 = self.k1 ^ 0x7465646279746573;
221225
self.ntail = 0;
222226
}
227+
228+
// Specialized write function that is only valid for buffers with len <= 8.
229+
// It's used to force inlining of write_u8 and write_usize, those would normally be inlined
230+
// except for composite types (that includes slices and str hashing because of delimiter).
231+
// Without this extra push the compiler is very reluctant to inline delimiter writes,
232+
// degrading performance substantially for the most common use cases.
233+
#[inline(always)]
234+
fn short_write(&mut self, msg: &[u8]) {
235+
debug_assert!(msg.len() <= 8);
236+
let length = msg.len();
237+
self.length += length;
238+
239+
let needed = 8 - self.ntail;
240+
let fill = cmp::min(length, needed);
241+
if fill == 8 {
242+
self.tail = unsafe { load_int_le!(msg, 0, u64) };
243+
} else {
244+
self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
245+
if length < needed {
246+
self.ntail += length;
247+
return;
248+
}
249+
}
250+
self.state.v3 ^= self.tail;
251+
S::c_rounds(&mut self.state);
252+
self.state.v0 ^= self.tail;
253+
254+
// Buffered tail is now flushed, process new input.
255+
self.ntail = length - needed;
256+
self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
257+
}
223258
}
224259

225260
#[stable(feature = "rust1", since = "1.0.0")]
@@ -262,6 +297,21 @@ impl super::Hasher for SipHasher24 {
262297
}
263298

264299
impl<S: Sip> super::Hasher for Hasher<S> {
300+
// see short_write comment for explanation
301+
#[inline]
302+
fn write_usize(&mut self, i: usize) {
303+
let bytes = unsafe {
304+
::slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
305+
};
306+
self.short_write(bytes);
307+
}
308+
309+
// see short_write comment for explanation
310+
#[inline]
311+
fn write_u8(&mut self, i: u8) {
312+
self.short_write(&[i]);
313+
}
314+
265315
#[inline]
266316
fn write(&mut self, msg: &[u8]) {
267317
let length = msg.len();
@@ -271,19 +321,16 @@ impl<S: Sip> super::Hasher for Hasher<S> {
271321

272322
if self.ntail != 0 {
273323
needed = 8 - self.ntail;
324+
self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << 8 * self.ntail;
274325
if length < needed {
275-
self.tail |= u8to64_le!(msg, 0, length) << 8 * self.ntail;
276326
self.ntail += length;
277327
return
328+
} else {
329+
self.state.v3 ^= self.tail;
330+
S::c_rounds(&mut self.state);
331+
self.state.v0 ^= self.tail;
332+
self.ntail = 0;
278333
}
279-
280-
let m = self.tail | u8to64_le!(msg, 0, needed) << 8 * self.ntail;
281-
282-
self.state.v3 ^= m;
283-
S::c_rounds(&mut self.state);
284-
self.state.v0 ^= m;
285-
286-
self.ntail = 0;
287334
}
288335

289336
// Buffered tail is now flushed, process new input.
@@ -292,7 +339,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
292339

293340
let mut i = needed;
294341
while i < len - left {
295-
let mi = unsafe { load_u64_le(msg, i) };
342+
let mi = unsafe { load_int_le!(msg, i, u64) };
296343

297344
self.state.v3 ^= mi;
298345
S::c_rounds(&mut self.state);
@@ -301,7 +348,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
301348
i += 8;
302349
}
303350

304-
self.tail = u8to64_le!(msg, i, left);
351+
self.tail = unsafe { u8to64_le(msg, i, left) };
305352
self.ntail = left;
306353
}
307354

src/libcoretest/hash/sip.rs

+21
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use test::{Bencher, black_box};
1414

1515
use core::hash::{Hash, Hasher};
1616
use core::hash::{SipHasher, SipHasher13, SipHasher24};
17+
use core::{slice, mem};
1718

1819
// Hash just the bytes of the slice, without length prefix
1920
struct Bytes<'a>(&'a [u8]);
@@ -327,6 +328,26 @@ fn test_hash_no_concat_alias() {
327328
assert!(hash(&v) != hash(&w));
328329
}
329330

331+
#[test]
332+
fn test_write_short_works() {
333+
let test_usize = 0xd0c0b0a0usize;
334+
let mut h1 = SipHasher24::new();
335+
h1.write_usize(test_usize);
336+
h1.write(b"bytes");
337+
h1.write(b"string");
338+
h1.write_u8(0xFFu8);
339+
h1.write_u8(0x01u8);
340+
let mut h2 = SipHasher24::new();
341+
h2.write(unsafe {
342+
slice::from_raw_parts(&test_usize as *const _ as *const u8,
343+
mem::size_of::<usize>())
344+
});
345+
h2.write(b"bytes");
346+
h2.write(b"string");
347+
h2.write(&[0xFFu8, 0x01u8]);
348+
assert_eq!(h1.finish(), h2.finish());
349+
}
350+
330351
#[bench]
331352
fn bench_str_under_8_bytes(b: &mut Bencher) {
332353
let s = "foo";

0 commit comments

Comments
 (0)