1414
1515use marker:: PhantomData ;
1616use ptr;
17+ use cmp;
18+ use mem;
1719
1820/// An implementation of SipHash 1-3.
1921///
@@ -78,45 +80,6 @@ struct State {
7880 v3 : u64 ,
7981}
8082
81- // sadly, these macro definitions can't appear later,
82- // because they're needed in the following defs;
83- // this design could be improved.
84-
85- macro_rules! u8to64_le {
86- ( $buf: expr, $i: expr) =>
87- ( $buf[ 0 +$i] as u64 |
88- ( $buf[ 1 +$i] as u64 ) << 8 |
89- ( $buf[ 2 +$i] as u64 ) << 16 |
90- ( $buf[ 3 +$i] as u64 ) << 24 |
91- ( $buf[ 4 +$i] as u64 ) << 32 |
92- ( $buf[ 5 +$i] as u64 ) << 40 |
93- ( $buf[ 6 +$i] as u64 ) << 48 |
94- ( $buf[ 7 +$i] as u64 ) << 56 ) ;
95- ( $buf: expr, $i: expr, $len: expr) =>
96- ( {
97- let mut t = 0 ;
98- let mut out = 0 ;
99- while t < $len {
100- out |= ( $buf[ t+$i] as u64 ) << t* 8 ;
101- t += 1 ;
102- }
103- out
104- } ) ;
105- }
106-
107- /// Load a full u64 word from a byte stream, in LE order. Use
108- /// `copy_nonoverlapping` to let the compiler generate the most efficient way
109- /// to load u64 from a possibly unaligned address.
110- ///
111- /// Unsafe because: unchecked indexing at i..i+8
112- #[ inline]
113- unsafe fn load_u64_le ( buf : & [ u8 ] , i : usize ) -> u64 {
114- debug_assert ! ( i + 8 <= buf. len( ) ) ;
115- let mut data = 0u64 ;
116- ptr:: copy_nonoverlapping ( buf. get_unchecked ( i) , & mut data as * mut _ as * mut u8 , 8 ) ;
117- data. to_le ( )
118- }
119-
12083macro_rules! compress {
12184 ( $state: expr) => ( {
12285 compress!( $state. v0, $state. v1, $state. v2, $state. v3)
@@ -132,6 +95,47 @@ macro_rules! compress {
13295 } ) ;
13396}
13497
98+ /// Load an integer of the desired type from a byte stream, in LE order. Uses
99+ /// `copy_nonoverlapping` to let the compiler generate the most efficient way
100+ /// to load it from a possibly unaligned address.
101+ ///
102+ /// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
103+ macro_rules! load_int_le {
104+ ( $buf: expr, $i: expr, $int_ty: ident) =>
105+ ( {
106+ debug_assert!( $i + mem:: size_of:: <$int_ty>( ) <= $buf. len( ) ) ;
107+ let mut data = 0 as $int_ty;
108+ ptr:: copy_nonoverlapping( $buf. get_unchecked( $i) ,
109+ & mut data as * mut _ as * mut u8 ,
110+ mem:: size_of:: <$int_ty>( ) ) ;
111+ data. to_le( )
112+ } ) ;
113+ }
114+
115+ /// Load an u64 using up to 7 bytes of a byte slice.
116+ ///
117+ /// Unsafe because: unchecked indexing at start..start+len
118+ #[ inline]
119+ unsafe fn u8to64_le ( buf : & [ u8 ] , start : usize , len : usize ) -> u64 {
120+ debug_assert ! ( len < 8 ) ;
121+ let mut i = 0 ; // current byte index (from LSB) in the output u64
122+ let mut out = 0 ;
123+ if i + 3 < len {
124+ out = load_int_le ! ( buf, start + i, u32 ) as u64 ;
125+ i += 4 ;
126+ }
127+ if i + 1 < len {
128+ out |= ( load_int_le ! ( buf, start + i, u16 ) as u64 ) << ( i * 8 ) ;
129+ i += 2
130+ }
131+ if i < len {
132+ out |= ( * buf. get_unchecked ( start + i) as u64 ) << ( i * 8 ) ;
133+ i += 1 ;
134+ }
135+ debug_assert_eq ! ( i, len) ;
136+ out
137+ }
138+
135139impl SipHasher {
136140 /// Creates a new `SipHasher` with the two initial keys set to 0.
137141 #[ inline]
@@ -220,6 +224,37 @@ impl<S: Sip> Hasher<S> {
220224 self . state . v3 = self . k1 ^ 0x7465646279746573 ;
221225 self . ntail = 0 ;
222226 }
227+
228+ // Specialized write function that is only valid for buffers with len <= 8.
229+ // It's used to force inlining of write_u8 and write_usize, those would normally be inlined
230+ // except for composite types (that includes slices and str hashing because of delimiter).
231+ // Without this extra push the compiler is very reluctant to inline delimiter writes,
232+ // degrading performance substantially for the most common use cases.
233+ #[ inline( always) ]
234+ fn short_write ( & mut self , msg : & [ u8 ] ) {
235+ debug_assert ! ( msg. len( ) <= 8 ) ;
236+ let length = msg. len ( ) ;
237+ self . length += length;
238+
239+ let needed = 8 - self . ntail ;
240+ let fill = cmp:: min ( length, needed) ;
241+ if fill == 8 {
242+ self . tail = unsafe { load_int_le ! ( msg, 0 , u64 ) } ;
243+ } else {
244+ self . tail |= unsafe { u8to64_le ( msg, 0 , fill) } << ( 8 * self . ntail ) ;
245+ if length < needed {
246+ self . ntail += length;
247+ return ;
248+ }
249+ }
250+ self . state . v3 ^= self . tail ;
251+ S :: c_rounds ( & mut self . state ) ;
252+ self . state . v0 ^= self . tail ;
253+
254+ // Buffered tail is now flushed, process new input.
255+ self . ntail = length - needed;
256+ self . tail = unsafe { u8to64_le ( msg, needed, self . ntail ) } ;
257+ }
223258}
224259
225260#[ stable( feature = "rust1" , since = "1.0.0" ) ]
@@ -262,6 +297,21 @@ impl super::Hasher for SipHasher24 {
262297}
263298
264299impl < S : Sip > super :: Hasher for Hasher < S > {
300+ // see short_write comment for explanation
301+ #[ inline]
302+ fn write_usize ( & mut self , i : usize ) {
303+ let bytes = unsafe {
304+ :: slice:: from_raw_parts ( & i as * const usize as * const u8 , mem:: size_of :: < usize > ( ) )
305+ } ;
306+ self . short_write ( bytes) ;
307+ }
308+
309+ // see short_write comment for explanation
310+ #[ inline]
311+ fn write_u8 ( & mut self , i : u8 ) {
312+ self . short_write ( & [ i] ) ;
313+ }
314+
265315 #[ inline]
266316 fn write ( & mut self , msg : & [ u8 ] ) {
267317 let length = msg. len ( ) ;
@@ -271,19 +321,16 @@ impl<S: Sip> super::Hasher for Hasher<S> {
271321
272322 if self . ntail != 0 {
273323 needed = 8 - self . ntail ;
324+ self . tail |= unsafe { u8to64_le ( msg, 0 , cmp:: min ( length, needed) ) } << 8 * self . ntail ;
274325 if length < needed {
275- self . tail |= u8to64_le ! ( msg, 0 , length) << 8 * self . ntail ;
276326 self . ntail += length;
277327 return
328+ } else {
329+ self . state . v3 ^= self . tail ;
330+ S :: c_rounds ( & mut self . state ) ;
331+ self . state . v0 ^= self . tail ;
332+ self . ntail = 0 ;
278333 }
279-
280- let m = self . tail | u8to64_le ! ( msg, 0 , needed) << 8 * self . ntail ;
281-
282- self . state . v3 ^= m;
283- S :: c_rounds ( & mut self . state ) ;
284- self . state . v0 ^= m;
285-
286- self . ntail = 0 ;
287334 }
288335
289336 // Buffered tail is now flushed, process new input.
@@ -292,7 +339,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
292339
293340 let mut i = needed;
294341 while i < len - left {
295- let mi = unsafe { load_u64_le ( msg, i) } ;
342+ let mi = unsafe { load_int_le ! ( msg, i, u64 ) } ;
296343
297344 self . state . v3 ^= mi;
298345 S :: c_rounds ( & mut self . state ) ;
@@ -301,7 +348,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
301348 i += 8 ;
302349 }
303350
304- self . tail = u8to64_le ! ( msg, i, left) ;
351+ self . tail = unsafe { u8to64_le ( msg, i, left) } ;
305352 self . ntail = left;
306353 }
307354
0 commit comments