Skip to content

Commit 579f066

Browse files
committed
Initial cut at a SipHash implementation
I have verified that this gives the same results as the reference implementation, which can be found here (hard to find via Google): https://www.131002.net/siphash/siphash24.c Compared to the current FNV hash, this first attempt achieves so-so performance, with slowdown ranging from 4x on small inputs to 1.4x for large. bytes FNV SipHash 5 15 63 ns 8 17 70 11 19 74 40 49 124 1MB 1.2 1.7 ms
1 parent 240626f commit 579f066

File tree

3 files changed

+154
-0
lines changed

3 files changed

+154
-0
lines changed

Data/Hashable/SipHash.hs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving #-}
2+
3+
module Data.Hashable.SipHash
4+
(
5+
LE64
6+
, fromWord64
7+
, fullBlock
8+
, lastBlock
9+
, finalize
10+
, hashByteString
11+
) where
12+
13+
#include "MachDeps.h"
14+
15+
import Data.Bits
16+
import Data.Word
17+
import Foreign.ForeignPtr
18+
import Foreign.Ptr
19+
import Data.ByteString.Internal
20+
import Foreign.Storable
21+
import Numeric
22+
23+
newtype LE64 = LE64 { fromLE64 :: Word64 }
24+
deriving (Eq)
25+
26+
instance Show LE64 where
27+
show (LE64 !v) = let s = showHex v ""
28+
in "0x" ++ replicate (16 - length s) '0' ++ s
29+
30+
fromWord64 :: Word64 -> LE64
31+
#ifndef WORDS_BIGENDIAN
32+
fromWord64 = LE64
33+
#else
34+
#error big endian support TBD
35+
#endif
36+
37+
initState :: (Word64 -> Word64 -> Word64 -> Word64 -> r)
38+
-> Word64 -> Word64
39+
-> r
40+
initState k k0 k1 = k v0 v1 v2 v3
41+
where !v0 = (k0 `xor` 0x736f6d6570736575)
42+
!v1 = (k1 `xor` 0x646f72616e646f6d)
43+
!v2 = (k0 `xor` 0x6c7967656e657261)
44+
!v3 = (k1 `xor` 0x7465646279746573)
45+
46+
sipRound :: (Word64 -> Word64 -> Word64 -> Word64 -> r)
47+
-> Word64 -> Word64 -> Word64 -> Word64 -> r
48+
sipRound k !v0 !v1 !v2 !v3 = k v0_c v1_d v2_c v3_d
49+
where v0_a = v0 + v1
50+
v2_a = v2 + v3
51+
v1_a = v1 `rotateL` 13
52+
v3_a = v3 `rotateL` 16
53+
v1_b = v1_a `xor` v0_a
54+
v3_b = v3_a `xor` v2_a
55+
v0_b = v0_a `rotateL` 32
56+
v2_b = v2_a + v1_b
57+
!v0_c = v0_b + v3_b
58+
v1_c = v1_b `rotateL` 17
59+
v3_c = v3_b `rotateL` 21
60+
!v1_d = v1_c `xor` v2_b
61+
!v3_d = v3_c `xor` v0_c
62+
!v2_c = v2_b `rotateL` 32
63+
64+
fullBlock :: Int -> LE64
65+
-> (Word64 -> Word64 -> Word64 -> Word64 -> r)
66+
-> Word64 -> Word64 -> Word64 -> Word64 -> r
67+
fullBlock c m k v0 v1 v2 v3 = runRounds c k' v0 v1 v2 (v3 `xor` fromLE64 m)
68+
where k' w0 = k $! (w0 `xor` fromLE64 m)
69+
{-# INLINE fullBlock #-}
70+
71+
runRounds :: Int
72+
-> (Word64 -> Word64 -> Word64 -> Word64 -> r)
73+
-> Word64 -> Word64 -> Word64 -> Word64 -> r
74+
runRounds !c k = go 0
75+
where go i !v0 !v1 !v2 !v3
76+
| i < c = sipRound (go (i+1)) v0 v1 v2 v3
77+
| otherwise = k v0 v1 v2 v3
78+
{-# INLINE runRounds #-}
79+
80+
lastBlock :: Int -> Int -> LE64
81+
-> (Word64 -> Word64 -> Word64 -> Word64 -> r)
82+
-> Word64 -> Word64 -> Word64 -> Word64 -> r
83+
lastBlock !c !len !m k !v0 !v1 !v2 !v3 =
84+
#ifndef WORDS_BIGENDIAN
85+
fullBlock c (LE64 m') k v0 v1 v2 v3
86+
#else
87+
#error big endian support TBD
88+
#endif
89+
where m' = fromLE64 m .|. ((fromIntegral len .&. 0xff) `shiftL` 56)
90+
{-# INLINE lastBlock #-}
91+
92+
finalize :: Int
93+
-> (Word64 -> r)
94+
-> Word64 -> Word64 -> Word64 -> Word64 -> r
95+
finalize d k v0 v1 v2 v3 = runRounds d k' v0 v1 (v2 `xor` 0xff) v3
96+
where k' w0 w1 w2 w3 = k $! w0 `xor` w1 `xor` w2 `xor` w3
97+
{-# INLINE finalize #-}
98+
99+
hashByteString :: Int -> Int -> Word64 -> Word64 -> ByteString -> Word64
100+
hashByteString !c !d k0 k1 (PS fp off len) =
101+
inlinePerformIO . withForeignPtr fp $ \basePtr ->
102+
let ptr0 = basePtr `plusPtr` off
103+
scant = len .&. 7
104+
endBlocks = ptr0 `plusPtr` (len - scant)
105+
go !ptr !v0 !v1 !v2 !v3
106+
| ptr == endBlocks = readLast ptr 0 0
107+
| otherwise = do
108+
m <- peekLE64 ptr
109+
fullBlock c m (go (ptr `plusPtr` 8)) v0 v1 v2 v3
110+
where
111+
readLast p !s !m
112+
| p == end = lastBlock c len (LE64 m)
113+
(finalize d return)
114+
v0 v1 v2 v3
115+
| otherwise = do
116+
b <- fromIntegral `fmap` peekByte p
117+
readLast (p `plusPtr` 1) (s+8) (m .|. (b `unsafeShiftL` s))
118+
where end = ptr0 `plusPtr` len
119+
in initState (go ptr0) k0 k1
120+
121+
peekByte :: Ptr Word8 -> IO Word8
122+
peekByte = peek
123+
124+
peekLE64 :: Ptr Word8 -> IO LE64
125+
#if defined(x86_64_HOST_ARCH) || defined(i386_HOST_ARCH)
126+
-- platforms on which unaligned loads are legal and usually fast
127+
peekLE64 p = LE64 `fmap` peek (castPtr p)
128+
#else
129+
peekLE64 p = do
130+
let peek8 d = fromIntegral `fmap` peekByte (p `plusPtr` d)
131+
b0 <- peek8 0
132+
b1 <- peek8 1
133+
b2 <- peek8 2
134+
b3 <- peek8 3
135+
b4 <- peek8 4
136+
b5 <- peek8 5
137+
b6 <- peek8 6
138+
b7 <- peek8 7
139+
let !w = (b7 `shiftL` 56) .|. (b6 `shiftL` 48) .|. (b5 `shiftL` 40) .|.
140+
(b4 `shiftL` 32) .|. (b3 `shiftL` 24) .|. (b2 `shiftL` 16) .|.
141+
(b1 `shiftL` 8) .|. b0
142+
return (fromWord64 w)
143+
#endif

benchmarks/Benchmarks.hs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ module Main (main) where
55
import Control.Monad.ST
66
import Criterion.Main
77
import Data.Hashable
8+
import Data.Hashable.SipHash
89
import Foreign.ForeignPtr
910
import GHC.Exts
1011
import GHC.ST (ST(..))
@@ -35,6 +36,8 @@ main = do
3536
!bs40 = B.pack [0..39]
3637
!bs1Mb = B.pack . map fromIntegral $ [0..999999::Int]
3738

39+
let sipHash = hashByteString 2 4 0x4a7330fae70f52e8 0x919ea5953a9a1ec9
40+
3841
withForeignPtr fp5 $ \ p5 ->
3942
withForeignPtr fp8 $ \ p8 ->
4043
withForeignPtr fp11 $ \ p11 ->
@@ -64,6 +67,13 @@ main = do
6467
, bench "2^20" $ whnf hash bs1Mb
6568
]
6669
]
70+
, bgroup "sipHash"
71+
[ bench "5" $ whnf sipHash bs5
72+
, bench "8" $ whnf sipHash bs8
73+
, bench "11" $ whnf sipHash bs11
74+
, bench "40" $ whnf sipHash bs40
75+
, bench "2^20" $ whnf sipHash bs1Mb
76+
]
6777
]
6878

6979
data ByteArray = BA { unBA :: !ByteArray# }

hashable.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Flag integer-gmp
2929

3030
Library
3131
Exposed-modules: Data.Hashable
32+
Data.Hashable.SipHash
3233
Build-depends: base >= 4.0 && < 5.0,
3334
bytestring >= 0.9
3435
if impl(ghc)

0 commit comments

Comments
 (0)