Use faster hash for short arrays. (#39966)

oscardssmith · mbauman · vtjnash · web-flow · commit c700781aef91 · 2021-03-16T08:39:21.000-05:00
* Use faster hash for AbstractArrays.

Since the current code already hashes all elements of small arrays (&lt;4096 elements), this is basically a fast-path that avoids a lot of the math. It also xors the hashes, which should allow the compiler to vectorize hashing. As a result, I'm measuring about a 4x speedup for hashing a 20x20 matrix, and have not found cases where this is slower.

In addition, we can only hash the values for all arrays yielding to a 2x speedup.

Co-authored-by: Matt Bauman &lt;mbauman@juliacomputing.com&gt;
Co-authored-by: Jameson Nash &lt;vtjnash@gmail.com&gt;
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
@@ -2403,7 +2403,14 @@ function hash(A::AbstractArray, h::UInt)
     # Instead hash the tuple of firsts and lasts along each dimension
     h = hash(map(first, axes(A)), h)
     h = hash(map(last, axes(A)), h)
-    isempty(A) && return h
+
+    # For short arrays, it's not worth doing anything complicated
+    if length(A) < 8192
+        for x in A
+            h = hash(x, h)
+        end
+        return h
+    end
 
     # Goal: Hash approximately log(N) entries with a higher density of hashed elements
     # weighted towards the end and special consideration for repeated values. Colliding
@@ -2434,9 +2441,8 @@ function hash(A::AbstractArray, h::UInt)
     n = 0
     while true
         n += 1
-        # Hash the current key-index and its element
-        elt = A[keyidx]
-        h = hash(keyidx=>elt, h)
+        # Hash the element
+        h = hash(A[keyidx], h)
 
         # Skip backwards a Fibonacci number of indices -- this is a linear index operation
         linidx = key_to_linear[keyidx]