Skip to content

Commit 8c99a51

Browse files
authored
Merge pull request #2797 from Sahnvour/hashing
hash algorithm improvements
2 parents f01cb8c + 54255ee commit 8c99a51

File tree

6 files changed

+525
-113
lines changed

6 files changed

+525
-113
lines changed

std/hash.zig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
const adler = @import("hash/adler.zig");
22
pub const Adler32 = adler.Adler32;
33

4+
const auto_hash = @import("hash/auto_hash.zig");
5+
pub const autoHash = auto_hash.autoHash;
6+
47
// pub for polynomials + generic crc32 construction
58
pub const crc = @import("hash/crc.zig");
69
pub const Crc32 = crc.Crc32;
@@ -16,18 +19,25 @@ pub const SipHash128 = siphash.SipHash128;
1619

1720
pub const murmur = @import("hash/murmur.zig");
1821
pub const Murmur2_32 = murmur.Murmur2_32;
22+
23+
1924
pub const Murmur2_64 = murmur.Murmur2_64;
2025
pub const Murmur3_32 = murmur.Murmur3_32;
2126

2227
pub const cityhash = @import("hash/cityhash.zig");
2328
pub const CityHash32 = cityhash.CityHash32;
2429
pub const CityHash64 = cityhash.CityHash64;
2530

31+
const wyhash = @import("hash/wyhash.zig");
32+
pub const Wyhash = wyhash.Wyhash;
33+
2634
test "hash" {
2735
_ = @import("hash/adler.zig");
36+
_ = @import("hash/auto_hash.zig");
2837
_ = @import("hash/crc.zig");
2938
_ = @import("hash/fnv.zig");
3039
_ = @import("hash/siphash.zig");
3140
_ = @import("hash/murmur.zig");
3241
_ = @import("hash/cityhash.zig");
42+
_ = @import("hash/wyhash.zig");
3343
}

std/hash/auto_hash.zig

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
const std = @import("std");
2+
const builtin = @import("builtin");
3+
const mem = std.mem;
4+
const meta = std.meta;
5+
6+
/// Provides generic hashing for any eligible type.
7+
/// Only hashes `key` itself, pointers are not followed.
8+
pub fn autoHash(hasher: var, key: var) void {
9+
const Key = @typeOf(key);
10+
switch (@typeInfo(Key)) {
11+
builtin.TypeId.NoReturn,
12+
builtin.TypeId.Opaque,
13+
builtin.TypeId.Undefined,
14+
builtin.TypeId.ArgTuple,
15+
builtin.TypeId.Void,
16+
builtin.TypeId.Null,
17+
builtin.TypeId.BoundFn,
18+
builtin.TypeId.ComptimeFloat,
19+
builtin.TypeId.ComptimeInt,
20+
builtin.TypeId.Type,
21+
builtin.TypeId.EnumLiteral,
22+
=> @compileError("cannot hash this type"),
23+
24+
// Help the optimizer see that hashing an int is easy by inlining!
25+
// TODO Check if the situation is better after #561 is resolved.
26+
builtin.TypeId.Int => @inlineCall(hasher.update, std.mem.asBytes(&key)),
27+
28+
builtin.TypeId.Float => |info| autoHash(hasher, @bitCast(@IntType(false, info.bits), key)),
29+
30+
builtin.TypeId.Bool => autoHash(hasher, @boolToInt(key)),
31+
builtin.TypeId.Enum => autoHash(hasher, @enumToInt(key)),
32+
builtin.TypeId.ErrorSet => autoHash(hasher, @errorToInt(key)),
33+
builtin.TypeId.Promise, builtin.TypeId.Fn => autoHash(hasher, @ptrToInt(key)),
34+
35+
builtin.TypeId.Pointer => |info| switch (info.size) {
36+
builtin.TypeInfo.Pointer.Size.One,
37+
builtin.TypeInfo.Pointer.Size.Many,
38+
builtin.TypeInfo.Pointer.Size.C,
39+
=> autoHash(hasher, @ptrToInt(key)),
40+
41+
builtin.TypeInfo.Pointer.Size.Slice => {
42+
autoHash(hasher, key.ptr);
43+
autoHash(hasher, key.len);
44+
},
45+
},
46+
47+
builtin.TypeId.Optional => if (key) |k| autoHash(hasher, k),
48+
49+
builtin.TypeId.Array => {
50+
// TODO detect via a trait when Key has no padding bits to
51+
// hash it as an array of bytes.
52+
// Otherwise, hash every element.
53+
for (key) |element| {
54+
autoHash(hasher, element);
55+
}
56+
},
57+
58+
builtin.TypeId.Vector => |info| {
59+
if (info.child.bit_count % 8 == 0) {
60+
// If there's no unused bits in the child type, we can just hash
61+
// this as an array of bytes.
62+
hasher.update(mem.asBytes(&key));
63+
} else {
64+
// Otherwise, hash every element.
65+
// TODO remove the copy to an array once field access is done.
66+
const array: [info.len]info.child = key;
67+
comptime var i: u32 = 0;
68+
inline while (i < info.len) : (i += 1) {
69+
autoHash(hasher, array[i]);
70+
}
71+
}
72+
},
73+
74+
builtin.TypeId.Struct => |info| {
75+
// TODO detect via a trait when Key has no padding bits to
76+
// hash it as an array of bytes.
77+
// Otherwise, hash every field.
78+
inline for (info.fields) |field| {
79+
// We reuse the hash of the previous field as the seed for the
80+
// next one so that they're dependant.
81+
autoHash(hasher, @field(key, field.name));
82+
}
83+
},
84+
85+
builtin.TypeId.Union => |info| blk: {
86+
if (info.tag_type) |tag_type| {
87+
const tag = meta.activeTag(key);
88+
const s = autoHash(hasher, tag);
89+
inline for (info.fields) |field| {
90+
const enum_field = field.enum_field.?;
91+
if (enum_field.value == @enumToInt(tag)) {
92+
autoHash(hasher, @field(key, enum_field.name));
93+
// TODO use a labelled break when it does not crash the compiler.
94+
// break :blk;
95+
return;
96+
}
97+
}
98+
unreachable;
99+
} else @compileError("cannot hash untagged union type: " ++ @typeName(Key) ++ ", provide your own hash function");
100+
},
101+
102+
builtin.TypeId.ErrorUnion => blk: {
103+
const payload = key catch |err| {
104+
autoHash(hasher, err);
105+
break :blk;
106+
};
107+
autoHash(hasher, payload);
108+
},
109+
}
110+
}
111+
112+
const testing = std.testing;
113+
const Wyhash = std.hash.Wyhash;
114+
115+
fn testAutoHash(key: var) u64 {
116+
// Any hash could be used here, for testing autoHash.
117+
var hasher = Wyhash.init(0);
118+
autoHash(&hasher, key);
119+
return hasher.final();
120+
}
121+
122+
test "autoHash slice" {
123+
// Allocate one array dynamically so that we're assured it is not merged
124+
// with the other by the optimization passes.
125+
const array1 = try std.heap.direct_allocator.create([6]u32);
126+
defer std.heap.direct_allocator.destroy(array1);
127+
array1.* = [_]u32{ 1, 2, 3, 4, 5, 6 };
128+
const array2 = [_]u32{ 1, 2, 3, 4, 5, 6 };
129+
const a = array1[0..];
130+
const b = array2[0..];
131+
const c = array1[0..3];
132+
testing.expect(testAutoHash(a) == testAutoHash(a));
133+
testing.expect(testAutoHash(a) != testAutoHash(array1));
134+
testing.expect(testAutoHash(a) != testAutoHash(b));
135+
testing.expect(testAutoHash(a) != testAutoHash(c));
136+
}
137+
138+
test "testAutoHash optional" {
139+
const a: ?u32 = 123;
140+
const b: ?u32 = null;
141+
testing.expectEqual(testAutoHash(a), testAutoHash(u32(123)));
142+
testing.expect(testAutoHash(a) != testAutoHash(b));
143+
testing.expectEqual(testAutoHash(b), 0);
144+
}
145+
146+
test "testAutoHash array" {
147+
const a = [_]u32{ 1, 2, 3 };
148+
const h = testAutoHash(a);
149+
var hasher = Wyhash.init(0);
150+
autoHash(&hasher, u32(1));
151+
autoHash(&hasher, u32(2));
152+
autoHash(&hasher, u32(3));
153+
testing.expectEqual(h, hasher.final());
154+
}
155+
156+
test "testAutoHash struct" {
157+
const Foo = struct {
158+
a: u32 = 1,
159+
b: u32 = 2,
160+
c: u32 = 3,
161+
};
162+
const f = Foo{};
163+
const h = testAutoHash(f);
164+
var hasher = Wyhash.init(0);
165+
autoHash(&hasher, u32(1));
166+
autoHash(&hasher, u32(2));
167+
autoHash(&hasher, u32(3));
168+
testing.expectEqual(h, hasher.final());
169+
}
170+
171+
test "testAutoHash union" {
172+
const Foo = union(enum) {
173+
A: u32,
174+
B: f32,
175+
C: u32,
176+
};
177+
178+
const a = Foo{ .A = 18 };
179+
var b = Foo{ .B = 12.34 };
180+
const c = Foo{ .C = 18 };
181+
testing.expect(testAutoHash(a) == testAutoHash(a));
182+
testing.expect(testAutoHash(a) != testAutoHash(b));
183+
testing.expect(testAutoHash(a) != testAutoHash(c));
184+
185+
b = Foo{ .A = 18 };
186+
testing.expect(testAutoHash(a) == testAutoHash(b));
187+
}
188+
189+
test "testAutoHash vector" {
190+
const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
191+
const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
192+
const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
193+
testing.expect(testAutoHash(a) == testAutoHash(a));
194+
testing.expect(testAutoHash(a) != testAutoHash(b));
195+
testing.expect(testAutoHash(a) != testAutoHash(c));
196+
}
197+
198+
test "testAutoHash error union" {
199+
const Errors = error{Test};
200+
const Foo = struct {
201+
a: u32 = 1,
202+
b: u32 = 2,
203+
c: u32 = 3,
204+
};
205+
const f = Foo{};
206+
const g: Errors!Foo = Errors.Test;
207+
testing.expect(testAutoHash(f) != testAutoHash(g));
208+
testing.expect(testAutoHash(f) == testAutoHash(Foo{}));
209+
testing.expect(testAutoHash(g) == testAutoHash(Errors.Test));
210+
}

std/hash/throughput_test.zig

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
const builtin = @import("builtin");
2+
const std = @import("std");
3+
const time = std.time;
4+
const Timer = time.Timer;
5+
const hash = std.hash;
6+
7+
const KiB = 1024;
8+
const MiB = 1024 * KiB;
9+
const GiB = 1024 * MiB;
10+
11+
var prng = std.rand.DefaultPrng.init(0);
12+
13+
const Hash = struct {
14+
ty: type,
15+
name: []const u8,
16+
init_u8s: ?[]const u8 = null,
17+
init_u64: ?u64 = null,
18+
};
19+
20+
const siphash_key = "0123456789abcdef";
21+
22+
const hashes = [_]Hash{
23+
Hash{ .ty = hash.Wyhash, .name = "wyhash", .init_u64 = 0 },
24+
Hash{ .ty = hash.SipHash64(1, 3), .name = "siphash(1,3)", .init_u8s = siphash_key },
25+
Hash{ .ty = hash.SipHash64(2, 4), .name = "siphash(2,4)", .init_u8s = siphash_key },
26+
Hash{ .ty = hash.Fnv1a_64, .name = "fnv1a" },
27+
Hash{ .ty = hash.Crc32, .name = "crc32" },
28+
};
29+
30+
const Result = struct {
31+
hash: u64,
32+
throughput: u64,
33+
};
34+
35+
pub fn benchmarkHash(comptime H: var, bytes: usize) !Result {
36+
var h = blk: {
37+
if (H.init_u8s) |init| {
38+
break :blk H.ty.init(init);
39+
}
40+
if (H.init_u64) |init| {
41+
break :blk H.ty.init(init);
42+
}
43+
break :blk H.ty.init();
44+
};
45+
46+
var block: [8192]u8 = undefined;
47+
prng.random.bytes(block[0..]);
48+
49+
var offset: usize = 0;
50+
var timer = try Timer.start();
51+
const start = timer.lap();
52+
while (offset < bytes) : (offset += block.len) {
53+
h.update(block[0..]);
54+
}
55+
const end = timer.read();
56+
57+
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
58+
const throughput = @floatToInt(u64, @intToFloat(f64, bytes) / elapsed_s);
59+
60+
return Result{
61+
.hash = h.final(),
62+
.throughput = throughput,
63+
};
64+
}
65+
66+
fn usage() void {
67+
std.debug.warn(
68+
\\throughput_test [options]
69+
\\
70+
\\Options:
71+
\\ --filter [test-name]
72+
\\ --seed [int]
73+
\\ --count [int]
74+
\\ --help
75+
\\
76+
);
77+
}
78+
79+
fn mode(comptime x: comptime_int) comptime_int {
80+
return if (builtin.mode == builtin.Mode.Debug) x / 64 else x;
81+
}
82+
83+
// TODO(#1358): Replace with builtin formatted padding when available.
84+
fn printPad(stdout: var, s: []const u8) !void {
85+
var i: usize = 0;
86+
while (i < 12 - s.len) : (i += 1) {
87+
try stdout.print(" ");
88+
}
89+
try stdout.print("{}", s);
90+
}
91+
92+
pub fn main() !void {
93+
var stdout_file = try std.io.getStdOut();
94+
var stdout_out_stream = stdout_file.outStream();
95+
const stdout = &stdout_out_stream.stream;
96+
97+
var buffer: [1024]u8 = undefined;
98+
var fixed = std.heap.FixedBufferAllocator.init(buffer[0..]);
99+
const args = try std.process.argsAlloc(&fixed.allocator);
100+
101+
var filter: ?[]u8 = "";
102+
var count: usize = mode(128 * MiB);
103+
104+
var i: usize = 1;
105+
while (i < args.len) : (i += 1) {
106+
if (std.mem.eql(u8, args[i], "--seed")) {
107+
i += 1;
108+
if (i == args.len) {
109+
usage();
110+
std.os.exit(1);
111+
}
112+
113+
const seed = try std.fmt.parseUnsigned(u32, args[i], 10);
114+
prng.seed(seed);
115+
} else if (std.mem.eql(u8, args[i], "--filter")) {
116+
i += 1;
117+
if (i == args.len) {
118+
usage();
119+
std.os.exit(1);
120+
}
121+
122+
filter = args[i];
123+
} else if (std.mem.eql(u8, args[i], "--count")) {
124+
i += 1;
125+
if (i == args.len) {
126+
usage();
127+
std.os.exit(1);
128+
}
129+
130+
const c = try std.fmt.parseUnsigned(u32, args[i], 10);
131+
count = c * MiB;
132+
} else if (std.mem.eql(u8, args[i], "--help")) {
133+
usage();
134+
return;
135+
} else {
136+
usage();
137+
std.os.exit(1);
138+
}
139+
}
140+
141+
inline for (hashes) |H| {
142+
if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) {
143+
const result = try benchmarkHash(H, count);
144+
try printPad(stdout, H.name);
145+
try stdout.print(": {:4} MiB/s [{:16}]\n", result.throughput / (1 * MiB), result.hash);
146+
}
147+
}
148+
}

0 commit comments

Comments
 (0)