Skip to content

Commit 323b7ee

Browse files
committed
hs1-siv: Compute NH for each h in parallel
1 parent fa757d9 commit 323b7ee

File tree

1 file changed

+17
-19
lines changed

1 file changed

+17
-19
lines changed

hs1-siv/src/hash.rs

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -64,31 +64,29 @@ impl<P: Hs1Params> Hasher<P> {
6464
assert!(usize::from(self.bytes) <= self.block_u8().len());
6565

6666
#[inline(always)]
67-
fn nh(v1: &[u32], v2: &[u32]) -> u64 {
68-
debug_assert_eq!(v1.len(), v2.len());
69-
debug_assert_eq!(v1.len() % 4, 0);
70-
// I originally used a fancy, compact iterator chain here but the optimizer is shit
71-
// (and honestly, this is pretty compact too)
72-
let mut s = 0u64;
73-
for (x, y) in v1.chunks_exact(4).zip(v2.chunks_exact(4)) {
74-
let d = u64::from(x[3].wrapping_add(y[3]));
75-
let c = u64::from(x[2].wrapping_add(y[2]));
76-
let b = u64::from(x[1].wrapping_add(y[1]));
77-
let a = u64::from(x[0].wrapping_add(y[0]));
78-
s = s.wrapping_add(a * c).wrapping_add(b * d);
79-
}
80-
s
67+
fn nh_step(&[ax, bx, cx, dx]: &[u32; 4], &[ay, by, cy, dy]: &[u32; 4]) -> u64 {
68+
let d = u64::from(dx.wrapping_add(dy));
69+
let c = u64::from(cx.wrapping_add(cy));
70+
let b = u64::from(bx.wrapping_add(by));
71+
let a = u64::from(ax.wrapping_add(ay));
72+
(a * c).wrapping_add(b * d)
8173
}
8274

8375
let m_ints = &self.block;
8476

8577
let block16_count = usize::from(((self.bytes + 15) / 16).max(1));
8678

87-
self.k
88-
.nh
89-
.windows(B16::<P>::to_usize() / 4)
90-
.step_by(4)
91-
.map(|k_n_i| nh(&k_n_i[..block16_count * 4], &m_ints[..block16_count * 4]))
79+
let mut nh = Array::<u64, P::T>::default();
80+
for (i0, m_ints_i) in m_ints.chunks_exact(4).enumerate().take(block16_count) {
81+
for (nh_i, k_n_i_i) in nh.iter_mut().zip(self.k.nh.chunks_exact(4).skip(i0)) {
82+
let k_n_i_i = k_n_i_i.try_into().expect("exactly 4 elements");
83+
let m_ints_i = m_ints_i.try_into().expect("exactly 4 elements");
84+
let s = nh_step(k_n_i_i, m_ints_i);
85+
*nh_i = nh_i.wrapping_add(s);
86+
}
87+
}
88+
89+
nh.iter()
9290
.map(|nh_i| (nh_i + (u64::from(self.bytes) & mask(4))) & mask(60))
9391
.zip(self.k.poly.iter())
9492
.zip(self.h.iter_mut())

0 commit comments

Comments
 (0)