|
| 1 | +diff --git a/horcrux/src/gf2n.rs b/horcrux/src/gf2n.rs |
| 2 | +index 5bc0221..501d455 100644 |
| 3 | +--- a/horcrux/src/gf2n.rs |
| 4 | ++++ b/horcrux/src/gf2n.rs |
| 5 | +@@ -294,13 +294,9 @@ fn mul_clmul_u64<const NWORDS: usize, const A: usize, const B: usize, const C: u |
| 6 | + GF2n::<u64, NWORDS, A, B, C>::propagate_carries(words, carry) |
| 7 | + } |
| 8 | + |
| 9 | +-#[cfg(all( |
| 10 | +- feature = "clmul", |
| 11 | +- target_arch = "aarch64", |
| 12 | +- target_feature = "neon", |
| 13 | +- target_feature = "aes" |
| 14 | +-))] |
| 15 | +-fn mul_clmul_u64<const NWORDS: usize, const A: usize, const B: usize, const C: usize>( |
| 16 | ++#[cfg(all(feature = "clmul", target_arch = "aarch64"))] |
| 17 | ++#[target_feature(enable = "neon", enable = "aes")] |
| 18 | ++unsafe fn mul_clmul_u64<const NWORDS: usize, const A: usize, const B: usize, const C: usize>( |
| 19 | + x: &GF2n<u64, NWORDS, A, B, C>, |
| 20 | + y: &GF2n<u64, NWORDS, A, B, C>, |
| 21 | + ) -> GF2n<u64, NWORDS, A, B, C> { |
| 22 | +@@ -316,7 +312,7 @@ fn mul_clmul_u64<const NWORDS: usize, const A: usize, const B: usize, const C: u |
| 23 | + for j in 0..NWORDS { |
| 24 | + let yj = y.words[j]; |
| 25 | + // Safety: target_feature's "neon" and "aes" are available in this function. |
| 26 | +- let clmul: u128 = unsafe { vmull_p64(xi, yj) }; |
| 27 | ++ let clmul: u128 = vmull_p64(xi, yj); |
| 28 | + let low: u64 = clmul as u64; |
| 29 | + let high: u64 = (clmul >> 64) as u64; |
| 30 | + |
| 31 | +@@ -540,12 +536,7 @@ impl<W: Word, const NWORDS: usize, const A: usize, const B: usize, const C: usiz |
| 32 | + target_feature = "sse2", |
| 33 | + target_feature = "pclmulqdq" |
| 34 | + ), |
| 35 | +- all( |
| 36 | +- feature = "clmul", |
| 37 | +- target_arch = "aarch64", |
| 38 | +- target_feature = "neon", |
| 39 | +- target_feature = "aes" |
| 40 | +- ) |
| 41 | ++ all(feature = "clmul", target_arch = "aarch64") |
| 42 | + ))] |
| 43 | + fn propagate_carries(mut words: [W; NWORDS], carry: [W; NWORDS]) -> Self { |
| 44 | + if NWORDS == 1 { |
| 45 | +@@ -672,19 +663,11 @@ impl<W: Word, const NWORDS: usize, const A: usize, const B: usize, const C: usiz |
| 46 | + type Output = Self; |
| 47 | + |
| 48 | + fn mul(self, other: &Self) -> Self { |
| 49 | +- #[cfg(any( |
| 50 | +- all( |
| 51 | +- feature = "clmul", |
| 52 | +- target_arch = "x86_64", |
| 53 | +- target_feature = "sse2", |
| 54 | +- target_feature = "pclmulqdq" |
| 55 | +- ), |
| 56 | +- all( |
| 57 | +- feature = "clmul", |
| 58 | +- target_arch = "aarch64", |
| 59 | +- target_feature = "neon", |
| 60 | +- target_feature = "aes" |
| 61 | +- ) |
| 62 | ++ #[cfg(all( |
| 63 | ++ feature = "clmul", |
| 64 | ++ target_arch = "x86_64", |
| 65 | ++ target_feature = "sse2", |
| 66 | ++ target_feature = "pclmulqdq" |
| 67 | + ))] |
| 68 | + if W::NBITS == 64 { |
| 69 | + // Safety: W == u64 when NBITS == 64. |
| 70 | +@@ -696,6 +679,21 @@ impl<W: Word, const NWORDS: usize, const A: usize, const B: usize, const C: usiz |
| 71 | + let result: &Self = unsafe { std::mem::transmute(&tmp) }; |
| 72 | + return *result; |
| 73 | + } |
| 74 | ++ #[cfg(all(feature = "clmul", target_arch = "aarch64"))] |
| 75 | ++ if W::NBITS == 64 |
| 76 | ++ && std::arch::is_aarch64_feature_detected!("neon") |
| 77 | ++ && std::arch::is_aarch64_feature_detected!("aes") |
| 78 | ++ { |
| 79 | ++ // Safety: W == u64 when NBITS == 64. |
| 80 | ++ let x: &GF2n<u64, NWORDS, A, B, C> = unsafe { std::mem::transmute(&self) }; |
| 81 | ++ // Safety: W == u64 when NBITS == 64. |
| 82 | ++ let y: &GF2n<u64, NWORDS, A, B, C> = unsafe { std::mem::transmute(other) }; |
| 83 | ++ // Safety: target_feature's "neon" and "aes" are available in this block. |
| 84 | ++ let tmp: GF2n<u64, NWORDS, A, B, C> = unsafe { mul_clmul_u64(x, y) }; |
| 85 | ++ // Safety: W == u64 when NBITS == 64. |
| 86 | ++ let result: &Self = unsafe { std::mem::transmute(&tmp) }; |
| 87 | ++ return *result; |
| 88 | ++ } |
| 89 | + self.mul_as_add(other) |
| 90 | + } |
| 91 | + } |
0 commit comments