Skip to content

Commit 2e64ad5

Browse files
authored
chacha20: SSE2 autodetection support (#270)
i586 targets do not have SSE2 enabled by default. This commit adds autodetection for SSE2 in addition to the existing autodetection for AVX2, and falls back to a portable implementation if SSE2 is not available.
1 parent 335f0d9 commit 2e64ad5

File tree

1 file changed

+42
-19
lines changed

1 file changed

+42
-19
lines changed

chacha20/src/backend/autodetect.rs

+42-19
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
//! Autodetection support for AVX2 CPU intrinsics on x86 CPUs, with fallback
2-
//! to the SSE2 backend when it's unavailable (the `sse2` target feature is
3-
//! enabled-by-default on all x86(_64) CPUs)
1+
//! Autodetection support for AVX2 CPU and SSE2 intrinsics on x86 CPUs, with
2+
//! fallback to a portable version when they're unavailable.
43
5-
use super::{avx2, sse2};
4+
use super::{avx2, soft, sse2};
65
use crate::{rounds::Rounds, BLOCK_SIZE, IV_SIZE, KEY_SIZE};
76
use core::mem::ManuallyDrop;
87

@@ -13,75 +12,99 @@ use core::mem::ManuallyDrop;
1312
pub(crate) const BUFFER_SIZE: usize = BLOCK_SIZE * 4;
1413

1514
cpufeatures::new!(avx2_cpuid, "avx2");
15+
cpufeatures::new!(sse2_cpuid, "sse2");
1616

1717
/// The ChaCha20 core function.
1818
pub struct Core<R: Rounds> {
1919
inner: Inner<R>,
20-
token: avx2_cpuid::InitToken,
20+
avx2_token: avx2_cpuid::InitToken,
21+
sse2_token: sse2_cpuid::InitToken,
2122
}
2223

2324
union Inner<R: Rounds> {
2425
avx2: ManuallyDrop<avx2::Core<R>>,
2526
sse2: ManuallyDrop<sse2::Core<R>>,
27+
soft: ManuallyDrop<soft::Core<R>>,
2628
}
2729

2830
impl<R: Rounds> Core<R> {
2931
/// Initialize ChaCha core function with the given key size, IV, and
3032
/// number of rounds.
33+
///
34+
/// Attempts to use AVX2 if present, followed by SSE2, with fallback to a
35+
/// portable software implementation if neither are available.
3136
#[inline]
3237
pub fn new(key: &[u8; KEY_SIZE], iv: [u8; IV_SIZE]) -> Self {
33-
let (token, avx2_present) = avx2_cpuid::init_get();
38+
let (avx2_token, avx2_present) = avx2_cpuid::init_get();
39+
let (sse2_token, sse2_present) = sse2_cpuid::init_get();
3440

3541
let inner = if avx2_present {
3642
Inner {
3743
avx2: ManuallyDrop::new(avx2::Core::new(key, iv)),
3844
}
39-
} else {
45+
} else if sse2_present {
4046
Inner {
4147
sse2: ManuallyDrop::new(sse2::Core::new(key, iv)),
4248
}
49+
} else {
50+
Inner {
51+
soft: ManuallyDrop::new(soft::Core::new(key, iv)),
52+
}
4353
};
4454

45-
Self { inner, token }
55+
Self {
56+
inner,
57+
avx2_token,
58+
sse2_token,
59+
}
4660
}
4761

48-
/// Generate output, overwriting data already in the buffer
62+
/// Generate output, overwriting data already in the buffer.
4963
#[inline]
50-
pub fn generate(&self, counter: u64, output: &mut [u8]) {
51-
if self.token.get() {
64+
pub fn generate(&mut self, counter: u64, output: &mut [u8]) {
65+
if self.avx2_token.get() {
5266
unsafe { (*self.inner.avx2).generate(counter, output) }
53-
} else {
67+
} else if self.sse2_token.get() {
5468
unsafe { (*self.inner.sse2).generate(counter, output) }
69+
} else {
70+
unsafe { (*self.inner.soft).generate(counter, output) }
5571
}
5672
}
5773

58-
/// Apply generated keystream to the output buffer
74+
/// Apply generated keystream to the output buffer.
5975
#[inline]
6076
#[cfg(feature = "cipher")]
61-
pub fn apply_keystream(&self, counter: u64, output: &mut [u8]) {
62-
if self.token.get() {
77+
pub fn apply_keystream(&mut self, counter: u64, output: &mut [u8]) {
78+
if self.avx2_token.get() {
6379
unsafe { (*self.inner.avx2).apply_keystream(counter, output) }
64-
} else {
80+
} else if self.sse2_token.get() {
6581
unsafe { (*self.inner.sse2).apply_keystream(counter, output) }
82+
} else {
83+
unsafe { (*self.inner.soft).apply_keystream(counter, output) }
6684
}
6785
}
6886
}
6987

7088
impl<R: Rounds> Clone for Core<R> {
7189
fn clone(&self) -> Self {
72-
let inner = if self.token.get() {
90+
let inner = if self.avx2_token.get() {
7391
Inner {
7492
avx2: ManuallyDrop::new(unsafe { (*self.inner.avx2).clone() }),
7593
}
76-
} else {
94+
} else if self.sse2_token.get() {
7795
Inner {
7896
sse2: ManuallyDrop::new(unsafe { (*self.inner.sse2).clone() }),
7997
}
98+
} else {
99+
Inner {
100+
soft: ManuallyDrop::new(unsafe { (*self.inner.soft).clone() }),
101+
}
80102
};
81103

82104
Self {
83105
inner,
84-
token: self.token,
106+
avx2_token: self.avx2_token,
107+
sse2_token: self.sse2_token,
85108
}
86109
}
87110
}

0 commit comments

Comments
 (0)