Skip to content

Commit 7818f35

Browse files
committed
arm: support aes_armv8 on Rust 1.61+ using asm!
Adds "polyfills" for the unstable ARMv8 AES intrinsics using the `asm!` macro which was stabilized in Rust 1.59. However note we also need `target_feature` stabilizations for `aes` and `neon` which occurred in Rust 1.61. Based on benchmarks this has no effect on performance, although it was necessary to place AESE/AESMC and AESD/AESIMC into a single `asm!` block in order to ensure that instructions fuse properly, as they did when using the proper intrinsics.
1 parent 4334b85 commit 7818f35

File tree

7 files changed

+114
-26
lines changed

7 files changed

+114
-26
lines changed

.github/workflows/aes.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,15 @@ jobs:
219219
cross test --package aes --target ${{ matrix.target }}
220220
cross test --package aes --target ${{ matrix.target }} --features hazmat
221221
222-
# ARMv8 cross-compiled tests for AES intrinsics (nightly-only)
222+
# ARMv8 cross-compiled tests for AES intrinsics
223223
armv8:
224224
env:
225225
RUSTFLAGS: "-Dwarnings --cfg aes_armv8"
226226
strategy:
227227
matrix:
228228
include:
229229
- target: aarch64-unknown-linux-gnu
230-
rust: nightly
230+
rust: 1.61.0 # MSRV for `aes_armv8`
231231
runs-on: ubuntu-latest
232232
# Cross mounts only current package, i.e. by default it ignores workspace's Cargo.toml
233233
defaults:

aes/src/armv8.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub(crate) mod hazmat;
1414

1515
mod encdec;
1616
mod expand;
17+
mod intrinsics;
1718
#[cfg(test)]
1819
mod test_expand;
1920

aes/src/armv8/encdec.rs

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ use crate::{Block, Block8};
44
use cipher::inout::InOut;
55
use core::arch::aarch64::*;
66

7+
// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
8+
// TODO(tarcieri): remove when these intrinsics have been stabilized
9+
use super::intrinsics::{
10+
vaesdq_u8, vaesdq_u8_and_vaesimcq_u8, vaeseq_u8, vaeseq_u8_and_vaesmcq_u8,
11+
};
12+
713
/// Perform AES encryption using the given expanded keys.
814
#[target_feature(enable = "aes")]
915
#[target_feature(enable = "neon")]
@@ -19,11 +25,8 @@ pub(super) unsafe fn encrypt1<const N: usize>(
1925
let mut state = vld1q_u8(in_ptr as *const u8);
2026

2127
for k in expanded_keys.iter().take(rounds - 1) {
22-
// AES single round encryption
23-
state = vaeseq_u8(state, *k);
24-
25-
// AES mix columns
26-
state = vaesmcq_u8(state);
28+
// AES single round encryption and mix columns
29+
state = vaeseq_u8_and_vaesmcq_u8(state, *k);
2730
}
2831

2932
// AES single round encryption
@@ -62,11 +65,8 @@ pub(super) unsafe fn encrypt8<const N: usize>(
6265

6366
for k in expanded_keys.iter().take(rounds - 1) {
6467
for i in 0..8 {
65-
// AES single round encryption
66-
state[i] = vaeseq_u8(state[i], *k);
67-
68-
// AES mix columns
69-
state[i] = vaesmcq_u8(state[i]);
68+
// AES single round encryption and mix columns
69+
state[i] = vaeseq_u8_and_vaesmcq_u8(state[i], *k);
7070
}
7171
}
7272

@@ -95,11 +95,8 @@ pub(super) unsafe fn decrypt1<const N: usize>(
9595
let mut state = vld1q_u8(in_ptr as *const u8);
9696

9797
for k in expanded_keys.iter().take(rounds - 1) {
98-
// AES single round decryption
99-
state = vaesdq_u8(state, *k);
100-
101-
// AES inverse mix columns
102-
state = vaesimcq_u8(state);
98+
// AES single round decryption and inverse mix columns
99+
state = vaesdq_u8_and_vaesimcq_u8(state, *k);
103100
}
104101

105102
// AES single round decryption
@@ -138,11 +135,8 @@ pub(super) unsafe fn decrypt8<const N: usize>(
138135

139136
for k in expanded_keys.iter().take(rounds - 1) {
140137
for i in 0..8 {
141-
// AES single round decryption
142-
state[i] = vaesdq_u8(state[i], *k);
143-
144-
// AES inverse mix columns
145-
state[i] = vaesimcq_u8(state[i]);
138+
// AES single round decryption and inverse mix columns
139+
state[i] = vaesdq_u8_and_vaesimcq_u8(state[i], *k);
146140
}
147141
}
148142

aes/src/armv8/expand.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
33
use core::{arch::aarch64::*, mem, slice};
44

5+
// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
6+
// TODO(tarcieri): remove when these intrinsics have been stabilized
7+
use super::intrinsics::{vaeseq_u8, vaesimcq_u8};
8+
59
/// There are 4 AES words in a block.
610
const BLOCK_WORDS: usize = 4;
711

aes/src/armv8/hazmat.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
use crate::{Block, Block8};
88
use core::arch::aarch64::*;
99

10+
// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
11+
use super::intrinsics::{vaesdq_u8, vaeseq_u8, vaesimcq_u8, vaesmcq_u8};
12+
1013
/// AES cipher (encrypt) round function.
1114
#[allow(clippy::cast_ptr_alignment)]
1215
#[target_feature(enable = "aes")]

aes/src/armv8/intrinsics.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//! Stable "polyfills" for unstable `core::arch::aarch64` intrinsics which use
2+
//! `asm!` internally to allow use on stable Rust.
3+
// TODO(tarcieri): remove when these intrinsics have been stabilized
4+
5+
use core::arch::{aarch64::uint8x16_t, asm};
6+
7+
/// AES single round encryption.
8+
#[inline(always)]
9+
pub(super) unsafe fn vaeseq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
10+
asm!(
11+
"AESE {d:v}.16B, {k:v}.16B",
12+
d = inout(vreg) data,
13+
k = in(vreg) key,
14+
options(pure, nomem, nostack, preserves_flags)
15+
);
16+
data
17+
}
18+
19+
/// AES single round decryption.
20+
#[inline(always)]
21+
pub(super) unsafe fn vaesdq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
22+
asm!(
23+
"AESD {d:v}.16B, {k:v}.16B",
24+
d = inout(vreg) data,
25+
k = in(vreg) key,
26+
options(pure, nomem, nostack, preserves_flags)
27+
);
28+
data
29+
}
30+
31+
/// AES mix columns.
32+
#[cfg(feature = "hazmat")]
33+
#[inline(always)]
34+
pub(super) unsafe fn vaesmcq_u8(mut data: uint8x16_t) -> uint8x16_t {
35+
asm!(
36+
"AESMC {d:v}.16B, {d:v}.16B",
37+
d = inout(vreg) data,
38+
options(pure, nomem, nostack, preserves_flags)
39+
);
40+
data
41+
}
42+
43+
/// AES inverse mix columns.
44+
#[inline(always)]
45+
pub(super) unsafe fn vaesimcq_u8(mut data: uint8x16_t) -> uint8x16_t {
46+
asm!(
47+
"AESIMC {d:v}.16B, {d:v}.16B",
48+
d = inout(vreg) data,
49+
options(pure, nomem, nostack, preserves_flags)
50+
);
51+
data
52+
}
53+
54+
/// AES single round encryption combined with mix columns.
55+
///
56+
/// These two instructions are combined into a single assembly block to ensure
57+
/// that instructions fuse properly.
58+
#[inline(always)]
59+
pub(super) unsafe fn vaeseq_u8_and_vaesmcq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
60+
asm!(
61+
"AESE {d:v}.16B, {k:v}.16B",
62+
"AESMC {d:v}.16B, {d:v}.16B",
63+
d = inout(vreg) data,
64+
k = in(vreg) key,
65+
options(pure, nomem, nostack, preserves_flags)
66+
);
67+
data
68+
}
69+
70+
/// AES single round decryption combined with mix columns.
71+
///
72+
/// These two instructions are combined into a single assembly block to ensure
73+
/// that instructions fuse properly.
74+
#[inline(always)]
75+
pub(super) unsafe fn vaesdq_u8_and_vaesimcq_u8(
76+
mut data: uint8x16_t,
77+
key: uint8x16_t,
78+
) -> uint8x16_t {
79+
asm!(
80+
"AESD {d:v}.16B, {k:v}.16B",
81+
"AESIMC {d:v}.16B, {d:v}.16B",
82+
d = inout(vreg) data,
83+
k = in(vreg) key,
84+
options(pure, nomem, nostack, preserves_flags)
85+
);
86+
data
87+
}

aes/src/lib.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626
//! backend at the cost of decreased performance (using a modified form of
2727
//! the fixslicing technique called "semi-fixslicing").
2828
//!
29-
//! ## ARMv8 intrinsics (nightly-only)
29+
//! ## ARMv8 intrinsics (Rust 1.61+)
3030
//! On `aarch64` targets including `aarch64-apple-darwin` (Apple M1) and Linux
3131
//! targets such as `aarch64-unknown-linux-gnu` and `aarch64-unknown-linux-musl`,
3232
//! support for using AES intrinsics provided by the ARMv8 Cryptography Extensions
33-
//! is available when using the nightly compiler, and can be enabled using the
33+
//! is available when using Rust 1.61 or above, and can be enabled using the
3434
//! `aes_armv8` configuration flag.
3535
//!
3636
//! On Linux and macOS, when the `aes_armv8` flag is enabled support for AES
@@ -99,7 +99,7 @@
9999
//!
100100
//! You can modify crate using the following configuration flags:
101101
//!
102-
//! - `aes_armv8`: enable ARMv8 AES intrinsics (nightly-only).
102+
//! - `aes_armv8`: enable ARMv8 AES intrinsics (Rust 1.61+).
103103
//! - `aes_force_soft`: force software implementation.
104104
//! - `aes_compact`: reduce code size at the cost of slower performance
105105
//! (affects only software backend).
@@ -119,7 +119,6 @@
119119
)]
120120
#![cfg_attr(docsrs, feature(doc_cfg))]
121121
#![warn(missing_docs, rust_2018_idioms)]
122-
#![cfg_attr(all(aes_armv8, target_arch = "aarch64"), feature(stdsimd))]
123122

124123
#[cfg(feature = "hazmat")]
125124
#[cfg_attr(docsrs, doc(cfg(feature = "hazmat")))]

0 commit comments

Comments
 (0)