|
| 1 | +// Copyright 2019 The CryptoCorrosion Contributors |
| 2 | +// Copyright 2020 Developers of the Rand project. |
| 3 | +// |
| 4 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 7 | +// option. This file may not be copied, modified, or distributed |
| 8 | +// except according to those terms. |
| 9 | + |
| 10 | +//! The ChaCha random number generator. |
| 11 | +
|
| 12 | +use ppv_lite86::{dispatch, dispatch_light128}; |
| 13 | + |
| 14 | +pub use ppv_lite86::Machine; |
| 15 | +use ppv_lite86::{vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4}; |
| 16 | + |
| 17 | +pub(crate) const BLOCK: usize = 64; |
| 18 | +pub(crate) const BLOCK64: u64 = BLOCK as u64; |
| 19 | +const LOG2_BUFBLOCKS: u64 = 2; |
| 20 | +const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS; |
| 21 | +pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS; |
| 22 | +pub(crate) const BUFSZ: usize = BUFSZ64 as usize; |
| 23 | + |
| 24 | +#[derive(Clone)] |
| 25 | +pub struct ChaCha { |
| 26 | + pub(crate) b: vec128_storage, |
| 27 | + pub(crate) c: vec128_storage, |
| 28 | + pub(crate) d: vec128_storage, |
| 29 | +} |
| 30 | + |
| 31 | +#[derive(Clone)] |
| 32 | +pub struct State<V> { |
| 33 | + pub(crate) a: V, |
| 34 | + pub(crate) b: V, |
| 35 | + pub(crate) c: V, |
| 36 | + pub(crate) d: V, |
| 37 | +} |
| 38 | + |
| 39 | +#[inline(always)] |
| 40 | +pub(crate) fn round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V> { |
| 41 | + x.a += x.b; |
| 42 | + x.d = (x.d ^ x.a).rotate_each_word_right16(); |
| 43 | + x.c += x.d; |
| 44 | + x.b = (x.b ^ x.c).rotate_each_word_right20(); |
| 45 | + x.a += x.b; |
| 46 | + x.d = (x.d ^ x.a).rotate_each_word_right24(); |
| 47 | + x.c += x.d; |
| 48 | + x.b = (x.b ^ x.c).rotate_each_word_right25(); |
| 49 | + x |
| 50 | +} |
| 51 | + |
| 52 | +#[inline(always)] |
| 53 | +pub(crate) fn diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> { |
| 54 | + x.b = x.b.shuffle_lane_words3012(); |
| 55 | + x.c = x.c.shuffle_lane_words2301(); |
| 56 | + x.d = x.d.shuffle_lane_words1230(); |
| 57 | + x |
| 58 | +} |
| 59 | +#[inline(always)] |
| 60 | +pub(crate) fn undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> { |
| 61 | + x.b = x.b.shuffle_lane_words1230(); |
| 62 | + x.c = x.c.shuffle_lane_words2301(); |
| 63 | + x.d = x.d.shuffle_lane_words3012(); |
| 64 | + x |
| 65 | +} |
| 66 | + |
| 67 | +impl ChaCha { |
| 68 | + #[inline(always)] |
| 69 | + pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self { |
| 70 | + init_chacha(key, nonce) |
| 71 | + } |
| 72 | + |
| 73 | + #[inline(always)] |
| 74 | + fn pos64<M: Machine>(&self, m: M) -> u64 { |
| 75 | + let d: M::u32x4 = m.unpack(self.d); |
| 76 | + ((d.extract(1) as u64) << 32) | d.extract(0) as u64 |
| 77 | + } |
| 78 | + |
| 79 | + /// Produce 4 blocks of output, advancing the state |
| 80 | + #[inline(always)] |
| 81 | + pub fn refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ]) { |
| 82 | + refill_wide(self, drounds, out) |
| 83 | + } |
| 84 | + |
| 85 | + #[inline(always)] |
| 86 | + pub fn set_stream_param(&mut self, param: u32, value: u64) { |
| 87 | + set_stream_param(self, param, value) |
| 88 | + } |
| 89 | + |
| 90 | + #[inline(always)] |
| 91 | + pub fn get_stream_param(&self, param: u32) -> u64 { |
| 92 | + get_stream_param(self, param) |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +#[inline(always)] |
| 97 | +fn refill_wide_impl<Mach: Machine>( |
| 98 | + m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ], |
| 99 | +) { |
| 100 | + let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); |
| 101 | + let mut pos = state.pos64(m); |
| 102 | + let d0: Mach::u32x4 = m.unpack(state.d); |
| 103 | + pos += 1; |
| 104 | + let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 105 | + pos += 1; |
| 106 | + let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 107 | + pos += 1; |
| 108 | + let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 109 | + |
| 110 | + let b = m.unpack(state.b); |
| 111 | + let c = m.unpack(state.c); |
| 112 | + let mut x = State { |
| 113 | + a: Mach::u32x4x4::from_lanes([k, k, k, k]), |
| 114 | + b: Mach::u32x4x4::from_lanes([b, b, b, b]), |
| 115 | + c: Mach::u32x4x4::from_lanes([c, c, c, c]), |
| 116 | + d: m.unpack(Mach::u32x4x4::from_lanes([d0, d1, d2, d3]).into()), |
| 117 | + }; |
| 118 | + for _ in 0..drounds { |
| 119 | + x = round(x); |
| 120 | + x = undiagonalize(round(diagonalize(x))); |
| 121 | + } |
| 122 | + let mut pos = state.pos64(m); |
| 123 | + let d0: Mach::u32x4 = m.unpack(state.d); |
| 124 | + pos += 1; |
| 125 | + let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 126 | + pos += 1; |
| 127 | + let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 128 | + pos += 1; |
| 129 | + let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 130 | + pos += 1; |
| 131 | + let d4 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |
| 132 | + |
| 133 | + let (a, b, c, d) = ( |
| 134 | + x.a.to_lanes(), |
| 135 | + x.b.to_lanes(), |
| 136 | + x.c.to_lanes(), |
| 137 | + x.d.to_lanes(), |
| 138 | + ); |
| 139 | + let sb = m.unpack(state.b); |
| 140 | + let sc = m.unpack(state.c); |
| 141 | + let sd = [m.unpack(state.d), d1, d2, d3]; |
| 142 | + state.d = d4.into(); |
| 143 | + let mut words = out.chunks_exact_mut(16); |
| 144 | + for ((((&a, &b), &c), &d), &sd) in a.iter().zip(&b).zip(&c).zip(&d).zip(&sd) { |
| 145 | + (a + k).write_le(words.next().unwrap()); |
| 146 | + (b + sb).write_le(words.next().unwrap()); |
| 147 | + (c + sc).write_le(words.next().unwrap()); |
| 148 | + (d + sd).write_le(words.next().unwrap()); |
| 149 | + } |
| 150 | +} |
| 151 | + |
| 152 | +dispatch!(m, Mach, { |
| 153 | + fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ]) { |
| 154 | + refill_wide_impl(m, state, drounds, out); |
| 155 | + } |
| 156 | +}); |
| 157 | + |
| 158 | +// Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ |
| 159 | +// and XChaCha's setup step. |
| 160 | +dispatch!(m, Mach, { |
| 161 | + fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State<vec128_storage> { |
| 162 | + let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); |
| 163 | + let mut x = State { |
| 164 | + a: k, |
| 165 | + b: m.unpack(state.b), |
| 166 | + c: m.unpack(state.c), |
| 167 | + d: m.unpack(state.d), |
| 168 | + }; |
| 169 | + for _ in 0..drounds { |
| 170 | + x = round(x); |
| 171 | + x = undiagonalize(round(diagonalize(x))); |
| 172 | + } |
| 173 | + State { |
| 174 | + a: x.a.into(), |
| 175 | + b: x.b.into(), |
| 176 | + c: x.c.into(), |
| 177 | + d: x.d.into(), |
| 178 | + } |
| 179 | + } |
| 180 | +}); |
| 181 | + |
| 182 | +dispatch_light128!(m, Mach, { |
| 183 | + fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) { |
| 184 | + let d: Mach::u32x4 = m.unpack(state.d); |
| 185 | + state.d = d |
| 186 | + .insert((value >> 32) as u32, (param << 1) | 1) |
| 187 | + .insert(value as u32, param << 1) |
| 188 | + .into(); |
| 189 | + } |
| 190 | +}); |
| 191 | + |
| 192 | +dispatch_light128!(m, Mach, { |
| 193 | + fn get_stream_param(state: &ChaCha, param: u32) -> u64 { |
| 194 | + let d: Mach::u32x4 = m.unpack(state.d); |
| 195 | + ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64 |
| 196 | + } |
| 197 | +}); |
| 198 | + |
| 199 | +fn read_u32le(xs: &[u8]) -> u32 { |
| 200 | + assert_eq!(xs.len(), 4); |
| 201 | + u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24) |
| 202 | +} |
| 203 | + |
| 204 | +dispatch_light128!(m, Mach, { |
| 205 | + fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha { |
| 206 | + let ctr_nonce = [ |
| 207 | + 0, |
| 208 | + if nonce.len() == 12 { |
| 209 | + read_u32le(&nonce[0..4]) |
| 210 | + } else { |
| 211 | + 0 |
| 212 | + }, |
| 213 | + read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]), |
| 214 | + read_u32le(&nonce[nonce.len() - 4..]), |
| 215 | + ]; |
| 216 | + let key0: Mach::u32x4 = m.read_le(&key[..16]); |
| 217 | + let key1: Mach::u32x4 = m.read_le(&key[16..]); |
| 218 | + ChaCha { |
| 219 | + b: key0.into(), |
| 220 | + c: key1.into(), |
| 221 | + d: ctr_nonce.into(), |
| 222 | + } |
| 223 | + } |
| 224 | +}); |
| 225 | + |
| 226 | +dispatch_light128!(m, Mach, { |
| 227 | + fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha { |
| 228 | + let key0: Mach::u32x4 = m.read_le(&key[..16]); |
| 229 | + let key1: Mach::u32x4 = m.read_le(&key[16..]); |
| 230 | + let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]); |
| 231 | + let mut state = ChaCha { |
| 232 | + b: key0.into(), |
| 233 | + c: key1.into(), |
| 234 | + d: nonce0.into(), |
| 235 | + }; |
| 236 | + let x = refill_narrow_rounds(&mut state, rounds); |
| 237 | + let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])]; |
| 238 | + state.b = x.a; |
| 239 | + state.c = x.d; |
| 240 | + state.d = ctr_nonce1.into(); |
| 241 | + state |
| 242 | + } |
| 243 | +}); |
0 commit comments