@@ -14,7 +14,7 @@ use ppv_lite86::{dispatch, dispatch_light128};
14
14
pub use ppv_lite86:: Machine ;
15
15
use ppv_lite86:: { vec128_storage, ArithOps , BitOps32 , LaneWords4 , MultiLane , StoreBytes , Vec4 } ;
16
16
17
- pub ( crate ) const BLOCK : usize = 64 ;
17
+ pub ( crate ) const BLOCK : usize = 16 ;
18
18
pub ( crate ) const BLOCK64 : u64 = BLOCK as u64 ;
19
19
const LOG2_BUFBLOCKS : u64 = 2 ;
20
20
const BUFBLOCKS : u64 = 1 << LOG2_BUFBLOCKS ;
@@ -81,7 +81,7 @@ impl ChaCha {
81
81
82
82
/// Produce 4 blocks of output, advancing the state
83
83
#[ inline( always) ]
84
- pub fn refill4 ( & mut self , drounds : u32 , out : & mut [ u8 ; BUFSZ ] ) {
84
+ pub fn refill4 ( & mut self , drounds : u32 , out : & mut [ u32 ; BUFSZ ] ) {
85
85
refill_wide ( self , drounds, out)
86
86
}
87
87
@@ -114,7 +114,7 @@ impl ChaCha {
114
114
#[ allow( clippy:: many_single_char_names) ]
115
115
#[ inline( always) ]
116
116
fn refill_wide_impl < Mach : Machine > (
117
- m : Mach , state : & mut ChaCha , drounds : u32 , out : & mut [ u8 ; BUFSZ ] ,
117
+ m : Mach , state : & mut ChaCha , drounds : u32 , out : & mut [ u32 ; BUFSZ ] ,
118
118
) {
119
119
let k = m. vec ( [ 0x6170_7865 , 0x3320_646e , 0x7962_2d32 , 0x6b20_6574 ] ) ;
120
120
let mut pos = state. pos64 ( m) ;
@@ -159,17 +159,26 @@ fn refill_wide_impl<Mach: Machine>(
159
159
let sc = m. unpack ( state. c ) ;
160
160
let sd = [ m. unpack ( state. d ) , d1, d2, d3] ;
161
161
state. d = d4. into ( ) ;
162
- let mut words = out. chunks_exact_mut ( 16 ) ;
163
- for ( ( ( ( & a, & b) , & c) , & d) , & sd) in a. iter ( ) . zip ( & b) . zip ( & c) . zip ( & d) . zip ( & sd) {
164
- ( a + k) . write_le ( words. next ( ) . unwrap ( ) ) ;
165
- ( b + sb) . write_le ( words. next ( ) . unwrap ( ) ) ;
166
- ( c + sc) . write_le ( words. next ( ) . unwrap ( ) ) ;
167
- ( d + sd) . write_le ( words. next ( ) . unwrap ( ) ) ;
168
- }
162
+ out[ 0 ..4 ] . copy_from_slice ( & ( a[ 0 ] + k) . to_lanes ( ) ) ;
163
+ out[ 4 ..8 ] . copy_from_slice ( & ( b[ 0 ] + sb) . to_lanes ( ) ) ;
164
+ out[ 8 ..12 ] . copy_from_slice ( & ( c[ 0 ] + sc) . to_lanes ( ) ) ;
165
+ out[ 12 ..16 ] . copy_from_slice ( & ( d[ 0 ] + sd[ 0 ] ) . to_lanes ( ) ) ;
166
+ out[ 16 ..20 ] . copy_from_slice ( & ( a[ 1 ] + k) . to_lanes ( ) ) ;
167
+ out[ 20 ..24 ] . copy_from_slice ( & ( b[ 1 ] + sb) . to_lanes ( ) ) ;
168
+ out[ 24 ..28 ] . copy_from_slice ( & ( c[ 1 ] + sc) . to_lanes ( ) ) ;
169
+ out[ 28 ..32 ] . copy_from_slice ( & ( d[ 1 ] + sd[ 1 ] ) . to_lanes ( ) ) ;
170
+ out[ 32 ..36 ] . copy_from_slice ( & ( a[ 2 ] + k) . to_lanes ( ) ) ;
171
+ out[ 36 ..40 ] . copy_from_slice ( & ( b[ 2 ] + sb) . to_lanes ( ) ) ;
172
+ out[ 40 ..44 ] . copy_from_slice ( & ( c[ 2 ] + sc) . to_lanes ( ) ) ;
173
+ out[ 44 ..48 ] . copy_from_slice ( & ( d[ 2 ] + sd[ 2 ] ) . to_lanes ( ) ) ;
174
+ out[ 48 ..52 ] . copy_from_slice ( & ( a[ 3 ] + k) . to_lanes ( ) ) ;
175
+ out[ 52 ..56 ] . copy_from_slice ( & ( b[ 3 ] + sb) . to_lanes ( ) ) ;
176
+ out[ 56 ..60 ] . copy_from_slice ( & ( c[ 3 ] + sc) . to_lanes ( ) ) ;
177
+ out[ 60 ..64 ] . copy_from_slice ( & ( d[ 3 ] + sd[ 3 ] ) . to_lanes ( ) ) ;
169
178
}
170
179
171
180
dispatch ! ( m, Mach , {
172
- fn refill_wide( state: & mut ChaCha , drounds: u32 , out: & mut [ u8 ; BUFSZ ] ) {
181
+ fn refill_wide( state: & mut ChaCha , drounds: u32 , out: & mut [ u32 ; BUFSZ ] ) {
173
182
refill_wide_impl( m, state, drounds, out) ;
174
183
}
175
184
} ) ;
0 commit comments