@@ -33,23 +33,32 @@ inline fn __add_full_block_avx2(
33
33
stack u64[28] s_state,
34
34
reg ptr u64[25] a_jagged_p,
35
35
reg u64 in inlen,
36
- reg u64 rate
37
- ) -> reg u256[7], stack u64[28], reg u64, reg u64
36
+ reg u64 rate,
37
+ #msf reg u64 ms
38
+ ) -> reg u256[7], stack u64[28], reg u64, reg u64, #msf reg u64
38
39
{
39
40
40
41
inline int i;
41
42
reg u64 j l t rate8;
43
+ reg bool loop_condition;
42
44
43
45
rate8 = rate;
44
46
rate8 >>= 3;
45
47
j = 0;
46
- while ( j < rate8 )
48
+ while { loop_condition = ( j < rate8 ); } ( loop_condition )
47
49
{
50
+ ms = #update_msf(loop_condition, ms);
51
+
48
52
t = [in + 8*j];
53
+
49
54
l = a_jagged_p[(int) j];
55
+ l = #protect(l, ms);
56
+
50
57
s_state[(int) l] = t;
51
58
j += 1;
59
+
52
60
}
61
+ ms = #update_msf(!loop_condition, ms);
53
62
54
63
//TODO: check & change to #VPBROADCAST_4u64
55
64
t = s_state[0];
@@ -63,7 +72,7 @@ inline fn __add_full_block_avx2(
63
72
in += rate;
64
73
inlen -= rate;
65
74
66
- return state, s_state, in, inlen;
75
+ return state, s_state, in, inlen, ms ;
67
76
}
68
77
69
78
@@ -74,42 +83,56 @@ inline fn __add_final_block_avx2(
74
83
reg ptr u64[25] a_jagged_p,
75
84
reg u64 in inlen,
76
85
reg u8 trail_byte,
77
- reg u64 rate
78
- ) -> reg u256[7]
86
+ reg u64 rate,
87
+ #msf reg u64 ms
88
+ ) -> reg u256[7], #msf reg u64
79
89
{
80
90
inline int i;
81
91
reg u64 j l t inlen8;
82
92
reg u8 c;
93
+ reg bool loop_condition;
83
94
84
95
s_state = __init_s_state_avx2();
85
96
86
97
inlen8 = inlen;
87
98
inlen8 >>= 3;
88
99
j = 0;
89
- while ( j < inlen8 )
100
+
101
+ while { loop_condition = (j < inlen8); } ( loop_condition )
90
102
{
103
+ ms = #update_msf(loop_condition, ms);
104
+
91
105
t = [in + 8*j];
92
106
l = a_jagged_p[(int) j];
107
+ l = #protect(l, ms);
108
+
93
109
s_state[(int) l] = t;
94
110
j += 1;
95
111
}
112
+ ms = #update_msf(!loop_condition, ms);
113
+
96
114
l = a_jagged_p[(int) j];
115
+ l = #protect(l, ms);
116
+
97
117
l <<= 3;
98
118
j <<= 3;
99
119
100
- while ( j < inlen )
120
+ while { loop_condition = ( j < inlen ); } ( loop_condition )
101
121
{
122
+ ms = #update_msf(loop_condition, ms);
102
123
c = (u8)[in + j];
103
124
s_state[u8 (int) l] = c;
104
125
j += 1;
105
126
l += 1;
106
127
}
128
+ ms = #update_msf(!loop_condition, ms);
107
129
108
130
s_state[u8 (int) l] = trail_byte;
109
131
110
132
// j = (rate-1) >> 3;
111
133
j = rate; j -= 1; j >>= 3;
112
134
l = a_jagged_p[(int) j];
135
+ l = #protect(l, ms);
113
136
l <<= 3;
114
137
// l += ((rate-1) & 0x7)
115
138
j = rate; j -= 1; j &= 0x7;
@@ -125,7 +148,7 @@ inline fn __add_final_block_avx2(
125
148
for i = 0 to 7
126
149
{ state[i] ^= s_state[u256 i]; }
127
150
128
- return state;
151
+ return state, ms ;
129
152
}
130
153
131
154
@@ -134,30 +157,37 @@ inline fn __xtr_full_block_avx2(
134
157
reg u256[7] state,
135
158
reg ptr u64[25] a_jagged_p,
136
159
reg u64 out,
137
- reg u64 len
138
- ) -> reg u64
160
+ reg u64 len,
161
+ #msf reg u64 ms
162
+ ) -> reg u64, #msf reg u64
139
163
{
140
164
inline int i;
141
165
stack u64[28] s_state;
142
166
reg u64 j l t len8;
167
+ reg bool loop_condition;
143
168
144
169
for i = 0 to 7
145
170
{ s_state[u256 i] = state[i]; }
146
171
147
172
len8 = len;
148
173
len8 >>= 3;
149
174
j = 0;
150
- while ( j < len8 )
175
+ while { loop_condition = ( j < len8 ); } ( loop_condition )
151
176
{
177
+ ms = #update_msf(loop_condition, ms);
178
+
152
179
l = a_jagged_p[(int) j];
180
+ l = #protect(l, ms);
181
+
153
182
t = s_state[(int) l];
154
183
[out + 8*j] = t;
155
184
j += 1;
156
185
}
186
+ ms = #update_msf(!loop_condition, ms);
157
187
158
188
out += len;
159
189
160
- return out;
190
+ return out, ms ;
161
191
}
162
192
163
193
@@ -166,27 +196,38 @@ inline fn __xtr_bytes_avx2(
166
196
reg u256[7] state,
167
197
reg ptr u64[25] a_jagged_p,
168
198
reg u64 out,
169
- reg u64 len
199
+ reg u64 len,
200
+ #msf reg u64 ms
170
201
) -> reg u64
171
202
{
172
203
inline int i;
173
204
stack u64[28] s_state;
174
205
reg u64 j l t len8;
175
206
reg u8 c;
207
+ reg bool loop_condition;
176
208
177
209
for i = 0 to 7
178
210
{ s_state[u256 i] = state[i]; }
179
211
180
212
len8 = len;
181
213
len8 >>= 3;
182
214
j = 0;
183
- while ( j < len8 )
184
- { l = a_jagged_p[(int) j];
215
+ while { loop_condition = ( j < len8 ); } ( loop_condition )
216
+ {
217
+ ms = #update_msf(loop_condition, ms);
218
+
219
+ l = a_jagged_p[(int) j];
220
+ l = #protect(l, ms);
221
+
185
222
t = s_state[(int) l];
186
223
[out + 8*j] = t;
187
224
j += 1;
188
225
}
226
+ ms = #update_msf(!loop_condition, ms);
227
+
189
228
l = a_jagged_p[(int)j];
229
+ l = #protect(l, ms);
230
+
190
231
j <<= 3;
191
232
l <<= 3;
192
233
@@ -208,65 +249,75 @@ inline fn __absorb_avx2(
208
249
reg u256[7] state,
209
250
reg u64 in inlen,
210
251
reg u8 trail_byte,
211
- reg u64 rate
212
- ) -> reg u256[7]
252
+ reg u64 rate,
253
+ #msf reg u64 ms
254
+ ) -> reg u256[7], #msf reg u64
213
255
{
214
256
stack u64[28] s_state;
215
257
reg ptr u64[25] a_jagged_p;
258
+ reg bool loop_condition;
216
259
217
260
a_jagged_p = KECCAK_A_JAGGED;
218
261
s_state = __init_s_state_avx2();
219
262
220
263
// intermediate blocks
221
- while ( inlen >= rate )
264
+ while { loop_condition = ( inlen >= rate); } (loop_condition )
222
265
{
223
- state, s_state, in, inlen = __add_full_block_avx2(state, s_state, a_jagged_p, in, inlen, rate);
224
- state = __keccakf1600_avx2(state);
266
+ ms = #update_msf(loop_condition, ms);
267
+
268
+ state, s_state, in, inlen, ms = __add_full_block_avx2(state, s_state, a_jagged_p, in, inlen, rate, ms);
269
+
270
+ state, ms = __keccakf1600_avx2(state, ms);
225
271
}
272
+ ms = #update_msf(!loop_condition, ms);
226
273
227
274
// final block
228
- state = __add_final_block_avx2(state, s_state, a_jagged_p, in, inlen, trail_byte, rate);
275
+ state, ms = __add_final_block_avx2(state, s_state, a_jagged_p, in, inlen, trail_byte, rate, ms );
229
276
230
- return state;
277
+ return state, ms ;
231
278
}
232
279
233
280
234
- inline fn __squeeze_avx2(reg u256[7] state, reg u64 out outlen rate)
281
+ inline fn __squeeze_avx2(reg u256[7] state, reg u64 out outlen rate, #msf reg u64 ms )
235
282
{
236
283
reg ptr u64[25] a_jagged_p;
284
+ reg bool loop_condition;
237
285
238
286
a_jagged_p = KECCAK_A_JAGGED;
239
287
240
288
// intermediate blocks
241
- while ( outlen > rate )
289
+ while { loop_condition = ( outlen > rate); } ( loop_condition )
242
290
{
243
- state = __keccakf1600_avx2(state);
244
- out = __xtr_full_block_avx2(state, a_jagged_p, out, rate);
291
+ ms = #update_msf(loop_condition, ms);
292
+
293
+ state, ms = __keccakf1600_avx2(state, ms);
294
+ out, ms = __xtr_full_block_avx2(state, a_jagged_p, out, rate, ms);
245
295
outlen -= rate;
246
296
}
297
+ ms = #update_msf(!loop_condition, ms);
247
298
248
- state = __keccakf1600_avx2(state);
249
- out = __xtr_bytes_avx2(state, a_jagged_p, out, outlen);
299
+ state, ms = __keccakf1600_avx2(state, ms );
300
+ out = __xtr_bytes_avx2(state, a_jagged_p, out, outlen, ms );
250
301
}
251
302
252
303
253
- inline fn __keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate)
304
+ inline fn __keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate, #msf reg u64 ms )
254
305
{
255
306
reg u256[7] state;
256
307
257
308
state = __keccak_init_avx2();
258
309
259
310
// absorb
260
- state = __absorb_avx2(state, in, inlen, trail_byte, rate);
311
+ state, ms = __absorb_avx2(state, in, inlen, trail_byte, rate, ms );
261
312
262
313
// squeeze
263
- __squeeze_avx2(state, out, outlen, rate);
314
+ __squeeze_avx2(state, out, outlen, rate, ms );
264
315
}
265
316
266
317
267
- fn _keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate)
318
+ fn _keccak1600_avx2(reg u64 out outlen in inlen, reg u8 trail_byte, reg u64 rate, #msf reg u64 ms )
268
319
{
269
- __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate);
320
+ __keccak1600_avx2(out, outlen, in, inlen, trail_byte, rate, ms );
270
321
}
271
322
272
323
0 commit comments