-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathZen5_Demo_Imm8.asm
343 lines (293 loc) · 10.4 KB
/
Zen5_Demo_Imm8.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
INCLUDE Zen5_Demo_Port.h
.data
memop0 dq 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
memop1 dq 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
memop2 dq 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
memop3 dq 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.code
InstLatX64_empty_port macro INST
endm
InstLatX64_2ymmI82ymm_lat macro INST, I8
INST ymm0, ymm0, ymm0, I8
endm
InstLatX64_2ymmI82ymm_port macro INST, I8
INST ymm8, ymm8, ymm8, I8
INST ymm9, ymm9, ymm9, I8
INST ymm10, ymm10, ymm10, I8
INST ymm11, ymm11, ymm11, I8
INST ymm12, ymm12, ymm12, I8
INST ymm13, ymm13, ymm13, I8
INST ymm14, ymm14, ymm14, I8
INST ymm15, ymm15, ymm15, I8
endm
InstLatX64_2zmmI82zmm_lat macro INST, I8
INST zmm0, zmm0, zmm0, I8
endm
InstLatX64_2zmmI82zmm_port macro INST, I8
INST zmm8, zmm8, zmm8, I8
INST zmm9, zmm9, zmm9, I8
INST zmm10, zmm10, zmm10, I8
INST zmm11, zmm11, zmm11, I8
INST zmm12, zmm12, zmm12, I8
INST zmm13, zmm13, zmm13, I8
INST zmm14, zmm14, zmm14, I8
INST zmm15, zmm15, zmm15, I8
endm
InstLatX64_xmmymmI82ymm_lat macro INST, I8
INST ymm0, ymm0, xmm0, I8
endm
InstLatX64_xmmymmI82ymm_port macro INST, I8
INST ymm8, ymm8, xmm8, I8
INST ymm9, ymm9, xmm9, I8
INST ymm10, ymm10, xmm10, I8
INST ymm11, ymm11, xmm11, I8
INST ymm12, ymm12, xmm12, I8
INST ymm13, ymm13, xmm13, I8
INST ymm14, ymm14, xmm14, I8
INST ymm15, ymm15, xmm15, I8
endm
InstLatX64_xmmzmmI82zmm_lat macro INST, I8
INST zmm0, zmm0, xmm0, I8
endm
InstLatX64_xmmzmmI82zmm_port macro INST, I8
INST zmm8, zmm8, xmm8, I8
INST zmm9, zmm9, xmm9, I8
INST zmm10, zmm10, xmm10, I8
INST zmm11, zmm11, xmm11, I8
INST zmm12, zmm12, xmm12, I8
INST zmm13, zmm13, xmm13, I8
INST zmm14, zmm14, xmm14, I8
INST zmm15, zmm15, xmm15, I8
endm
InstLatX64_ymmzmmI82zmm_lat macro INST, I8
INST zmm0, zmm0, ymm0, I8
endm
InstLatX64_ymmzmmI82zmm_port macro INST, I8
INST zmm8, zmm8, ymm8, I8
INST zmm9, zmm9, ymm9, I8
INST zmm10, zmm10, ymm10, I8
INST zmm11, zmm11, ymm11, I8
INST zmm12, zmm12, ymm12, I8
INST zmm13, zmm13, ymm13, I8
INST zmm14, zmm14, ymm14, I8
INST zmm15, zmm15, ymm15, I8
endm
InstLatX64_ymmI82xmm_lat macro INST, I8
INST xmm0, ymm0, I8
endm
InstLatX64_ymmI82xmm_port macro INST, I8
INST xmm8, ymm8, I8
INST xmm9, ymm9, I8
INST xmm10, ymm10, I8
INST xmm11, ymm11, I8
INST xmm12, ymm12, I8
INST xmm13, ymm13, I8
INST xmm14, ymm14, I8
INST xmm15, ymm15, I8
endm
InstLatX64_zmmI82xmm_lat macro INST, I8
INST xmm0, zmm0, I8
endm
InstLatX64_zmmI82xmm_port macro INST, I8
INST xmm8, zmm8, I8
INST xmm9, zmm9, I8
INST xmm10, zmm10, I8
INST xmm11, zmm11, I8
INST xmm12, zmm12, I8
INST xmm13, zmm13, I8
INST xmm14, zmm14, I8
INST xmm15, zmm15, I8
endm
InstLatX64_zmmI82ymm_lat macro INST, I8
INST ymm0, zmm0, I8
endm
InstLatX64_zmmI82ymm_port macro INST, I8
INST ymm8, zmm8, I8
INST ymm9, zmm9, I8
INST ymm10, zmm10, I8
INST ymm11, zmm11, I8
INST ymm12, zmm12, I8
INST ymm13, zmm13, I8
INST ymm14, zmm14, I8
INST ymm15, zmm15, I8
endm
test_m macro FUNC, M1, M2, INST, R1, R2, I8
LOCAL looptest
FUNC proc
push r15
push r14
push r13
push r12
push rbx
push rsi
push rdi
vzeroupper
xor eax, eax
cpuid
rdtscp
shl rdx, 32
or rax, rdx
mov rsi, rax
mov r15, 100000
align 16
looptest:
REPEAT R1
M1
endm
REPEAT R2
M2 INST, I8
endm
sub r15, 1
jnz looptest
xor eax, eax
cpuid
rdtscp
shl rdx, 32
or rax, rdx
sub rax, rsi
pop rdi
pop rsi
pop rbx
pop r12
pop r13
pop r14
pop r15
ret
FUNC endp
endm
zen5_wrap macro INST, OPERANDS, R1, R2, I8
;test_m FUNC, M1, M2, INST, R1, R2, I8
test_m Zen5_&INST&_&OPERANDS&_&I8&_lat, InstLatX64_empty_port, InstLatX64_&OPERANDS&_lat, INST, 1, 1, I8
test_m Zen5_&INST&_&OPERANDS&_&I8&_tp, InstLatX64_empty_port, InstLatX64_&OPERANDS&_port, INST, 1, 1, I8
endm
zen5_wrap vextracti128, ymmI82xmm, 1, 2, 000h
zen5_wrap vextractf128, ymmI82xmm, 1, 2, 000h
zen5_wrap vextracti128, ymmI82xmm, 1, 2, 001h
zen5_wrap vextractf128, ymmI82xmm, 1, 2, 001h
zen5_wrap vextracti32x4, ymmi82xmm, 1, 2, 000h
zen5_wrap vextractf32x4, ymmi82xmm, 1, 2, 000h
zen5_wrap vextracti32x4, ymmi82xmm, 1, 2, 001h
zen5_wrap vextractf32x4, ymmi82xmm, 1, 2, 001h
zen5_wrap vextracti64x2, ymmi82xmm, 1, 2, 000h
zen5_wrap vextractf64x2, ymmi82xmm, 1, 2, 000h
zen5_wrap vextracti64x2, ymmi82xmm, 1, 2, 001h
zen5_wrap vextractf64x2, ymmi82xmm, 1, 2, 001h
zen5_wrap vextracti32x4, zmmi82xmm, 1, 1, 000h
zen5_wrap vextractf32x4, zmmi82xmm, 1, 1, 000h
zen5_wrap vextracti32x4, zmmi82xmm, 1, 1, 001h
zen5_wrap vextractf32x4, zmmi82xmm, 1, 1, 001h
zen5_wrap vextracti32x4, zmmi82xmm, 1, 1, 002h
zen5_wrap vextractf32x4, zmmi82xmm, 1, 1, 002h
zen5_wrap vextracti32x4, zmmi82xmm, 1, 1, 003h
zen5_wrap vextractf32x4, zmmi82xmm, 1, 1, 003h
zen5_wrap vextracti32x8, zmmi82ymm, 1, 1, 000h
zen5_wrap vextractf32x8, zmmi82ymm, 1, 1, 000h
zen5_wrap vextracti32x8, zmmi82ymm, 1, 1, 001h
zen5_wrap vextractf32x8, zmmi82ymm, 1, 1, 001h
zen5_wrap vinserti128, xmmymmI82ymm, 1, 2, 000h
zen5_wrap vinsertf128, xmmymmI82ymm, 1, 2, 000h
zen5_wrap vinserti128, xmmymmI82ymm, 1, 2, 001h
zen5_wrap vinsertf128, xmmymmI82ymm, 1, 2, 001h
zen5_wrap vinserti32x4, xmmymmI82ymm, 1, 2, 000h
zen5_wrap vinsertf32x4, xmmymmI82ymm, 1, 2, 000h
zen5_wrap vinserti32x4, xmmymmI82ymm, 1, 2, 001h
zen5_wrap vinsertf32x4, xmmymmI82ymm, 1, 2, 001h
zen5_wrap vinserti64x2, xmmymmI82ymm, 1, 2, 000h
zen5_wrap vinsertf64x2, xmmymmI82ymm, 1, 2, 000h
zen5_wrap vinserti64x2, xmmymmI82ymm, 1, 2, 001h
zen5_wrap vinsertf64x2, xmmymmI82ymm, 1, 2, 001h
zen5_wrap vinserti32x4, xmmzmmI82zmm, 1, 1, 000h
zen5_wrap vinsertf32x4, xmmzmmI82zmm, 1, 1, 000h
zen5_wrap vinserti32x4, xmmzmmI82zmm, 1, 1, 001h
zen5_wrap vinsertf32x4, xmmzmmI82zmm, 1, 1, 001h
zen5_wrap vinserti32x4, xmmzmmI82zmm, 1, 1, 002h
zen5_wrap vinsertf32x4, xmmzmmI82zmm, 1, 1, 002h
zen5_wrap vinserti32x4, xmmzmmI82zmm, 1, 1, 003h
zen5_wrap vinsertf32x4, xmmzmmI82zmm, 1, 1, 003h
zen5_wrap vinserti32x8, ymmzmmI82zmm, 1, 1, 000h
zen5_wrap vinsertf32x8, ymmzmmI82zmm, 1, 1, 000h
zen5_wrap vinserti32x8, ymmzmmI82zmm, 1, 1, 001h
zen5_wrap vinsertf32x8, ymmzmmI82zmm, 1, 1, 001h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 000h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 000h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 001h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 001h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 002h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 002h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 003h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 003h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 008h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 008h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 010h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 010h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 011h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 011h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 012h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 012h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 013h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 013h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 018h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 018h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 020h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 020h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 021h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 021h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 022h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 022h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 023h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 023h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 028h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 028h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 030h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 030h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 031h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 031h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 032h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 032h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 033h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 033h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 038h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 038h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 080h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 080h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 081h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 081h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 082h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 082h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 083h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 083h
zen5_wrap vperm2i128, 2ymmI82ymm, 1, 2, 088h
zen5_wrap vperm2f128, 2ymmI82ymm, 1, 2, 088h
zen5_wrap vshufi32x4, 2ymmI82ymm, 1, 2, 000h
zen5_wrap vshuff32x4, 2ymmI82ymm, 1, 2, 000h
zen5_wrap vshufi32x4, 2ymmI82ymm, 1, 2, 001h
zen5_wrap vshuff32x4, 2ymmI82ymm, 1, 2, 001h
zen5_wrap vshufi32x4, 2ymmI82ymm, 1, 2, 002h
zen5_wrap vshuff32x4, 2ymmI82ymm, 1, 2, 002h
zen5_wrap vshufi32x4, 2ymmI82ymm, 1, 2, 003h
zen5_wrap vshuff32x4, 2ymmI82ymm, 1, 2, 003h
zen5_wrap vshufi64x2, 2ymmI82ymm, 1, 2, 000h
zen5_wrap vshuff64x2, 2ymmI82ymm, 1, 2, 000h
zen5_wrap vshufi64x2, 2ymmI82ymm, 1, 2, 001h
zen5_wrap vshuff64x2, 2ymmI82ymm, 1, 2, 001h
zen5_wrap vshufi64x2, 2ymmI82ymm, 1, 2, 002h
zen5_wrap vshuff64x2, 2ymmI82ymm, 1, 2, 002h
zen5_wrap vshufi64x2, 2ymmI82ymm, 1, 2, 003h
zen5_wrap vshuff64x2, 2ymmI82ymm, 1, 2, 003h
zen5_wrap vshufi32x4, 2zmmI82zmm, 1, 1, 000h
zen5_wrap vshuff32x4, 2zmmI82zmm, 1, 1, 000h
zen5_wrap vshufi32x4, 2zmmI82zmm, 1, 1, 044h
zen5_wrap vshuff32x4, 2zmmI82zmm, 1, 1, 044h
zen5_wrap vshufi32x4, 2zmmI82zmm, 1, 1, 0e4h
zen5_wrap vshuff32x4, 2zmmI82zmm, 1, 1, 0e4h
zen5_wrap vshufi32x4, 2zmmI82zmm, 1, 1, 0a5h
zen5_wrap vshuff32x4, 2zmmI82zmm, 1, 1, 0a5h
zen5_wrap vshufi64x2, 2zmmI82zmm, 1, 1, 000h
zen5_wrap vshuff64x2, 2zmmI82zmm, 1, 1, 000h
zen5_wrap vshufi64x2, 2zmmI82zmm, 1, 1, 044h
zen5_wrap vshuff64x2, 2zmmI82zmm, 1, 1, 044h
zen5_wrap vshufi64x2, 2zmmI82zmm, 1, 1, 0e4h
zen5_wrap vshuff64x2, 2zmmI82zmm, 1, 1, 0e4h
zen5_wrap vshufi64x2, 2zmmI82zmm, 1, 1, 0a5h
zen5_wrap vshuff64x2, 2zmmI82zmm, 1, 1, 0a5h
END