Skip to content

Commit 98dfde6

Browse files
committed
adds semantics for pmuludq and extends AVX/AVX2 pmuldq
Previously, we only supported SSE and SSE2 variants of pmuldq and didn't support any of pmuludq (unsigned packed multiplication).
1 parent 4879d2d commit 98dfde6

File tree

2 files changed

+87
-8
lines changed

2 files changed

+87
-8
lines changed

plugins/x86/semantics/test.t

+32
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,38 @@ and the same for a memory operand
5454
YMM0 := high:128[YMM0].#1.#2
5555
}
5656

57+
$ mc 0x66,0x0f,0xf4,0x4f,0x0c
58+
pmuludq 0xc(%rdi), %xmm1
59+
{
60+
#0 := mem[RDI + 0xC, el]:u128
61+
#1 := pad:64[95:64[YMM1]] * pad:64[95:64[#0]]
62+
#2 := pad:64[31:0[YMM1]] * pad:64[31:0[#0]]
63+
YMM1 := high:128[YMM1].#1.#2
64+
}
65+
$ mc 0x66,0x0f,0xf4,0xcb
66+
pmuludq %xmm3, %xmm1
67+
{
68+
#0 := pad:64[95:64[YMM1]] * pad:64[95:64[YMM3]]
69+
#1 := pad:64[31:0[YMM1]] * pad:64[31:0[YMM3]]
70+
YMM1 := high:128[YMM1].#0.#1
71+
}
72+
$ mc 0xc5,0xe1,0xf4,0xcc
73+
vpmuludq %xmm4, %xmm3, %xmm1
74+
{
75+
#0 := pad:64[95:64[YMM1]] * pad:64[95:64[YMM4]]
76+
#1 := pad:64[31:0[YMM1]] * pad:64[31:0[YMM4]]
77+
YMM1 := 0.#0.#1
78+
}
79+
$ mc 0xc5,0xe5,0xf4,0xcc
80+
vpmuludq %ymm4, %ymm3, %ymm1
81+
{
82+
#0 := pad:64[223:192[YMM1]] * pad:64[223:192[YMM4]]
83+
#1 := pad:64[159:128[YMM1]] * pad:64[159:128[YMM4]]
84+
#2 := pad:64[95:64[YMM1]] * pad:64[95:64[YMM4]]
85+
#3 := pad:64[31:0[YMM1]] * pad:64[31:0[YMM4]]
86+
YMM1 := #0.#1.#2.#3
87+
}
88+
5789

5890
-----------------------------------------------------------
5991
# Testing xchgb #

plugins/x86/semantics/x86-64.lisp

+55-8
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,66 @@
6363
;; pmuludq
6464
;; Reference: Vol. 2B 4-370
6565
(defun PMULDQrr (dst _ src)
66-
(pmul dst src))
66+
(pmul set$ cast-signed dst src))
6767

6868
(defun PMULDQrm (dst _ base _ _ off _)
6969
(let ((src (load-dword (+ base off))))
70-
(pmul dst src)))
70+
(pmul set$ cast-signed dst src)))
7171

72-
(defun pmul (dst src)
72+
(defun PMULUDQrr (dst _ src)
73+
(pmul set$ cast-unsigned dst src))
74+
75+
(defun PMULUDQrm (dst _ base _ _ off _)
76+
(let ((src (load-dword (+ base off))))
77+
(pmul set$ cast-unsigned dst src)))
78+
79+
(defun VPMULDQrr (dst _ src)
80+
(pmul setv cast-signed dst src))
81+
82+
(defun VPMULDQrm (dst _ base _ _ off _)
83+
(let ((src (load-dword (+ base off))))
84+
(pmul setv cast-signed dst src)))
85+
86+
(defun VPMULUDQrr (dst _ src)
87+
(pmul setv cast-unsigned dst src))
88+
89+
(defun VPMULUDQrm (dst _ base _ _ off _)
90+
(let ((src (load-dword (+ base off))))
91+
(pmul setv cast-unsigned dst src)))
92+
93+
(defun VPMULDQYrr (dst _ src)
94+
(pmuly cast-signed dst src))
95+
96+
(defun VPMULDQYrm (dst _ base _ _ off _)
97+
(let ((src (load-dword (+ base off))))
98+
(pmuly cast-signed dst src)))
99+
100+
(defun VPMULUDQYrr (dst _ src)
101+
(pmuly cast-unsigned dst src))
102+
103+
(defun VPMULUDQYrm (dst _ base _ _ off _)
104+
(let ((src (load-dword (+ base off))))
105+
(pmuly cast-unsigned dst src)))
106+
107+
(defmacro pmul (set cast dst src)
108+
(declare (visibility :private))
109+
(let ((hi (* (cast 64 (extract 95 64 dst))
110+
(cast 64 (extract 95 64 src))))
111+
(lo (* (cast 64 (extract 31 0 dst))
112+
(cast 64 (extract 31 0 src)))))
113+
(set dst (concat hi lo))))
114+
115+
(defmacro pmuly (cast dst src)
73116
(declare (visibility :private))
74-
(let ((hi (* (cast-signed 64 (extract 95 64 dst))
75-
(cast-signed 64 (extract 95 64 src))))
76-
(lo (* (cast-signed 64 (extract 31 0 dst))
77-
(cast-signed 64 (extract 31 0 src)))))
78-
(set$ dst (concat hi lo))))
117+
(let ((w4 (* (cast 64 (extract 223 192 dst))
118+
(cast 64 (extract 223 192 src))))
119+
(w3 (* (cast 64 (extract 159 128 dst))
120+
(cast 64 (extract 159 128 src))))
121+
(w2 (* (cast 64 (extract 95 64 dst))
122+
(cast 64 (extract 95 64 src))))
123+
(w1 (* (cast 64 (extract 31 0 dst))
124+
(cast 64 (extract 31 0 src)))))
125+
(set$ dst (concat w4 w3 w2 w1))))
79126

80127

81128
;; pack{u,s}sdw/pack{u,s}swb

0 commit comments

Comments
 (0)