Skip to content

Commit 4a00bc8

Browse files
committed
feat: Implement simd_f64x2_arith for pulley
1 parent f2ab725 commit 4a00bc8

File tree

4 files changed

+51
-1
lines changed

4 files changed

+51
-1
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

+4
Original file line numberDiff line numberDiff line change
@@ -1157,17 +1157,20 @@
11571157

11581158
(rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
11591159
(rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
1160+
(rule (lower (has_type $F64X2 (fsub a b))) (pulley_vsubf64x2 a b))
11601161

11611162
;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11621163

11631164
(rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
11641165
(rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
1166+
(rule (lower (has_type $F64X2 (fmul a b))) (pulley_vmulf64x2 a b))
11651167

11661168
;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11671169

11681170
(rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b))
11691171
(rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b))
11701172
(rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdivf32x4 a b))
1173+
(rule (lower (has_type $F64X2 (fdiv a b))) (pulley_vdivf64x2 a b))
11711174

11721175
;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11731176

@@ -1230,6 +1233,7 @@
12301233

12311234
(rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
12321235
(rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a))
1236+
(rule (lower (has_type $F64X2 (fneg a))) (pulley_vnegf64x2 a))
12331237

12341238
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12351239

crates/wast-util/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,6 @@ impl WastTest {
411411
"spec_testsuite/simd_f32x4_arith.wast",
412412
"spec_testsuite/simd_f32x4_cmp.wast",
413413
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
414-
"spec_testsuite/simd_f64x2_arith.wast",
415414
"spec_testsuite/simd_f64x2_cmp.wast",
416415
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
417416
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",

pulley/src/interp.rs

+39
Original file line numberDiff line numberDiff line change
@@ -3007,6 +3007,19 @@ impl ExtendedOpVisitor for Interpreter<'_> {
30073007
ControlFlow::Continue(())
30083008
}
30093009

3010+
fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3011+
let a = self.state[operands.src1].get_f64x2();
3012+
let b = self.state[operands.src2].get_f64x2();
3013+
let mut result = [0.0f64; 2];
3014+
3015+
for i in 0..2 {
3016+
result[i] = a[i] / b[i];
3017+
}
3018+
3019+
self.state[operands.dst].set_f64x2(result);
3020+
ControlFlow::Continue(())
3021+
}
3022+
30103023
fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
30113024
let a = self.state[operands.src1].get_f32();
30123025
let b = self.state[operands.src2].get_f32();
@@ -3900,6 +3913,16 @@ impl ExtendedOpVisitor for Interpreter<'_> {
39003913
ControlFlow::Continue(())
39013914
}
39023915

3916+
fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3917+
let mut a = self.state[operands.src1].get_f64x2();
3918+
let b = self.state[operands.src2].get_f64x2();
3919+
for (a, b) in a.iter_mut().zip(b) {
3920+
*a = *a - b;
3921+
}
3922+
self.state[operands.dst].set_f64x2(a);
3923+
ControlFlow::Continue(())
3924+
}
3925+
39033926
fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
39043927
let mut a = self.state[operands.src1].get_i8x16();
39053928
let b = self.state[operands.src2].get_i8x16();
@@ -3940,6 +3963,16 @@ impl ExtendedOpVisitor for Interpreter<'_> {
39403963
ControlFlow::Continue(())
39413964
}
39423965

3966+
fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3967+
let mut a = self.state[operands.src1].get_f64x2();
3968+
let b = self.state[operands.src2].get_f64x2();
3969+
for (a, b) in a.iter_mut().zip(b) {
3970+
*a = *a * b;
3971+
}
3972+
self.state[operands.dst].set_f64x2(a);
3973+
ControlFlow::Continue(())
3974+
}
3975+
39433976
fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
39443977
let mut a = self.state[operands.src1].get_i16x8();
39453978
let b = self.state[operands.src2].get_i16x8();
@@ -4367,6 +4400,12 @@ impl ExtendedOpVisitor for Interpreter<'_> {
43674400
ControlFlow::Continue(())
43684401
}
43694402

4403+
fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4404+
let a = self.state[src].get_f64x2();
4405+
self.state[dst].set_f64x2(a.map(|i| -i));
4406+
ControlFlow::Continue(())
4407+
}
4408+
43704409
fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
43714410
let mut a = self.state[operands.src1].get_i8x16();
43724411
let b = self.state[operands.src2].get_i8x16();

pulley/src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,8 @@ macro_rules! for_each_extended_op {
860860
fmul64 = Fmul64 { operands: BinaryOperands<FReg> };
861861
/// `dst = src1 / src2`
862862
fdiv64 = Fdiv64 { operands: BinaryOperands<FReg> };
863+
/// `dst = src1 / src2`
864+
vdivf64x2 = VDivF64x2 { operands: BinaryOperands<VReg> };
863865
/// `dst = ieee_maximum(src1, src2)`
864866
fmaximum64 = Fmaximum64 { operands: BinaryOperands<FReg> };
865867
/// `dst = ieee_minimum(src1, src2)`
@@ -1075,6 +1077,8 @@ macro_rules! for_each_extended_op {
10751077
vsubi32x4 = VSubI32x4 { operands: BinaryOperands<VReg> };
10761078
/// `dst = src1 - src2`
10771079
vsubi64x2 = VSubI64x2 { operands: BinaryOperands<VReg> };
1080+
/// `dst = src1 - src2`
1081+
vsubf64x2 = VSubF64x2 { operands: BinaryOperands<VReg> };
10781082

10791083
/// `dst = saturating_sub(src1, src2)`
10801084
vsubi8x16_sat = VSubI8x16Sat { operands: BinaryOperands<VReg> };
@@ -1093,6 +1097,8 @@ macro_rules! for_each_extended_op {
10931097
vmuli32x4 = VMulI32x4 { operands: BinaryOperands<VReg> };
10941098
/// `dst = src1 * src2`
10951099
vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };
1100+
/// `dst = src1 * src2`
1101+
vmulf64x2 = VMulF64x2 { operands: BinaryOperands<VReg> };
10961102

10971103
/// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
10981104
vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };
@@ -1183,6 +1189,8 @@ macro_rules! for_each_extended_op {
11831189
vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
11841190
/// `dst = -src`
11851191
vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };
1192+
/// `dst = -src`
1193+
vnegf64x2 = VnegF64x2 { dst: VReg, src: VReg };
11861194

11871195
/// `dst = min(src1, src2)` (signed)
11881196
vmin8x16_s = Vmin8x16S { operands: BinaryOperands<VReg> };

0 commit comments

Comments
 (0)