Skip to content

Commit 035c862

Browse files
committed
pulley: Implement SIMD splat instruction
Gets a few spec tests and CLIF tests passing cc bytecodealliance#9783
1 parent b10dc29 commit 035c862

File tree

5 files changed

+66
-6
lines changed

5 files changed

+66
-6
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

+9
Original file line numberDiff line numberDiff line change
@@ -771,3 +771,12 @@
771771

772772
(rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a)))
773773
(rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a))
774+
775+
;;;; Rules for `split` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
776+
777+
(rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a))
778+
(rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a))
779+
(rule (lower (has_type $I32X4 (splat a))) (pulley_vsplatx32 a))
780+
(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a))
781+
(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a))
782+
(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a))

cranelift/filetests/filetests/runtests/simd-splat.clif

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ target x86_64 sse41 has_avx has_avx2
1010
set enable_multi_ret_implicit_sret
1111
target riscv64 has_v
1212
target riscv64 has_v has_c has_zcb
13+
target pulley32
14+
target pulley32be
15+
target pulley64
16+
target pulley64be
1317

1418
function %splat_i8x16(i8) -> i8x16 {
1519
block0(v0: i8):

crates/wast-util/src/lib.rs

-2
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,6 @@ impl WastTest {
401401
// features in Pulley are implemented.
402402
if config.compiler == Compiler::CraneliftPulley {
403403
let unsupported = [
404-
"misc_testsuite/int-to-float-splat.wast",
405-
"misc_testsuite/issue6562.wast",
406404
"misc_testsuite/memory64/simd.wast",
407405
"misc_testsuite/simd/almost-extmul.wast",
408406
"misc_testsuite/simd/canonicalize-nan.wast",

pulley/src/interp.rs

+40-4
Original file line numberDiff line numberDiff line change
@@ -2551,7 +2551,7 @@ impl OpVisitor for Interpreter<'_> {
25512551
let mut a = self.state[operands.src1].get_i8x16();
25522552
let b = self.state[operands.src2].get_i8x16();
25532553
for (a, b) in a.iter_mut().zip(b) {
2554-
*a += b;
2554+
*a = a.wrapping_add(b);
25552555
}
25562556
self.state[operands.dst].set_i8x16(a);
25572557
ControlFlow::Continue(())
@@ -2561,7 +2561,7 @@ impl OpVisitor for Interpreter<'_> {
25612561
let mut a = self.state[operands.src1].get_i16x8();
25622562
let b = self.state[operands.src2].get_i16x8();
25632563
for (a, b) in a.iter_mut().zip(b) {
2564-
*a += b;
2564+
*a = a.wrapping_add(b);
25652565
}
25662566
self.state[operands.dst].set_i16x8(a);
25672567
ControlFlow::Continue(())
@@ -2571,7 +2571,7 @@ impl OpVisitor for Interpreter<'_> {
25712571
let mut a = self.state[operands.src1].get_i32x4();
25722572
let b = self.state[operands.src2].get_i32x4();
25732573
for (a, b) in a.iter_mut().zip(b) {
2574-
*a += b;
2574+
*a = a.wrapping_add(b);
25752575
}
25762576
self.state[operands.dst].set_i32x4(a);
25772577
ControlFlow::Continue(())
@@ -2581,7 +2581,7 @@ impl OpVisitor for Interpreter<'_> {
25812581
let mut a = self.state[operands.src1].get_i64x2();
25822582
let b = self.state[operands.src2].get_i64x2();
25832583
for (a, b) in a.iter_mut().zip(b) {
2584-
*a += b;
2584+
*a = a.wrapping_add(b);
25852585
}
25862586
self.state[operands.dst].set_i64x2(a);
25872587
ControlFlow::Continue(())
@@ -2695,6 +2695,42 @@ impl OpVisitor for Interpreter<'_> {
26952695
self.state[dst].set_u128(val);
26962696
ControlFlow::Continue(())
26972697
}
2698+
2699+
fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2700+
let val = self.state[src].get_u32() as u8;
2701+
self.state[dst].set_u8x16([val; 16]);
2702+
ControlFlow::Continue(())
2703+
}
2704+
2705+
fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2706+
let val = self.state[src].get_u32() as u16;
2707+
self.state[dst].set_u16x8([val; 8]);
2708+
ControlFlow::Continue(())
2709+
}
2710+
2711+
fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2712+
let val = self.state[src].get_u32();
2713+
self.state[dst].set_u32x4([val; 4]);
2714+
ControlFlow::Continue(())
2715+
}
2716+
2717+
fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2718+
let val = self.state[src].get_u64();
2719+
self.state[dst].set_u64x2([val; 2]);
2720+
ControlFlow::Continue(())
2721+
}
2722+
2723+
fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
2724+
let val = self.state[src].get_f32();
2725+
self.state[dst].set_f32x4([val; 4]);
2726+
ControlFlow::Continue(())
2727+
}
2728+
2729+
fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
2730+
let val = self.state[src].get_f64();
2731+
self.state[dst].set_f64x2([val; 2]);
2732+
ControlFlow::Continue(())
2733+
}
26982734
}
26992735

27002736
impl ExtendedOpVisitor for Interpreter<'_> {

pulley/src/lib.rs

+13
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,19 @@ macro_rules! for_each_op {
609609
vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
610610
/// `dst = src1 >> src2` (unsigned)
611611
vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };
612+
613+
/// `dst = splat(low8(src))`
614+
vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
615+
/// `dst = splat(low16(src))`
616+
vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
617+
/// `dst = splat(low32(src))`
618+
vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
619+
/// `dst = splat(src)`
620+
vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
621+
/// `dst = splat(low32(src))`
622+
vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
623+
/// `dst = splat(src)`
624+
vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
612625
}
613626
};
614627
}

0 commit comments

Comments
 (0)