Skip to content

Commit db4bd21

Browse files
authored
pulley: Implement SIMD splat instruction (#9832)
* pulley: Implement SIMD `splat` instruction Gets a few spec tests and CLIF tests passing cc #9783 * Fix typo
1 parent d3f05ee commit db4bd21

File tree

5 files changed

+66
-6
lines changed

5 files changed

+66
-6
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

+9
Original file line numberDiff line numberDiff line change
@@ -799,3 +799,12 @@
799799

800800
(rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a)))
801801
(rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a))
802+
803+
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
804+
805+
(rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a))
806+
(rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a))
807+
(rule (lower (has_type $I32X4 (splat a))) (pulley_vsplatx32 a))
808+
(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a))
809+
(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a))
810+
(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a))

cranelift/filetests/filetests/runtests/simd-splat.clif

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ target x86_64 sse41 has_avx has_avx2
1010
set enable_multi_ret_implicit_sret
1111
target riscv64 has_v
1212
target riscv64 has_v has_c has_zcb
13+
target pulley32
14+
target pulley32be
15+
target pulley64
16+
target pulley64be
1317

1418
function %splat_i8x16(i8) -> i8x16 {
1519
block0(v0: i8):

crates/wast-util/src/lib.rs

-2
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,6 @@ impl WastTest {
401401
// features in Pulley are implemented.
402402
if config.compiler == Compiler::CraneliftPulley {
403403
let unsupported = [
404-
"misc_testsuite/int-to-float-splat.wast",
405-
"misc_testsuite/issue6562.wast",
406404
"misc_testsuite/memory64/simd.wast",
407405
"misc_testsuite/simd/almost-extmul.wast",
408406
"misc_testsuite/simd/canonicalize-nan.wast",

pulley/src/interp.rs

+40-4
Original file line numberDiff line numberDiff line change
@@ -2574,7 +2574,7 @@ impl OpVisitor for Interpreter<'_> {
25742574
let mut a = self.state[operands.src1].get_i8x16();
25752575
let b = self.state[operands.src2].get_i8x16();
25762576
for (a, b) in a.iter_mut().zip(b) {
2577-
*a += b;
2577+
*a = a.wrapping_add(b);
25782578
}
25792579
self.state[operands.dst].set_i8x16(a);
25802580
ControlFlow::Continue(())
@@ -2584,7 +2584,7 @@ impl OpVisitor for Interpreter<'_> {
25842584
let mut a = self.state[operands.src1].get_i16x8();
25852585
let b = self.state[operands.src2].get_i16x8();
25862586
for (a, b) in a.iter_mut().zip(b) {
2587-
*a += b;
2587+
*a = a.wrapping_add(b);
25882588
}
25892589
self.state[operands.dst].set_i16x8(a);
25902590
ControlFlow::Continue(())
@@ -2594,7 +2594,7 @@ impl OpVisitor for Interpreter<'_> {
25942594
let mut a = self.state[operands.src1].get_i32x4();
25952595
let b = self.state[operands.src2].get_i32x4();
25962596
for (a, b) in a.iter_mut().zip(b) {
2597-
*a += b;
2597+
*a = a.wrapping_add(b);
25982598
}
25992599
self.state[operands.dst].set_i32x4(a);
26002600
ControlFlow::Continue(())
@@ -2604,7 +2604,7 @@ impl OpVisitor for Interpreter<'_> {
26042604
let mut a = self.state[operands.src1].get_i64x2();
26052605
let b = self.state[operands.src2].get_i64x2();
26062606
for (a, b) in a.iter_mut().zip(b) {
2607-
*a += b;
2607+
*a = a.wrapping_add(b);
26082608
}
26092609
self.state[operands.dst].set_i64x2(a);
26102610
ControlFlow::Continue(())
@@ -2718,6 +2718,42 @@ impl OpVisitor for Interpreter<'_> {
27182718
self.state[dst].set_u128(val);
27192719
ControlFlow::Continue(())
27202720
}
2721+
2722+
fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2723+
let val = self.state[src].get_u32() as u8;
2724+
self.state[dst].set_u8x16([val; 16]);
2725+
ControlFlow::Continue(())
2726+
}
2727+
2728+
fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2729+
let val = self.state[src].get_u32() as u16;
2730+
self.state[dst].set_u16x8([val; 8]);
2731+
ControlFlow::Continue(())
2732+
}
2733+
2734+
fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2735+
let val = self.state[src].get_u32();
2736+
self.state[dst].set_u32x4([val; 4]);
2737+
ControlFlow::Continue(())
2738+
}
2739+
2740+
fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
2741+
let val = self.state[src].get_u64();
2742+
self.state[dst].set_u64x2([val; 2]);
2743+
ControlFlow::Continue(())
2744+
}
2745+
2746+
fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
2747+
let val = self.state[src].get_f32();
2748+
self.state[dst].set_f32x4([val; 4]);
2749+
ControlFlow::Continue(())
2750+
}
2751+
2752+
fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
2753+
let val = self.state[src].get_f64();
2754+
self.state[dst].set_f64x2([val; 2]);
2755+
ControlFlow::Continue(())
2756+
}
27212757
}
27222758

27232759
impl ExtendedOpVisitor for Interpreter<'_> {

pulley/src/lib.rs

+13
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,19 @@ macro_rules! for_each_op {
618618
vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
619619
/// `dst = src1 >> src2` (unsigned)
620620
vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };
621+
622+
/// `dst = splat(low8(src))`
623+
vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
624+
/// `dst = splat(low16(src))`
625+
vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
626+
/// `dst = splat(low32(src))`
627+
vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
628+
/// `dst = splat(src)`
629+
vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
630+
/// `dst = splat(low32(src))`
631+
vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
632+
/// `dst = splat(src)`
633+
vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
621634
}
622635
};
623636
}

0 commit comments

Comments
 (0)