Skip to content

Commit 506996c

Browse files
committed
pulley: Initial scaffold of SIMD support
This commit fills out some of the initial infrastructure necessary for supporting the SIMD proposal to WebAssembly in the Pulley interpreter, namely 128-bit simd. The `VRegVal` union has been filled out with various types, endianness questions are settled, and initial implementations of a suite of opcodes are added to get a basic set of tests working throughout the backend. cc bytecodealliance#9783
1 parent 9fd2b3a commit 506996c

File tree

12 files changed

+474
-62
lines changed

12 files changed

+474
-62
lines changed

Diff for: cranelift/codegen/meta/src/pulley.rs

+44-16
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,47 @@ const OPS: &[Inst<'_>] = pulley_interpreter::for_each_op!(define);
2727
const EXTENDED_OPS: &[Inst<'_>] = pulley_interpreter::for_each_extended_op!(define);
2828

2929
enum Operand<'a> {
30-
Normal { name: &'a str, ty: &'a str },
31-
Writable { name: &'a str, ty: &'a str },
32-
TrapCode { name: &'a str, ty: &'a str },
33-
Binop { reg: &'a str },
30+
Normal {
31+
name: &'a str,
32+
ty: &'a str,
33+
},
34+
Writable {
35+
name: &'a str,
36+
ty: &'a str,
37+
},
38+
TrapCode {
39+
name: &'a str,
40+
ty: &'a str,
41+
},
42+
Binop {
43+
dst: &'a str,
44+
src1: &'a str,
45+
src2: &'a str,
46+
},
3447
}
3548

3649
impl Inst<'_> {
3750
fn operands(&self) -> impl Iterator<Item = Operand<'_>> {
3851
self.fields
3952
.iter()
4053
.map(|(name, ty)| match (*name, *ty) {
41-
("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" },
42-
("operands", "BinaryOperands < FReg >") => Operand::Binop { reg: "FReg" },
54+
("operands", binop) => {
55+
// Parse "BinaryOperands < A >"` as A/A/A
56+
// Parse "BinaryOperands < A, B >"` as A/B/A
57+
// Parse "BinaryOperands < A, B, C >"` as A/B/C
58+
let mut parts = binop
59+
.strip_prefix("BinaryOperands <")
60+
.unwrap()
61+
.strip_suffix(">")
62+
.unwrap()
63+
.trim()
64+
.split(',')
65+
.map(|x| x.trim());
66+
let dst = parts.next().unwrap();
67+
let src1 = parts.next().unwrap_or(dst);
68+
let src2 = parts.next().unwrap_or(dst);
69+
Operand::Binop { dst, src1, src2 }
70+
}
4371
("dst", ty) => Operand::Writable { name, ty },
4472
(name, ty) => Operand::Normal { name, ty },
4573
})
@@ -109,7 +137,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
109137
pat.push_str(",");
110138
format_string.push_str(&format!(" // trap={{{name}:?}}"));
111139
}
112-
Operand::Binop { reg: _ } => {
140+
Operand::Binop { .. } => {
113141
pat.push_str("dst, src1, src2,");
114142
format_string.push_str(" {dst}, {src1}, {src2}");
115143
locals.push_str(&format!("let dst = reg_name(*dst.to_reg());\n"));
@@ -161,7 +189,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
161189
}
162190
}
163191
Operand::TrapCode { .. } => {}
164-
Operand::Binop { reg: _ } => {
192+
Operand::Binop { .. } => {
165193
pat.push_str("dst, src1, src2,");
166194
uses.push("src1");
167195
uses.push("src2");
@@ -221,7 +249,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
221249
pat.push_str(",");
222250
trap.push_str(&format!("sink.add_trap({name});\n"));
223251
}
224-
Operand::Binop { reg: _ } => {
252+
Operand::Binop { .. } => {
225253
pat.push_str("dst, src1, src2,");
226254
args.push_str(
227255
"pulley_interpreter::regs::BinaryOperands::new(dst, src1, src2),",
@@ -265,10 +293,10 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
265293
Operand::Writable { name, ty } => {
266294
isle.push_str(&format!("\n ({name} Writable{ty})"));
267295
}
268-
Operand::Binop { reg } => {
269-
isle.push_str(&format!("\n (dst Writable{reg})"));
270-
isle.push_str(&format!("\n (src1 {reg})"));
271-
isle.push_str(&format!("\n (src2 {reg})"));
296+
Operand::Binop { dst, src1, src2 } => {
297+
isle.push_str(&format!("\n (dst Writable{dst})"));
298+
isle.push_str(&format!("\n (src1 {src1})"));
299+
isle.push_str(&format!("\n (src2 {src2})"));
272300
}
273301
}
274302
}
@@ -303,13 +331,13 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
303331
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
304332
result = Some(ty);
305333
}
306-
Operand::Binop { reg } => {
307-
isle.push_str(&format!("{reg} {reg}"));
334+
Operand::Binop { dst, src1, src2 } => {
335+
isle.push_str(&format!("{src1} {src2}"));
308336
rule.push_str("src1 src2");
309337
ops.push("src1");
310338
ops.push("src2");
311339
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
312-
result = Some(reg);
340+
result = Some(dst);
313341
}
314342
}
315343
isle.push_str(" ");

Diff for: cranelift/codegen/src/isa/pulley_shared/abi.rs

+11-10
Original file line numberDiff line numberDiff line change
@@ -510,17 +510,18 @@ where
510510
_target_vector_bytes: u32,
511511
_isa_flags: &PulleyFlags,
512512
) -> u32 {
513+
// Spill slots are the size of a "word" or a pointer, but Pulley
514+
// registers are 8-byte for integers/floats regardless of pointer size.
515+
// Calculate the number of slots necessary to store 8 bytes.
516+
let slots_for_8bytes = match P::pointer_width() {
517+
PointerWidth::PointerWidth32 => 2,
518+
PointerWidth::PointerWidth64 => 1,
519+
};
513520
match rc {
514-
// Spilling an integer or float register requires spilling 8 bytes,
515-
// and spill slots are defined in terms of "word bytes" or the size
516-
// of a pointer. That means on 32-bit pulley we need to take up two
517-
// spill slots where on 64-bit pulley we need to only take up one
518-
// spill slot for integers.
519-
RegClass::Int | RegClass::Float => match P::pointer_width() {
520-
PointerWidth::PointerWidth32 => 2,
521-
PointerWidth::PointerWidth64 => 1,
522-
},
523-
RegClass::Vector => unreachable!(),
521+
// Int/float registers are 8-bytes
522+
RegClass::Int | RegClass::Float => slots_for_8bytes,
523+
// Vector registers are 16 bytes
524+
RegClass::Vector => 2 * slots_for_8bytes,
524525
}
525526
}
526527

Diff for: cranelift/codegen/src/isa/pulley_shared/inst.isle

+10
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,16 @@
414414
(rule (pulley_fstore amode src ty flags)
415415
(SideEffectNoResult.Inst (MInst.FStore amode src ty flags)))
416416

417+
(decl pulley_vload (Amode Type MemFlags) VReg)
418+
(rule (pulley_vload amode ty flags)
419+
(let ((dst WritableVReg (temp_writable_vreg))
420+
(_ Unit (emit (MInst.VLoad dst amode ty flags))))
421+
dst))
422+
423+
(decl pulley_vstore (Amode VReg Type MemFlags) SideEffectNoResult)
424+
(rule (pulley_vstore amode src ty flags)
425+
(SideEffectNoResult.Inst (MInst.VStore amode src ty flags)))
426+
417427
(decl gen_br_table (XReg MachLabel BoxVecMachLabel) Unit)
418428
(rule (gen_br_table idx default labels)
419429
(emit (MInst.BrTable idx default labels)))

Diff for: cranelift/codegen/src/isa/pulley_shared/inst/mod.rs

+2-12
Original file line numberDiff line numberDiff line change
@@ -453,18 +453,8 @@ where
453453
}
454454

455455
fn worst_case_size() -> CodeOffset {
456-
// `BrIfXeq32 { a, b, taken, not_taken }` expands to `br_if_xeq32 a, b, taken; jump not_taken`.
457-
//
458-
// The first instruction is seven bytes long:
459-
// * 1 byte opcode
460-
// * 1 byte `a` register encoding
461-
// * 1 byte `b` register encoding
462-
// * 4 byte `taken` displacement
463-
//
464-
// And the second instruction is five bytes long:
465-
// * 1 byte opcode
466-
// * 4 byte `not_taken` displacement
467-
12
456+
// `Vconst128 { dst, imm }` is 18 bytes (opcode + dst + 16-byte imm)
457+
18
468458
}
469459

470460
fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {

Diff for: cranelift/codegen/src/isa/pulley_shared/lower.isle

+32
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,11 @@
143143
(rule (lower (has_type $I64 (iadd a b)))
144144
(pulley_xadd64 a b))
145145

146+
(rule (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b))
147+
(rule (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b))
148+
(rule (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b))
149+
(rule (lower (has_type $I64X2 (iadd a b))) (pulley_vaddi64x2 a b))
150+
146151
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
147152

148153
(rule (lower (has_type $I8 (isub a b)))
@@ -192,6 +197,11 @@
192197
(rule (lower (has_type $I64 (ishl a b)))
193198
(pulley_xshl64 a b))
194199

200+
(rule (lower (has_type $I8X16 (ishl a b))) (pulley_vshli8x16 a b))
201+
(rule (lower (has_type $I16X8 (ishl a b))) (pulley_vshli16x8 a b))
202+
(rule (lower (has_type $I32X4 (ishl a b))) (pulley_vshli32x4 a b))
203+
(rule (lower (has_type $I64X2 (ishl a b))) (pulley_vshli64x2 a b))
204+
195205
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
196206

197207
(rule (lower (has_type $I32 (ushr a b)))
@@ -200,6 +210,11 @@
200210
(rule (lower (has_type $I64 (ushr a b)))
201211
(pulley_xshr64_u a b))
202212

213+
(rule (lower (has_type $I8X16 (ushr a b))) (pulley_vshri8x16_u a b))
214+
(rule (lower (has_type $I16X8 (ushr a b))) (pulley_vshri16x8_u a b))
215+
(rule (lower (has_type $I32X4 (ushr a b))) (pulley_vshri32x4_u a b))
216+
(rule (lower (has_type $I64X2 (ushr a b))) (pulley_vshri64x2_u a b))
217+
203218
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204219

205220
(rule (lower (has_type $I32 (sshr a b)))
@@ -208,6 +223,11 @@
208223
(rule (lower (has_type $I64 (sshr a b)))
209224
(pulley_xshr64_s a b))
210225

226+
(rule (lower (has_type $I8X16 (sshr a b))) (pulley_vshri8x16_s a b))
227+
(rule (lower (has_type $I16X8 (sshr a b))) (pulley_vshri16x8_s a b))
228+
(rule (lower (has_type $I32X4 (sshr a b))) (pulley_vshri32x4_s a b))
229+
(rule (lower (has_type $I64X2 (sshr a b))) (pulley_vshri64x2_s a b))
230+
211231
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
212232

213233
(rule 0 (lower (has_type (fits_in_32 _) (band a b)))
@@ -414,6 +434,9 @@
414434
(rule 1 (lower (has_type $I64 (sload32 flags addr offset)))
415435
(pulley_xload (amode addr offset) $I32 flags (ExtKind.Sign64)))
416436

437+
(rule 2 (lower (has_type (ty_vec128 ty) (load flags addr offset)))
438+
(pulley_vload (amode addr offset) ty flags))
439+
417440
;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
418441

419442
(rule (lower (store flags src @ (value_type (ty_int ty)) addr offset))
@@ -431,6 +454,9 @@
431454
(rule (lower (istore32 flags src addr offset))
432455
(side_effect (pulley_xstore (amode addr offset) src $I32 flags)))
433456

457+
(rule 2 (lower (store flags src @ (value_type (ty_vec128 ty)) addr offset))
458+
(side_effect (pulley_vstore (amode addr offset) src ty flags)))
459+
434460
;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
435461

436462
(rule (lower (stack_addr stack_slot offset))
@@ -622,6 +648,8 @@
622648

623649
(rule (lower (has_type $F32 (fadd a b))) (pulley_fadd32 a b))
624650
(rule (lower (has_type $F64 (fadd a b))) (pulley_fadd64 a b))
651+
(rule (lower (has_type $F32X4 (fadd a b))) (pulley_vaddf32x4 a b))
652+
(rule (lower (has_type $F64X2 (fadd a b))) (pulley_vaddf64x2 a b))
625653

626654
;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
627655

@@ -687,3 +715,7 @@
687715

688716
(rule (lower (has_type $F32 (fabs a))) (pulley_fabs32 a))
689717
(rule (lower (has_type $F64 (fabs a))) (pulley_fabs64 a))
718+
719+
;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720+
721+
(rule (lower (has_type (ty_vec128 _) (vconst (u128_from_constant a)))) (pulley_vconst128 a))

Diff for: crates/wast-util/src/lib.rs

-9
Original file line numberDiff line numberDiff line change
@@ -407,21 +407,14 @@ impl WastTest {
407407
"misc_testsuite/simd/almost-extmul.wast",
408408
"misc_testsuite/simd/canonicalize-nan.wast",
409409
"misc_testsuite/simd/cvt-from-uint.wast",
410-
"misc_testsuite/simd/interesting-float-splat.wast",
411410
"misc_testsuite/simd/issue4807.wast",
412411
"misc_testsuite/simd/issue6725-no-egraph-panic.wast",
413-
"misc_testsuite/simd/issue_3173_select_v128.wast",
414412
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
415413
"misc_testsuite/simd/load_splat_out_of_bounds.wast",
416414
"misc_testsuite/simd/replace-lane-preserve.wast",
417415
"misc_testsuite/simd/spillslot-size-fuzzbug.wast",
418416
"misc_testsuite/simd/unaligned-load.wast",
419417
"misc_testsuite/simd/v128-select.wast",
420-
"misc_testsuite/winch/_simd_address.wast",
421-
"misc_testsuite/winch/_simd_const.wast",
422-
"misc_testsuite/winch/_simd_load.wast",
423-
"misc_testsuite/winch/_simd_multivalue.wast",
424-
"misc_testsuite/winch/_simd_store.wast",
425418
"spec_testsuite/proposals/annotations/simd_lane.wast",
426419
"spec_testsuite/proposals/multi-memory/simd_memory-multi.wast",
427420
"spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
@@ -431,7 +424,6 @@ impl WastTest {
431424
"spec_testsuite/proposals/relaxed-simd/relaxed_laneselect.wast",
432425
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
433426
"spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
434-
"spec_testsuite/simd_address.wast",
435427
"spec_testsuite/simd_align.wast",
436428
"spec_testsuite/simd_bit_shift.wast",
437429
"spec_testsuite/simd_bitwise.wast",
@@ -482,7 +474,6 @@ impl WastTest {
482474
"spec_testsuite/simd_load_splat.wast",
483475
"spec_testsuite/simd_load_zero.wast",
484476
"spec_testsuite/simd_splat.wast",
485-
"spec_testsuite/simd_store.wast",
486477
"spec_testsuite/simd_store16_lane.wast",
487478
"spec_testsuite/simd_store32_lane.wast",
488479
"spec_testsuite/simd_store64_lane.wast",

Diff for: pulley/src/decode.rs

+19-1
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,15 @@ impl Decode for u64 {
303303
}
304304
}
305305

306+
impl Decode for u128 {
307+
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
308+
where
309+
T: BytecodeStream,
310+
{
311+
Ok(u128::from_le_bytes(bytecode.read()?))
312+
}
313+
}
314+
306315
impl Decode for i8 {
307316
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
308317
where
@@ -339,6 +348,15 @@ impl Decode for i64 {
339348
}
340349
}
341350

351+
impl Decode for i128 {
352+
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
353+
where
354+
T: BytecodeStream,
355+
{
356+
Ok(i128::from_le_bytes(bytecode.read()?))
357+
}
358+
}
359+
342360
impl Decode for XReg {
343361
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
344362
where
@@ -404,7 +422,7 @@ impl Decode for ExtendedOpcode {
404422
}
405423
}
406424

407-
impl<R: Reg> Decode for BinaryOperands<R> {
425+
impl<D: Reg, S1: Reg, S2: Reg> Decode for BinaryOperands<D, S1, S2> {
408426
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
409427
where
410428
T: BytecodeStream,

Diff for: pulley/src/disas.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ impl Disas for i64 {
149149
}
150150
}
151151

152+
impl Disas for i128 {
153+
fn disas(&self, _position: usize, disas: &mut String) {
154+
write!(disas, "{self}").unwrap();
155+
}
156+
}
157+
152158
impl Disas for u8 {
153159
fn disas(&self, _position: usize, disas: &mut String) {
154160
write!(disas, "{self}").unwrap();
@@ -173,6 +179,12 @@ impl Disas for u64 {
173179
}
174180
}
175181

182+
impl Disas for u128 {
183+
fn disas(&self, _position: usize, disas: &mut String) {
184+
write!(disas, "{self}").unwrap();
185+
}
186+
}
187+
176188
impl Disas for PcRelOffset {
177189
fn disas(&self, position: usize, disas: &mut String) {
178190
let offset = isize::try_from(i32::from(*self)).unwrap();
@@ -192,9 +204,18 @@ fn disas_list<T: Disas>(position: usize, disas: &mut String, iter: impl IntoIter
192204
}
193205
}
194206

195-
impl<R: Reg + Disas> Disas for BinaryOperands<R> {
207+
impl<D, S1, S2> Disas for BinaryOperands<D, S1, S2>
208+
where
209+
D: Reg + Disas,
210+
S1: Reg + Disas,
211+
S2: Reg + Disas,
212+
{
196213
fn disas(&self, position: usize, disas: &mut String) {
197-
disas_list(position, disas, [self.dst, self.src1, self.src2])
214+
self.dst.disas(position, disas);
215+
write!(disas, ", ").unwrap();
216+
self.src1.disas(position, disas);
217+
write!(disas, ", ").unwrap();
218+
self.src2.disas(position, disas);
198219
}
199220
}
200221

0 commit comments

Comments
 (0)