bytecodealliance · alexcrichton · Dec 29, 2024 · Dec 29, 2024 · Dec 29, 2024
@@ -1789,7 +1789,7 @@ pub(crate) fn define(
             r#"
         Fixed-point multiplication of numbers in the QN format, where N + 1
         is the number bitwidth:
-        `a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`
+        `a := signed_saturate((x * y + (1 << (Q - 1))) >> Q)`
 
         Polymorphic over all integer vector types with 16- or 32-bit numbers.
         "#,

@@ -311,6 +311,10 @@
 (rule (lower (has_type $I64 (smulhi a b)))
   (pulley_xmulhi64_s a b))
 
+;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I16X8 (sqmul_round_sat a b))) (pulley_vqmulrsi16x8 a b))
+
 ;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule 0 (lower (has_type (fits_in_32 _) (sdiv a b)))
@@ -335,7 +339,7 @@
   (pulley_xrem32_u (zext32 a) (zext32 b)))
 (rule 1 (lower (has_type $I64 (urem a b))) (pulley_xrem64_u a b))
 
-;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I16X8 (avg_round a b))) (pulley_vavground16x8 a b))
 
@@ -1372,4 +1376,4 @@
 
 ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
+(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
@@ -405,14 +405,12 @@ impl WastTest {
                 "misc_testsuite/simd/issue_3327_bnot_lowering.wast",
                 "misc_testsuite/simd/v128-select.wast",
                 "spec_testsuite/proposals/annotations/simd_lane.wast",
-                "spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
                 "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast",
                 "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast",
                 "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
                 "spec_testsuite/proposals/memory64/simd_lane.wast",
                 "spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
                 "spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
-                "spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
                 "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
                 "spec_testsuite/simd_f32x4_arith.wast",
                 "spec_testsuite/simd_f32x4_cmp.wast",
@@ -421,7 +419,6 @@ impl WastTest {
                 "spec_testsuite/simd_f64x2_cmp.wast",
                 "spec_testsuite/simd_f64x2_pmin_pmax.wast",
                 "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
-                "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
                 "spec_testsuite/simd_i16x8_sat_arith.wast",
                 "spec_testsuite/simd_i32x4_arith2.wast",
                 "spec_testsuite/simd_i32x4_dot_i16x8.wast",

@@ -3835,6 +3835,19 @@ impl ExtendedOpVisitor for Interpreter<'_> {
         ControlFlow::Continue(())
     }
 
+    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let mut a = self.state[operands.src1].get_i16x8();
+        let b = self.state[operands.src2].get_i16x8();
+        const MIN: i32 = i16::MIN as i32;
+        const MAX: i32 = i16::MAX as i32;
+        for (a, b) in a.iter_mut().zip(b) {
+            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
+            *a = r.clamp(MIN, MAX) as i16;
+        }
+        self.state[operands.dst].set_i16x8(a);
+        ControlFlow::Continue(())
+    }
+
     fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
         let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
         self.state[dst].set_u32(u32::from(a));

@@ -1071,6 +1071,9 @@ macro_rules! for_each_extended_op {
             /// `dst = src1 * src2`
             vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };
 
+            /// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
+            vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };
+
             /// `low32(dst) = zext(src[lane])`
             xextractv8x16 = XExtractV8x16 { dst: XReg, src: VReg, lane: u8 };
             /// `low32(dst) = zext(src[lane])`