bytecodealliance · alexcrichton · Dec 30, 2024 · Dec 29, 2024 · Dec 30, 2024 · Dec 30, 2024
@@ -221,6 +221,11 @@
 (rule 1 (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b))
 (rule 1 (lower (has_type $I64X2 (iadd a b))) (pulley_vaddi64x2 a b))
 
+;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I16X8 (iadd_pairwise a b))) (pulley_vaddpairwisei16x8_s a b))
+(rule (lower (has_type $I32X4 (iadd_pairwise a b))) (pulley_vaddpairwisei32x4_s a b))
+
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule 0 (lower (has_type (ty_int (fits_in_32 _)) (isub a b))) (pulley_xsub32 a b))
@@ -1372,4 +1377,4 @@
 
 ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
+(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
@@ -407,11 +407,9 @@ impl WastTest {
                 "spec_testsuite/proposals/annotations/simd_lane.wast",
                 "spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
                 "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast",
-                "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast",
                 "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
                 "spec_testsuite/proposals/memory64/simd_lane.wast",
                 "spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
-                "spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
                 "spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
                 "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
                 "spec_testsuite/simd_f32x4_arith.wast",
@@ -420,12 +418,9 @@ impl WastTest {
                 "spec_testsuite/simd_f64x2_arith.wast",
                 "spec_testsuite/simd_f64x2_cmp.wast",
                 "spec_testsuite/simd_f64x2_pmin_pmax.wast",
-                "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
                 "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
                 "spec_testsuite/simd_i16x8_sat_arith.wast",
                 "spec_testsuite/simd_i32x4_arith2.wast",
-                "spec_testsuite/simd_i32x4_dot_i16x8.wast",
-                "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast",
                 "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
                 "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
                 "spec_testsuite/simd_i64x2_arith2.wast",

@@ -3299,6 +3299,40 @@ impl ExtendedOpVisitor for Interpreter<'_> {
         ControlFlow::Continue(())
     }
 
+    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_i16x8();
+        let b = self.state[operands.src2].get_i16x8();
+        let result = a
+            .chunks(2)
+            .chain(b.chunks(2))
+            .map(|pair| {
+                let [h, t]: [_; 2] = pair.try_into().unwrap();
+                h.wrapping_add(t)
+            })
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        self.state[operands.dst].set_i16x8(result);
+        ControlFlow::Continue(())
+    }
+
+    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_i32x4();
+        let b = self.state[operands.src2].get_i32x4();
+        let result = a
+            .chunks(2)
+            .chain(b.chunks(2))
+            .map(|pair| {
+                let [h, t]: [_; 2] = pair.try_into().unwrap();
+                h.wrapping_add(t)
+            })
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        self.state[operands.dst].set_i32x4(result);
+        ControlFlow::Continue(())
+    }
+
     fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
         let a = self.state[operands.src1].get_i8x16();
         let b = self.state[operands.src2].get_u32();

@@ -899,6 +899,11 @@ macro_rules! for_each_extended_op {
             /// `dst = src1 + src2`
             vaddf64x2 = VAddF64x2 { operands: BinaryOperands<VReg> };
 
+            /// `dst = [src1[0] + src1[1], ..., src2[6] + src2[7]]`
+            vaddpairwisei16x8_s = VAddpairwiseI16x8S { operands: BinaryOperands<VReg> };
+            /// `dst = [src1[0] + src1[1], ..., src2[2] + src2[3]]`
+            vaddpairwisei32x4_s = VAddpairwiseI32x4S { operands: BinaryOperands<VReg> };
+
             /// `dst = src1 << src2`
             vshli8x16 = VShlI8x16 { operands: BinaryOperands<VReg, VReg, XReg> };
             /// `dst = src1 << src2`