bytecodealliance · eagr · Dec 23, 2024 · Dec 23, 2024 · Dec 24, 2024 · Dec 24, 2024
@@ -751,6 +751,33 @@
   (if-let true (floatcc_unordered cc))
   (pulley_xbxor32_s8 (lower_fcmp ty (floatcc_complement cc) a b) 1))
 
+;; vector comparisons
+
+(rule 1 (lower (has_type (ty_vec128 _) (fcmp cc a b @ (value_type (ty_vec128 ty)))))
+  (lower_vfcmp ty cc a b))
+
+(decl lower_vfcmp (Type FloatCC Value Value) VReg)
+(rule (lower_vfcmp $F32X4 (FloatCC.Ordered) a b) (pulley_vordf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.Unordered) a b) (pulley_vunof32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.Equal) a b) (pulley_veqf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.NotEqual) a b) (pulley_vneqf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.OrderedNotEqual) a b) (pulley_vordneqf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.UnorderedOrEqual) a b) (pulley_vunoeqf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.LessThan) a b) (pulley_vltf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf32x4 a b))
+(rule (lower_vfcmp $F32X4 (FloatCC.GreaterThan) a b) (pulley_vltf32x4 b a))
+(rule (lower_vfcmp $F32X4 (FloatCC.GreaterThanOrEqual) a b) (pulley_vlteqf32x4 b a))
+(rule (lower_vfcmp $F64X2 (FloatCC.Ordered) a b) (pulley_vordf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.Unordered) a b) (pulley_vunof64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.Equal) a b) (pulley_veqf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.NotEqual) a b) (pulley_vneqf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.OrderedNotEqual) a b) (pulley_vordneqf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.UnorderedOrEqual) a b) (pulley_vunoeqf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.LessThan) a b) (pulley_vltf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf64x2 a b))
+(rule (lower_vfcmp $F64X2 (FloatCC.GreaterThan) a b) (pulley_vltf64x2 b a))
+(rule (lower_vfcmp $F64X2 (FloatCC.GreaterThanOrEqual) a b) (pulley_vlteqf64x2 b a))
+
 ;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (decl amode (Value Offset32) Amode)

@@ -401,7 +401,6 @@ impl WastTest {
         // features in Pulley are implemented.
         if config.compiler == Compiler::CraneliftPulley {
             let unsupported = [
-                "misc_testsuite/simd/canonicalize-nan.wast",
                 "misc_testsuite/simd/issue_3327_bnot_lowering.wast",
                 "misc_testsuite/simd/v128-select.wast",
                 "spec_testsuite/proposals/annotations/simd_lane.wast",
@@ -417,11 +416,7 @@ impl WastTest {
                 "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
                 "spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
                 "spec_testsuite/simd_f32x4_arith.wast",
-                "spec_testsuite/simd_f32x4_cmp.wast",
-                "spec_testsuite/simd_f32x4_pmin_pmax.wast",
                 "spec_testsuite/simd_f64x2_arith.wast",
-                "spec_testsuite/simd_f64x2_cmp.wast",
-                "spec_testsuite/simd_f64x2_pmin_pmax.wast",
                 "spec_testsuite/simd_i16x8_arith2.wast",
                 "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
                 "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",

@@ -4207,6 +4207,214 @@ impl ExtendedOpVisitor for Interpreter<'_> {
         ControlFlow::Continue(())
     }
 
+    fn vordf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a.is_nan() || b.is_nan() {
+                0
+            } else {
+                u32::MAX
+            };
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vunof32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a.is_nan() || b.is_nan() {
+                u32::MAX
+            } else {
+                0
+            };
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b { u32::MAX } else { 0 };
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b { 0 } else { u32::MAX };
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vordneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b || a.is_nan() || b.is_nan() {
+                0
+            } else {
+                u32::MAX
+            }
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vunoeqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b || a.is_nan() || b.is_nan() {
+                u32::MAX
+            } else {
+                0
+            }
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a < b { u32::MAX } else { 0 };
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        let mut c = [0; 4];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a <= b { u32::MAX } else { 0 };
+        }
+        self.state[operands.dst].set_u32x4(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vordf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a.is_nan() || b.is_nan() {
+                0
+            } else {
+                u64::MAX
+            };
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vunof64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a.is_nan() || b.is_nan() {
+                u64::MAX
+            } else {
+                0
+            };
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b { u64::MAX } else { 0 };
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b { 0 } else { u64::MAX };
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vordneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b || a.is_nan() || b.is_nan() {
+                0
+            } else {
+                u64::MAX
+            }
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vunoeqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a == b || a.is_nan() || b.is_nan() {
+                u64::MAX
+            } else {
+                0
+            }
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a < b { u64::MAX } else { 0 };
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
+    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let a = self.state[operands.src1].get_f64x2();
+        let b = self.state[operands.src2].get_f64x2();
+        let mut c = [0; 2];
+        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
+            *c = if a <= b { u64::MAX } else { 0 };
+        }
+        self.state[operands.dst].set_u64x2(c);
+        ControlFlow::Continue(())
+    }
+
     fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
         let a = self.state[src].get_i8x16();
         self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));

@@ -1144,6 +1144,39 @@ macro_rules! for_each_extended_op {
             /// `dst = src <= dst` (unsigned)
             vulteq64x2 = Vulteq64x2 { operands: BinaryOperands<VReg> };
 
+            /// `dst = !src1.is_nan() && !src2.is_nan()`
+            vordf32x4 = Vordf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = src1.is_nan() || src2.is_nan()`
+            vunof32x4 = Vunof32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 == src2`
+            veqf32x4 = Veqf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 != src2`
+            vneqf32x4 = Vneqf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = !src1.is_nan() && !src2.is_nan() && src1 != src2`
+            vordneqf32x4 = Vordneqf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = src1.is_nan() || src2.is_nan() || src1 == src2`
+            vunoeqf32x4 = Vunoeqf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 < src2`
+            vltf32x4 = Vltf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 <= src2`
+            vlteqf32x4 = Vlteqf32x4 { operands: BinaryOperands<VReg> };
+            /// `dst = !src1.is_nan() && !src2.is_nan()`
+            vordf64x2 = Vordf64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = src1.is_nan() || src2.is_nan()`
+            vunof64x2 = Vunof64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 == src2`
+            veqf64x2 = Veqf64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 != src2`
+            vneqf64x2 = Vneqf64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = !src1.is_nan() && !src2.is_nan() && src1 != src2`
+            vordneqf64x2 = Vordneqf64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = src1.is_nan() || src2.is_nan() || src1 == src2`
+            vunoeqf64x2 = Vunoeqf64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 < src2`
+            vltf64x2 = Vltf64x2 { operands: BinaryOperands<VReg> };
+            /// `dst = src1 <= src2`
+            vlteqf64x2 = Vlteqf64x2 { operands: BinaryOperands<VReg> };
+
             /// `dst = -src`
             vneg8x16 = Vneg8x16 { dst: VReg, src: VReg };
             /// `dst = -src`