Skip to content

Commit

Permalink
pulley: Implement SIMD splat instruction (#9832)
Browse files Browse the repository at this point in the history
* pulley: Implement SIMD `splat` instruction

Gets a few spec tests and CLIF tests passing

cc #9783

* Fix typo
  • Loading branch information
alexcrichton authored Dec 17, 2024
1 parent d3f05ee commit db4bd21
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 6 deletions.
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -799,3 +799,12 @@

(rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a)))
(rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a))

;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a))
(rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a))
(rule (lower (has_type $I32X4 (splat a))) (pulley_vsplatx32 a))
(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a))
(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a))
(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a))
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ target x86_64 sse41 has_avx has_avx2
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %splat_i8x16(i8) -> i8x16 {
block0(v0: i8):
Expand Down
2 changes: 0 additions & 2 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,6 @@ impl WastTest {
// features in Pulley are implemented.
if config.compiler == Compiler::CraneliftPulley {
let unsupported = [
"misc_testsuite/int-to-float-splat.wast",
"misc_testsuite/issue6562.wast",
"misc_testsuite/memory64/simd.wast",
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/canonicalize-nan.wast",
Expand Down
44 changes: 40 additions & 4 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2574,7 +2574,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i8x16();
let b = self.state[operands.src2].get_i8x16();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i8x16(a);
ControlFlow::Continue(())
Expand All @@ -2584,7 +2584,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i16x8();
let b = self.state[operands.src2].get_i16x8();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i16x8(a);
ControlFlow::Continue(())
Expand All @@ -2594,7 +2594,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i32x4();
let b = self.state[operands.src2].get_i32x4();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i32x4(a);
ControlFlow::Continue(())
Expand All @@ -2604,7 +2604,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i64x2();
let b = self.state[operands.src2].get_i64x2();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i64x2(a);
ControlFlow::Continue(())
Expand Down Expand Up @@ -2718,6 +2718,42 @@ impl OpVisitor for Interpreter<'_> {
self.state[dst].set_u128(val);
ControlFlow::Continue(())
}

fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u32() as u8;
self.state[dst].set_u8x16([val; 16]);
ControlFlow::Continue(())
}

fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u32() as u16;
self.state[dst].set_u16x8([val; 8]);
ControlFlow::Continue(())
}

fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u32();
self.state[dst].set_u32x4([val; 4]);
ControlFlow::Continue(())
}

fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u64();
self.state[dst].set_u64x2([val; 2]);
ControlFlow::Continue(())
}

fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
let val = self.state[src].get_f32();
self.state[dst].set_f32x4([val; 4]);
ControlFlow::Continue(())
}

fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
let val = self.state[src].get_f64();
self.state[dst].set_f64x2([val; 2]);
ControlFlow::Continue(())
}
}

impl ExtendedOpVisitor for Interpreter<'_> {
Expand Down
13 changes: 13 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,19 @@ macro_rules! for_each_op {
vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
/// `dst = src1 >> src2` (unsigned)
vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };

/// `dst = splat(low8(src))`
vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
/// `dst = splat(low16(src))`
vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
/// `dst = splat(low32(src))`
vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
/// `dst = splat(src)`
vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
/// `dst = splat(low32(src))`
vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
/// `dst = splat(src)`
vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
}
};
}
Expand Down

0 comments on commit db4bd21

Please sign in to comment.