Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pulley: Finish simd proposal implementation #9935

Merged
merged 2 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,9 @@
(rule 1 (lower (has_type $F64 (select c a b)))
(pulley_fselect64 (emit_cond (lower_cond c)) a b))

(rule 2 (lower (has_type (ty_vec128 _) (select c a b)))
(pulley_vselect (emit_cond (lower_cond c)) a b))

;; Helper to emit a conditional into a register itself.
(decl emit_cond (Cond) XReg)
(rule (emit_cond (Cond.If32 reg)) reg)
Expand Down Expand Up @@ -1213,6 +1216,18 @@
(rule (lower (has_type $I64 (fcvt_to_sint_sat val @ (value_type $F64))))
(pulley_x64_from_f64_s_sat val))

(rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4))))
(pulley_vi32x4_from_f32x4_s val))

(rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4))))
(pulley_vi32x4_from_f32x4_u val))

(rule (lower (has_type $I64X2 (fcvt_to_sint_sat val @ (value_type $F64X2))))
(pulley_vi64x2_from_f64x2_s val))

(rule (lower (has_type $I64X2 (fcvt_to_uint_sat val @ (value_type $F64X2))))
(pulley_vi64x2_from_f64x2_u val))

;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fdemote val @ (value_type $F64))))
Expand Down Expand Up @@ -1429,12 +1444,17 @@

(rule (lower (snarrow a @ (value_type $I16X8) b)) (pulley_vnarrow16x8_s a b))
(rule (lower (snarrow a @ (value_type $I32X4) b)) (pulley_vnarrow32x4_s a b))
(rule (lower (snarrow a @ (value_type $I64X2) b)) (pulley_vnarrow64x2_s a b))

;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (unarrow a @ (value_type $I16X8) b)) (pulley_vnarrow16x8_u a b))
(rule (lower (unarrow a @ (value_type $I32X4) b)) (pulley_vnarrow32x4_u a b))

;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (uunarrow a @ (value_type $I64X2) b)) (pulley_vunarrow64x2_u a b))

;;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (fvpromote_low a @ (value_type $F32X4))) (pulley_vfpromotelow a))
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %sadd_sat_i8x16(i8x16, i8x16) -> i8x16 {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-avg-round.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 skylake
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-band-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %band_splat_const_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bitcast.clif
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ target s390x
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %bitcast_if32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ target x86_64 skylake
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
Expand Down
8 changes: 8 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bitselect.clif
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
set enable_multi_ret_implicit_sret=false
target pulley32
target pulley32be
target pulley64
target pulley64be

set opt_level=speed
target aarch64
Expand All @@ -16,6 +20,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %bitselect_i64x2(i64x2, i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2, v2: i64x2):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bor-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %bor_splat_const_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bxor-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %bxor_splat_const_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-ceil.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target s390x
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %ceil_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fcvt_to_uint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-iabs.clif
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %iabs_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-iadd-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %iadd_splat_const_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %iadd_swidenhigh_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %iadd_swidenlow_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %iadd_swiden_high_low_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %iadd_uwidenhigh_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %iadd_uwidenlow_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %iadd_uwiden_high_low_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-ifma.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 skylake
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

;; These tests test integer fused multiply add/subtract instructions.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %insertlane_preserves_upper_bits(f64) -> i64 fast {
block0(v5: f64):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-ishl.clif
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ target x86_64 skylake
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %ishl_i8x16(i8x16, i32) -> i8x16 {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-isub-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %isub_splat_reverse_i8x16(i8x16, i8) -> i8x16 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %isub_swidenhigh_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %isub_swidenlow_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %isub_uwidenhigh_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %isub_uwidenlow_i32x4(i32x4, i32x4) -> i64x2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

;; shuffle

Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-logical.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %bnot() -> i32 {
block0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %i64x2_make0() -> i64x2 {
block0:
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-nearest.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target s390x
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %nearest_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
Expand Down
Loading
Loading