From e7eba5256cd13d1f36fc4d5f9b45e06cfbd4ad1c Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sat, 3 Aug 2024 17:42:07 +0100 Subject: [PATCH] egraphs: transpose select on operation to operation on select when it increases sharing Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin --- cranelift/codegen/src/opts/selects.isle | 41 ++++++ cranelift/codegen/src/prelude.isle | 9 ++ cranelift/codegen/src/prelude_opt.isle | 4 + .../filetests/filetests/egraph/select.clif | 117 ++++++++++++++++++ 4 files changed, 171 insertions(+) diff --git a/cranelift/codegen/src/opts/selects.isle b/cranelift/codegen/src/opts/selects.isle index d0e06f855604..62334de7f8aa 100644 --- a/cranelift/codegen/src/opts/selects.isle +++ b/cranelift/codegen/src/opts/selects.isle @@ -75,3 +75,44 @@ (rule (simplify (bor (ty_vec128 ty) (band ty (bnot ty c) y) (band ty x c))) (bitselect ty c x y)) (rule (simplify (bor (ty_vec128 ty) (band ty y (bnot ty c)) (band ty c x))) (bitselect ty c x y)) (rule (simplify (bor (ty_vec128 ty) (band ty y (bnot ty c)) (band ty x c))) (bitselect ty c x y)) + +;; transpose select on operation to operation on select when it increases sharing +;; select(cond, unop(x), unop(y)) => unop(select(cond, x, y)) +(rule (simplify (select ty cond (unop _ op x) (unop _ op y))) + (unop ty op (select ty cond x y))) + +;; select(cond, binop(x, z), binop(y, z)) => binop(select(cond, x, y), z) +(rule (simplify (select ty cond (binop _ op x z) (binop _ op y z))) + (binop ty op (select ty cond x y) z)) + +;; select(cond, binop(x, y), binop(x, z)) => binop(x, select(cond, y, z)) +(rule (simplify (select ty cond (binop _ op x y) (binop _ op x z))) + (binop ty op x (select ty cond y z))) + +;; select(cond, ternop(w, y, z), ternop(x, y, z)) => ternop(select(cond, w, x), y, z) +(rule (simplify (select ty cond (ternop ty op w y z) (ternop ty op x y z))) + (ternop ty op (select ty cond w x) y z)) + +;; select(cond, ternop(w, x, z), ternop(w, y, z)) => ternop(w, select(cond, x, y), z) +(rule (simplify (select ty cond (ternop _ op w x z) (ternop _ op w y z))) + (ternop ty op w (select ty cond x y) z)) + +;; select(cond, ternop(w, x, y), ternop(w, x, z)) => ternop(w, x, select(cond, y, z)) +(rule (simplify (select ty cond (ternop _ op w x y) (ternop _ op w x z))) + (ternop ty op w x (select ty cond y z))) + +;; select(cond, icmp(cc, x, z), icmp(cc, y, z)) => icmp(cc, select(cond, x, y), z) +(rule (simplify (select cmp_ty cond (icmp cmp_ty cc x @ (value_type sel_ty) z) (icmp cmp_ty cc y z))) + (icmp cmp_ty cc (select sel_ty cond x y) z)) + +;; select(cond, icmp(cc, x, y), icmp(cc, x, z)) => icmp(cc, x, select(cond, y, z)) +(rule (simplify (select cmp_ty cond (icmp cmp_ty cc x @ (value_type sel_ty) y) (icmp cmp_ty cc x z))) + (icmp cmp_ty cc x (select sel_ty cond y z))) + +;; select(cond, fcmp(cc, x, z), fcmp(cc, y, z)) => fcmp(cc, select(cond, x, y), z) +(rule (simplify (select cmp_ty cond (fcmp cmp_ty cc x @ (value_type sel_ty) z) (fcmp cmp_ty cc y z))) + (fcmp cmp_ty cc (select sel_ty cond x y) z)) + +;; select(cond, fcmp(cc, x, y), fcmp(cc, x, z)) => fcmp(cc, x, select(cond, y, z)) +(rule (simplify (select cmp_ty cond (fcmp cmp_ty cc x @ (value_type sel_ty) y) (fcmp cmp_ty cc x z))) + (fcmp cmp_ty cc x (select sel_ty cond y z))) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 54d8428230c6..fce37fd3dfe4 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -452,6 +452,15 @@ ;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(decl unop (Type Opcode Value) Value) +(extractor (unop ty op x) (inst_data ty (InstructionData.Unary op x))) + +(decl binop (Type Opcode Value Value) Value) +(extractor (binop ty op x y) (inst_data ty (InstructionData.Binary op (value_array_2 x y)))) + +(decl ternop (Type Opcode Value Value Value) Value) +(extractor (ternop ty op x y z) (inst_data ty (InstructionData.Ternary op (value_array_3 x y z)))) + (decl eq (Type Value Value) Value) (extractor (eq ty x y) (icmp ty (IntCC.Equal) x y)) diff --git a/cranelift/codegen/src/prelude_opt.isle b/cranelift/codegen/src/prelude_opt.isle index e0cd14860e88..fb484dd4878f 100644 --- a/cranelift/codegen/src/prelude_opt.isle +++ b/cranelift/codegen/src/prelude_opt.isle @@ -27,6 +27,10 @@ (decl value_array_3_ctor (Value Value Value) ValueArray3) (extern constructor value_array_3_ctor value_array_3_ctor) +(rule (unop ty op x) (make_inst ty (InstructionData.Unary op x))) +(rule (binop ty op x y) (make_inst ty (InstructionData.Binary op (value_array_2 x y)))) +(rule (ternop ty op x y z) (make_inst ty (InstructionData.Ternary op (value_array_3 x y z)))) + (rule (eq ty x y) (icmp ty (IntCC.Equal) x y)) (rule (ne ty x y) (icmp ty (IntCC.NotEqual) x y)) (rule (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y)) diff --git a/cranelift/filetests/filetests/egraph/select.clif b/cranelift/filetests/filetests/egraph/select.clif index 8b916e90d466..eb3b5c35cf0c 100644 --- a/cranelift/filetests/filetests/egraph/select.clif +++ b/cranelift/filetests/filetests/egraph/select.clif @@ -208,3 +208,120 @@ block0(v0: i32, v1: i32): ; check: v6 = icmp sgt v0, v1 ; check: v8 = bmask.i64 v6 ; check: return v8 + +;; select(cond, unop(x), unop(y)) => unop(select(cond, x, y)) +function %transpose_unop_gvn_1(i8, i32, i32) -> i32 { +block0(v0: i8, v1: i32, v2: i32): + v3 = ineg v1 + v4 = ineg v2 + v5 = select v0, v3, v4 + return v5 + ; check: v6 = select v0, v1, v2 + ; check: v7 = ineg v6 + ; check: return v7 +} + +;; select(cond, binop(x, z), binop(y, z)) => binop(select(cond, x, y), z) +function %transpose_binop_gvn_1(i8, i32, i32, i32) -> i32 { +block0(v0: i8, v1: i32, v2: i32, v3: i32): + v4 = iadd v1, v3 + v5 = iadd v2, v3 + v6 = select v0, v4, v5 + return v6 + ; check: v7 = select v0, v1, v2 + ; check: v8 = iadd v7, v3 + ; check: return v8 +} + +;; select(cond, binop(x, y), binop(x, z)) => binop(x, select(cond, y, z)) +function %transpose_binop_gvn_2(i8, i32, i32, i32) -> i32 { +block0(v0: i8, v1: i32, v2: i32, v3: i32): + v4 = iadd v1, v2 + v5 = iadd v1, v3 + v6 = select v0, v4, v5 + return v6 + ; check: v7 = select v0, v2, v3 + ; check: v8 = iadd v1, v7 + ; check: return v8 +} + +;; select(cond, ternop(w, y, z), ternop(x, y, z)) => ternop(select(cond, w, x), y, z) +function %transpose_ternop_gvn_1(i8, f32, f32, f32, f32) -> f32 { +block0(v0: i8, v1: f32, v2: f32, v3: f32, v4: f32): + v5 = fma v1, v3, v4 + v6 = fma v2, v3, v4 + v7 = select v0, v5, v6 + return v6 + ; check: v6 = fma v2, v3, v4 + ; check: return v6 +} + +;; select(cond, ternop(w, x, z), ternop(w, y, z)) => ternop(w, select(cond, x, y), z) +function %transpose_ternop_gvn_2(i8, f32, f32, f32, f32) -> f32 { +block0(v0: i8, v1: f32, v2: f32, v3: f32, v4: f32): + v5 = fma v1, v2, v4 + v6 = fma v1, v3, v4 + v7 = select v0, v5, v6 + return v6 + ; check: v6 = fma v1, v3, v4 + ; check: return v6 +} + +;; select(cond, ternop(w, x, y), ternop(w, x, z)) => ternop(w, x, select(cond, y, z)) +function %transpose_ternop_gvn_3(i8, f32, f32, f32, f32) -> f32 { +block0(v0: i8, v1: f32, v2: f32, v3: f32, v4: f32): + v5 = fma v1, v2, v3 + v6 = fma v1, v2, v4 + v7 = select v0, v5, v6 + return v6 + ; check: v6 = fma v1, v2, v4 + ; check: return v6 +} + +;; select(cond, icmp(cc, x, z), icmp(cc, y, z)) => icmp(cc, select(cond, x, y), z) +function %transpose_icmp_gvn_1(i8, i32, i32, i32) -> i8 { +block0(v0: i8, v1: i32, v2: i32, v3: i32): + v5 = icmp eq v1, v3 + v6 = icmp eq v2, v3 + v7 = select v0, v5, v6 + return v7 + ; check: v8 = select v0, v1, v2 + ; check: v9 = icmp eq v8, v3 + ; check: return v9 +} + +;; select(cond, icmp(cc, x, y), icmp(cc, x, z)) => icmp(cc, x, select(cond, y, z)) +function %transpose_icmp_gvn_2(i8, i32, i32, i32) -> i8 { +block0(v0: i8, v1: i32, v2: i32, v3: i32): + v5 = icmp eq v1, v2 + v6 = icmp eq v1, v3 + v7 = select v0, v5, v6 + return v7 + ; check: v8 = select v0, v2, v3 + ; check: v9 = icmp eq v1, v8 + ; check: return v9 +} + +;; select(cond, fcmp(cc, x, z), fcmp(cc, y, z)) => fcmp(cc, select(cond, x, y), z) +function %transpose_fcmp_gvn_1(i8, f32, f32, f32) -> i8 { +block0(v0: i8, v1: f32, v2: f32, v3: f32): + v5 = fcmp eq v1, v3 + v6 = fcmp eq v2, v3 + v7 = select v0, v5, v6 + return v7 + ; check: v8 = select v0, v1, v2 + ; check: v9 = fcmp eq v8, v3 + ; check: return v9 +} + +;; select(cond, fcmp(cc, x, y), fcmp(cc, x, z)) => fcmp(cc, x, select(cond, y, z)) +function %transpose_fcmp_gvn_2(i8, f32, f32, f32) -> i8 { +block0(v0: i8, v1: f32, v2: f32, v3: f32): + v5 = fcmp eq v1, v2 + v6 = fcmp eq v1, v3 + v7 = select v0, v5, v6 + return v7 + ; check: v8 = select v0, v2, v3 + ; check: v9 = fcmp eq v1, v8 + ; check: return v9 +}