diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 23f2500043..1999a21fd9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1809,6 +1809,9 @@ ;; Helpers for generating `and` instructions. +(decl and_reg (Type Reg Reg) Reg) +(rule (and_reg ty x y) (alu_rrr (ALUOp.And) ty x y)) + (decl and_imm (Type Reg ImmLogic) Reg) (rule (and_imm ty x y) (alu_rr_imm_logic (ALUOp.And) ty x y)) @@ -1820,6 +1823,10 @@ (rule (eor_vec x y size) (vec_rrr (VecALUOp.Eor) x y size)) ;; Helpers for generating `bic` instructions. + +(decl bic (Type Reg Reg) Reg) +(rule (bic ty x y) (alu_rrr (ALUOp.AndNot) ty x y)) + (decl bic_vec (Reg Reg VectorSize) Reg) (rule (bic_vec x y size) (vec_rrr (VecALUOp.Bic) x y size)) @@ -1864,6 +1871,15 @@ (decl vec_cnt (Reg VectorSize) Reg) (rule (vec_cnt x size) (vec_misc (VecMisc2.Cnt) x size)) +;; Helpers for generating a `bsl` instruction. + +(decl bsl (Type Reg Reg Reg) Reg) +(rule (bsl ty c x y) + (let ((dst WritableReg (temp_writable_reg ty)) + (_1 Unit (emit (MInst.FpuMove128 dst c))) + (_2 Unit (emit (MInst.VecRRR (VecALUOp.Bsl) dst x y (vector_size ty))))) + dst)) + ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl imm (Type u64) Reg) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index d2bc26160c..fed903523a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1142,7 +1142,22 @@ (rule (lower (has_type $I8X16 (popcnt x))) (vec_cnt x (VectorSize.Size8x16))) -;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (bitselect c x y))) + (let ((tmp1 Reg (and_reg ty x c)) + (tmp2 Reg (bic ty y c))) + (orr ty tmp1 tmp2))) + +(rule (lower (has_type (ty_vec128 ty) (bitselect c x y))) + (bsl ty c x y)) + +;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec128 ty) (vselect c x y))) + (bsl ty c x y)) + +;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y)))))) (let ((rn Reg x) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 773b5d6a03..f952785728 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -420,55 +420,7 @@ pub(crate) fn lower_insn_to_regs>( } } - Opcode::Bitselect | Opcode::Vselect => { - let ty = ty.unwrap(); - if !ty.is_vector() { - debug_assert_ne!(Opcode::Vselect, op); - let tmp = ctx.alloc_tmp(I64).only_reg().unwrap(); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); - // AND rTmp, rn, rcond - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::And, - size: OperandSize::Size64, - rd: tmp, - rn, - rm: rcond, - }); - // BIC rd, rm, rcond - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AndNot, - size: OperandSize::Size64, - rd, - rn: rm, - rm: rcond, - }); - // ORR rd, rd, rTmp - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Orr, - size: OperandSize::Size64, - rd, - rn: rd.to_reg(), - rm: tmp.to_reg(), - }); - } else { - let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::gen_move(rd, rcond, ty)); - - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Bsl, - rd, - rn, - rm, - size: VectorSize::from_ty(ty), - }); - } - } + Opcode::Bitselect | Opcode::Vselect => implemented_in_isle(ctx), Opcode::Trueif => { let condcode = ctx.data(insn).cond_code().unwrap(); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 27662a808a..d02b53193b 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -249,6 +249,15 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option { + if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() { + Some(ty) + } else { + None + } + } + #[inline] fn ty_32_or_64(&mut self, ty: Type) -> Option { if ty.bits() == 32 || ty.bits() == 64 { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index c7b95c5e97..75b586d186 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -293,6 +293,11 @@ (decl fits_in_64 (Type) Type) (extern extractor fits_in_64 fits_in_64) +;; An extractor that only matches scalar booleans, integers, and references that +;; can fit in 64 bits. +(decl ty_int_bool_ref_scalar_64 (Type) Type) +(extern extractor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64) + ;; An extractor that matches 32- and 64-bit types only. (decl ty_32_or_64 (Type) Type) (extern extractor ty_32_or_64 ty_32_or_64) diff --git a/cranelift/filetests/filetests/isa/aarch64/condops.clif b/cranelift/filetests/filetests/isa/aarch64/condops.clif index 6813b270e9..77554fbc99 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condops.clif @@ -37,9 +37,9 @@ block0(v0: i8, v1: i8, v2: i8): } ; block0: -; and x8, x1, x0 -; bic x0, x2, x0 -; orr x0, x0, x8 +; and w7, w1, w0 +; bic w9, w2, w0 +; orr w0, w7, w9 ; ret function %i(b1, i8, i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index 1524e04bb5..9120f62cdc 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -1,10 +1,8 @@ test run target aarch64 -target arm target s390x ; target x86_64 TODO: Not yet implemented on x86_64 - function %bnot_band() -> b1 { block0: v1 = bconst.b1 false @@ -14,3 +12,44 @@ block0: return v4 } ; run + +function %bitselect_i8(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = bitselect v0, v1, v2 + return v3 +} + +; run: %bitselect_i8(0, 0, 0xFF) == 0xFF +; run: %bitselect_i8(0x55, 0, 0xFF) == 0xAA +; run: %bitselect_i8(0xF0, 32, 13) == 45 +; run: %bitselect_i8(0xFF, 0xFF, 0) == 0xFF + +function %bitselect_i16(i16, i16, i16) -> i16 { +block0(v0: i16, v1: i16, v2: i16): + v3 = bitselect v0, v1, v2 + return v3 +} + +; run: %bitselect_i16(0, 0, 0xFFFF) == 0xFFFF +; run: %bitselect_i16(0x5555, 0, 0xFFFF) == 0xAAAA +; run: %bitselect_i16(0xFFFF, 0xFFFF, 0) == 0xFFFF + +function %bitselect_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = bitselect v0, v1, v2 + return v3 +} + +; run: %bitselect_i32(0, 0, 0xFFFFFFFF) == 0xFFFFFFFF +; run: %bitselect_i32(0x55555555, 0, 0xFFFFFFFF) == 0xAAAAAAAA +; run: %bitselect_i32(0xFFFFFFFF, 0xFFFFFFFF, 0) == 0xFFFFFFFF + +function %bitselect_i64(i64, i64, i64) -> i64 { +block0(v0: i64, v1: i64, v2: i64): + v3 = bitselect v0, v1, v2 + return v3 +} + +; run: %bitselect_i64(0, 0, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF +; run: %bitselect_i64(0x5555555555555555, 0, 0xFFFFFFFFFFFFFFFF) == 0xAAAAAAAAAAAAAAAA +; run: %bitselect_i64(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0) == 0xFFFFFFFFFFFFFFFF