Cranelift AArch64: Migrate Bitselect and Vselect to ISLE (#4139)
Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -1809,6 +1809,9 @@
|
|||||||
|
|
||||||
;; Helpers for generating `and` instructions.
|
;; Helpers for generating `and` instructions.
|
||||||
|
|
||||||
|
(decl and_reg (Type Reg Reg) Reg)
|
||||||
|
(rule (and_reg ty x y) (alu_rrr (ALUOp.And) ty x y))
|
||||||
|
|
||||||
(decl and_imm (Type Reg ImmLogic) Reg)
|
(decl and_imm (Type Reg ImmLogic) Reg)
|
||||||
(rule (and_imm ty x y) (alu_rr_imm_logic (ALUOp.And) ty x y))
|
(rule (and_imm ty x y) (alu_rr_imm_logic (ALUOp.And) ty x y))
|
||||||
|
|
||||||
@@ -1820,6 +1823,10 @@
|
|||||||
(rule (eor_vec x y size) (vec_rrr (VecALUOp.Eor) x y size))
|
(rule (eor_vec x y size) (vec_rrr (VecALUOp.Eor) x y size))
|
||||||
|
|
||||||
;; Helpers for generating `bic` instructions.
|
;; Helpers for generating `bic` instructions.
|
||||||
|
|
||||||
|
(decl bic (Type Reg Reg) Reg)
|
||||||
|
(rule (bic ty x y) (alu_rrr (ALUOp.AndNot) ty x y))
|
||||||
|
|
||||||
(decl bic_vec (Reg Reg VectorSize) Reg)
|
(decl bic_vec (Reg Reg VectorSize) Reg)
|
||||||
(rule (bic_vec x y size) (vec_rrr (VecALUOp.Bic) x y size))
|
(rule (bic_vec x y size) (vec_rrr (VecALUOp.Bic) x y size))
|
||||||
|
|
||||||
@@ -1864,6 +1871,15 @@
|
|||||||
(decl vec_cnt (Reg VectorSize) Reg)
|
(decl vec_cnt (Reg VectorSize) Reg)
|
||||||
(rule (vec_cnt x size) (vec_misc (VecMisc2.Cnt) x size))
|
(rule (vec_cnt x size) (vec_misc (VecMisc2.Cnt) x size))
|
||||||
|
|
||||||
|
;; Helpers for generating a `bsl` instruction.
|
||||||
|
|
||||||
|
(decl bsl (Type Reg Reg Reg) Reg)
|
||||||
|
(rule (bsl ty c x y)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg ty))
|
||||||
|
(_1 Unit (emit (MInst.FpuMove128 dst c)))
|
||||||
|
(_2 Unit (emit (MInst.VecRRR (VecALUOp.Bsl) dst x y (vector_size ty)))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(decl imm (Type u64) Reg)
|
(decl imm (Type u64) Reg)
|
||||||
|
|||||||
@@ -1142,7 +1142,22 @@
|
|||||||
(rule (lower (has_type $I8X16 (popcnt x)))
|
(rule (lower (has_type $I8X16 (popcnt x)))
|
||||||
(vec_cnt x (VectorSize.Size8x16)))
|
(vec_cnt x (VectorSize.Size8x16)))
|
||||||
|
|
||||||
;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (bitselect c x y)))
|
||||||
|
(let ((tmp1 Reg (and_reg ty x c))
|
||||||
|
(tmp2 Reg (bic ty y c)))
|
||||||
|
(orr ty tmp1 tmp2)))
|
||||||
|
|
||||||
|
(rule (lower (has_type (ty_vec128 ty) (bitselect c x y)))
|
||||||
|
(bsl ty c x y))
|
||||||
|
|
||||||
|
;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
|
||||||
|
(bsl ty c x y))
|
||||||
|
|
||||||
|
;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
|
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
|
||||||
(let ((rn Reg x)
|
(let ((rn Reg x)
|
||||||
|
|||||||
@@ -420,55 +420,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Bitselect | Opcode::Vselect => {
|
Opcode::Bitselect | Opcode::Vselect => implemented_in_isle(ctx),
|
||||||
let ty = ty.unwrap();
|
|
||||||
if !ty.is_vector() {
|
|
||||||
debug_assert_ne!(Opcode::Vselect, op);
|
|
||||||
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
|
|
||||||
// AND rTmp, rn, rcond
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::And,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd: tmp,
|
|
||||||
rn,
|
|
||||||
rm: rcond,
|
|
||||||
});
|
|
||||||
// BIC rd, rm, rcond
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::AndNot,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd,
|
|
||||||
rn: rm,
|
|
||||||
rm: rcond,
|
|
||||||
});
|
|
||||||
// ORR rd, rd, rTmp
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::Orr,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd,
|
|
||||||
rn: rd.to_reg(),
|
|
||||||
rm: tmp.to_reg(),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::gen_move(rd, rcond, ty));
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op: VecALUOp::Bsl,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Trueif => {
|
Opcode::Trueif => {
|
||||||
let condcode = ctx.data(insn).cond_code().unwrap();
|
let condcode = ctx.data(insn).cond_code().unwrap();
|
||||||
|
|||||||
@@ -249,6 +249,15 @@ macro_rules! isle_prelude_methods {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
|
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
|
||||||
if ty.bits() == 32 || ty.bits() == 64 {
|
if ty.bits() == 32 || ty.bits() == 64 {
|
||||||
|
|||||||
@@ -293,6 +293,11 @@
|
|||||||
(decl fits_in_64 (Type) Type)
|
(decl fits_in_64 (Type) Type)
|
||||||
(extern extractor fits_in_64 fits_in_64)
|
(extern extractor fits_in_64 fits_in_64)
|
||||||
|
|
||||||
|
;; An extractor that only matches scalar booleans, integers, and references that
|
||||||
|
;; can fit in 64 bits.
|
||||||
|
(decl ty_int_bool_ref_scalar_64 (Type) Type)
|
||||||
|
(extern extractor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64)
|
||||||
|
|
||||||
;; An extractor that matches 32- and 64-bit types only.
|
;; An extractor that matches 32- and 64-bit types only.
|
||||||
(decl ty_32_or_64 (Type) Type)
|
(decl ty_32_or_64 (Type) Type)
|
||||||
(extern extractor ty_32_or_64 ty_32_or_64)
|
(extern extractor ty_32_or_64 ty_32_or_64)
|
||||||
|
|||||||
@@ -37,9 +37,9 @@ block0(v0: i8, v1: i8, v2: i8):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; and x8, x1, x0
|
; and w7, w1, w0
|
||||||
; bic x0, x2, x0
|
; bic w9, w2, w0
|
||||||
; orr x0, x0, x8
|
; orr w0, w7, w9
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %i(b1, i8, i8) -> i8 {
|
function %i(b1, i8, i8) -> i8 {
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
test run
|
test run
|
||||||
target aarch64
|
target aarch64
|
||||||
target arm
|
|
||||||
target s390x
|
target s390x
|
||||||
; target x86_64 TODO: Not yet implemented on x86_64
|
; target x86_64 TODO: Not yet implemented on x86_64
|
||||||
|
|
||||||
|
|
||||||
function %bnot_band() -> b1 {
|
function %bnot_band() -> b1 {
|
||||||
block0:
|
block0:
|
||||||
v1 = bconst.b1 false
|
v1 = bconst.b1 false
|
||||||
@@ -14,3 +12,44 @@ block0:
|
|||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %bitselect_i8(i8, i8, i8) -> i8 {
|
||||||
|
block0(v0: i8, v1: i8, v2: i8):
|
||||||
|
v3 = bitselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %bitselect_i8(0, 0, 0xFF) == 0xFF
|
||||||
|
; run: %bitselect_i8(0x55, 0, 0xFF) == 0xAA
|
||||||
|
; run: %bitselect_i8(0xF0, 32, 13) == 45
|
||||||
|
; run: %bitselect_i8(0xFF, 0xFF, 0) == 0xFF
|
||||||
|
|
||||||
|
function %bitselect_i16(i16, i16, i16) -> i16 {
|
||||||
|
block0(v0: i16, v1: i16, v2: i16):
|
||||||
|
v3 = bitselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %bitselect_i16(0, 0, 0xFFFF) == 0xFFFF
|
||||||
|
; run: %bitselect_i16(0x5555, 0, 0xFFFF) == 0xAAAA
|
||||||
|
; run: %bitselect_i16(0xFFFF, 0xFFFF, 0) == 0xFFFF
|
||||||
|
|
||||||
|
function %bitselect_i32(i32, i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32, v2: i32):
|
||||||
|
v3 = bitselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %bitselect_i32(0, 0, 0xFFFFFFFF) == 0xFFFFFFFF
|
||||||
|
; run: %bitselect_i32(0x55555555, 0, 0xFFFFFFFF) == 0xAAAAAAAA
|
||||||
|
; run: %bitselect_i32(0xFFFFFFFF, 0xFFFFFFFF, 0) == 0xFFFFFFFF
|
||||||
|
|
||||||
|
function %bitselect_i64(i64, i64, i64) -> i64 {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64):
|
||||||
|
v3 = bitselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %bitselect_i64(0, 0, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF
|
||||||
|
; run: %bitselect_i64(0x5555555555555555, 0, 0xFFFFFFFFFFFFFFFF) == 0xAAAAAAAAAAAAAAAA
|
||||||
|
; run: %bitselect_i64(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0) == 0xFFFFFFFFFFFFFFFF
|
||||||
|
|||||||
Reference in New Issue
Block a user