Implement iabs in ISLE (AArch64) (#4399)
* Implement `iabs` in ISLE (AArch64) Converts the existing implementation of `iabs` for AArch64 into ISLE, and fixes support for `iabs` on scalar values. Copyright (c) 2022 Arm Limited. * Improve scalar `iabs` implementation. Also introduces `CSNeg` instruction. Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -187,6 +187,13 @@
|
||||
(rn Reg)
|
||||
(rm Reg))
|
||||
|
||||
;; A conditional-select negation operation.
|
||||
(CSNeg
|
||||
(rd WritableReg)
|
||||
(cond Cond)
|
||||
(rn Reg)
|
||||
(rm Reg))
|
||||
|
||||
;; A conditional-set operation.
|
||||
(CSet
|
||||
(rd WritableReg)
|
||||
@@ -1534,11 +1541,15 @@
|
||||
(MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
|
||||
dst)))
|
||||
|
||||
(decl cmp_imm (OperandSize Reg Imm12) ProducesFlags)
|
||||
(rule (cmp_imm size src1 src2)
|
||||
(ProducesFlags.ProducesFlagsSideEffect
|
||||
(MInst.AluRRImm12 (ALUOp.SubS) size (writable_zero_reg)
|
||||
src1 src2)))
|
||||
|
||||
(decl cmp64_imm (Reg Imm12) ProducesFlags)
|
||||
(rule (cmp64_imm src1 src2)
|
||||
(ProducesFlags.ProducesFlagsSideEffect
|
||||
(MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg)
|
||||
src1 src2)))
|
||||
(cmp_imm (OperandSize.Size64) src1 src2))
|
||||
|
||||
;; Helper for emitting `sbc` instructions.
|
||||
(decl sbc_paired (Type Reg Reg) ConsumesFlags)
|
||||
@@ -1681,6 +1692,18 @@
|
||||
(MInst.CSel dst cond if_true if_false)
|
||||
dst)))
|
||||
|
||||
;; Helper for generating a `CSNeg` instruction.
|
||||
;;
|
||||
;; Note that this doesn't actually emit anything, instead it produces a
|
||||
;; `ConsumesFlags` instruction which must be consumed with `with_flags*`
|
||||
;; helpers.
|
||||
(decl csneg (Cond Reg Reg) ConsumesFlags)
|
||||
(rule (csneg cond if_true if_false)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||
(MInst.CSNeg dst cond if_true if_false)
|
||||
dst)))
|
||||
|
||||
;; Helpers for generating `add` instructions.
|
||||
|
||||
(decl add (Type Reg Reg) Reg)
|
||||
@@ -1769,6 +1792,17 @@
|
||||
(decl addp (Reg Reg VectorSize) Reg)
|
||||
(rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size))
|
||||
|
||||
;; Helper for generating vector `abs` instructions.
|
||||
(decl vec_abs (Reg VectorSize) Reg)
|
||||
(rule (vec_abs x size) (vec_misc (VecMisc2.Abs) x size))
|
||||
|
||||
;; Helper for generating instruction sequences to calculate a scalar absolute
|
||||
;; value.
|
||||
(decl abs (OperandSize Reg) Reg)
|
||||
(rule (abs size x)
|
||||
(value_regs_get (with_flags (cmp_imm size x (u8_into_imm12 0))
|
||||
(csneg (Cond.Gt) x x)) 0))
|
||||
|
||||
;; Helper for generating `addv` instructions.
|
||||
(decl addv (Reg VectorSize) Reg)
|
||||
(rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size))
|
||||
|
||||
@@ -331,12 +331,16 @@ pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
|
||||
(0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
|
||||
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
|
||||
debug_assert_eq!(op & 0b1, op);
|
||||
debug_assert_eq!(o2 & 0b1, o2);
|
||||
0b100_11010100_00000_0000_00_00000_00000
|
||||
| (op << 30)
|
||||
| (machreg_to_gpr(rm) << 16)
|
||||
| (cond.bits() << 12)
|
||||
| (o2 << 10)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
| machreg_to_gpr(rd.to_reg())
|
||||
| (cond.bits() << 12)
|
||||
}
|
||||
|
||||
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
|
||||
@@ -348,18 +352,6 @@ fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize)
|
||||
| (cond.bits() << 12)
|
||||
}
|
||||
|
||||
fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
|
||||
0b100_11010100_11111_0000_01_11111_00000
|
||||
| machreg_to_gpr(rd.to_reg())
|
||||
| (cond.invert().bits() << 12)
|
||||
}
|
||||
|
||||
fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
|
||||
0b110_11010100_11111_0000_00_11111_00000
|
||||
| machreg_to_gpr(rd.to_reg())
|
||||
| (cond.invert().bits() << 12)
|
||||
}
|
||||
|
||||
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
|
||||
0b0_1_1_11010010_00000_0000_10_00000_0_0000
|
||||
| size.sf_bit() << 31
|
||||
@@ -1352,15 +1344,21 @@ impl MachInstEmit for Inst {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let rn = allocs.next(rn);
|
||||
let rm = allocs.next(rm);
|
||||
sink.put4(enc_csel(rd, rn, rm, cond));
|
||||
sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
|
||||
}
|
||||
&Inst::CSNeg { rd, rn, rm, cond } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let rn = allocs.next(rn);
|
||||
let rm = allocs.next(rm);
|
||||
sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
|
||||
}
|
||||
&Inst::CSet { rd, cond } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
sink.put4(enc_cset(rd, cond));
|
||||
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
|
||||
}
|
||||
&Inst::CSetm { rd, cond } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
sink.put4(enc_csetm(rd, cond));
|
||||
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
|
||||
}
|
||||
&Inst::CCmpImm {
|
||||
size,
|
||||
|
||||
@@ -2129,6 +2129,16 @@ fn test_aarch64_binemit() {
|
||||
"8A218E9A",
|
||||
"csel x10, x12, x14, hs",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CSNeg {
|
||||
rd: writable_xreg(10),
|
||||
rn: xreg(12),
|
||||
rm: xreg(14),
|
||||
cond: Cond::Hs,
|
||||
},
|
||||
"8A258EDA",
|
||||
"csneg x10, x12, x14, hs",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CSet {
|
||||
rd: writable_xreg(15),
|
||||
|
||||
@@ -669,6 +669,11 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
||||
collector.reg_use(rn);
|
||||
collector.reg_use(rm);
|
||||
}
|
||||
&Inst::CSNeg { rd, rn, rm, .. } => {
|
||||
collector.reg_def(rd);
|
||||
collector.reg_use(rn);
|
||||
collector.reg_use(rm);
|
||||
}
|
||||
&Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
|
||||
collector.reg_def(rd);
|
||||
}
|
||||
@@ -1510,6 +1515,13 @@ impl Inst {
|
||||
let cond = cond.pretty_print(0, allocs);
|
||||
format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
|
||||
}
|
||||
&Inst::CSNeg { rd, rn, rm, cond } => {
|
||||
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
||||
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
|
||||
let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs);
|
||||
let cond = cond.pretty_print(0, allocs);
|
||||
format!("csneg {}, {}, {}, {}", rd, rn, rm, cond)
|
||||
}
|
||||
&Inst::CSet { rd, cond } => {
|
||||
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
||||
let cond = cond.pretty_print(0, allocs);
|
||||
|
||||
@@ -156,6 +156,17 @@
|
||||
(rule (lower (has_type ty (iadd_pairwise x y)))
|
||||
(addp x y (vector_size ty)))
|
||||
|
||||
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (iabs x)))
|
||||
(vec_abs x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type $I64 (iabs x)))
|
||||
(abs (OperandSize.Size64) x))
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
|
||||
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
|
||||
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller
|
||||
|
||||
@@ -1941,17 +1941,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||
}
|
||||
|
||||
Opcode::Iabs => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Abs,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
Opcode::Iabs => implemented_in_isle(ctx),
|
||||
Opcode::AvgRound => {
|
||||
let ty = ty.unwrap();
|
||||
|
||||
|
||||
119
cranelift/filetests/filetests/isa/aarch64/iabs.clif
Normal file
119
cranelift/filetests/filetests/isa/aarch64/iabs.clif
Normal file
@@ -0,0 +1,119 @@
|
||||
test compile precise-output
|
||||
set unwind_info=false
|
||||
target aarch64
|
||||
|
||||
function %f1(i8x16) -> i8x16 {
|
||||
block0(v0: i8x16):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.16b, v0.16b
|
||||
; ret
|
||||
|
||||
function %f2(i8x8) -> i8x8 {
|
||||
block0(v0: i8x8):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.8b, v0.8b
|
||||
; ret
|
||||
|
||||
function %f3(i16x8) -> i16x8 {
|
||||
block0(v0: i16x8):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.8h, v0.8h
|
||||
; ret
|
||||
|
||||
function %f4(i16x4) -> i16x4 {
|
||||
block0(v0: i16x4):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.4h, v0.4h
|
||||
; ret
|
||||
|
||||
function %f5(i32x4) -> i32x4 {
|
||||
block0(v0: i32x4):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.4s, v0.4s
|
||||
; ret
|
||||
|
||||
function %f6(i32x2) -> i32x2 {
|
||||
block0(v0: i32x2):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.2s, v0.2s
|
||||
; ret
|
||||
|
||||
function %f7(i64x2) -> i64x2 {
|
||||
block0(v0: i64x2):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; abs v0.2d, v0.2d
|
||||
; ret
|
||||
|
||||
function %f8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxtb w3, w0
|
||||
; subs wzr, w3, #0
|
||||
; csneg x0, x3, x3, gt
|
||||
; ret
|
||||
|
||||
function %f9(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxth w3, w0
|
||||
; subs wzr, w3, #0
|
||||
; csneg x0, x3, x3, gt
|
||||
; ret
|
||||
|
||||
function %f10(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; subs wzr, w0, #0
|
||||
; csneg x0, x0, x0, gt
|
||||
; ret
|
||||
|
||||
function %f11(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iabs v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, #0
|
||||
; csneg x0, x0, x0, gt
|
||||
; ret
|
||||
@@ -1,5 +1,7 @@
|
||||
test interpret
|
||||
; aarch64 & x86_64 only support vector iabs
|
||||
test run
|
||||
target aarch64
|
||||
; x86_64 only supports vector iabs
|
||||
|
||||
function %iabs_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
|
||||
Reference in New Issue
Block a user