Implement iabs in ISLE (AArch64) (#4399)
* Implement `iabs` in ISLE (AArch64) Converts the existing implementation of `iabs` for AArch64 into ISLE, and fixes support for `iabs` on scalar values. Copyright (c) 2022 Arm Limited. * Improve scalar `iabs` implementation. Also introduces `CSNeg` instruction. Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -187,6 +187,13 @@
|
|||||||
(rn Reg)
|
(rn Reg)
|
||||||
(rm Reg))
|
(rm Reg))
|
||||||
|
|
||||||
|
;; A conditional-select negation operation.
|
||||||
|
(CSNeg
|
||||||
|
(rd WritableReg)
|
||||||
|
(cond Cond)
|
||||||
|
(rn Reg)
|
||||||
|
(rm Reg))
|
||||||
|
|
||||||
;; A conditional-set operation.
|
;; A conditional-set operation.
|
||||||
(CSet
|
(CSet
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
@@ -1534,11 +1541,15 @@
|
|||||||
(MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
|
(MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
|
||||||
dst)))
|
dst)))
|
||||||
|
|
||||||
|
(decl cmp_imm (OperandSize Reg Imm12) ProducesFlags)
|
||||||
|
(rule (cmp_imm size src1 src2)
|
||||||
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
|
(MInst.AluRRImm12 (ALUOp.SubS) size (writable_zero_reg)
|
||||||
|
src1 src2)))
|
||||||
|
|
||||||
(decl cmp64_imm (Reg Imm12) ProducesFlags)
|
(decl cmp64_imm (Reg Imm12) ProducesFlags)
|
||||||
(rule (cmp64_imm src1 src2)
|
(rule (cmp64_imm src1 src2)
|
||||||
(ProducesFlags.ProducesFlagsSideEffect
|
(cmp_imm (OperandSize.Size64) src1 src2))
|
||||||
(MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg)
|
|
||||||
src1 src2)))
|
|
||||||
|
|
||||||
;; Helper for emitting `sbc` instructions.
|
;; Helper for emitting `sbc` instructions.
|
||||||
(decl sbc_paired (Type Reg Reg) ConsumesFlags)
|
(decl sbc_paired (Type Reg Reg) ConsumesFlags)
|
||||||
@@ -1681,6 +1692,18 @@
|
|||||||
(MInst.CSel dst cond if_true if_false)
|
(MInst.CSel dst cond if_true if_false)
|
||||||
dst)))
|
dst)))
|
||||||
|
|
||||||
|
;; Helper for generating a `CSNeg` instruction.
|
||||||
|
;;
|
||||||
|
;; Note that this doesn't actually emit anything, instead it produces a
|
||||||
|
;; `ConsumesFlags` instruction which must be consumed with `with_flags*`
|
||||||
|
;; helpers.
|
||||||
|
(decl csneg (Cond Reg Reg) ConsumesFlags)
|
||||||
|
(rule (csneg cond if_true if_false)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.CSNeg dst cond if_true if_false)
|
||||||
|
dst)))
|
||||||
|
|
||||||
;; Helpers for generating `add` instructions.
|
;; Helpers for generating `add` instructions.
|
||||||
|
|
||||||
(decl add (Type Reg Reg) Reg)
|
(decl add (Type Reg Reg) Reg)
|
||||||
@@ -1769,6 +1792,17 @@
|
|||||||
(decl addp (Reg Reg VectorSize) Reg)
|
(decl addp (Reg Reg VectorSize) Reg)
|
||||||
(rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size))
|
(rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size))
|
||||||
|
|
||||||
|
;; Helper for generating vector `abs` instructions.
|
||||||
|
(decl vec_abs (Reg VectorSize) Reg)
|
||||||
|
(rule (vec_abs x size) (vec_misc (VecMisc2.Abs) x size))
|
||||||
|
|
||||||
|
;; Helper for generating instruction sequences to calculate a scalar absolute
|
||||||
|
;; value.
|
||||||
|
(decl abs (OperandSize Reg) Reg)
|
||||||
|
(rule (abs size x)
|
||||||
|
(value_regs_get (with_flags (cmp_imm size x (u8_into_imm12 0))
|
||||||
|
(csneg (Cond.Gt) x x)) 0))
|
||||||
|
|
||||||
;; Helper for generating `addv` instructions.
|
;; Helper for generating `addv` instructions.
|
||||||
(decl addv (Reg VectorSize) Reg)
|
(decl addv (Reg VectorSize) Reg)
|
||||||
(rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size))
|
(rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size))
|
||||||
|
|||||||
@@ -331,12 +331,16 @@ pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
|
|||||||
(0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
|
(0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
|
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
|
||||||
|
debug_assert_eq!(op & 0b1, op);
|
||||||
|
debug_assert_eq!(o2 & 0b1, o2);
|
||||||
0b100_11010100_00000_0000_00_00000_00000
|
0b100_11010100_00000_0000_00_00000_00000
|
||||||
|
| (op << 30)
|
||||||
| (machreg_to_gpr(rm) << 16)
|
| (machreg_to_gpr(rm) << 16)
|
||||||
|
| (cond.bits() << 12)
|
||||||
|
| (o2 << 10)
|
||||||
| (machreg_to_gpr(rn) << 5)
|
| (machreg_to_gpr(rn) << 5)
|
||||||
| machreg_to_gpr(rd.to_reg())
|
| machreg_to_gpr(rd.to_reg())
|
||||||
| (cond.bits() << 12)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
|
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
|
||||||
@@ -348,18 +352,6 @@ fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize)
|
|||||||
| (cond.bits() << 12)
|
| (cond.bits() << 12)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
|
|
||||||
0b100_11010100_11111_0000_01_11111_00000
|
|
||||||
| machreg_to_gpr(rd.to_reg())
|
|
||||||
| (cond.invert().bits() << 12)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
|
|
||||||
0b110_11010100_11111_0000_00_11111_00000
|
|
||||||
| machreg_to_gpr(rd.to_reg())
|
|
||||||
| (cond.invert().bits() << 12)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
|
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
|
||||||
0b0_1_1_11010010_00000_0000_10_00000_0_0000
|
0b0_1_1_11010010_00000_0000_10_00000_0_0000
|
||||||
| size.sf_bit() << 31
|
| size.sf_bit() << 31
|
||||||
@@ -1352,15 +1344,21 @@ impl MachInstEmit for Inst {
|
|||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let rm = allocs.next(rm);
|
let rm = allocs.next(rm);
|
||||||
sink.put4(enc_csel(rd, rn, rm, cond));
|
sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
|
||||||
|
}
|
||||||
|
&Inst::CSNeg { rd, rn, rm, cond } => {
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
let rm = allocs.next(rm);
|
||||||
|
sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
|
||||||
}
|
}
|
||||||
&Inst::CSet { rd, cond } => {
|
&Inst::CSet { rd, cond } => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
sink.put4(enc_cset(rd, cond));
|
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
|
||||||
}
|
}
|
||||||
&Inst::CSetm { rd, cond } => {
|
&Inst::CSetm { rd, cond } => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
sink.put4(enc_csetm(rd, cond));
|
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
|
||||||
}
|
}
|
||||||
&Inst::CCmpImm {
|
&Inst::CCmpImm {
|
||||||
size,
|
size,
|
||||||
|
|||||||
@@ -2129,6 +2129,16 @@ fn test_aarch64_binemit() {
|
|||||||
"8A218E9A",
|
"8A218E9A",
|
||||||
"csel x10, x12, x14, hs",
|
"csel x10, x12, x14, hs",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::CSNeg {
|
||||||
|
rd: writable_xreg(10),
|
||||||
|
rn: xreg(12),
|
||||||
|
rm: xreg(14),
|
||||||
|
cond: Cond::Hs,
|
||||||
|
},
|
||||||
|
"8A258EDA",
|
||||||
|
"csneg x10, x12, x14, hs",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::CSet {
|
Inst::CSet {
|
||||||
rd: writable_xreg(15),
|
rd: writable_xreg(15),
|
||||||
|
|||||||
@@ -669,6 +669,11 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
|
&Inst::CSNeg { rd, rn, rm, .. } => {
|
||||||
|
collector.reg_def(rd);
|
||||||
|
collector.reg_use(rn);
|
||||||
|
collector.reg_use(rm);
|
||||||
|
}
|
||||||
&Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
|
&Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
}
|
}
|
||||||
@@ -1510,6 +1515,13 @@ impl Inst {
|
|||||||
let cond = cond.pretty_print(0, allocs);
|
let cond = cond.pretty_print(0, allocs);
|
||||||
format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
|
format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
|
||||||
}
|
}
|
||||||
|
&Inst::CSNeg { rd, rn, rm, cond } => {
|
||||||
|
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
||||||
|
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
|
||||||
|
let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs);
|
||||||
|
let cond = cond.pretty_print(0, allocs);
|
||||||
|
format!("csneg {}, {}, {}, {}", rd, rn, rm, cond)
|
||||||
|
}
|
||||||
&Inst::CSet { rd, cond } => {
|
&Inst::CSet { rd, cond } => {
|
||||||
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
||||||
let cond = cond.pretty_print(0, allocs);
|
let cond = cond.pretty_print(0, allocs);
|
||||||
|
|||||||
@@ -156,6 +156,17 @@
|
|||||||
(rule (lower (has_type ty (iadd_pairwise x y)))
|
(rule (lower (has_type ty (iadd_pairwise x y)))
|
||||||
(addp x y (vector_size ty)))
|
(addp x y (vector_size ty)))
|
||||||
|
|
||||||
|
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane _ _) (iabs x)))
|
||||||
|
(vec_abs x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (iabs x)))
|
||||||
|
(abs (OperandSize.Size64) x))
|
||||||
|
|
||||||
|
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
|
||||||
|
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
|
||||||
|
|
||||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; `i64` and smaller
|
;; `i64` and smaller
|
||||||
|
|||||||
@@ -1941,17 +1941,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Iabs => {
|
Opcode::Iabs => implemented_in_isle(ctx),
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let ty = ty.unwrap();
|
|
||||||
ctx.emit(Inst::VecMisc {
|
|
||||||
op: VecMisc2::Abs,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Opcode::AvgRound => {
|
Opcode::AvgRound => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
|
|
||||||
|
|||||||
119
cranelift/filetests/filetests/isa/aarch64/iabs.clif
Normal file
119
cranelift/filetests/filetests/isa/aarch64/iabs.clif
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set unwind_info=false
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
function %f1(i8x16) -> i8x16 {
|
||||||
|
block0(v0: i8x16):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.16b, v0.16b
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f2(i8x8) -> i8x8 {
|
||||||
|
block0(v0: i8x8):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.8b, v0.8b
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f3(i16x8) -> i16x8 {
|
||||||
|
block0(v0: i16x8):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.8h, v0.8h
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f4(i16x4) -> i16x4 {
|
||||||
|
block0(v0: i16x4):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.4h, v0.4h
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f5(i32x4) -> i32x4 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.4s, v0.4s
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f6(i32x2) -> i32x2 {
|
||||||
|
block0(v0: i32x2):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.2s, v0.2s
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f7(i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; abs v0.2d, v0.2d
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f8(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sxtb w3, w0
|
||||||
|
; subs wzr, w3, #0
|
||||||
|
; csneg x0, x3, x3, gt
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f9(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sxth w3, w0
|
||||||
|
; subs wzr, w3, #0
|
||||||
|
; csneg x0, x3, x3, gt
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f10(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; subs wzr, w0, #0
|
||||||
|
; csneg x0, x0, x0, gt
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f11(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; subs xzr, x0, #0
|
||||||
|
; csneg x0, x0, x0, gt
|
||||||
|
; ret
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
test interpret
|
test interpret
|
||||||
; aarch64 & x86_64 only support vector iabs
|
test run
|
||||||
|
target aarch64
|
||||||
|
; x86_64 only supports vector iabs
|
||||||
|
|
||||||
function %iabs_i8(i8) -> i8 {
|
function %iabs_i8(i8) -> i8 {
|
||||||
block0(v0: i8):
|
block0(v0: i8):
|
||||||
|
|||||||
Reference in New Issue
Block a user