Implement iabs in ISLE (AArch64) (#4399)

* Implement `iabs` in ISLE (AArch64)

Converts the existing implementation of `iabs` for AArch64 into ISLE,
and fixes support for `iabs` on scalar values.

Copyright (c) 2022 Arm Limited.

* Improve scalar `iabs` implementation.

Also introduces `CSNeg` instruction.

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-07-18 19:12:34 +01:00
committed by GitHub
parent db7f9ccd2b
commit d792646677
8 changed files with 208 additions and 32 deletions

View File

@@ -187,6 +187,13 @@
(rn Reg) (rn Reg)
(rm Reg)) (rm Reg))
;; A conditional-select negation operation.
(CSNeg
(rd WritableReg)
(cond Cond)
(rn Reg)
(rm Reg))
;; A conditional-set operation. ;; A conditional-set operation.
(CSet (CSet
(rd WritableReg) (rd WritableReg)
@@ -1534,11 +1541,15 @@
(MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2) (MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
dst))) dst)))
(decl cmp_imm (OperandSize Reg Imm12) ProducesFlags)
(rule (cmp_imm size src1 src2)
(ProducesFlags.ProducesFlagsSideEffect
(MInst.AluRRImm12 (ALUOp.SubS) size (writable_zero_reg)
src1 src2)))
(decl cmp64_imm (Reg Imm12) ProducesFlags) (decl cmp64_imm (Reg Imm12) ProducesFlags)
(rule (cmp64_imm src1 src2) (rule (cmp64_imm src1 src2)
(ProducesFlags.ProducesFlagsSideEffect (cmp_imm (OperandSize.Size64) src1 src2))
(MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg)
src1 src2)))
;; Helper for emitting `sbc` instructions. ;; Helper for emitting `sbc` instructions.
(decl sbc_paired (Type Reg Reg) ConsumesFlags) (decl sbc_paired (Type Reg Reg) ConsumesFlags)
@@ -1681,6 +1692,18 @@
(MInst.CSel dst cond if_true if_false) (MInst.CSel dst cond if_true if_false)
dst))) dst)))
;; Helper for generating a `CSNeg` instruction.
;;
;; Note that this doesn't actually emit anything, instead it produces a
;; `ConsumesFlags` instruction which must be consumed with `with_flags*`
;; helpers.
(decl csneg (Cond Reg Reg) ConsumesFlags)
(rule (csneg cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.CSNeg dst cond if_true if_false)
dst)))
;; Helpers for generating `add` instructions. ;; Helpers for generating `add` instructions.
(decl add (Type Reg Reg) Reg) (decl add (Type Reg Reg) Reg)
@@ -1769,6 +1792,17 @@
(decl addp (Reg Reg VectorSize) Reg) (decl addp (Reg Reg VectorSize) Reg)
(rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size)) (rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size))
;; Helper for generating vector `abs` instructions.
(decl vec_abs (Reg VectorSize) Reg)
(rule (vec_abs x size) (vec_misc (VecMisc2.Abs) x size))
;; Helper for generating instruction sequences to calculate a scalar absolute
;; value.
(decl abs (OperandSize Reg) Reg)
(rule (abs size x)
(value_regs_get (with_flags (cmp_imm size x (u8_into_imm12 0))
(csneg (Cond.Gt) x x)) 0))
;; Helper for generating `addv` instructions. ;; Helper for generating `addv` instructions.
(decl addv (Reg VectorSize) Reg) (decl addv (Reg VectorSize) Reg)
(rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size)) (rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size))

View File

@@ -331,12 +331,16 @@ pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
(0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg()) (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
} }
fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 { fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
debug_assert_eq!(op & 0b1, op);
debug_assert_eq!(o2 & 0b1, o2);
0b100_11010100_00000_0000_00_00000_00000 0b100_11010100_00000_0000_00_00000_00000
| (op << 30)
| (machreg_to_gpr(rm) << 16) | (machreg_to_gpr(rm) << 16)
| (cond.bits() << 12)
| (o2 << 10)
| (machreg_to_gpr(rn) << 5) | (machreg_to_gpr(rn) << 5)
| machreg_to_gpr(rd.to_reg()) | machreg_to_gpr(rd.to_reg())
| (cond.bits() << 12)
} }
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 { fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
@@ -348,18 +352,6 @@ fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize)
| (cond.bits() << 12) | (cond.bits() << 12)
} }
fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
0b100_11010100_11111_0000_01_11111_00000
| machreg_to_gpr(rd.to_reg())
| (cond.invert().bits() << 12)
}
fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
0b110_11010100_11111_0000_00_11111_00000
| machreg_to_gpr(rd.to_reg())
| (cond.invert().bits() << 12)
}
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 { fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
0b0_1_1_11010010_00000_0000_10_00000_0_0000 0b0_1_1_11010010_00000_0000_10_00000_0_0000
| size.sf_bit() << 31 | size.sf_bit() << 31
@@ -1352,15 +1344,21 @@ impl MachInstEmit for Inst {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let rm = allocs.next(rm); let rm = allocs.next(rm);
sink.put4(enc_csel(rd, rn, rm, cond)); sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
}
&Inst::CSNeg { rd, rn, rm, cond } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
} }
&Inst::CSet { rd, cond } => { &Inst::CSet { rd, cond } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
sink.put4(enc_cset(rd, cond)); sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
} }
&Inst::CSetm { rd, cond } => { &Inst::CSetm { rd, cond } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
sink.put4(enc_csetm(rd, cond)); sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
} }
&Inst::CCmpImm { &Inst::CCmpImm {
size, size,

View File

@@ -2129,6 +2129,16 @@ fn test_aarch64_binemit() {
"8A218E9A", "8A218E9A",
"csel x10, x12, x14, hs", "csel x10, x12, x14, hs",
)); ));
insns.push((
Inst::CSNeg {
rd: writable_xreg(10),
rn: xreg(12),
rm: xreg(14),
cond: Cond::Hs,
},
"8A258EDA",
"csneg x10, x12, x14, hs",
));
insns.push(( insns.push((
Inst::CSet { Inst::CSet {
rd: writable_xreg(15), rd: writable_xreg(15),

View File

@@ -669,6 +669,11 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
} }
&Inst::CSNeg { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => { &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
} }
@@ -1510,6 +1515,13 @@ impl Inst {
let cond = cond.pretty_print(0, allocs); let cond = cond.pretty_print(0, allocs);
format!("csel {}, {}, {}, {}", rd, rn, rm, cond) format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
} }
&Inst::CSNeg { rd, rn, rm, cond } => {
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs);
let cond = cond.pretty_print(0, allocs);
format!("csneg {}, {}, {}, {}", rd, rn, rm, cond)
}
&Inst::CSet { rd, cond } => { &Inst::CSet { rd, cond } => {
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
let cond = cond.pretty_print(0, allocs); let cond = cond.pretty_print(0, allocs);

View File

@@ -156,6 +156,17 @@
(rule (lower (has_type ty (iadd_pairwise x y))) (rule (lower (has_type ty (iadd_pairwise x y)))
(addp x y (vector_size ty))) (addp x y (vector_size ty)))
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (iabs x)))
(vec_abs x (vector_size ty)))
(rule (lower (has_type $I64 (iabs x)))
(abs (OperandSize.Size64) x))
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller ;; `i64` and smaller

View File

@@ -1941,17 +1941,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
panic!("ALU+imm and ALU+carry ops should not appear here!"); panic!("ALU+imm and ALU+carry ops should not appear here!");
} }
Opcode::Iabs => { Opcode::Iabs => implemented_in_isle(ctx),
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let ty = ty.unwrap();
ctx.emit(Inst::VecMisc {
op: VecMisc2::Abs,
rd,
rn,
size: VectorSize::from_ty(ty),
});
}
Opcode::AvgRound => { Opcode::AvgRound => {
let ty = ty.unwrap(); let ty = ty.unwrap();

View File

@@ -0,0 +1,119 @@
test compile precise-output
set unwind_info=false
target aarch64
function %f1(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iabs v0
return v1
}
; block0:
; abs v0.16b, v0.16b
; ret
function %f2(i8x8) -> i8x8 {
block0(v0: i8x8):
v1 = iabs v0
return v1
}
; block0:
; abs v0.8b, v0.8b
; ret
function %f3(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iabs v0
return v1
}
; block0:
; abs v0.8h, v0.8h
; ret
function %f4(i16x4) -> i16x4 {
block0(v0: i16x4):
v1 = iabs v0
return v1
}
; block0:
; abs v0.4h, v0.4h
; ret
function %f5(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iabs v0
return v1
}
; block0:
; abs v0.4s, v0.4s
; ret
function %f6(i32x2) -> i32x2 {
block0(v0: i32x2):
v1 = iabs v0
return v1
}
; block0:
; abs v0.2s, v0.2s
; ret
function %f7(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iabs v0
return v1
}
; block0:
; abs v0.2d, v0.2d
; ret
function %f8(i8) -> i8 {
block0(v0: i8):
v1 = iabs v0
return v1
}
; block0:
; sxtb w3, w0
; subs wzr, w3, #0
; csneg x0, x3, x3, gt
; ret
function %f9(i16) -> i16 {
block0(v0: i16):
v1 = iabs v0
return v1
}
; block0:
; sxth w3, w0
; subs wzr, w3, #0
; csneg x0, x3, x3, gt
; ret
function %f10(i32) -> i32 {
block0(v0: i32):
v1 = iabs v0
return v1
}
; block0:
; subs wzr, w0, #0
; csneg x0, x0, x0, gt
; ret
function %f11(i64) -> i64 {
block0(v0: i64):
v1 = iabs v0
return v1
}
; block0:
; subs xzr, x0, #0
; csneg x0, x0, x0, gt
; ret

View File

@@ -1,5 +1,7 @@
test interpret test interpret
; aarch64 & x86_64 only support vector iabs test run
target aarch64
; x86_64 only supports vector iabs
function %iabs_i8(i8) -> i8 { function %iabs_i8(i8) -> i8 {
block0(v0: i8): block0(v0: i8):