diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 9e211a4c7b..a1553000a8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -187,6 +187,13 @@ (rn Reg) (rm Reg)) + ;; A conditional-select negation operation. + (CSNeg + (rd WritableReg) + (cond Cond) + (rn Reg) + (rm Reg)) + ;; A conditional-set operation. (CSet (rd WritableReg) @@ -1534,11 +1541,15 @@ (MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2) dst))) +(decl cmp_imm (OperandSize Reg Imm12) ProducesFlags) +(rule (cmp_imm size src1 src2) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRImm12 (ALUOp.SubS) size (writable_zero_reg) + src1 src2))) + (decl cmp64_imm (Reg Imm12) ProducesFlags) (rule (cmp64_imm src1 src2) - (ProducesFlags.ProducesFlagsSideEffect - (MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg) - src1 src2))) + (cmp_imm (OperandSize.Size64) src1 src2)) ;; Helper for emitting `sbc` instructions. (decl sbc_paired (Type Reg Reg) ConsumesFlags) @@ -1681,6 +1692,18 @@ (MInst.CSel dst cond if_true if_false) dst))) +;; Helper for generating a `CSNeg` instruction. +;; +;; Note that this doesn't actually emit anything, instead it produces a +;; `ConsumesFlags` instruction which must be consumed with `with_flags*` +;; helpers. +(decl csneg (Cond Reg Reg) ConsumesFlags) +(rule (csneg cond if_true if_false) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.CSNeg dst cond if_true if_false) + dst))) + ;; Helpers for generating `add` instructions. (decl add (Type Reg Reg) Reg) @@ -1769,6 +1792,17 @@ (decl addp (Reg Reg VectorSize) Reg) (rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size)) +;; Helper for generating vector `abs` instructions. +(decl vec_abs (Reg VectorSize) Reg) +(rule (vec_abs x size) (vec_misc (VecMisc2.Abs) x size)) + +;; Helper for generating instruction sequences to calculate a scalar absolute +;; value. +(decl abs (OperandSize Reg) Reg) +(rule (abs size x) + (value_regs_get (with_flags (cmp_imm size x (u8_into_imm12 0)) + (csneg (Cond.Gt) x x)) 0)) + ;; Helper for generating `addv` instructions. (decl addv (Reg VectorSize) Reg) (rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size)) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index bfbd121b2d..ee63225e09 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -331,12 +331,16 @@ pub(crate) fn enc_adr(off: i32, rd: Writable) -> u32 { (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg()) } -fn enc_csel(rd: Writable, rn: Reg, rm: Reg, cond: Cond) -> u32 { +fn enc_csel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 { + debug_assert_eq!(op & 0b1, op); + debug_assert_eq!(o2 & 0b1, o2); 0b100_11010100_00000_0000_00_00000_00000 + | (op << 30) | (machreg_to_gpr(rm) << 16) + | (cond.bits() << 12) + | (o2 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()) - | (cond.bits() << 12) } fn enc_fcsel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 { @@ -348,18 +352,6 @@ fn enc_fcsel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) | (cond.bits() << 12) } -fn enc_cset(rd: Writable, cond: Cond) -> u32 { - 0b100_11010100_11111_0000_01_11111_00000 - | machreg_to_gpr(rd.to_reg()) - | (cond.invert().bits() << 12) -} - -fn enc_csetm(rd: Writable, cond: Cond) -> u32 { - 0b110_11010100_11111_0000_00_11111_00000 - | machreg_to_gpr(rd.to_reg()) - | (cond.invert().bits() << 12) -} - fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 { 0b0_1_1_11010010_00000_0000_10_00000_0_0000 | size.sf_bit() << 31 @@ -1352,15 +1344,21 @@ impl MachInstEmit for Inst { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); let rm = allocs.next(rm); - sink.put4(enc_csel(rd, rn, rm, cond)); + sink.put4(enc_csel(rd, rn, rm, cond, 0, 0)); + } + &Inst::CSNeg { rd, rn, rm, cond } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + sink.put4(enc_csel(rd, rn, rm, cond, 1, 1)); } &Inst::CSet { rd, cond } => { let rd = allocs.next_writable(rd); - sink.put4(enc_cset(rd, cond)); + sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1)); } &Inst::CSetm { rd, cond } => { let rd = allocs.next_writable(rd); - sink.put4(enc_csetm(rd, cond)); + sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0)); } &Inst::CCmpImm { size, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 66d1d8a776..2629b7835a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2129,6 +2129,16 @@ fn test_aarch64_binemit() { "8A218E9A", "csel x10, x12, x14, hs", )); + insns.push(( + Inst::CSNeg { + rd: writable_xreg(10), + rn: xreg(12), + rm: xreg(14), + cond: Cond::Hs, + }, + "8A258EDA", + "csneg x10, x12, x14, hs", + )); insns.push(( Inst::CSet { rd: writable_xreg(15), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index e4044f2de8..42b2959b9b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -669,6 +669,11 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rn); collector.reg_use(rm); } + &Inst::CSNeg { rd, rn, rm, .. } => { + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); + } &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => { collector.reg_def(rd); } @@ -1510,6 +1515,13 @@ impl Inst { let cond = cond.pretty_print(0, allocs); format!("csel {}, {}, {}, {}", rd, rn, rm, cond) } + &Inst::CSNeg { rd, rn, rm, cond } => { + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); + let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); + let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); + let cond = cond.pretty_print(0, allocs); + format!("csneg {}, {}, {}, {}", rd, rn, rm, cond) + } &Inst::CSet { rd, cond } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let cond = cond.pretty_print(0, allocs); diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index f215654b1d..7e7115e05c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -156,6 +156,17 @@ (rule (lower (has_type ty (iadd_pairwise x y))) (addp x y (vector_size ty))) +;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (iabs x))) + (vec_abs x (vector_size ty))) + +(rule (lower (has_type $I64 (iabs x))) + (abs (OperandSize.Size64) x)) + +(rule (lower (has_type (fits_in_32 ty) (iabs x))) + (abs (OperandSize.Size32) (put_in_reg_sext32 x))) + ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 50f01e9f23..09e4d311c4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1941,17 +1941,7 @@ pub(crate) fn lower_insn_to_regs>( panic!("ALU+imm and ALU+carry ops should not appear here!"); } - Opcode::Iabs => { - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let ty = ty.unwrap(); - ctx.emit(Inst::VecMisc { - op: VecMisc2::Abs, - rd, - rn, - size: VectorSize::from_ty(ty), - }); - } + Opcode::Iabs => implemented_in_isle(ctx), Opcode::AvgRound => { let ty = ty.unwrap(); diff --git a/cranelift/filetests/filetests/isa/aarch64/iabs.clif b/cranelift/filetests/filetests/isa/aarch64/iabs.clif new file mode 100644 index 0000000000..bfbf0e45b9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/iabs.clif @@ -0,0 +1,119 @@ +test compile precise-output +set unwind_info=false +target aarch64 + +function %f1(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.16b, v0.16b +; ret + +function %f2(i8x8) -> i8x8 { +block0(v0: i8x8): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.8b, v0.8b +; ret + +function %f3(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.8h, v0.8h +; ret + +function %f4(i16x4) -> i16x4 { +block0(v0: i16x4): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.4h, v0.4h +; ret + +function %f5(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.4s, v0.4s +; ret + +function %f6(i32x2) -> i32x2 { +block0(v0: i32x2): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.2s, v0.2s +; ret + +function %f7(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iabs v0 + return v1 +} + +; block0: +; abs v0.2d, v0.2d +; ret + +function %f8(i8) -> i8 { +block0(v0: i8): + v1 = iabs v0 + return v1 +} + +; block0: +; sxtb w3, w0 +; subs wzr, w3, #0 +; csneg x0, x3, x3, gt +; ret + +function %f9(i16) -> i16 { +block0(v0: i16): + v1 = iabs v0 + return v1 +} + +; block0: +; sxth w3, w0 +; subs wzr, w3, #0 +; csneg x0, x3, x3, gt +; ret + +function %f10(i32) -> i32 { +block0(v0: i32): + v1 = iabs v0 + return v1 +} + +; block0: +; subs wzr, w0, #0 +; csneg x0, x0, x0, gt +; ret + +function %f11(i64) -> i64 { +block0(v0: i64): + v1 = iabs v0 + return v1 +} + +; block0: +; subs xzr, x0, #0 +; csneg x0, x0, x0, gt +; ret diff --git a/cranelift/filetests/filetests/runtests/iabs.clif b/cranelift/filetests/filetests/runtests/iabs.clif index acf2bf8584..8781744ece 100644 --- a/cranelift/filetests/filetests/runtests/iabs.clif +++ b/cranelift/filetests/filetests/runtests/iabs.clif @@ -1,5 +1,7 @@ test interpret -; aarch64 & x86_64 only support vector iabs +test run +target aarch64 +; x86_64 only supports vector iabs function %iabs_i8(i8) -> i8 { block0(v0: i8):