From 14d1c7ee9fbd597a908c49d409ba7ac4dc08d722 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 8 Jul 2021 16:35:50 +0100 Subject: [PATCH] aarch64: Refactor lower_icmp to allow returning a different flag --- cranelift/codegen/src/isa/aarch64/lower.rs | 85 ++++++++++++------- .../codegen/src/isa/aarch64/lower_inst.rs | 30 +++---- 2 files changed, 65 insertions(+), 50 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 0984575981..858d521119 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -21,6 +21,7 @@ use crate::isa::aarch64::AArch64Backend; use super::lower_inst; use crate::data_value::DataValue; +use crate::ir::instructions::Opcode::Icmp; use log::{debug, trace}; use regalloc::{Reg, Writable}; use smallvec::SmallVec; @@ -1255,7 +1256,8 @@ pub(crate) fn maybe_input_insn_via_conv>( /// Specifies what [lower_icmp] should do when lowering #[derive(Debug, Clone, PartialEq)] pub(crate) enum IcmpOutput { - /// Only sets flags, discarding the results + /// Set flags, discarding the results. The flag to be used needs to be checked + /// in [IcmpResult] Flags, /// Materializes the results into a register. The flags set may be incorrect Register(Writable), @@ -1270,6 +1272,25 @@ impl IcmpOutput { } } +/// The output of an Icmp lowering. +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum IcmpResult { + /// The result was output into the flag corresponding with this [Cond]. Other flags are left in + /// an undefined state. + CondCode(Cond), + /// The result was materialized into the output register. + Register, +} + +impl IcmpResult { + pub fn unwrap_cond(&self) -> Cond { + match self { + IcmpResult::CondCode(c) => *c, + _ => panic!("Unwrapped flag, but IcmpResult was {:?}", self), + } + } +} + /// Lower an icmp comparision /// /// We can lower into the status flags, or materialize the result into a register @@ -1279,7 +1300,7 @@ pub(crate) fn lower_icmp>( insn: IRInst, condcode: IntCC, output: IcmpOutput, -) -> CodegenResult<()> { +) -> CodegenResult { debug!( "lower_icmp: insn {}, condcode: {}, output: {:?}", insn, condcode, output @@ -1298,7 +1319,7 @@ pub(crate) fn lower_icmp>( (false, false) => NarrowValueMode::ZeroExtend64, }; - if ty == I128 { + let out_condcode = if ty == I128 { let lhs = put_input_in_regs(ctx, inputs[0]); let rhs = put_input_in_regs(ctx, inputs[1]); @@ -1424,29 +1445,32 @@ pub(crate) fn lower_icmp>( } } } - } else if !ty.is_vector() { + cond + } else if ty.is_vector() { + assert_ne!(output, IcmpOutput::Flags); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; + cond + } else { let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); let is_overflow = condcode == IntCC::Overflow || condcode == IntCC::NotOverflow; let is_small_type = ty == I8 || ty == I16; - let (rn, rm) = if is_overflow && is_small_type { + let (cond, rn, rm) = if is_overflow && is_small_type { // Overflow checks for non native types require additional instructions, other than // just the extend op. // - // TODO: Codegen improvements here: - // * Merge the second sxt{h,b} into the sub instruction. - // * We can especially improve codegen here if we can return a different flag out of - // this function. That way we can tell the caller to use the 'ne' flag and save - // the last 3 instructions. + // TODO: Codegen improvements: Merge the second sxt{h,b} into the following sub instruction. // // sxt{h,b} w0, w0 // sxt{h,b} w1, w1 // sub w0, w0, w1 // cmp w0, w0, sxt{h,b} - // cset w0, ne - // mov w1, #0x80000000 - // cmp w1, w0 + // + // The result of this comparison is either the EQ or NE condition code, so we need to + // signal that to the caller let extend_op = if ty == I8 { ExtendOp::SXTB @@ -1454,22 +1478,20 @@ pub(crate) fn lower_icmp>( ExtendOp::SXTH }; let tmp1 = ctx.alloc_tmp(I32).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(I32).only_reg().unwrap(); ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp1, rn, rm)); - ctx.emit(alu_inst_imm12( - ALUOp::SubS32, - writable_zero_reg(), + + let out_cond = match condcode { + IntCC::Overflow => Cond::Ne, + IntCC::NotOverflow => Cond::Eq, + _ => unreachable!(), + }; + ( + out_cond, tmp1.to_reg(), ResultRSEImm12::RegExtend(tmp1.to_reg(), extend_op), - )); - ctx.emit(Inst::CSet { - rd: tmp2, - cond: Cond::Ne, - }); - lower_constant_u64(ctx, tmp1, 0x8000_0000); - (tmp1.to_reg(), ResultRSEImm12::Reg(tmp2.to_reg())) + ) } else { - (rn, rm) + (cond, rn, rm) }; let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); @@ -1478,13 +1500,14 @@ pub(crate) fn lower_icmp>( if let IcmpOutput::Register(rd) = output { materialize_bool_result(ctx, insn, rd, cond); } - } else { - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); - lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; - } + cond + }; - Ok(()) + Ok(match output { + // We currently never emit a different register than what was asked for + IcmpOutput::Register(_) => IcmpResult::Register, + IcmpOutput::Flags => IcmpResult::CondCode(out_condcode), + }) } pub(crate) fn lower_fcmp_or_ffcmp_to_flags>(ctx: &mut C, insn: IRInst) { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index ea01bd94a4..1aea3563dc 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1656,9 +1656,7 @@ pub(crate) fn lower_insn_to_regs>( maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint) { let condcode = ctx.data(icmp_insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); - lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?; - cond + lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?.unwrap_cond() } else if let Some(fcmp_insn) = maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint) { @@ -1721,11 +1719,10 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Selectif | Opcode::SelectifSpectreGuard => { let condcode = ctx.data(insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; + let cond = lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?.unwrap_cond(); // csel.COND rd, rn, rm let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); @@ -2042,12 +2039,10 @@ pub(crate) fn lower_insn_to_regs>( cond } else if op == Opcode::Trapif { let condcode = ctx.data(insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); // Verification ensures that the input is always a single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; - cond + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?.unwrap_cond() } else { let condcode = ctx.data(insn).fp_cond_code().unwrap(); let cond = lower_fp_condcode(condcode); @@ -3651,11 +3646,11 @@ pub(crate) fn lower_branch>( maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint) { let condcode = ctx.data(icmp_insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); + let cond = + lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?.unwrap_cond(); let negated = op0 == Opcode::Brz; let cond = if negated { cond.invert() } else { cond }; - lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken, @@ -3703,32 +3698,29 @@ pub(crate) fn lower_branch>( } Opcode::BrIcmp => { let condcode = ctx.data(branches[0]).cond_code().unwrap(); - let cond = lower_condcode(condcode); - let kind = CondBrKind::Cond(cond); + let cond = lower_icmp(ctx, branches[0], condcode, IcmpOutput::Flags)?.unwrap_cond(); - lower_icmp(ctx, branches[0], condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken, - kind, + kind: CondBrKind::Cond(cond), }); } Opcode::Brif => { let condcode = ctx.data(branches[0]).cond_code().unwrap(); - let cond = lower_condcode(condcode); - let kind = CondBrKind::Cond(cond); let flag_input = InsnInput { insn: branches[0], input: 0, }; if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) { - lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; + let cond = + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?.unwrap_cond(); ctx.emit(Inst::CondBr { taken, not_taken, - kind, + kind: CondBrKind::Cond(cond), }); } else { // If the ifcmp result is actually placed in a @@ -3738,7 +3730,7 @@ pub(crate) fn lower_branch>( ctx.emit(Inst::CondBr { taken, not_taken, - kind, + kind: CondBrKind::Cond(lower_condcode(condcode)), }); } }