diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 3130b03b4c..145171c728 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1154,12 +1154,43 @@ pub(crate) fn maybe_input_insn_via_conv>( None } -pub(crate) fn lower_icmp_or_ifcmp_to_flags>( +/// Specifies what [lower_icmp] should do when lowering +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum IcmpOutput { + /// Only sets flags, discarding the results + Flags, + /// Materializes the results into a register. The flags set may be incorrect + Register(Writable), +} + +impl IcmpOutput { + pub fn reg(&self) -> Option> { + match self { + IcmpOutput::Flags => None, + IcmpOutput::Register(reg) => Some(*reg), + } + } +} + +/// Lower an icmp comparision +/// +/// We can lower into the status flags, or materialize the result into a register +/// This is controlled by the `output` parameter. +pub(crate) fn lower_icmp>( ctx: &mut C, insn: IRInst, - is_signed: bool, -) { - debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn); + condcode: IntCC, + output: IcmpOutput, +) -> CodegenResult<()> { + debug!( + "lower_icmp: insn {}, condcode: {}, output: {:?}", + insn, condcode, output + ); + + let rd = output.reg().unwrap_or(writable_zero_reg()); + let inputs = insn_inputs(ctx, insn); + let cond = lower_condcode(condcode); + let is_signed = condcode_is_signed(condcode); let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); let narrow_mode = match (bits <= 32, is_signed) { @@ -1168,14 +1199,126 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags>( (false, true) => NarrowValueMode::SignExtend64, (false, false) => NarrowValueMode::ZeroExtend64, }; - let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - let ty = ctx.input_ty(insn, 0); - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); - debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm); - let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); - let rd = writable_zero_reg(); - ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + + if ty == I128 { + let lhs = put_input_in_regs(ctx, inputs[0]); + let rhs = put_input_in_regs(ctx, inputs[1]); + + let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap(); + let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap(); + + match condcode { + IntCC::Equal | IntCC::NotEqual => { + // eor tmp1, lhs_lo, rhs_lo + // eor tmp2, lhs_hi, rhs_hi + // adds xzr, tmp1, tmp2 + // cset dst, {eq, ne} + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Eor64, + rd: tmp1, + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Eor64, + rd: tmp2, + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: writable_zero_reg(), + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + + if let IcmpOutput::Register(rd) = output { + materialize_bool_result(ctx, insn, rd, cond); + } + } + IntCC::Overflow | IntCC::NotOverflow => { + // We can do an 128bit add while throwing away the results + // and check the overflow flags at the end. + // + // adds xzr, lhs_lo, rhs_lo + // adcs xzr, lhs_hi, rhs_hi + // cset dst, {vs, vc} + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: writable_zero_reg(), + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AdcS64, + rd: writable_zero_reg(), + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + + if let IcmpOutput::Register(rd) = output { + materialize_bool_result(ctx, insn, rd, cond); + } + } + _ => { + // The currently generated ASM does not correctly set the flags, so we assert here + // to ensure that we don't silently lower incorrect code. + assert_ne!(IcmpOutput::Flags, output, "Unable to lower icmp to flags"); + + // cmp lhs_lo, rhs_lo + // cset tmp1, low_cc + // cmp lhs_hi, rhs_hi + // cset tmp2, cond + // csel dst, tmp1, tmp2, eq + + let low_cc = match condcode { + IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => Cond::Hs, + IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi, + IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => Cond::Ls, + IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo, + _ => unreachable!(), + }; + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_zero_reg(), + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + materialize_bool_result(ctx, insn, tmp1, low_cc); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_zero_reg(), + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + materialize_bool_result(ctx, insn, tmp2, cond); + ctx.emit(Inst::CSel { + cond: Cond::Eq, + rd, + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + } + } + } else if !ty.is_vector() { + let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); + ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); + + if let IcmpOutput::Register(rd) = output { + materialize_bool_result(ctx, insn, rd, cond); + } + } else { + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; + } + + Ok(()) } pub(crate) fn lower_fcmp_or_ffcmp_to_flags>(ctx: &mut C, insn: IRInst) { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index db5b021e2f..c4f732defe 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1,7 +1,7 @@ //! Lower a single Cranelift instruction into vcode. use crate::binemit::CodeOffset; -use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::condcodes::FloatCC; use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode}; @@ -1528,8 +1528,7 @@ pub(crate) fn lower_insn_to_regs>( { let condcode = ctx.data(icmp_insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); - lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed); + lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?; cond } else if let Some(fcmp_insn) = maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint) @@ -1577,11 +1576,10 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Selectif | Opcode::SelectifSpectreGuard => { let condcode = ctx.data(insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; // csel.COND rd, rn, rm let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); @@ -1648,14 +1646,11 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trueif => { let condcode = ctx.data(insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - materialize_bool_result(ctx, insn, rd, cond); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Register(rd))?; } Opcode::Trueff => { @@ -1847,126 +1842,8 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Icmp => { let condcode = ctx.data(insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ctx.input_ty(insn, 0); - let bits = ty_bits(ty); - let narrow_mode = match (bits <= 32, is_signed) { - (true, true) => NarrowValueMode::SignExtend32, - (true, false) => NarrowValueMode::ZeroExtend32, - (false, true) => NarrowValueMode::SignExtend64, - (false, false) => NarrowValueMode::ZeroExtend64, - }; - - if ty == I128 { - let lhs = put_input_in_regs(ctx, inputs[0]); - let rhs = put_input_in_regs(ctx, inputs[1]); - - let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap(); - - match condcode { - IntCC::Equal | IntCC::NotEqual => { - // eor tmp1, lhs_lo, rhs_lo - // eor tmp2, lhs_hi, rhs_hi - // adds xzr, tmp1, tmp2 - // cset dst, {eq, ne} - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Eor64, - rd: tmp1, - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Eor64, - rd: tmp2, - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AddS64, - rd: writable_zero_reg(), - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), - }); - materialize_bool_result(ctx, insn, rd, cond); - } - IntCC::Overflow | IntCC::NotOverflow => { - // We can do an 128bit add while throwing away the results - // and check the overflow flags at the end. - // - // adds xzr, lhs_lo, rhs_lo - // adcs xzr, lhs_hi, rhs_hi - // cset dst, {vs, vc} - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AddS64, - rd: writable_zero_reg(), - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AdcS64, - rd: writable_zero_reg(), - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - materialize_bool_result(ctx, insn, rd, cond); - } - _ => { - // cmp lhs_lo, rhs_lo - // cset tmp1, low_cc - // cmp lhs_hi, rhs_hi - // cset tmp2, cond - // csel dst, tmp1, tmp2, eq - - let low_cc = match condcode { - IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => { - Cond::Hs - } - IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi, - IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => { - Cond::Ls - } - IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo, - _ => unreachable!(), - }; - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS64, - rd: writable_zero_reg(), - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - materialize_bool_result(ctx, insn, tmp1, low_cc); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS64, - rd: writable_zero_reg(), - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - materialize_bool_result(ctx, insn, tmp2, cond); - ctx.emit(Inst::CSel { - cond: Cond::Eq, - rd, - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), - }); - } - } - } else if !ty.is_vector() { - let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); - ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); - materialize_bool_result(ctx, insn, rd, cond); - } else { - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); - lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; - } + lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?; } Opcode::Fcmp => { @@ -2020,11 +1897,10 @@ pub(crate) fn lower_insn_to_regs>( } else if op == Opcode::Trapif { let condcode = ctx.data(insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; cond } else { let condcode = ctx.data(insn).fp_cond_code().unwrap(); @@ -3525,11 +3401,10 @@ pub(crate) fn lower_branch>( { let condcode = ctx.data(icmp_insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); let negated = op0 == Opcode::Brz; let cond = if negated { cond.invert() } else { cond }; - lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed); + lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken, @@ -3621,13 +3496,12 @@ pub(crate) fn lower_branch>( let cond = lower_condcode(condcode); let kind = CondBrKind::Cond(cond); - let is_signed = condcode_is_signed(condcode); let flag_input = InsnInput { insn: branches[0], input: 0, }; if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) { - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken,