From f86ecdcb869d90965145de07b19e8b3a4c5761f0 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 26 Jun 2020 15:36:49 +0200 Subject: [PATCH] machinst x64: lower and implement div/idiv; ADD TESTS --- cranelift/codegen/src/isa/x64/inst/args.rs | 2 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 130 ++++++++++++++++++++- cranelift/codegen/src/isa/x64/inst/mod.rs | 102 ++++++++++++++++ cranelift/codegen/src/isa/x64/lower.rs | 111 +++++++++++++++++- 4 files changed, 341 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index a19874a923..1bcc18ee4f 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -532,7 +532,7 @@ pub enum CC { /// <= unsigned BE = 6, - /// > unsigend + /// > unsigned NBE = 7, /// negative diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 7565d785fc..1eb402b1c3 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -395,7 +395,7 @@ fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { pub(crate) fn emit( inst: &Inst, sink: &mut MachBuffer, - _flags: &settings::Flags, + flags: &settings::Flags, state: &mut EmitState, ) { match inst { @@ -516,6 +516,128 @@ pub(crate) fn emit( } } + Inst::Div { + size, + signed, + divisor, + loc, + } => { + let (prefix, rex_flags) = match size { + 2 => (LegacyPrefix::_66, RexFlags::clear_w()), + 4 => (LegacyPrefix::None, RexFlags::clear_w()), + 8 => (LegacyPrefix::None, RexFlags::set_w()), + _ => unreachable!(), + }; + + sink.add_trap(*loc, TrapCode::IntegerDivisionByZero); + + let subopcode = if *signed { 7 } else { 6 }; + match divisor { + RegMem::Reg { reg } => { + let src = int_reg_enc(*reg); + emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags) + } + RegMem::Mem { addr: src } => emit_std_enc_mem( + sink, + prefix, + 0xF7, + 1, + subopcode, + &src.finalize(state), + rex_flags, + ), + } + } + + Inst::SignExtendRaxRdx { size } => { + let (prefix, rex_flags) = match size { + 2 => (LegacyPrefix::_66, RexFlags::clear_w()), + 4 => (LegacyPrefix::None, RexFlags::clear_w()), + 8 => (LegacyPrefix::None, RexFlags::set_w()), + _ => unreachable!(), + }; + prefix.emit(sink); + rex_flags.emit_two_op(sink, 0, 0); + sink.put1(0x99); + } + + Inst::SignedDivOrRem { + is_div, + size, + divisor, + loc, + } => { + debug_assert!(flags.avoid_div_traps()); + + // Check if the divisor is zero, first. + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), *divisor); + inst.emit(sink, flags, state); + + let inst = Inst::one_way_jmp( + CC::NZ, + BranchTarget::ResolvedOffset(Inst::size_of_trap() as isize), + ); + inst.emit(sink, flags, state); + + let inst = Inst::trap(*loc, TrapCode::IntegerDivisionByZero); + inst.emit(sink, flags, state); + + // Now check if the divisor is -1. + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor); + inst.emit(sink, flags, state); + + let do_op = sink.get_label(); + // If not equal, jump to do-op. + let inst = Inst::one_way_jmp(CC::NZ, BranchTarget::Label(do_op)); + inst.emit(sink, flags, state); + + // Here, divisor == -1. + let done_label = if !*is_div { + // x % -1 = 0; put the result into the destination, $rdx. + let done_label = sink.get_label(); + + let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx())); + inst.emit(sink, flags, state); + + let inst = Inst::jmp_known(BranchTarget::Label(done_label)); + inst.emit(sink, flags, state); + + Some(done_label) + } else { + // Check for integer overflow. + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); + inst.emit(sink, flags, state); + + // If not equal, jump over the trap. + let inst = Inst::one_way_jmp( + CC::NZ, + BranchTarget::ResolvedOffset(Inst::size_of_trap() as isize), + ); + inst.emit(sink, flags, state); + + let inst = Inst::trap(*loc, TrapCode::IntegerOverflow); + inst.emit(sink, flags, state); + + None + }; + + sink.bind_label(do_op); + + // Fill in the "high" parts: sign-extend the sign-bit of rax into rdx. + let inst = Inst::sign_extend_rax_to_rdx(*size); + inst.emit(sink, flags, state); + + let inst = Inst::div(*size, true /*signed*/, RegMem::reg(*divisor), *loc); + inst.emit(sink, flags, state); + + // The lowering takes care of moving the result back into the right register, see + // comment there. + + if let Some(done) = done_label { + sink.bind_label(done); + } + } + Inst::Imm_R { dst_is_64, simm64, @@ -1135,8 +1257,14 @@ pub(crate) fn emit( Inst::Ud2 { trap_info } => { sink.add_trap(trap_info.0, trap_info.1); + let cur_offset = sink.cur_offset(); sink.put1(0x0f); sink.put1(0x0b); + assert_eq!( + sink.cur_offset() - cur_offset, + Inst::size_of_trap(), + "invalid trap size" + ); } Inst::VirtualSPOffsetAdj { offset } => { diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index ac95450669..3791c573e0 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -49,6 +49,30 @@ pub enum Inst { dst: Writable, }, + /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr) + Div { + size: u8, // 1, 2, 4 or 8 + signed: bool, + divisor: RegMem, + loc: SourceLoc, + }, + + /// A synthetic sequence to implement the right inline checks for signed remainder and modulo, + /// assuming the dividend is in $rax. + /// Puts the result back into $rax if is_div, $rdx if !is_div, to mimic what the div + /// instruction does. + SignedDivOrRem { + is_div: bool, + size: u8, + divisor: Reg, + loc: SourceLoc, + }, + + /// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo) + SignExtendRaxRdx { + size: u8, // 1, 2, 4 or 8 + }, + /// Constant materialization: (imm32 imm64) reg. /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32. Imm_R { @@ -250,6 +274,20 @@ impl Inst { } } + pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + Inst::Div { + size, + signed, + divisor, + loc, + } + } + pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2); + Inst::SignExtendRaxRdx { size } + } + pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); if !dst_is_64 { @@ -359,6 +397,20 @@ impl Inst { Inst::Cmp_RMI_R { size, src, dst } } + pub(crate) fn trap(srcloc: SourceLoc, trap_code: TrapCode) -> Inst { + Inst::Ud2 { + trap_info: (srcloc, trap_code), + } + } + /// Returns the size of a trap instruction, which must be fixed. Asserted during codegen. + pub(crate) fn size_of_trap() -> u32 { + 2 + } + + pub(crate) fn one_way_jmp(cc: CC, dst: BranchTarget) -> Inst { + Inst::OneWayJmpCond { cc, dst } + } + pub(crate) fn setcc(cc: CC, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::Setcc { cc, dst } @@ -489,6 +541,37 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), + Inst::Div { + size, + signed, + divisor, + .. + } => format!( + "{} {}", + ljustify(if *signed { + "idiv".to_string() + } else { + "div".into() + }), + divisor.show_rru_sized(mb_rru, *size) + ), + Inst::SignedDivOrRem { + is_div, + size, + divisor, + .. + } => format!( + "s{} $rax:$rdx, {}", + if *is_div { "div " } else { "rem " }, + show_ireg_sized(*divisor, mb_rru, *size), + ), + Inst::SignExtendRaxRdx { size } => match size { + 2 => "cwd", + 4 => "cdq", + 8 => "cqo", + _ => unreachable!(), + } + .into(), Inst::XMM_Mov_RM_R { op, src, dst } => format!( "{} {}, {}", ljustify(op.to_string()), @@ -678,6 +761,20 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } + Inst::Div { divisor, .. } => { + collector.add_mod(Writable::from_reg(regs::rax())); + collector.add_mod(Writable::from_reg(regs::rdx())); + divisor.get_regs_as_uses(collector); + } + Inst::SignedDivOrRem { divisor, .. } => { + collector.add_mod(Writable::from_reg(regs::rax())); + collector.add_mod(Writable::from_reg(regs::rdx())); + collector.add_use(*divisor); + } + Inst::SignExtendRaxRdx { .. } => { + collector.add_use(regs::rax()); + collector.add_mod(Writable::from_reg(regs::rdx())); + } Inst::XMM_Mov_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); @@ -852,6 +949,11 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::Div { divisor, .. } => divisor.map_uses(mapper), + Inst::SignedDivOrRem { divisor, .. } => { + map_use(mapper, divisor); + } + Inst::SignExtendRaxRdx { .. } => {} Inst::XMM_Mov_RM_R { ref mut src, ref mut dst, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index a9ff7b1e94..c29f0ebbdd 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -15,6 +15,7 @@ use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type}; use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; +use crate::settings::Flags; use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; @@ -169,7 +170,11 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) { // Top-level instruction lowering entry point, for one instruction. /// Actually codegen an instruction's results into registers. -fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) -> CodegenResult<()> { +fn lower_insn_to_regs>( + ctx: &mut C, + insn: IRInst, + flags: &Flags, +) -> CodegenResult<()> { let op = ctx.data(insn).opcode(); let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) @@ -656,6 +661,108 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) -> Codeg } } + Opcode::Udiv | Opcode::Urem => { + let input_ty = ctx.input_ty(insn, 0); + let size = input_ty.bytes() as u8; + + let dividend = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + let divisor = if flags.avoid_div_traps() { + let srcloc = ctx.srcloc(insn); + let divisor = input_to_reg(ctx, inputs[1]); + + // Check that divisor isn't zero, or trap otherwise. + let after_trap = BranchTarget::ResolvedOffset(Inst::size_of_trap() as isize); + ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), divisor)); + ctx.emit(Inst::one_way_jmp(CC::NZ, after_trap)); + ctx.emit(Inst::trap(srcloc, TrapCode::IntegerDivisionByZero)); + + RegMem::reg(divisor) + } else { + input_to_reg_mem(ctx, inputs[1]) + }; + + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::rax()), + dividend, + input_ty, + )); + + // Fill in the "high" parts: unsigned means we put 0 in there. + ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx()))); + + // Emit the actual idiv. + ctx.emit(Inst::div( + size, + false, /* signed */ + divisor, + ctx.srcloc(insn), + )); + + // Move the result back into the destination reg. + if op == Opcode::Udiv { + // The quotient is in rax. + ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty)); + } else { + // The remainder is in rdx. + ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); + } + } + + Opcode::Sdiv | Opcode::Srem => { + let input_ty = ctx.input_ty(insn, 0); + let size = input_ty.bytes() as u8; + + let dividend = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + let srcloc = ctx.srcloc(insn); + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::rax()), + dividend, + input_ty, + )); + + if flags.avoid_div_traps() { + // Lowering all the inline checks and special behavior is a bit complicated, so + // this is implemented as a vcode meta-instruction. + // + // Note it keeps the result in $rax (if is_div) or $rdx (if !is_div), so that + // regalloc is aware of the coalescing opportunity between rax/rdx and the + // destination register. + let divisor = input_to_reg(ctx, inputs[1]); + ctx.emit(Inst::SignedDivOrRem { + is_div: op == Opcode::Sdiv, + size, + divisor, + loc: srcloc, + }); + } else { + let divisor = input_to_reg_mem(ctx, inputs[1]); + + // Fill in the "high" parts: sign-extend the sign-bit of rax into rdx. + ctx.emit(Inst::sign_extend_rax_to_rdx(size)); + + // Emit the actual idiv. + ctx.emit(Inst::div( + size, + true, /* signed */ + divisor, + ctx.srcloc(insn), + )); + } + + // Move the result back into the destination reg. + if op == Opcode::Sdiv { + // The quotient is in rax. + ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty)); + } else { + // The remainder is in rdx. + ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); + } + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm @@ -698,7 +805,7 @@ impl LowerBackend for X64Backend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { - lower_insn_to_regs(ctx, ir_inst) + lower_insn_to_regs(ctx, ir_inst, &self.flags) } fn lower_branch_group>(