From 9d5be00de491ed5bcd1cf418f1d2af5038e03d5f Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 2 Jul 2020 17:11:06 +0200 Subject: [PATCH] Address review comments - put the division in the synthetic instruction as well, - put the branch table check in the inst's emission code, - replace OneWayCondJmp by TrapIf vcode instruction, - add comments describing code generated by the synthetic instructions --- .../codegen/src/isa/aarch64/inst/emit.rs | 4 + cranelift/codegen/src/isa/x64/inst/args.rs | 8 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 222 +++++++++++------- .../codegen/src/isa/x64/inst/emit_tests.rs | 53 ++++- cranelift/codegen/src/isa/x64/inst/mod.rs | 69 +++--- cranelift/codegen/src/isa/x64/lower.rs | 109 +++------ 6 files changed, 277 insertions(+), 188 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 9802349f85..bafe42abd0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1591,6 +1591,10 @@ impl MachInstEmit for Inst { let jt_off = sink.cur_offset(); for &target in info.targets.iter() { let word_off = sink.cur_offset(); + // off_into_table is an addend here embedded in the label to be later patched + // at the end of codegen. The offset is initially relative to this jump table + // entry; with the extra addend, it'll be relative to the jump table's start, + // after patching. let off_into_table = word_off - jt_off; sink.use_label_at_offset( word_off, diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index a9580b216d..78187f6011 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -90,7 +90,13 @@ impl ShowWithRRU for Amode { index.show_rru(mb_rru), 1 << shift ), - Amode::RipRelative { ref target } => format!("{}(%rip)", target.show_rru(mb_rru)), + Amode::RipRelative { ref target } => format!( + "{}(%rip)", + match target { + BranchTarget::Label(label) => format!("label{}", label.get()), + BranchTarget::ResolvedOffset(offset) => offset.to_string(), + } + ), } } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index e2d3271af1..d89a712cc1 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,8 @@ use log::debug; use regalloc::Reg; +use std::convert::TryFrom; + use crate::binemit::Reloc; use crate::isa::x64::inst::*; @@ -284,11 +286,9 @@ fn emit_std_enc_mem( sink.put4(0); } BranchTarget::ResolvedOffset(offset) => { - assert!( - offset <= u32::max_value() as isize, - "rip-relative can't hold >= U32_MAX values" - ); - sink.put4(offset as u32); + let offset = + u32::try_from(offset).expect("rip-relative can't hold >= U32_MAX values"); + sink.put4(offset); } } } @@ -370,6 +370,16 @@ fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { } } +/// Emits a one way conditional jump if CC is set (true). +fn one_way_jmp(sink: &mut MachBuffer, cc: CC, label: MachLabel) { + let cond_start = sink.cur_offset(); + let cond_disp_off = cond_start + 2; + sink.use_label_at_offset(cond_disp_off, label, LabelUse::JmpRel32); + sink.put1(0x0F); + sink.put1(0x80 + cc.get_enc()); + sink.put4(0x0); +} + /// The top-level emit function. /// /// Important! Do not add improved (shortened) encoding cases to existing @@ -580,88 +590,115 @@ pub(crate) fn emit( } Inst::SignExtendRaxRdx { size } => { - let (prefix, rex_flags) = match size { - 2 => (LegacyPrefix::_66, RexFlags::clear_w()), - 4 => (LegacyPrefix::None, RexFlags::clear_w()), - 8 => (LegacyPrefix::None, RexFlags::set_w()), + match size { + 2 => sink.put1(0x66), + 4 => {} + 8 => sink.put1(0x48), _ => unreachable!(), - }; - prefix.emit(sink); - rex_flags.emit_two_op(sink, 0, 0); + } sink.put1(0x99); } - Inst::SignedDivOrRem { + Inst::CheckedDivOrRemSeq { is_div, + is_signed, size, divisor, loc, } => { + // Generates the following code sequence: + // + // ;; check divide by zero: + // cmp 0 %divisor + // jnz $after_trap + // ud2 + // $after_trap: + // + // ;; for signed modulo/div: + // cmp -1 %divisor + // jnz $do_op + // ;; for signed modulo, result is 0 + // mov #0, %rdx + // j $done + // ;; for signed div, check for integer overflow against INT_MIN of the right size + // cmp INT_MIN, %rax + // jnz $do_op + // ud2 + // + // $do_op: + // ;; if signed + // cdq ;; sign-extend from rax into rdx + // ;; else + // mov #0, %rdx + // idiv %divisor + // + // $done: debug_assert!(flags.avoid_div_traps()); // Check if the divisor is zero, first. let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), *divisor); inst.emit(sink, flags, state); - let inst = Inst::one_way_jmp( - CC::NZ, - BranchTarget::ResolvedOffset(Inst::size_of_trap() as isize), - ); + let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc); inst.emit(sink, flags, state); - let inst = Inst::trap(*loc, TrapCode::IntegerDivisionByZero); - inst.emit(sink, flags, state); - - // Now check if the divisor is -1. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor); - inst.emit(sink, flags, state); - - let do_op = sink.get_label(); - // If not equal, jump to do-op. - let inst = Inst::one_way_jmp(CC::NZ, BranchTarget::Label(do_op)); - inst.emit(sink, flags, state); - - // Here, divisor == -1. - let done_label = if !*is_div { - // x % -1 = 0; put the result into the destination, $rdx. - let done_label = sink.get_label(); - - let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx())); + let (do_op, done_label) = if *is_signed { + // Now check if the divisor is -1. + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor); inst.emit(sink, flags, state); - let inst = Inst::jmp_known(BranchTarget::Label(done_label)); - inst.emit(sink, flags, state); + let do_op = sink.get_label(); - Some(done_label) + // If not equal, jump to do-op. + one_way_jmp(sink, CC::NZ, do_op); + + // Here, divisor == -1. + if !*is_div { + // x % -1 = 0; put the result into the destination, $rdx. + let done_label = sink.get_label(); + + let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx())); + inst.emit(sink, flags, state); + + let inst = Inst::jmp_known(BranchTarget::Label(done_label)); + inst.emit(sink, flags, state); + + (Some(do_op), Some(done_label)) + } else { + // Check for integer overflow. + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); + inst.emit(sink, flags, state); + + // If not equal, jump over the trap. + let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow, *loc); + inst.emit(sink, flags, state); + + (Some(do_op), None) + } } else { - // Check for integer overflow. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); - inst.emit(sink, flags, state); - - // If not equal, jump over the trap. - let inst = Inst::one_way_jmp( - CC::NZ, - BranchTarget::ResolvedOffset(Inst::size_of_trap() as isize), - ); - inst.emit(sink, flags, state); - - let inst = Inst::trap(*loc, TrapCode::IntegerOverflow); - inst.emit(sink, flags, state); - - None + (None, None) }; - sink.bind_label(do_op); + if let Some(do_op) = do_op { + sink.bind_label(do_op); + } - // Fill in the "high" parts: sign-extend the sign-bit of rax into rdx. - let inst = Inst::sign_extend_rax_to_rdx(*size); + // Fill in the high parts: + if *is_signed { + // sign-extend the sign-bit of rax into rdx, for signed opcodes. + let inst = Inst::sign_extend_rax_to_rdx(*size); + inst.emit(sink, flags, state); + } else { + // zero for unsigned opcodes. + let inst = Inst::imm_r(true /* is_64 */, 0, Writable::from_reg(regs::rdx())); + inst.emit(sink, flags, state); + } + + let inst = Inst::div(*size, *is_signed, RegMem::reg(*divisor), *loc); inst.emit(sink, flags, state); - let inst = Inst::div(*size, true /*signed*/, RegMem::reg(*divisor), *loc); - inst.emit(sink, flags, state); - - // The lowering takes care of moving the result back into the right register, see - // comment there. + // Lowering takes care of moving the result back into the right register, see comment + // there. if let Some(done) = done_label { sink.bind_label(done); @@ -1173,18 +1210,6 @@ pub(crate) fn emit( sink.put4(nt_disp); } - Inst::OneWayJmpCond { cc, dst } => { - let cond_start = sink.cur_offset(); - let cond_disp_off = cond_start + 2; - if let Some(l) = dst.as_label() { - sink.use_label_at_offset(cond_disp_off, l, LabelUse::JmpRel32); - } - let dst_disp = dst.as_offset32_or_zero() as u32; - sink.put1(0x0F); - sink.put1(0x80 + cc.get_enc()); - sink.put4(dst_disp); - } - Inst::JmpUnknown { target } => { match target { RegMem::Reg { reg } => { @@ -1215,19 +1240,41 @@ pub(crate) fn emit( } } - Inst::JmpTable { + Inst::JmpTableSeq { idx, tmp1, tmp2, ref targets, + default_target, .. } => { // This sequence is *one* instruction in the vcode, and is expanded only here at // emission time, because we cannot allow the regalloc to insert spills/reloads in // the middle; we depend on hardcoded PC-rel addressing below. + // + // We don't have to worry about emitting islands, because the only label-use type has a + // maximum range of 2 GB. If we later consider using shorter-range label references, + // this will need to be revisited. // Save index in a tmp (the live range of ridx only goes to start of this // sequence; rtmp1 or rtmp2 may overwrite it). + + // We generate the following sequence: + // ;; generated by lowering: cmp #jmp_table_size, %idx + // jnb $default_target + // mov %idx, %tmp2 + // lea start_of_jump_table_offset(%rip), %tmp1 + // movzlq [%tmp1, %tmp2], %tmp2 + // addq %tmp2, %tmp1 + // j *%tmp1 + // $start_of_jump_table: + // -- jump table entries + let default_label = match default_target { + BranchTarget::Label(label) => label, + _ => unreachable!(), + }; + one_way_jmp(sink, CC::NB, *default_label); // idx unsigned >= jmp table size + let inst = Inst::gen_move(*tmp2, *idx, I64); inst.emit(sink, flags, state); @@ -1265,12 +1312,33 @@ pub(crate) fn emit( let jt_off = sink.cur_offset(); for &target in targets.iter() { let word_off = sink.cur_offset(); + // off_into_table is an addend here embedded in the label to be later patched at + // the end of codegen. The offset is initially relative to this jump table entry; + // with the extra addend, it'll be relative to the jump table's start, after + // patching. let off_into_table = word_off - jt_off; sink.use_label_at_offset(word_off, target.as_label().unwrap(), LabelUse::PCRel32); sink.put4(off_into_table); } } + Inst::TrapIf { + cc, + trap_code, + srcloc, + } => { + let else_label = sink.get_label(); + + // Jump over if the invert of CC is set (i.e. CC is not set). + one_way_jmp(sink, cc.invert(), else_label); + + // Trap! + let inst = Inst::trap(*srcloc, *trap_code); + inst.emit(sink, flags, state); + + sink.bind_label(else_label); + } + Inst::XMM_Mov_RM_R { op, src: src_e, @@ -1343,14 +1411,8 @@ pub(crate) fn emit( Inst::Ud2 { trap_info } => { sink.add_trap(trap_info.0, trap_info.1); - let cur_offset = sink.cur_offset(); sink.put1(0x0f); sink.put1(0x0b); - assert_eq!( - sink.cur_offset() - cur_offset, - Inst::size_of_trap(), - "invalid trap size" - ); } Inst::VirtualSPOffsetAdj { offset } => { diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index cbdb80aae2..78a13b8a55 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1154,6 +1154,55 @@ fn test_x64_emit() { "imull $76543210, %esi", )); + // ======================================================== + // Div + insns.push(( + Inst::div( + 4, + true, /*signed*/ + RegMem::reg(regs::rsi()), + SourceLoc::default(), + ), + "F7FE", + "idiv %esi", + )); + insns.push(( + Inst::div( + 8, + true, /*signed*/ + RegMem::reg(regs::r15()), + SourceLoc::default(), + ), + "49F7FF", + "idiv %r15", + )); + insns.push(( + Inst::div( + 4, + false, /*signed*/ + RegMem::reg(regs::r14()), + SourceLoc::default(), + ), + "41F7F6", + "div %r14d", + )); + insns.push(( + Inst::div( + 8, + false, /*signed*/ + RegMem::reg(regs::rdi()), + SourceLoc::default(), + ), + "48F7F7", + "div %rdi", + )); + + // ======================================================== + // cdq family: SignExtendRaxRdx + insns.push((Inst::sign_extend_rax_to_rdx(2), "6699", "cwd")); + insns.push((Inst::sign_extend_rax_to_rdx(4), "99", "cdq")); + insns.push((Inst::sign_extend_rax_to_rdx(8), "4899", "cqo")); + // ======================================================== // Imm_R // @@ -1532,7 +1581,7 @@ fn test_x64_emit() { insns.push(( Inst::lea(Amode::rip_relative(BranchTarget::ResolvedOffset(0)), w_rdi), "488D3D00000000", - "lea (offset 0)(%rip), %rdi", + "lea 0(%rip), %rdi", )); insns.push(( Inst::lea( @@ -1540,7 +1589,7 @@ fn test_x64_emit() { w_r15, ), "4C8D3D39050000", - "lea (offset 1337)(%rip), %r15", + "lea 1337(%rip), %r15", )); // ======================================================== diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 2cc2574540..b682046956 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -57,12 +57,15 @@ pub enum Inst { loc: SourceLoc, }, - /// A synthetic sequence to implement the right inline checks for signed remainder and modulo, + /// A synthetic sequence to implement the right inline checks for remainder and division, /// assuming the dividend is in $rax. /// Puts the result back into $rax if is_div, $rdx if !is_div, to mimic what the div /// instruction does. - SignedDivOrRem { + /// The generated code sequence is described in the emit's function match arm for this + /// instruction. + CheckedDivOrRemSeq { is_div: bool, + is_signed: bool, size: u8, divisor: Reg, loc: SourceLoc, @@ -224,15 +227,14 @@ pub enum Inst { not_taken: BranchTarget, }, - /// A one-way conditional branch, invisible to the CFG processing; used *only* as part of - /// straight-line sequences in code to be emitted. - OneWayJmpCond { cc: CC, dst: BranchTarget }, - /// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale). - JmpTable { + /// The generated code sequence is described in the emit's function match arm for this + /// instruction. + JmpTableSeq { idx: Reg, tmp1: Writable, tmp2: Writable, + default_target: BranchTarget, targets: Vec, targets_for_term: Vec, }, @@ -240,6 +242,13 @@ pub enum Inst { /// Indirect jump: jmpq (reg mem). JmpUnknown { target: RegMem }, + /// Traps if the condition code is set. + TrapIf { + cc: CC, + trap_code: TrapCode, + srcloc: SourceLoc, + }, + /// A debug trap. Hlt, @@ -411,14 +420,6 @@ impl Inst { trap_info: (srcloc, trap_code), } } - /// Returns the size of a trap instruction, which must be fixed. Asserted during codegen. - pub(crate) fn size_of_trap() -> u32 { - 2 - } - - pub(crate) fn one_way_jmp(cc: CC, dst: BranchTarget) -> Inst { - Inst::OneWayJmpCond { cc, dst } - } pub(crate) fn setcc(cc: CC, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); @@ -494,6 +495,14 @@ impl Inst { pub(crate) fn jmp_unknown(target: RegMem) -> Inst { Inst::JmpUnknown { target } } + + pub(crate) fn trap_if(cc: CC, trap_code: TrapCode, srcloc: SourceLoc) -> Inst { + Inst::TrapIf { + cc, + trap_code, + srcloc, + } + } } //============================================================================= @@ -564,13 +573,15 @@ impl ShowWithRRU for Inst { }), divisor.show_rru_sized(mb_rru, *size) ), - Inst::SignedDivOrRem { + Inst::CheckedDivOrRemSeq { is_div, + is_signed, size, divisor, .. } => format!( - "s{} $rax:$rdx, {}", + "{}{} $rax:$rdx, {}", + if *is_signed { "s" } else { "u" }, if *is_div { "div " } else { "rem " }, show_ireg_sized(*divisor, mb_rru, *size), ), @@ -730,12 +741,7 @@ impl ShowWithRRU for Inst { taken.show_rru(mb_rru), not_taken.show_rru(mb_rru) ), - Inst::OneWayJmpCond { cc, dst } => format!( - "{} {}", - ljustify2("j".to_string(), cc.to_string()), - dst.show_rru(mb_rru), - ), - Inst::JmpTable { idx, .. } => { + Inst::JmpTableSeq { idx, .. } => { format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru)) } // @@ -744,6 +750,9 @@ impl ShowWithRRU for Inst { ljustify("jmp".to_string()), target.show_rru(mb_rru) ), + Inst::TrapIf { cc, trap_code, .. } => { + format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code) + } Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), Inst::Hlt => "hlt".into(), Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1), @@ -779,7 +788,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(Writable::from_reg(regs::rdx())); divisor.get_regs_as_uses(collector); } - Inst::SignedDivOrRem { divisor, .. } => { + Inst::CheckedDivOrRemSeq { divisor, .. } => { collector.add_mod(Writable::from_reg(regs::rax())); collector.add_mod(Writable::from_reg(regs::rdx())); collector.add_use(*divisor); @@ -871,7 +880,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dest.get_regs_as_uses(collector); } - Inst::JmpTable { + Inst::JmpTableSeq { ref idx, ref tmp1, ref tmp2, @@ -886,9 +895,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } | Inst::JmpCond { .. } - | Inst::OneWayJmpCond { .. } | Inst::Nop { .. } | Inst::JmpUnknown { .. } + | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Hlt | Inst::Ud2 { .. } => { @@ -977,7 +986,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_mod(mapper, dst); } Inst::Div { divisor, .. } => divisor.map_uses(mapper), - Inst::SignedDivOrRem { divisor, .. } => { + Inst::CheckedDivOrRemSeq { divisor, .. } => { map_use(mapper, divisor); } Inst::SignExtendRaxRdx { .. } => {} @@ -1104,7 +1113,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { dest.map_uses(mapper); } - Inst::JmpTable { + Inst::JmpTableSeq { ref mut idx, ref mut tmp1, ref mut tmp2, @@ -1119,9 +1128,9 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } | Inst::JmpCond { .. } - | Inst::OneWayJmpCond { .. } | Inst::Nop { .. } | Inst::JmpUnknown { .. } + | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Ud2 { .. } | Inst::Hlt => { @@ -1182,7 +1191,7 @@ impl MachInst for Inst { taken, not_taken, } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()), - &Self::JmpTable { + &Self::JmpTableSeq { ref targets_for_term, .. } => MachTerminator::Indirect(&targets_for_term[..]), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 30bd611153..3228c8d5ab 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -210,7 +210,8 @@ fn lower_insn_to_regs>( let rhs = input_to_reg_mem_imm(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]); - // TODO For add, try to commute the operands if one is an immediate. + // TODO For commutative operations (add, mul, and, or, xor), try to commute the + // operands if one is an immediate. let is_64 = int_ty_is_64(ty.unwrap()); let alu_op = match op { @@ -622,7 +623,7 @@ fn lower_insn_to_regs>( ctx.emit(inst); } - Opcode::Select | Opcode::Selectif => { + Opcode::Select | Opcode::Selectif | Opcode::SelectifSpectreGuard => { let cc = if op == Opcode::Select { // The input is a boolean value, compare it against zero. let size = ctx.input_ty(insn, 0).bytes() as u8; @@ -663,56 +664,10 @@ fn lower_insn_to_regs>( } } - Opcode::Udiv | Opcode::Urem => { - let input_ty = ctx.input_ty(insn, 0); - let size = input_ty.bytes() as u8; + Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => { + let is_div = op == Opcode::Udiv || op == Opcode::Sdiv; + let is_signed = op == Opcode::Sdiv || op == Opcode::Srem; - let dividend = input_to_reg(ctx, inputs[0]); - let dst = output_to_reg(ctx, outputs[0]); - - let divisor = if flags.avoid_div_traps() { - let srcloc = ctx.srcloc(insn); - let divisor = input_to_reg(ctx, inputs[1]); - - // Check that divisor isn't zero, or trap otherwise. - let after_trap = BranchTarget::ResolvedOffset(Inst::size_of_trap() as isize); - ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), divisor)); - ctx.emit(Inst::one_way_jmp(CC::NZ, after_trap)); - ctx.emit(Inst::trap(srcloc, TrapCode::IntegerDivisionByZero)); - - RegMem::reg(divisor) - } else { - input_to_reg_mem(ctx, inputs[1]) - }; - - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::rax()), - dividend, - input_ty, - )); - - // Fill in the "high" parts: unsigned means we put 0 in there. - ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx()))); - - // Emit the actual idiv. - ctx.emit(Inst::div( - size, - false, /* signed */ - divisor, - ctx.srcloc(insn), - )); - - // Move the result back into the destination reg. - if op == Opcode::Udiv { - // The quotient is in rax. - ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty)); - } else { - // The remainder is in rdx. - ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); - } - } - - Opcode::Sdiv | Opcode::Srem => { let input_ty = ctx.input_ty(insn, 0); let size = input_ty.bytes() as u8; @@ -727,15 +682,17 @@ fn lower_insn_to_regs>( )); if flags.avoid_div_traps() { - // Lowering all the inline checks and special behavior is a bit complicated, so - // this is implemented as a vcode meta-instruction. + // A vcode meta-instruction is used to lower the inline checks, since they embed + // pc-relative offsets that must not change, thus requiring regalloc to not + // interfere by introducing spills and reloads. // // Note it keeps the result in $rax (if is_div) or $rdx (if !is_div), so that // regalloc is aware of the coalescing opportunity between rax/rdx and the // destination register. let divisor = input_to_reg(ctx, inputs[1]); - ctx.emit(Inst::SignedDivOrRem { - is_div: op == Opcode::Sdiv, + ctx.emit(Inst::CheckedDivOrRemSeq { + is_div, + is_signed, size, divisor, loc: srcloc, @@ -743,20 +700,25 @@ fn lower_insn_to_regs>( } else { let divisor = input_to_reg_mem(ctx, inputs[1]); - // Fill in the "high" parts: sign-extend the sign-bit of rax into rdx. - ctx.emit(Inst::sign_extend_rax_to_rdx(size)); + // Fill in the high parts: + if is_signed { + // sign-extend the sign-bit of rax into rdx, for signed opcodes. + ctx.emit(Inst::sign_extend_rax_to_rdx(size)); + } else { + // zero for unsigned opcodes. + ctx.emit(Inst::imm_r( + true, /* is_64 */ + 0, + Writable::from_reg(regs::rdx()), + )); + } // Emit the actual idiv. - ctx.emit(Inst::div( - size, - true, /* signed */ - divisor, - ctx.srcloc(insn), - )); + ctx.emit(Inst::div(size, is_signed, divisor, ctx.srcloc(insn))); } // Move the result back into the destination reg. - if op == Opcode::Sdiv { + if is_div { // The quotient is in rax. ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty)); } else { @@ -914,12 +876,12 @@ impl LowerBackend for X64Backend { assert!(jt_size <= u32::max_value() as usize); let jt_size = jt_size as u32; - let idx_size = ctx.input_ty(branches[0], 0).bits(); + let idx_size_bits = ctx.input_ty(branches[0], 0).bits(); // Zero-extend to 32-bits if needed. // TODO consider factoring this out? - let idx = if idx_size < 32 { - let ext_mode = match idx_size { + let idx = if idx_size_bits < 32 { + let ext_mode = match idx_size_bits { 1 | 8 => ExtMode::BL, 16 => ExtMode::WL, _ => unreachable!(), @@ -947,12 +909,6 @@ impl LowerBackend for X64Backend { // Bounds-check (compute flags from idx - jt_size) and branch to default. ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx)); - let default_target = BranchTarget::Label(targets[0]); - ctx.emit(Inst::OneWayJmpCond { - dst: default_target, - cc: CC::NB, // unsigned >= - }); - // Emit the compound instruction that does: // // lea $jt, %rA @@ -970,17 +926,20 @@ impl LowerBackend for X64Backend { let tmp1 = ctx.alloc_tmp(RegClass::I64, I32); let tmp2 = ctx.alloc_tmp(RegClass::I64, I32); + let targets_for_term: Vec = targets.to_vec(); + let default_target = BranchTarget::Label(targets[0]); + let jt_targets: Vec = targets .iter() .skip(1) .map(|bix| BranchTarget::Label(*bix)) .collect(); - let targets_for_term: Vec = targets.to_vec(); - ctx.emit(Inst::JmpTable { + ctx.emit(Inst::JmpTableSeq { idx, tmp1, tmp2, + default_target, targets: jt_targets, targets_for_term, });