From 35d9ab19b727c2050b7579d8e0b73c4f5303e30d Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 24 Jul 2020 17:04:34 +0200 Subject: [PATCH] Review fixes; --- cranelift/codegen/shared/src/condcodes.rs | 21 ----- .../codegen/src/isa/aarch64/inst/emit.rs | 6 +- cranelift/codegen/src/isa/aarch64/lower.rs | 20 ++++ .../codegen/src/isa/aarch64/lower_inst.rs | 16 ++-- cranelift/codegen/src/isa/x64/inst/emit.rs | 91 ++++++++++--------- cranelift/codegen/src/isa/x64/inst/mod.rs | 55 ++++++++--- cranelift/codegen/src/isa/x64/lower.rs | 70 +++++++------- cranelift/codegen/src/machinst/buffer.rs | 57 +++++++----- 8 files changed, 191 insertions(+), 145 deletions(-) diff --git a/cranelift/codegen/shared/src/condcodes.rs b/cranelift/codegen/shared/src/condcodes.rs index 2695b5b471..03ae865ce4 100644 --- a/cranelift/codegen/shared/src/condcodes.rs +++ b/cranelift/codegen/shared/src/condcodes.rs @@ -122,27 +122,6 @@ impl IntCC { } } - /// Determines whether this condcode interprets inputs as signed or - /// unsigned. See the documentation for the `icmp` instruction in - /// cranelift-codegen/meta/src/shared/instructions.rs for further insights - /// into this. - pub fn is_signed(&self) -> bool { - match self { - IntCC::Equal - | IntCC::UnsignedGreaterThanOrEqual - | IntCC::UnsignedGreaterThan - | IntCC::UnsignedLessThanOrEqual - | IntCC::UnsignedLessThan - | IntCC::NotEqual => false, - IntCC::SignedGreaterThanOrEqual - | IntCC::SignedGreaterThan - | IntCC::SignedLessThanOrEqual - | IntCC::SignedLessThan - | IntCC::Overflow - | IntCC::NotOverflow => true, - } - } - /// Get the corresponding string condition code for the IntCC object. pub fn to_static_str(self) -> &'static str { use self::IntCC::*; diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index aff06ee7a2..9cee1345b4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1501,7 +1501,7 @@ impl MachInstEmit for Inst { } &Inst::Call { ref info } => { if let Some(s) = state.take_stackmap() { - sink.add_stackmap(4, s); + sink.add_stackmap(StackmapExtent::UpcomingBytes(4), s); } sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0); sink.put4(enc_jump26(0b100101, 0)); @@ -1511,7 +1511,7 @@ impl MachInstEmit for Inst { } &Inst::CallInd { ref info } => { if let Some(s) = state.take_stackmap() { - sink.add_stackmap(4, s); + sink.add_stackmap(StackmapExtent::UpcomingBytes(4), s); } sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5)); if info.opcode.is_call() { @@ -1569,7 +1569,7 @@ impl MachInstEmit for Inst { let (srcloc, code) = trap_info; sink.add_trap(srcloc, code); if let Some(s) = state.take_stackmap() { - sink.add_stackmap(4, s); + sink.add_stackmap(StackmapExtent::UpcomingBytes(4), s); } sink.put4(0xd4a00000); } diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index c98cb128b7..516cbd7515 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -798,6 +798,26 @@ pub(crate) fn lower_vector_compare>( Ok(()) } +/// Determines whether this condcode interprets inputs as signed or unsigned. See the +/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs +/// for further insights into this. +pub(crate) fn condcode_is_signed(cc: IntCC) -> bool { + match cc { + IntCC::Equal + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedGreaterThan + | IntCC::UnsignedLessThanOrEqual + | IntCC::UnsignedLessThan + | IntCC::NotEqual => false, + IntCC::SignedGreaterThanOrEqual + | IntCC::SignedGreaterThan + | IntCC::SignedLessThanOrEqual + | IntCC::SignedLessThan + | IntCC::Overflow + | IntCC::NotOverflow => true, + } +} + //============================================================================= // Helpers for instruction lowering. diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5b89ce1436..5db776765d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1115,7 +1115,7 @@ pub(crate) fn lower_insn_to_regs>( { let condcode = inst_condcode(ctx.data(icmp_insn)).unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed); cond } else if let Some(fcmp_insn) = @@ -1161,7 +1161,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Selectif | Opcode::SelectifSpectreGuard => { let condcode = inst_condcode(ctx.data(insn)).unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); @@ -1232,7 +1232,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trueif => { let condcode = inst_condcode(ctx.data(insn)).unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); @@ -1433,7 +1433,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Icmp => { let condcode = inst_condcode(ctx.data(insn)).unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); let rd = get_output_reg(ctx, outputs[0]); let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); @@ -1509,7 +1509,7 @@ pub(crate) fn lower_insn_to_regs>( } else if op == Opcode::Trapif { let condcode = inst_condcode(ctx.data(insn)).unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); @@ -2360,7 +2360,7 @@ pub(crate) fn lower_branch>( { let condcode = inst_condcode(ctx.data(icmp_insn)).unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); let negated = op0 == Opcode::Brz; let cond = if negated { cond.invert() } else { cond }; @@ -2410,7 +2410,7 @@ pub(crate) fn lower_branch>( let cond = lower_condcode(condcode); let kind = CondBrKind::Cond(cond); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); let ty = ctx.input_ty(branches[0], 0); let bits = ty_bits(ty); let narrow_mode = match (bits <= 32, is_signed) { @@ -2451,7 +2451,7 @@ pub(crate) fn lower_branch>( let cond = lower_condcode(condcode); let kind = CondBrKind::Cond(cond); - let is_signed = condcode.is_signed(); + let is_signed = condcode_is_signed(condcode); let flag_input = InsnInput { insn: branches[0], input: 0, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 2d4ae85d03..7194ef92ab 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -373,6 +373,26 @@ fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { } } +/// A small helper to generate a signed conversion instruction. +fn emit_signed_cvt( + sink: &mut MachBuffer, + flags: &settings::Flags, + state: &mut EmitState, + src: Reg, + dst: Writable, + to_f64: bool, +) { + // Handle an unsigned int, which is the "easy" case: a signed conversion will do the + // right thing. + let op = if to_f64 { + SseOpcode::Cvtsi2sd + } else { + SseOpcode::Cvtsi2ss + }; + let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst); + inst.emit(sink, flags, state); +} + /// Emits a one way conditional jump if CC is set (true). fn one_way_jmp(sink: &mut MachBuffer, cc: CC, label: MachLabel) { let cond_start = sink.cur_offset(); @@ -700,7 +720,7 @@ pub(crate) fn emit( debug_assert!(flags.avoid_div_traps()); // Check if the divisor is zero, first. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), *divisor); + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg()); inst.emit(sink, flags, state); let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc); @@ -708,7 +728,7 @@ pub(crate) fn emit( let (do_op, done_label) = if kind.is_signed() { // Now check if the divisor is -1. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor); + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor.to_reg()); inst.emit(sink, flags, state); let do_op = sink.get_label(); @@ -768,7 +788,7 @@ pub(crate) fn emit( inst.emit(sink, flags, state); } - let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(*divisor), *loc); + let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor.to_reg()), *loc); inst.emit(sink, flags, state); // Lowering takes care of moving the result back into the right register, see comment @@ -1283,7 +1303,7 @@ pub(crate) fn emit( dest, loc, opcode, .. } => { if let Some(s) = state.take_stackmap() { - sink.add_stackmap(5, s); + sink.add_stackmap(StackmapExtent::UpcomingBytes(5), s); } sink.put1(0xE8); // The addend adjusts for the difference between the end of the instruction and the @@ -1327,7 +1347,7 @@ pub(crate) fn emit( } } if let Some(s) = state.take_stackmap() { - sink.add_stackmap_ending_at(start_offset, s); + sink.add_stackmap(StackmapExtent::StartedAtOffset(start_offset), s); } if opcode.is_call() { sink.add_call_site(*loc, *opcode); @@ -1611,13 +1631,13 @@ pub(crate) fn emit( // j done // // ;; to get the desired NaN behavior (signalling NaN transformed into a quiet NaN, the - // NaN value is returned), we add both inputs. + // ;; NaN value is returned), we add both inputs. // propagate_nan: // add{ss,sd} %lhs, %rhs_dst // j done // // do_min_max: - // min{ss,sd} %lhs, %rhs_dst + // {min,max}{ss,sd} %lhs, %rhs_dst // // done: let done = sink.get_label(); @@ -1707,8 +1727,9 @@ pub(crate) fn emit( dst_size, } => { let (prefix, opcode, dst_first) = match op { - SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E, false), - SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false), + // Movd and movq use the same opcode; the presence of the REX prefix (set below) + // actually determines which is used. + SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false), SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true), SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true), _ => panic!("unexpected opcode {:?}", op), @@ -1734,8 +1755,9 @@ pub(crate) fn emit( src_size, } => { let (prefix, opcode) = match op { - SseOpcode::Movd => (LegacyPrefix::_66, 0x0F6E), - SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E), + // Movd and movq use the same opcode; the presence of the REX prefix (set below) + // actually determines which is used. + SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E), SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A), SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A), _ => panic!("unexpected opcode {:?}", op), @@ -1781,6 +1803,9 @@ pub(crate) fn emit( tmp_gpr1, tmp_gpr2, } => { + // Note: this sequence is specific to 64-bit mode; a 32-bit mode would require a + // different sequence. + // // Emit the following sequence: // // cmp 0, %src @@ -1790,6 +1815,7 @@ pub(crate) fn emit( // cvtsi2sd/cvtsi2ss %src, %dst // j done // + // ;; handle negative: see below for an explanation of what it's doing. // handle_negative: // mov %src, %tmp_gpr1 // shr $1, %tmp_gpr1 @@ -1801,39 +1827,24 @@ pub(crate) fn emit( // // done: - // A small helper to generate a signed conversion instruction, that helps deduplicating - // code below. - let emit_signed_cvt = |sink: &mut MachBuffer, - flags: &settings::Flags, - state: &mut EmitState, - src: Reg, - dst: Writable, - to_f64: bool| { - // Handle an unsigned int, which is the "easy" case: a signed conversion will do the - // right thing. - let op = if to_f64 { - SseOpcode::Cvtsi2sd - } else { - SseOpcode::Cvtsi2ss - }; - let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst); - inst.emit(sink, flags, state); - }; + assert!(src != tmp_gpr1); + assert!(src != tmp_gpr2); + assert!(tmp_gpr1 != tmp_gpr2); let handle_negative = sink.get_label(); let done = sink.get_label(); - // If x seen as a signed int is not negative, a signed-conversion will do the right + // If x seen as a signed int64 is not negative, a signed-conversion will do the right // thing. // TODO use tst src, src here. - let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), *src); + let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), src.to_reg()); inst.emit(sink, flags, state); one_way_jmp(sink, CC::L, handle_negative); - // Handle an unsigned int, which is the "easy" case: a signed conversion will do the + // Handle a positive int64, which is the "easy" case: a signed conversion will do the // right thing. - emit_signed_cvt(sink, flags, state, *src, *dst, *to_f64); + emit_signed_cvt(sink, flags, state, src.to_reg(), *dst, *to_f64); let inst = Inst::jmp_known(BranchTarget::Label(done)); inst.emit(sink, flags, state); @@ -1842,10 +1853,8 @@ pub(crate) fn emit( // Divide x by two to get it in range for the signed conversion, keep the LSB, and // scale it back up on the FP side. - if tmp_gpr1.to_reg() != *src { - let inst = Inst::gen_move(*tmp_gpr1, *src, I64); - inst.emit(sink, flags, state); - } + let inst = Inst::gen_move(*tmp_gpr1, src.to_reg(), I64); + inst.emit(sink, flags, state); // tmp_gpr1 := src >> 1 let inst = Inst::shift_r( @@ -1856,10 +1865,8 @@ pub(crate) fn emit( ); inst.emit(sink, flags, state); - if tmp_gpr2.to_reg() != *src { - let inst = Inst::gen_move(*tmp_gpr2, *src, I64); - inst.emit(sink, flags, state); - } + let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), I64); + inst.emit(sink, flags, state); let inst = Inst::alu_rmi_r( true, /* 64bits */ @@ -2306,7 +2313,7 @@ pub(crate) fn emit( Inst::Ud2 { trap_info } => { sink.add_trap(trap_info.0, trap_info.1); if let Some(s) = state.take_stackmap() { - sink.add_stackmap(2, s); + sink.add_stackmap(StackmapExtent::UpcomingBytes(2), s); } sink.put1(0x0f); sink.put1(0x0b); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 572f8c4865..0558c3f98e 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1,5 +1,5 @@ -//! This module defines x86_64-specific machine instruction types. - +//! This module defines x86_64-specific machine instruction types.an explanation of what it's +//! doing. #![allow(dead_code)] #![allow(non_snake_case)] #![allow(non_camel_case_types)] @@ -83,7 +83,9 @@ pub enum Inst { CheckedDivOrRemSeq { kind: DivOrRemKind, size: u8, - divisor: Reg, + /// The divisor operand. Note it's marked as modified so that it gets assigned a register + /// different from the temporary. + divisor: Writable, tmp: Option>, loc: SourceLoc, }, @@ -236,11 +238,15 @@ pub enum Inst { src_size: OperandSize, }, - /// Converts an unsigned int64 to a float64. + /// Converts an unsigned int64 to a float32/float64. CvtUint64ToFloatSeq { /// Is the target a 64-bits or 32-bits register? to_f64: bool, - src: Reg, + /// A copy of the source register, fed by lowering. It is marked as modified during + /// register allocation to make sure that the temporary registers differ from the src + /// register, since both registers are live at the same time in the generated code + /// sequence. + src: Writable, dst: Writable, tmp_gpr1: Writable, tmp_gpr2: Writable, @@ -442,6 +448,27 @@ impl Inst { Inst::MulHi { size, signed, rhs } } + pub(crate) fn checked_div_or_rem_seq( + kind: DivOrRemKind, + size: u8, + divisor: Writable, + tmp: Option>, + loc: SourceLoc, + ) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + debug_assert!(divisor.to_reg().get_class() == RegClass::I64); + debug_assert!(tmp + .map(|tmp| tmp.to_reg().get_class() == RegClass::I64) + .unwrap_or(true)); + Inst::CheckedDivOrRemSeq { + kind, + size, + divisor, + tmp, + loc, + } + } + pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst { debug_assert!(size == 8 || size == 4 || size == 2); Inst::SignExtendRaxRdx { size } @@ -567,12 +594,12 @@ impl Inst { pub(crate) fn cvt_u64_to_float_seq( to_f64: bool, - src: Reg, + src: Writable, tmp_gpr1: Writable, tmp_gpr2: Writable, dst: Writable, ) -> Inst { - debug_assert!(src.get_class() == RegClass::I64); + debug_assert!(src.to_reg().get_class() == RegClass::I64); debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64); debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::V128); @@ -972,7 +999,7 @@ impl ShowWithRRU for Inst { DivOrRemKind::SignedRem => "srem", DivOrRemKind::UnsignedRem => "urem", }, - show_ireg_sized(*divisor, mb_rru, *size), + show_ireg_sized(divisor.to_reg(), mb_rru, *size), ), Inst::SignExtendRaxRdx { size } => match size { @@ -1072,7 +1099,7 @@ impl ShowWithRRU for Inst { "u64_to_{}_seq", if *to_f64 { "f64" } else { "f32" } )), - show_ireg_sized(*src, mb_rru, 8), + show_ireg_sized(src.to_reg(), mb_rru, 8), dst.show_rru(mb_rru), ), @@ -1363,14 +1390,14 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { // the rdx register *before* the instruction, which is not too bad. collector.add_mod(Writable::from_reg(regs::rax())); collector.add_mod(Writable::from_reg(regs::rdx())); - collector.add_use(*divisor); + collector.add_mod(*divisor); if let Some(tmp) = tmp { collector.add_def(*tmp); } } Inst::SignExtendRaxRdx { .. } => { collector.add_use(regs::rax()); - collector.add_mod(Writable::from_reg(regs::rdx())); + collector.add_def(Writable::from_reg(regs::rdx())); } Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => { src.get_regs_as_uses(collector); @@ -1410,7 +1437,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { tmp_gpr2, .. } => { - collector.add_use(*src); + collector.add_mod(*src); collector.add_def(*dst); collector.add_def(*tmp_gpr1); collector.add_def(*tmp_gpr2); @@ -1603,7 +1630,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { Inst::Div { divisor, .. } => divisor.map_uses(mapper), Inst::MulHi { rhs, .. } => rhs.map_uses(mapper), Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => { - map_use(mapper, divisor); + map_mod(mapper, divisor); if let Some(tmp) = tmp { map_def(mapper, tmp) } @@ -1683,7 +1710,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut tmp_gpr2, .. } => { - map_use(mapper, src); + map_mod(mapper, src); map_def(mapper, dst); map_def(mapper, tmp_gpr1); map_def(mapper, tmp_gpr2); diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 36b659da23..9f1f4f4f1b 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -240,6 +240,21 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) { ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); } +fn emit_fcmp(ctx: Ctx, insn: IRInst) { + // The only valid CC constructed with `from_floatcc` can be put in the flag + // register with a direct float comparison; do this here. + let input_ty = ctx.input_ty(insn, 0); + let op = match input_ty { + F32 => SseOpcode::Ucomiss, + F64 => SseOpcode::Ucomisd, + _ => panic!("Bad input type to Fcmp"), + }; + let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); +} + fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature { let mut sig = Signature::new(call_conv); for i in 0..ctx.num_inputs(insn) { @@ -782,9 +797,15 @@ fn lower_insn_to_regs>( let dst = output_to_reg(ctx, outputs[0]); let ty = ctx.input_ty(insn, 0); let imm = match op { - // TODO could use tst src, src for IsNull - Opcode::IsNull => 0, - Opcode::IsInvalid => 0xffffffff, + Opcode::IsNull => { + // TODO could use tst src, src for IsNull + 0 + } + Opcode::IsInvalid => { + // We can do a 32-bit comparison even in 64-bits mode, as the constant is then + // sign-extended. + 0xffffffff + } _ => unreachable!(), }; ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::imm(imm), src)); @@ -1026,30 +1047,7 @@ fn lower_insn_to_regs>( // Verification ensures that the input is always a single-def ffcmp. let ffcmp_insn = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); - { - // The only valid CC constructed with `from_floatcc` can be put in the flag - // register with a direct float comparison; do this here. - let input_ty = ctx.input_ty(ffcmp_insn, 0); - let op = match input_ty { - F32 => SseOpcode::Ucomiss, - F64 => SseOpcode::Ucomisd, - _ => panic!("Bad input type to Fcmp"), - }; - let inputs = &[ - InsnInput { - insn: ffcmp_insn, - input: 0, - }, - InsnInput { - insn: ffcmp_insn, - input: 1, - }, - ]; - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - } - + emit_fcmp(ctx, ffcmp_insn); cc }; @@ -1203,11 +1201,15 @@ fn lower_insn_to_regs>( I64 => { let src = input_to_reg(ctx, inputs[0]); + + let src_copy = ctx.alloc_tmp(RegClass::I64, I64); + ctx.emit(Inst::gen_move(src_copy, src, I64)); + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I64); let tmp_gpr2 = ctx.alloc_tmp(RegClass::I64, I64); ctx.emit(Inst::cvt_u64_to_float_seq( ty == F64, - src, + src_copy, tmp_gpr1, tmp_gpr2, dst, @@ -1729,19 +1731,23 @@ fn lower_insn_to_regs>( // regalloc is aware of the coalescing opportunity between rax/rdx and the // destination register. let divisor = input_to_reg(ctx, inputs[1]); + + let divisor_copy = ctx.alloc_tmp(RegClass::I64, I64); + ctx.emit(Inst::gen_move(divisor_copy, divisor, I64)); + let tmp = if op == Opcode::Sdiv && size == 8 { Some(ctx.alloc_tmp(RegClass::I64, I64)) } else { None }; ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx()))); - ctx.emit(Inst::CheckedDivOrRemSeq { + ctx.emit(Inst::checked_div_or_rem_seq( kind, size, - divisor, + divisor_copy, tmp, - loc: srcloc, - }); + srcloc, + )); } else { let divisor = input_to_reg_mem(ctx, inputs[1]); diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index d78d0e325b..2d28f35285 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -258,8 +258,20 @@ impl MachLabel { } } +/// A stackmap extent, when creating a stackmap. +pub enum StackmapExtent { + /// The stack map starts at this instruction, and ends after the number of upcoming bytes + /// (note: this is a code offset diff). + UpcomingBytes(CodeOffset), + + /// The stack map started at the given offset and ends at the current one. This helps + /// architectures where the instruction size has not a fixed length. + StartedAtOffset(CodeOffset), +} + impl MachBuffer { - /// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`. + /// Create a new section, known to start at `start_offset` and with a size limited to + /// `length_limit`. pub fn new() -> MachBuffer { MachBuffer { data: SmallVec::new(), @@ -1159,32 +1171,27 @@ impl MachBuffer { } } - /// Add stackmap metadata for this program point: a set of stack offsets - /// (from SP upward) that contain live references. + /// Add stackmap metadata for this program point: a set of stack offsets (from SP upward) that + /// contain live references. /// - /// The `offset_to_fp` value is the offset from the nominal SP (at which the - /// `stack_offsets` are based) and the FP value. By subtracting - /// `offset_to_fp` from each `stack_offsets` element, one can obtain - /// live-reference offsets from FP instead. - pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) { - let offset = self.cur_offset(); + /// The `offset_to_fp` value is the offset from the nominal SP (at which the `stack_offsets` + /// are based) and the FP value. By subtracting `offset_to_fp` from each `stack_offsets` + /// element, one can obtain live-reference offsets from FP instead. + pub fn add_stackmap(&mut self, extent: StackmapExtent, stackmap: Stackmap) { + let (start, end) = match extent { + StackmapExtent::UpcomingBytes(insn_len) => { + let start_offset = self.cur_offset(); + (start_offset, start_offset + insn_len) + } + StackmapExtent::StartedAtOffset(start_offset) => { + let end_offset = self.cur_offset(); + debug_assert!(end_offset >= start_offset); + (start_offset, end_offset) + } + }; self.stackmaps.push(MachStackMap { - offset, - offset_end: offset + insn_len, - stackmap, - }); - } - - /// Add stackmap metadata for this program point: a set of stack offsets - /// (from SP upward) that contain live references. - /// - /// Method variant of `add_stackmap` that is easier to manipulate for non-fixed-sizes - /// instructions. - pub fn add_stackmap_ending_at(&mut self, start_offset: CodeOffset, stackmap: Stackmap) { - let cur_offset = self.cur_offset(); - self.stackmaps.push(MachStackMap { - offset: start_offset, - offset_end: cur_offset, + offset: start, + offset_end: end, stackmap, }); }