From 35d9ab19b727c2050b7579d8e0b73c4f5303e30d Mon Sep 17 00:00:00 2001
From: Benjamin Bouvier <public@benj.me>
Date: Fri, 24 Jul 2020 17:04:34 +0200
Subject: [PATCH] Review fixes;

---
 cranelift/codegen/shared/src/condcodes.rs     | 21 -----
 .../codegen/src/isa/aarch64/inst/emit.rs      |  6 +-
 cranelift/codegen/src/isa/aarch64/lower.rs    | 20 ++++
 .../codegen/src/isa/aarch64/lower_inst.rs     | 16 ++--
 cranelift/codegen/src/isa/x64/inst/emit.rs    | 91 ++++++++++---------
 cranelift/codegen/src/isa/x64/inst/mod.rs     | 55 ++++++++---
 cranelift/codegen/src/isa/x64/lower.rs        | 70 +++++++-------
 cranelift/codegen/src/machinst/buffer.rs      | 57 +++++++-----
 8 files changed, 191 insertions(+), 145 deletions(-)
diff --git a/cranelift/codegen/shared/src/condcodes.rs b/cranelift/codegen/shared/src/condcodes.rs
index 2695b5b471..03ae865ce4 100644
--- a/cranelift/codegen/shared/src/condcodes.rs
+++ b/cranelift/codegen/shared/src/condcodes.rs
@@ -122,27 +122,6 @@ impl IntCC {
         }
     }
 
-    /// Determines whether this condcode interprets inputs as signed or
-    /// unsigned.  See the documentation for the `icmp` instruction in
-    /// cranelift-codegen/meta/src/shared/instructions.rs for further insights
-    /// into this.
-    pub fn is_signed(&self) -> bool {
-        match self {
-            IntCC::Equal
-            | IntCC::UnsignedGreaterThanOrEqual
-            | IntCC::UnsignedGreaterThan
-            | IntCC::UnsignedLessThanOrEqual
-            | IntCC::UnsignedLessThan
-            | IntCC::NotEqual => false,
-            IntCC::SignedGreaterThanOrEqual
-            | IntCC::SignedGreaterThan
-            | IntCC::SignedLessThanOrEqual
-            | IntCC::SignedLessThan
-            | IntCC::Overflow
-            | IntCC::NotOverflow => true,
-        }
-    }
-
     /// Get the corresponding string condition code for the IntCC object.
     pub fn to_static_str(self) -> &'static str {
         use self::IntCC::*;
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index aff06ee7a2..9cee1345b4 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1501,7 +1501,7 @@ impl MachInstEmit for Inst {
             }
             &Inst::Call { ref info } => {
                 if let Some(s) = state.take_stackmap() {
-                    sink.add_stackmap(4, s);
+                    sink.add_stackmap(StackmapExtent::UpcomingBytes(4), s);
                 }
                 sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
                 sink.put4(enc_jump26(0b100101, 0));
@@ -1511,7 +1511,7 @@ impl MachInstEmit for Inst {
             }
             &Inst::CallInd { ref info } => {
                 if let Some(s) = state.take_stackmap() {
-                    sink.add_stackmap(4, s);
+                    sink.add_stackmap(StackmapExtent::UpcomingBytes(4), s);
                 }
                 sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
                 if info.opcode.is_call() {
@@ -1569,7 +1569,7 @@ impl MachInstEmit for Inst {
                 let (srcloc, code) = trap_info;
                 sink.add_trap(srcloc, code);
                 if let Some(s) = state.take_stackmap() {
-                    sink.add_stackmap(4, s);
+                    sink.add_stackmap(StackmapExtent::UpcomingBytes(4), s);
                 }
                 sink.put4(0xd4a00000);
             }
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index c98cb128b7..516cbd7515 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -798,6 +798,26 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
     Ok(())
 }
 
+/// Determines whether this condcode interprets inputs as signed or unsigned.  See the
+/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs
+/// for further insights into this.
+pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
+    match cc {
+        IntCC::Equal
+        | IntCC::UnsignedGreaterThanOrEqual
+        | IntCC::UnsignedGreaterThan
+        | IntCC::UnsignedLessThanOrEqual
+        | IntCC::UnsignedLessThan
+        | IntCC::NotEqual => false,
+        IntCC::SignedGreaterThanOrEqual
+        | IntCC::SignedGreaterThan
+        | IntCC::SignedLessThanOrEqual
+        | IntCC::SignedLessThan
+        | IntCC::Overflow
+        | IntCC::NotOverflow => true,
+    }
+}
+
 //=============================================================================
 // Helpers for instruction lowering.
 
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 5b89ce1436..5db776765d 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1115,7 +1115,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             {
                 let condcode = inst_condcode(ctx.data(icmp_insn)).unwrap();
                 let cond = lower_condcode(condcode);
-                let is_signed = condcode.is_signed();
+                let is_signed = condcode_is_signed(condcode);
                 lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed);
                 cond
             } else if let Some(fcmp_insn) =
@@ -1161,7 +1161,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         Opcode::Selectif | Opcode::SelectifSpectreGuard => {
             let condcode = inst_condcode(ctx.data(insn)).unwrap();
             let cond = lower_condcode(condcode);
-            let is_signed = condcode.is_signed();
+            let is_signed = condcode_is_signed(condcode);
             // Verification ensures that the input is always a
             // single-def ifcmp.
             let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
@@ -1232,7 +1232,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         Opcode::Trueif => {
             let condcode = inst_condcode(ctx.data(insn)).unwrap();
             let cond = lower_condcode(condcode);
-            let is_signed = condcode.is_signed();
+            let is_signed = condcode_is_signed(condcode);
             // Verification ensures that the input is always a
             // single-def ifcmp.
             let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
@@ -1433,7 +1433,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         Opcode::Icmp => {
             let condcode = inst_condcode(ctx.data(insn)).unwrap();
             let cond = lower_condcode(condcode);
-            let is_signed = condcode.is_signed();
+            let is_signed = condcode_is_signed(condcode);
             let rd = get_output_reg(ctx, outputs[0]);
             let ty = ctx.input_ty(insn, 0);
             let bits = ty_bits(ty);
@@ -1509,7 +1509,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             } else if op == Opcode::Trapif {
                 let condcode = inst_condcode(ctx.data(insn)).unwrap();
                 let cond = lower_condcode(condcode);
-                let is_signed = condcode.is_signed();
+                let is_signed = condcode_is_signed(condcode);
 
                 // Verification ensures that the input is always a single-def ifcmp.
                 let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
@@ -2360,7 +2360,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                 {
                     let condcode = inst_condcode(ctx.data(icmp_insn)).unwrap();
                     let cond = lower_condcode(condcode);
-                    let is_signed = condcode.is_signed();
+                    let is_signed = condcode_is_signed(condcode);
                     let negated = op0 == Opcode::Brz;
                     let cond = if negated { cond.invert() } else { cond };
 
@@ -2410,7 +2410,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                 let cond = lower_condcode(condcode);
                 let kind = CondBrKind::Cond(cond);
 
-                let is_signed = condcode.is_signed();
+                let is_signed = condcode_is_signed(condcode);
                 let ty = ctx.input_ty(branches[0], 0);
                 let bits = ty_bits(ty);
                 let narrow_mode = match (bits <= 32, is_signed) {
@@ -2451,7 +2451,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                 let cond = lower_condcode(condcode);
                 let kind = CondBrKind::Cond(cond);
 
-                let is_signed = condcode.is_signed();
+                let is_signed = condcode_is_signed(condcode);
                 let flag_input = InsnInput {
                     insn: branches[0],
                     input: 0,
diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
index 2d4ae85d03..7194ef92ab 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -373,6 +373,26 @@ fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
     }
 }
 
+/// A small helper to generate a signed conversion instruction.
+fn emit_signed_cvt(
+    sink: &mut MachBuffer<Inst>,
+    flags: &settings::Flags,
+    state: &mut EmitState,
+    src: Reg,
+    dst: Writable<Reg>,
+    to_f64: bool,
+) {
+    // Handle an unsigned int, which is the "easy" case: a signed conversion will do the
+    // right thing.
+    let op = if to_f64 {
+        SseOpcode::Cvtsi2sd
+    } else {
+        SseOpcode::Cvtsi2ss
+    };
+    let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst);
+    inst.emit(sink, flags, state);
+}
+
 /// Emits a one way conditional jump if CC is set (true).
 fn one_way_jmp(sink: &mut MachBuffer<Inst>, cc: CC, label: MachLabel) {
     let cond_start = sink.cur_offset();
@@ -700,7 +720,7 @@ pub(crate) fn emit(
             debug_assert!(flags.avoid_div_traps());
 
             // Check if the divisor is zero, first.
-            let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), *divisor);
+            let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg());
             inst.emit(sink, flags, state);
 
             let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc);
@@ -708,7 +728,7 @@ pub(crate) fn emit(
 
             let (do_op, done_label) = if kind.is_signed() {
                 // Now check if the divisor is -1.
-                let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor);
+                let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor.to_reg());
                 inst.emit(sink, flags, state);
 
                 let do_op = sink.get_label();
@@ -768,7 +788,7 @@ pub(crate) fn emit(
                 inst.emit(sink, flags, state);
             }
 
-            let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(*divisor), *loc);
+            let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor.to_reg()), *loc);
             inst.emit(sink, flags, state);
 
             // Lowering takes care of moving the result back into the right register, see comment
@@ -1283,7 +1303,7 @@ pub(crate) fn emit(
             dest, loc, opcode, ..
         } => {
             if let Some(s) = state.take_stackmap() {
-                sink.add_stackmap(5, s);
+                sink.add_stackmap(StackmapExtent::UpcomingBytes(5), s);
             }
             sink.put1(0xE8);
             // The addend adjusts for the difference between the end of the instruction and the
@@ -1327,7 +1347,7 @@ pub(crate) fn emit(
                 }
             }
             if let Some(s) = state.take_stackmap() {
-                sink.add_stackmap_ending_at(start_offset, s);
+                sink.add_stackmap(StackmapExtent::StartedAtOffset(start_offset), s);
             }
             if opcode.is_call() {
                 sink.add_call_site(*loc, *opcode);
@@ -1611,13 +1631,13 @@ pub(crate) fn emit(
             // j done
             //
             // ;; to get the desired NaN behavior (signalling NaN transformed into a quiet NaN, the
-            // NaN value is returned), we add both inputs.
+            // ;; NaN value is returned), we add both inputs.
             // propagate_nan:
             // add{ss,sd} %lhs, %rhs_dst
             // j done
             //
             // do_min_max:
-            // min{ss,sd} %lhs, %rhs_dst
+            // {min,max}{ss,sd} %lhs, %rhs_dst
             //
             // done:
             let done = sink.get_label();
@@ -1707,8 +1727,9 @@ pub(crate) fn emit(
             dst_size,
         } => {
             let (prefix, opcode, dst_first) = match op {
-                SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E, false),
-                SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false),
+                // Movd and movq use the same opcode; the presence of the REX prefix (set below)
+                // actually determines which is used.
+                SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false),
                 SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true),
                 SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true),
                 _ => panic!("unexpected opcode {:?}", op),
@@ -1734,8 +1755,9 @@ pub(crate) fn emit(
             src_size,
         } => {
             let (prefix, opcode) = match op {
-                SseOpcode::Movd => (LegacyPrefix::_66, 0x0F6E),
-                SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E),
+                // Movd and movq use the same opcode; the presence of the REX prefix (set below)
+                // actually determines which is used.
+                SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E),
                 SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A),
                 SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A),
                 _ => panic!("unexpected opcode {:?}", op),
@@ -1781,6 +1803,9 @@ pub(crate) fn emit(
             tmp_gpr1,
             tmp_gpr2,
         } => {
+            // Note: this sequence is specific to 64-bit mode; a 32-bit mode would require a
+            // different sequence.
+            //
             // Emit the following sequence:
             //
             //  cmp 0, %src
@@ -1790,6 +1815,7 @@ pub(crate) fn emit(
             //  cvtsi2sd/cvtsi2ss %src, %dst
             //  j done
             //
+            //  ;; handle negative: see below for an explanation of what it's doing.
             //  handle_negative:
             //  mov %src, %tmp_gpr1
             //  shr $1, %tmp_gpr1
@@ -1801,39 +1827,24 @@ pub(crate) fn emit(
             //
             //  done:
 
-            // A small helper to generate a signed conversion instruction, that helps deduplicating
-            // code below.
-            let emit_signed_cvt = |sink: &mut MachBuffer<Inst>,
-                                   flags: &settings::Flags,
-                                   state: &mut EmitState,
-                                   src: Reg,
-                                   dst: Writable<Reg>,
-                                   to_f64: bool| {
-                // Handle an unsigned int, which is the "easy" case: a signed conversion will do the
-                // right thing.
-                let op = if to_f64 {
-                    SseOpcode::Cvtsi2sd
-                } else {
-                    SseOpcode::Cvtsi2ss
-                };
-                let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst);
-                inst.emit(sink, flags, state);
-            };
+            assert!(src != tmp_gpr1);
+            assert!(src != tmp_gpr2);
+            assert!(tmp_gpr1 != tmp_gpr2);
 
             let handle_negative = sink.get_label();
             let done = sink.get_label();
 
-            // If x seen as a signed int is not negative, a signed-conversion will do the right
+            // If x seen as a signed int64 is not negative, a signed-conversion will do the right
             // thing.
             // TODO use tst src, src here.
-            let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), *src);
+            let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), src.to_reg());
             inst.emit(sink, flags, state);
 
             one_way_jmp(sink, CC::L, handle_negative);
 
-            // Handle an unsigned int, which is the "easy" case: a signed conversion will do the
+            // Handle a positive int64, which is the "easy" case: a signed conversion will do the
             // right thing.
-            emit_signed_cvt(sink, flags, state, *src, *dst, *to_f64);
+            emit_signed_cvt(sink, flags, state, src.to_reg(), *dst, *to_f64);
 
             let inst = Inst::jmp_known(BranchTarget::Label(done));
             inst.emit(sink, flags, state);
@@ -1842,10 +1853,8 @@ pub(crate) fn emit(
 
             // Divide x by two to get it in range for the signed conversion, keep the LSB, and
             // scale it back up on the FP side.
-            if tmp_gpr1.to_reg() != *src {
-                let inst = Inst::gen_move(*tmp_gpr1, *src, I64);
-                inst.emit(sink, flags, state);
-            }
+            let inst = Inst::gen_move(*tmp_gpr1, src.to_reg(), I64);
+            inst.emit(sink, flags, state);
 
             // tmp_gpr1 := src >> 1
             let inst = Inst::shift_r(
@@ -1856,10 +1865,8 @@ pub(crate) fn emit(
             );
             inst.emit(sink, flags, state);
 
-            if tmp_gpr2.to_reg() != *src {
-                let inst = Inst::gen_move(*tmp_gpr2, *src, I64);
-                inst.emit(sink, flags, state);
-            }
+            let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), I64);
+            inst.emit(sink, flags, state);
 
             let inst = Inst::alu_rmi_r(
                 true, /* 64bits */
@@ -2306,7 +2313,7 @@ pub(crate) fn emit(
         Inst::Ud2 { trap_info } => {
             sink.add_trap(trap_info.0, trap_info.1);
             if let Some(s) = state.take_stackmap() {
-                sink.add_stackmap(2, s);
+                sink.add_stackmap(StackmapExtent::UpcomingBytes(2), s);
             }
             sink.put1(0x0f);
             sink.put1(0x0b);
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index 572f8c4865..0558c3f98e 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1,5 +1,5 @@
-//! This module defines x86_64-specific machine instruction types.
-
+//! This module defines x86_64-specific machine instruction types.an explanation of what it's
+//! doing.
 #![allow(dead_code)]
 #![allow(non_snake_case)]
 #![allow(non_camel_case_types)]
@@ -83,7 +83,9 @@ pub enum Inst {
     CheckedDivOrRemSeq {
         kind: DivOrRemKind,
         size: u8,
-        divisor: Reg,
+        /// The divisor operand. Note it's marked as modified so that it gets assigned a register
+        /// different from the temporary.
+        divisor: Writable<Reg>,
         tmp: Option<Writable<Reg>>,
         loc: SourceLoc,
     },
@@ -236,11 +238,15 @@ pub enum Inst {
         src_size: OperandSize,
     },
 
-    /// Converts an unsigned int64 to a float64.
+    /// Converts an unsigned int64 to a float32/float64.
     CvtUint64ToFloatSeq {
         /// Is the target a 64-bits or 32-bits register?
         to_f64: bool,
-        src: Reg,
+        /// A copy of the source register, fed by lowering. It is marked as modified during
+        /// register allocation to make sure that the temporary registers differ from the src
+        /// register, since both registers are live at the same time in the generated code
+        /// sequence.
+        src: Writable<Reg>,
         dst: Writable<Reg>,
         tmp_gpr1: Writable<Reg>,
         tmp_gpr2: Writable<Reg>,
@@ -442,6 +448,27 @@ impl Inst {
         Inst::MulHi { size, signed, rhs }
     }
 
+    pub(crate) fn checked_div_or_rem_seq(
+        kind: DivOrRemKind,
+        size: u8,
+        divisor: Writable<Reg>,
+        tmp: Option<Writable<Reg>>,
+        loc: SourceLoc,
+    ) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        debug_assert!(divisor.to_reg().get_class() == RegClass::I64);
+        debug_assert!(tmp
+            .map(|tmp| tmp.to_reg().get_class() == RegClass::I64)
+            .unwrap_or(true));
+        Inst::CheckedDivOrRemSeq {
+            kind,
+            size,
+            divisor,
+            tmp,
+            loc,
+        }
+    }
+
     pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst {
         debug_assert!(size == 8 || size == 4 || size == 2);
         Inst::SignExtendRaxRdx { size }
@@ -567,12 +594,12 @@ impl Inst {
 
     pub(crate) fn cvt_u64_to_float_seq(
         to_f64: bool,
-        src: Reg,
+        src: Writable<Reg>,
         tmp_gpr1: Writable<Reg>,
         tmp_gpr2: Writable<Reg>,
         dst: Writable<Reg>,
     ) -> Inst {
-        debug_assert!(src.get_class() == RegClass::I64);
+        debug_assert!(src.to_reg().get_class() == RegClass::I64);
         debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64);
         debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64);
         debug_assert!(dst.to_reg().get_class() == RegClass::V128);
@@ -972,7 +999,7 @@ impl ShowWithRRU for Inst {
                     DivOrRemKind::SignedRem => "srem",
                     DivOrRemKind::UnsignedRem => "urem",
                 },
-                show_ireg_sized(*divisor, mb_rru, *size),
+                show_ireg_sized(divisor.to_reg(), mb_rru, *size),
             ),
 
             Inst::SignExtendRaxRdx { size } => match size {
@@ -1072,7 +1099,7 @@ impl ShowWithRRU for Inst {
                     "u64_to_{}_seq",
                     if *to_f64 { "f64" } else { "f32" }
                 )),
-                show_ireg_sized(*src, mb_rru, 8),
+                show_ireg_sized(src.to_reg(), mb_rru, 8),
                 dst.show_rru(mb_rru),
             ),
 
@@ -1363,14 +1390,14 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             // the rdx register *before* the instruction, which is not too bad.
             collector.add_mod(Writable::from_reg(regs::rax()));
             collector.add_mod(Writable::from_reg(regs::rdx()));
-            collector.add_use(*divisor);
+            collector.add_mod(*divisor);
             if let Some(tmp) = tmp {
                 collector.add_def(*tmp);
             }
         }
         Inst::SignExtendRaxRdx { .. } => {
             collector.add_use(regs::rax());
-            collector.add_mod(Writable::from_reg(regs::rdx()));
+            collector.add_def(Writable::from_reg(regs::rdx()));
         }
         Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
             src.get_regs_as_uses(collector);
@@ -1410,7 +1437,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             tmp_gpr2,
             ..
         } => {
-            collector.add_use(*src);
+            collector.add_mod(*src);
             collector.add_def(*dst);
             collector.add_def(*tmp_gpr1);
             collector.add_def(*tmp_gpr2);
@@ -1603,7 +1630,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
         Inst::Div { divisor, .. } => divisor.map_uses(mapper),
         Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
         Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
-            map_use(mapper, divisor);
+            map_mod(mapper, divisor);
             if let Some(tmp) = tmp {
                 map_def(mapper, tmp)
             }
@@ -1683,7 +1710,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
             ref mut tmp_gpr2,
             ..
         } => {
-            map_use(mapper, src);
+            map_mod(mapper, src);
             map_def(mapper, dst);
             map_def(mapper, tmp_gpr1);
             map_def(mapper, tmp_gpr2);
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 36b659da23..9f1f4f4f1b 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -240,6 +240,21 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) {
     ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
 }
 
+fn emit_fcmp(ctx: Ctx, insn: IRInst) {
+    // The only valid CC constructed with `from_floatcc` can be put in the flag
+    // register with a direct float comparison; do this here.
+    let input_ty = ctx.input_ty(insn, 0);
+    let op = match input_ty {
+        F32 => SseOpcode::Ucomiss,
+        F64 => SseOpcode::Ucomisd,
+        _ => panic!("Bad input type to Fcmp"),
+    };
+    let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+    let lhs = input_to_reg(ctx, inputs[0]);
+    let rhs = input_to_reg_mem(ctx, inputs[1]);
+    ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
+}
+
 fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
     let mut sig = Signature::new(call_conv);
     for i in 0..ctx.num_inputs(insn) {
@@ -782,9 +797,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let dst = output_to_reg(ctx, outputs[0]);
             let ty = ctx.input_ty(insn, 0);
             let imm = match op {
-                // TODO could use tst src, src for IsNull
-                Opcode::IsNull => 0,
-                Opcode::IsInvalid => 0xffffffff,
+                Opcode::IsNull => {
+                    // TODO could use tst src, src for IsNull
+                    0
+                }
+                Opcode::IsInvalid => {
+                    // We can do a 32-bit comparison even in 64-bits mode, as the constant is then
+                    // sign-extended.
+                    0xffffffff
+                }
                 _ => unreachable!(),
             };
             ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::imm(imm), src));
@@ -1026,30 +1047,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
                 // Verification ensures that the input is always a single-def ffcmp.
                 let ffcmp_insn = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
-                {
-                    // The only valid CC constructed with `from_floatcc` can be put in the flag
-                    // register with a direct float comparison; do this here.
-                    let input_ty = ctx.input_ty(ffcmp_insn, 0);
-                    let op = match input_ty {
-                        F32 => SseOpcode::Ucomiss,
-                        F64 => SseOpcode::Ucomisd,
-                        _ => panic!("Bad input type to Fcmp"),
-                    };
-                    let inputs = &[
-                        InsnInput {
-                            insn: ffcmp_insn,
-                            input: 0,
-                        },
-                        InsnInput {
-                            insn: ffcmp_insn,
-                            input: 1,
-                        },
-                    ];
-                    let lhs = input_to_reg(ctx, inputs[0]);
-                    let rhs = input_to_reg_mem(ctx, inputs[1]);
-                    ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
-                }
-
+                emit_fcmp(ctx, ffcmp_insn);
                 cc
             };
 
@@ -1203,11 +1201,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
                 I64 => {
                     let src = input_to_reg(ctx, inputs[0]);
+
+                    let src_copy = ctx.alloc_tmp(RegClass::I64, I64);
+                    ctx.emit(Inst::gen_move(src_copy, src, I64));
+
                     let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I64);
                     let tmp_gpr2 = ctx.alloc_tmp(RegClass::I64, I64);
                     ctx.emit(Inst::cvt_u64_to_float_seq(
                         ty == F64,
-                        src,
+                        src_copy,
                         tmp_gpr1,
                         tmp_gpr2,
                         dst,
@@ -1729,19 +1731,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 // regalloc is aware of the coalescing opportunity between rax/rdx and the
                 // destination register.
                 let divisor = input_to_reg(ctx, inputs[1]);
+
+                let divisor_copy = ctx.alloc_tmp(RegClass::I64, I64);
+                ctx.emit(Inst::gen_move(divisor_copy, divisor, I64));
+
                 let tmp = if op == Opcode::Sdiv && size == 8 {
                     Some(ctx.alloc_tmp(RegClass::I64, I64))
                 } else {
                     None
                 };
                 ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx())));
-                ctx.emit(Inst::CheckedDivOrRemSeq {
+                ctx.emit(Inst::checked_div_or_rem_seq(
                     kind,
                     size,
-                    divisor,
+                    divisor_copy,
                     tmp,
-                    loc: srcloc,
-                });
+                    srcloc,
+                ));
             } else {
                 let divisor = input_to_reg_mem(ctx, inputs[1]);
 
diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs
index d78d0e325b..2d28f35285 100644
--- a/cranelift/codegen/src/machinst/buffer.rs
+++ b/cranelift/codegen/src/machinst/buffer.rs
@@ -258,8 +258,20 @@ impl MachLabel {
     }
 }
 
+/// A stackmap extent, when creating a stackmap.
+pub enum StackmapExtent {
+    /// The stack map starts at this instruction, and ends after the number of upcoming bytes
+    /// (note: this is a code offset diff).
+    UpcomingBytes(CodeOffset),
+
+    /// The stack map started at the given offset and ends at the current one. This helps
+    /// architectures where the instruction size has not a fixed length.
+    StartedAtOffset(CodeOffset),
+}
+
 impl<I: VCodeInst> MachBuffer<I> {
-    /// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
+    /// Create a new section, known to start at `start_offset` and with a size limited to
+    /// `length_limit`.
     pub fn new() -> MachBuffer<I> {
         MachBuffer {
             data: SmallVec::new(),
@@ -1159,32 +1171,27 @@ impl<I: VCodeInst> MachBuffer<I> {
         }
     }
 
-    /// Add stackmap metadata for this program point: a set of stack offsets
-    /// (from SP upward) that contain live references.
+    /// Add stackmap metadata for this program point: a set of stack offsets (from SP upward) that
+    /// contain live references.
     ///
-    /// The `offset_to_fp` value is the offset from the nominal SP (at which the
-    /// `stack_offsets` are based) and the FP value. By subtracting
-    /// `offset_to_fp` from each `stack_offsets` element, one can obtain
-    /// live-reference offsets from FP instead.
-    pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) {
-        let offset = self.cur_offset();
+    /// The `offset_to_fp` value is the offset from the nominal SP (at which the `stack_offsets`
+    /// are based) and the FP value. By subtracting `offset_to_fp` from each `stack_offsets`
+    /// element, one can obtain live-reference offsets from FP instead.
+    pub fn add_stackmap(&mut self, extent: StackmapExtent, stackmap: Stackmap) {
+        let (start, end) = match extent {
+            StackmapExtent::UpcomingBytes(insn_len) => {
+                let start_offset = self.cur_offset();
+                (start_offset, start_offset + insn_len)
+            }
+            StackmapExtent::StartedAtOffset(start_offset) => {
+                let end_offset = self.cur_offset();
+                debug_assert!(end_offset >= start_offset);
+                (start_offset, end_offset)
+            }
+        };
         self.stackmaps.push(MachStackMap {
-            offset,
-            offset_end: offset + insn_len,
-            stackmap,
-        });
-    }
-
-    /// Add stackmap metadata for this program point: a set of stack offsets
-    /// (from SP upward) that contain live references.
-    ///
-    /// Method variant of `add_stackmap` that is easier to manipulate for non-fixed-sizes
-    /// instructions.
-    pub fn add_stackmap_ending_at(&mut self, start_offset: CodeOffset, stackmap: Stackmap) {
-        let cur_offset = self.cur_offset();
-        self.stackmaps.push(MachStackMap {
-            offset: start_offset,
-            offset_end: cur_offset,
+            offset: start,
+            offset_end: end,
             stackmap,
         });
     }