diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index e7f4b9a62f..3fd0a4e741 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -185,12 +185,12 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
     let from_bits = ty_bits(ty) as u8;
     let inputs = ctx.get_input(input.insn, input.input);
     let in_reg = if let Some(c) = inputs.constant {
+        // Generate constants fresh at each use to minimize long-range register pressure.
         let masked = if from_bits < 64 {
             c & ((1u64 << from_bits) - 1)
         } else {
             c
         };
-        // Generate constants fresh at each use to minimize long-range register pressure.
         let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
         for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| {
             ctx.alloc_tmp(reg_class, ty)
diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
index fcb9955cb8..4b236ee163 100644
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -1001,6 +1001,14 @@ pub enum OperandSize {
 }
 
 impl OperandSize {
+    pub(crate) fn from_bytes(num_bytes: u32) -> Self {
+        match num_bytes {
+            1 | 2 | 4 => OperandSize::Size32,
+            8 => OperandSize::Size64,
+            _ => unreachable!(),
+        }
+    }
+
     pub(crate) fn to_bytes(&self) -> u8 {
         match self {
             Self::Size32 => 4,
diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
index 3e8b441b73..53632d03a0 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -791,7 +791,11 @@ pub(crate) fn emit(
                     // x % -1 = 0; put the result into the destination, $rdx.
                     let done_label = sink.get_label();
 
-                    let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx()));
+                    let inst = Inst::imm(
+                        OperandSize::from_bytes(*size as u32),
+                        0,
+                        Writable::from_reg(regs::rdx()),
+                    );
                     inst.emit(sink, flags, state);
 
                     let inst = Inst::jmp_known(BranchTarget::Label(done_label));
@@ -803,7 +807,7 @@ pub(crate) fn emit(
                     if *size == 8 {
                         let tmp = tmp.expect("temporary for i64 sdiv");
 
-                        let inst = Inst::imm_r(true, 0x8000000000000000, tmp);
+                        let inst = Inst::imm(OperandSize::Size64, 0x8000000000000000, tmp);
                         inst.emit(sink, flags, state);
 
                         let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
@@ -839,7 +843,7 @@ pub(crate) fn emit(
                 inst.emit(sink, flags, state);
             } else {
                 // zero for unsigned opcodes.
-                let inst = Inst::imm_r(true /* is_64 */, 0, Writable::from_reg(regs::rdx()));
+                let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
                 inst.emit(sink, flags, state);
             }
 
@@ -854,18 +858,30 @@ pub(crate) fn emit(
             }
         }
 
-        Inst::Imm_R {
+        Inst::Imm {
             dst_is_64,
             simm64,
             dst,
         } => {
             let enc_dst = int_reg_enc(dst.to_reg());
             if *dst_is_64 {
-                // FIXME JRS 2020Feb10: also use the 32-bit case here when
-                // possible
-                sink.put1(0x48 | ((enc_dst >> 3) & 1));
-                sink.put1(0xB8 | (enc_dst & 7));
-                sink.put8(*simm64);
+                if low32_will_sign_extend_to_64(*simm64) {
+                    // Sign-extended move imm32.
+                    emit_std_enc_enc(
+                        sink,
+                        LegacyPrefixes::None,
+                        0xC7,
+                        1,
+                        /* subopcode */ 0,
+                        enc_dst,
+                        RexFlags::set_w(),
+                    );
+                    sink.put4(*simm64 as u32);
+                } else {
+                    sink.put1(0x48 | ((enc_dst >> 3) & 1));
+                    sink.put1(0xB8 | (enc_dst & 7));
+                    sink.put8(*simm64);
+                }
             } else {
                 if ((enc_dst >> 3) & 1) == 1 {
                     sink.put1(0x41);
@@ -2223,10 +2239,10 @@ pub(crate) fn emit(
 
                 // Otherwise, put INT_MAX.
                 if *dst_size == OperandSize::Size64 {
-                    let inst = Inst::imm_r(true, 0x7fffffffffffffff, *dst);
+                    let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
                     inst.emit(sink, flags, state);
                 } else {
-                    let inst = Inst::imm_r(false, 0x7fffffff, *dst);
+                    let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
                     inst.emit(sink, flags, state);
                 }
             } else {
@@ -2248,7 +2264,7 @@ pub(crate) fn emit(
                 match *src_size {
                     OperandSize::Size32 => {
                         let cst = Ieee32::pow2(output_bits - 1).neg().bits();
-                        let inst = Inst::imm32_r_unchecked(cst as u64, *tmp_gpr);
+                        let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
                         inst.emit(sink, flags, state);
                     }
                     OperandSize::Size64 => {
@@ -2260,7 +2276,7 @@ pub(crate) fn emit(
                         } else {
                             Ieee64::pow2(output_bits - 1).neg()
                         };
-                        let inst = Inst::imm_r(true, cst.bits(), *tmp_gpr);
+                        let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
                         inst.emit(sink, flags, state);
                     }
                 }
@@ -2362,15 +2378,14 @@ pub(crate) fn emit(
 
             let done = sink.get_label();
 
-            if *src_size == OperandSize::Size64 {
-                let cst = Ieee64::pow2(dst_size.to_bits() - 1).bits();
-                let inst = Inst::imm_r(true, cst, *tmp_gpr);
-                inst.emit(sink, flags, state);
+            let cst = if *src_size == OperandSize::Size64 {
+                Ieee64::pow2(dst_size.to_bits() - 1).bits()
             } else {
-                let cst = Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64;
-                let inst = Inst::imm32_r_unchecked(cst, *tmp_gpr);
-                inst.emit(sink, flags, state);
-            }
+                Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64
+            };
+
+            let inst = Inst::imm(*src_size, cst, *tmp_gpr);
+            inst.emit(sink, flags, state);
 
             let inst =
                 Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
@@ -2454,8 +2469,8 @@ pub(crate) fn emit(
             if *is_saturating {
                 // The input was "large" (>= 2**(width -1)), so the only way to get an integer
                 // overflow is because the input was too large: saturate to the max value.
-                let inst = Inst::imm_r(
-                    true,
+                let inst = Inst::imm(
+                    OperandSize::Size64,
                     if *dst_size == OperandSize::Size64 {
                         u64::max_value()
                     } else {
@@ -2475,7 +2490,7 @@ pub(crate) fn emit(
             sink.bind_label(next_is_large);
 
             if *dst_size == OperandSize::Size64 {
-                let inst = Inst::imm_r(true, 1 << 63, *tmp_gpr);
+                let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
                 inst.emit(sink, flags, state);
 
                 let inst = Inst::alu_rmi_r(
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index 9f7ea65b4b..b4a3b10d8d 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -1368,43 +1368,43 @@ fn test_x64_emit() {
     // Imm_R
     //
     insns.push((
-        Inst::imm_r(false, 1234567, w_r14),
+        Inst::imm(OperandSize::Size32, 1234567, w_r14),
         "41BE87D61200",
         "movl    $1234567, %r14d",
     ));
     insns.push((
-        Inst::imm_r(false, -126i64 as u64, w_r14),
+        Inst::imm(OperandSize::Size32, -126i64 as u64, w_r14),
         "41BE82FFFFFF",
         "movl    $-126, %r14d",
     ));
     insns.push((
-        Inst::imm_r(true, 1234567898765, w_r14),
+        Inst::imm(OperandSize::Size64, 1234567898765, w_r14),
         "49BE8D26FB711F010000",
         "movabsq $1234567898765, %r14",
     ));
     insns.push((
-        Inst::imm_r(true, -126i64 as u64, w_r14),
-        "49BE82FFFFFFFFFFFFFF",
+        Inst::imm(OperandSize::Size64, -126i64 as u64, w_r14),
+        "49C7C682FFFFFF",
         "movabsq $-126, %r14",
     ));
     insns.push((
-        Inst::imm_r(false, 1234567, w_rcx),
+        Inst::imm(OperandSize::Size32, 1234567, w_rcx),
         "B987D61200",
         "movl    $1234567, %ecx",
     ));
     insns.push((
-        Inst::imm_r(false, -126i64 as u64, w_rcx),
+        Inst::imm(OperandSize::Size32, -126i64 as u64, w_rcx),
         "B982FFFFFF",
         "movl    $-126, %ecx",
     ));
     insns.push((
-        Inst::imm_r(true, 1234567898765, w_rsi),
+        Inst::imm(OperandSize::Size64, 1234567898765, w_rsi),
         "48BE8D26FB711F010000",
         "movabsq $1234567898765, %rsi",
     ));
     insns.push((
-        Inst::imm_r(true, -126i64 as u64, w_rbx),
-        "48BB82FFFFFFFFFFFFFF",
+        Inst::imm(OperandSize::Size64, -126i64 as u64, w_rbx),
+        "48C7C382FFFFFF",
         "movabsq $-126, %rbx",
     ));
 
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index 2eb04345d3..e2afc3019e 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -107,7 +107,7 @@ pub enum Inst {
 
     /// Constant materialization: (imm32 imm64) reg.
     /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
-    Imm_R {
+    Imm {
         dst_is_64: bool,
         simm64: u64,
         dst: Writable<Reg>,
@@ -579,31 +579,18 @@ impl Inst {
         Inst::SignExtendData { size }
     }
 
-    pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
+    pub(crate) fn imm(size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
         debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        if !dst_is_64 {
-            debug_assert!(
-                low32_will_sign_extend_to_64(simm64),
-                "{} won't sign-extend to 64 bits!",
-                simm64
-            );
-        }
-        Inst::Imm_R {
+        // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
+        // the semantics of movl).
+        let dst_is_64 = size == OperandSize::Size64 && simm64 > u32::max_value() as u64;
+        Inst::Imm {
             dst_is_64,
             simm64,
             dst,
         }
     }
 
-    pub(crate) fn imm32_r_unchecked(simm64: u64, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::Imm_R {
-            dst_is_64: false,
-            simm64,
-            dst,
-        }
-    }
-
     pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
         debug_assert!(src.get_class() == RegClass::I64);
         debug_assert!(dst.to_reg().get_class() == RegClass::I64);
@@ -1424,7 +1411,7 @@ impl ShowWithRRU for Inst {
                 show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
             ),
 
-            Inst::Imm_R {
+            Inst::Imm {
                 dst_is_64,
                 simm64,
                 dst,
@@ -1761,7 +1748,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             src.get_regs_as_uses(collector);
             collector.add_use(*dst);
         }
-        Inst::Imm_R { dst, .. } => {
+        Inst::Imm { dst, .. } => {
             collector.add_def(*dst);
         }
         Inst::Mov_R_R { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => {
@@ -2097,7 +2084,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
             src.map_uses(mapper);
             map_use(mapper, dst);
         }
-        Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
+        Inst::Imm { ref mut dst, .. } => map_def(mapper, dst),
         Inst::Mov_R_R {
             ref mut src,
             ref mut dst,
@@ -2407,7 +2394,57 @@ impl MachInst for Inst {
         mut alloc_tmp: F,
     ) -> SmallVec<[Self; 4]> {
         let mut ret = SmallVec::new();
-        if ty.is_int() {
+        if ty == types::F32 {
+            if value == 0 {
+                ret.push(Inst::xmm_rm_r(
+                    SseOpcode::Xorps,
+                    RegMem::reg(to_reg.to_reg()),
+                    to_reg,
+                ));
+            } else {
+                let tmp = alloc_tmp(RegClass::I64, types::I32);
+                ret.push(Inst::imm(OperandSize::Size32, value, tmp));
+
+                ret.push(Inst::gpr_to_xmm(
+                    SseOpcode::Movd,
+                    RegMem::reg(tmp.to_reg()),
+                    OperandSize::Size32,
+                    to_reg,
+                ));
+            }
+        } else if ty == types::F64 {
+            if value == 0 {
+                ret.push(Inst::xmm_rm_r(
+                    SseOpcode::Xorpd,
+                    RegMem::reg(to_reg.to_reg()),
+                    to_reg,
+                ));
+            } else {
+                let tmp = alloc_tmp(RegClass::I64, types::I64);
+                ret.push(Inst::imm(OperandSize::Size64, value, tmp));
+
+                ret.push(Inst::gpr_to_xmm(
+                    SseOpcode::Movq,
+                    RegMem::reg(tmp.to_reg()),
+                    OperandSize::Size64,
+                    to_reg,
+                ));
+            }
+        } else {
+            // Must be an integer type.
+            debug_assert!(
+                ty == types::B1
+                    || ty == types::I8
+                    || ty == types::B8
+                    || ty == types::I16
+                    || ty == types::B16
+                    || ty == types::I32
+                    || ty == types::B32
+                    || ty == types::I64
+                    || ty == types::B64
+                    || ty == types::R32
+                    || ty == types::R64
+            );
             if value == 0 {
                 ret.push(Inst::alu_rmi_r(
                     ty == types::I64,
@@ -2416,42 +2453,11 @@ impl MachInst for Inst {
                     to_reg,
                 ));
             } else {
-                let is_64 = ty == types::I64 && value > 0x7fffffff;
-                ret.push(Inst::imm_r(is_64, value, to_reg));
-            }
-        } else if value == 0 {
-            ret.push(Inst::xmm_rm_r(
-                SseOpcode::Xorps,
-                RegMem::reg(to_reg.to_reg()),
-                to_reg,
-            ));
-        } else {
-            match ty {
-                types::F32 => {
-                    let tmp = alloc_tmp(RegClass::I64, types::I32);
-                    ret.push(Inst::imm32_r_unchecked(value, tmp));
-
-                    ret.push(Inst::gpr_to_xmm(
-                        SseOpcode::Movd,
-                        RegMem::reg(tmp.to_reg()),
-                        OperandSize::Size32,
-                        to_reg,
-                    ));
-                }
-
-                types::F64 => {
-                    let tmp = alloc_tmp(RegClass::I64, types::I64);
-                    ret.push(Inst::imm_r(true, value, tmp));
-
-                    ret.push(Inst::gpr_to_xmm(
-                        SseOpcode::Movq,
-                        RegMem::reg(tmp.to_reg()),
-                        OperandSize::Size64,
-                        to_reg,
-                    ));
-                }
-
-                _ => panic!("unexpected type {:?} in gen_constant", ty),
+                ret.push(Inst::imm(
+                    OperandSize::from_bytes(ty.bytes()),
+                    value,
+                    to_reg,
+                ));
             }
         }
         ret
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 6d48319477..fdaf0be6ed 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -29,7 +29,7 @@ type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
 //=============================================================================
 // Helpers for instruction lowering.
 
-fn is_int_ty(ty: Type) -> bool {
+fn is_int_or_ref_ty(ty: Type) -> bool {
     match ty {
         types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
         types::R32 => panic!("shouldn't have 32-bits refs on x64"),
@@ -79,7 +79,29 @@ fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
 /// Put the given input into a register, and mark it as used (side-effect).
 fn put_input_in_reg(ctx: Ctx, spec: InsnInput) -> Reg {
     let input = ctx.get_input(spec.insn, spec.input);
-    lowerinput_to_reg(ctx, input)
+
+    if let Some(c) = input.constant {
+        // Generate constants fresh at each use to minimize long-range register pressure.
+        let ty = ctx.input_ty(spec.insn, spec.input);
+        let from_bits = ty_bits(ty);
+        let masked = if from_bits < 64 {
+            c & ((1u64 << from_bits) - 1)
+        } else {
+            c
+        };
+
+        let cst_copy = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
+        for inst in Inst::gen_constant(cst_copy, masked, ty, |reg_class, ty| {
+            ctx.alloc_tmp(reg_class, ty)
+        })
+        .into_iter()
+        {
+            ctx.emit(inst);
+        }
+        cst_copy.to_reg()
+    } else {
+        lowerinput_to_reg(ctx, input)
+    }
 }
 
 /// An extension specification for `extend_input_to_reg`.
@@ -185,6 +207,10 @@ fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
     }
 }
 
+/// Emits an int comparison instruction.
+///
+/// Note: make sure that there are no instructions modifying the flags between a call to this
+/// function and the use of the flags!
 fn emit_cmp(ctx: Ctx, insn: IRInst) {
     let ty = ctx.input_ty(insn, 0);
 
@@ -231,6 +257,10 @@ enum FcmpCondResult {
     InvertedEqualOrConditions(CC, CC),
 }
 
+/// Emits a float comparison instruction.
+///
+/// Note: make sure that there are no instructions modifying the flags between a call to this
+/// function and the use of the flags!
 fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
     let (flip_operands, inverted_equal) = match cond_code {
         FloatCC::LessThan
@@ -375,7 +405,10 @@ fn matches_small_constant_shift<C: LowerCtx<I = Inst>>(
     })
 }
 
-fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode {
+/// Lowers an instruction to one of the x86 addressing modes.
+///
+/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
+fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i32) -> Amode {
     // We now either have an add that we must materialize, or some other input; as well as the
     // final offset.
     if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
@@ -409,6 +442,16 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u
                 shift_amt,
             )
         } else {
+            for i in 0..=1 {
+                if let Some(cst) = ctx.get_input(add, i).constant {
+                    let final_offset = (offset as i64).wrapping_add(cst as i64);
+                    if low32_will_sign_extend_to_64(final_offset as u64) {
+                        let base = put_input_in_reg(ctx, add_inputs[1 - i]);
+                        return Amode::imm_reg(final_offset as u32, base);
+                    }
+                }
+            }
+
             (
                 put_input_in_reg(ctx, add_inputs[0]),
                 put_input_in_reg(ctx, add_inputs[1]),
@@ -416,11 +459,11 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u
             )
         };
 
-        return Amode::imm_reg_reg_shift(offset, base, index, shift);
+        return Amode::imm_reg_reg_shift(offset as u32, base, index, shift);
     }
 
     let input = put_input_in_reg(ctx, spec);
-    Amode::imm_reg(offset, input)
+    Amode::imm_reg(offset as u32, input)
 }
 
 //=============================================================================
@@ -450,12 +493,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
     match op {
         Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
-            let w64 = ctx
+            let value = ctx
                 .get_constant(insn)
                 .expect("constant value for iconst et al");
-            let dst_is_64 = w64 > 0x7fffffff;
             let dst = get_output_reg(ctx, outputs[0]);
-            ctx.emit(Inst::imm_r(dst_is_64, w64, dst));
+            for inst in Inst::gen_constant(dst, value, ty.unwrap(), |reg_class, ty| {
+                ctx.alloc_tmp(reg_class, ty)
+            }) {
+                ctx.emit(inst);
+            }
         }
 
         Opcode::Iadd
@@ -669,7 +715,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let dst = get_output_reg(ctx, outputs[0]);
 
             let tmp = ctx.alloc_tmp(RegClass::I64, ty);
-            ctx.emit(Inst::imm_r(ty == types::I64, u64::max_value(), dst));
+            ctx.emit(Inst::imm(
+                OperandSize::from_bytes(ty.bytes()),
+                u64::max_value(),
+                dst,
+            ));
 
             ctx.emit(Inst::unary_rm_r(
                 ty.bytes() as u8,
@@ -685,7 +735,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 tmp,
             ));
 
-            ctx.emit(Inst::imm_r(ty == types::I64, ty.bits() as u64 - 1, dst));
+            ctx.emit(Inst::imm(
+                OperandSize::from_bytes(ty.bytes()),
+                ty.bits() as u64 - 1,
+                dst,
+            ));
 
             ctx.emit(Inst::alu_rmi_r(
                 ty == types::I64,
@@ -710,7 +764,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let dst = get_output_reg(ctx, outputs[0]);
 
             let tmp = ctx.alloc_tmp(RegClass::I64, ty);
-            ctx.emit(Inst::imm_r(false /* 64 bits */, ty.bits() as u64, tmp));
+            ctx.emit(Inst::imm(OperandSize::Size32, ty.bits() as u64, tmp));
 
             ctx.emit(Inst::unary_rm_r(
                 ty.bytes() as u8,
@@ -762,7 +816,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 ));
 
                 // mov 0x7777_7777_7777_7777, cst
-                ctx.emit(Inst::imm_r(is_64, 0x7777777777777777, cst));
+                ctx.emit(Inst::imm(OperandSize::Size64, 0x7777777777777777, cst));
 
                 // andq cst, tmp1
                 ctx.emit(Inst::alu_rmi_r(
@@ -846,7 +900,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 ));
 
                 // mov $0x0F0F_0F0F_0F0F_0F0F, cst
-                ctx.emit(Inst::imm_r(is_64, 0x0F0F0F0F0F0F0F0F, cst));
+                ctx.emit(Inst::imm(OperandSize::Size64, 0x0F0F0F0F0F0F0F0F, cst));
 
                 // and cst, dst
                 ctx.emit(Inst::alu_rmi_r(
@@ -857,7 +911,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 ));
 
                 // mov $0x0101_0101_0101_0101, cst
-                ctx.emit(Inst::imm_r(is_64, 0x0101010101010101, cst));
+                ctx.emit(Inst::imm(OperandSize::Size64, 0x0101010101010101, cst));
 
                 // mul cst, dst
                 ctx.emit(Inst::alu_rmi_r(
@@ -1808,7 +1862,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 | Opcode::Uload32
                 | Opcode::Sload32 => {
                     assert_eq!(inputs.len(), 1, "only one input for load operands");
-                    lower_to_amode(ctx, inputs[0], offset as u32)
+                    lower_to_amode(ctx, inputs[0], offset)
                 }
 
                 Opcode::LoadComplex
@@ -1899,7 +1953,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let addr = match op {
                 Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
                     assert_eq!(inputs.len(), 2, "only one input for store memory operands");
-                    lower_to_amode(ctx, inputs[1], offset as u32)
+                    lower_to_amode(ctx, inputs[1], offset)
                 }
 
                 Opcode::StoreComplex
@@ -2125,23 +2179,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
                 let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
 
-                // we request inversion of Equal to NotEqual here: taking LHS if equal would mean
-                // take it if both CC::NP and CC::Z are set, the conjunction of which can't be
-                // modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
-                // select operation, and invert the equal to a not-equal here.
-                let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
-
-                let (lhs_input, rhs_input) = match fcmp_results {
-                    FcmpCondResult::InvertedEqualOrConditions(_, _) => (inputs[2], inputs[1]),
-                    FcmpCondResult::Condition(_)
-                    | FcmpCondResult::AndConditions(_, _)
-                    | FcmpCondResult::OrConditions(_, _) => (inputs[1], inputs[2]),
+                // For equal, we flip the operands, because we can't test a conjunction of
+                // CPU flags with a single cmove; see InvertedEqualOrConditions doc comment.
+                let (lhs_input, rhs_input) = match cond_code {
+                    FloatCC::Equal => (inputs[2], inputs[1]),
+                    _ => (inputs[1], inputs[2]),
                 };
 
                 let ty = ctx.output_ty(insn, 0);
                 let rhs = put_input_in_reg(ctx, rhs_input);
                 let dst = get_output_reg(ctx, outputs[0]);
-                let lhs = if is_int_ty(ty) && ty.bytes() < 4 {
+                let lhs = if is_int_or_ref_ty(ty) && ty.bytes() < 4 {
                     // Special case: since the higher bits are undefined per CLIF semantics, we
                     // can just apply a 32-bit cmove here. Force inputs into registers, to
                     // avoid partial spilling out-of-bounds with memory accesses, though.
@@ -2151,11 +2199,22 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                     input_to_reg_mem(ctx, lhs_input)
                 };
 
+                // We request inversion of Equal to NotEqual here: taking LHS if equal would mean
+                // take it if both CC::NP and CC::Z are set, the conjunction of which can't be
+                // modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
+                // select operation, and invert the equal to a not-equal here.
+                let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
+
+                if let FcmpCondResult::InvertedEqualOrConditions(_, _) = &fcmp_results {
+                    // Keep this sync'd with the lowering of the select inputs above.
+                    assert_eq!(cond_code, FloatCC::Equal);
+                }
+
                 ctx.emit(Inst::gen_move(dst, rhs, ty));
 
                 match fcmp_results {
                     FcmpCondResult::Condition(cc) => {
-                        if is_int_ty(ty) {
+                        if is_int_or_ref_ty(ty) {
                             let size = u8::max(ty.bytes() as u8, 4);
                             ctx.emit(Inst::cmove(size, cc, lhs, dst));
                         } else {
@@ -2169,7 +2228,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                     }
                     FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
                     | FcmpCondResult::OrConditions(cc1, cc2) => {
-                        if is_int_ty(ty) {
+                        if is_int_or_ref_ty(ty) {
                             let size = u8::max(ty.bytes() as u8, 4);
                             ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst));
                             ctx.emit(Inst::cmove(size, cc2, lhs, dst));
@@ -2180,27 +2239,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                     }
                 }
             } else {
-                let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
-                    emit_cmp(ctx, icmp);
-                    let cond_code = ctx.data(icmp).cond_code().unwrap();
-                    CC::from_intcc(cond_code)
-                } else {
-                    // The input is a boolean value, compare it against zero.
-                    let size = ctx.input_ty(insn, 0).bytes() as u8;
-                    let test = put_input_in_reg(ctx, inputs[0]);
-                    ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
-                    CC::NZ
-                };
+                let ty = ty.unwrap();
 
-                let rhs = put_input_in_reg(ctx, inputs[2]);
-                let dst = get_output_reg(ctx, outputs[0]);
-                let ty = ctx.output_ty(insn, 0);
-
-                ctx.emit(Inst::gen_move(dst, rhs, ty));
-
-                if is_int_ty(ty) {
-                    let mut size = ty.bytes() as u8;
-                    let lhs = if size < 4 {
+                let mut size = ty.bytes() as u8;
+                let lhs = if is_int_or_ref_ty(ty) {
+                    if size < 4 {
                         // Special case: since the higher bits are undefined per CLIF semantics, we
                         // can just apply a 32-bit cmove here. Force inputs into registers, to
                         // avoid partial spilling out-of-bounds with memory accesses, though.
@@ -2208,17 +2251,44 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                         RegMem::reg(put_input_in_reg(ctx, inputs[1]))
                     } else {
                         input_to_reg_mem(ctx, inputs[1])
-                    };
+                    }
+                } else {
+                    input_to_reg_mem(ctx, inputs[1])
+                };
+
+                let rhs = put_input_in_reg(ctx, inputs[2]);
+                let dst = get_output_reg(ctx, outputs[0]);
+
+                let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
+                    emit_cmp(ctx, icmp);
+                    let cond_code = ctx.data(icmp).cond_code().unwrap();
+                    CC::from_intcc(cond_code)
+                } else {
+                    // The input is a boolean value, compare it against zero.
+                    let size = ctx.input_ty(insn, 0).bytes() as u8;
+                    let test = put_input_in_reg(ctx, flag_input);
+                    ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
+                    CC::NZ
+                };
+
+                // This doesn't affect the flags.
+                ctx.emit(Inst::gen_move(dst, rhs, ty));
+
+                if is_int_or_ref_ty(ty) {
                     ctx.emit(Inst::cmove(size, cc, lhs, dst));
                 } else {
                     debug_assert!(ty == types::F32 || ty == types::F64);
-                    let lhs = input_to_reg_mem(ctx, inputs[1]);
                     ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
                 }
             }
         }
 
         Opcode::Selectif | Opcode::SelectifSpectreGuard => {
+            let lhs = input_to_reg_mem(ctx, inputs[1]);
+            let rhs = put_input_in_reg(ctx, inputs[2]);
+            let dst = get_output_reg(ctx, outputs[0]);
+            let ty = ctx.output_ty(insn, 0);
+
             // Verification ensures that the input is always a single-def ifcmp.
             let cmp_insn = ctx
                 .get_input(inputs[0].insn, inputs[0].input)
@@ -2230,13 +2300,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
             let cc = CC::from_intcc(ctx.data(insn).cond_code().unwrap());
 
-            let lhs = input_to_reg_mem(ctx, inputs[1]);
-            let rhs = put_input_in_reg(ctx, inputs[2]);
-            let dst = get_output_reg(ctx, outputs[0]);
-
-            let ty = ctx.output_ty(insn, 0);
-
-            if is_int_ty(ty) {
+            if is_int_or_ref_ty(ty) {
                 let size = ty.bytes() as u8;
                 if size == 1 {
                     // Sign-extend operands to 32, then do a cmove of size 4.
@@ -2296,7 +2360,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 } else {
                     None
                 };
-                ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx())));
+                // TODO use xor
+                ctx.emit(Inst::imm(
+                    OperandSize::Size32,
+                    0,
+                    Writable::from_reg(regs::rdx()),
+                ));
                 ctx.emit(Inst::checked_div_or_rem_seq(
                     kind,
                     size,
@@ -2308,30 +2377,24 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 let divisor = input_to_reg_mem(ctx, inputs[1]);
 
                 // Fill in the high parts:
-                if input_ty == types::I8 {
-                    if kind.is_signed() {
-                        // sign-extend the sign-bit of al into ah, for signed opcodes.
-                        ctx.emit(Inst::sign_extend_data(1));
-                    } else {
-                        ctx.emit(Inst::movzx_rm_r(
-                            ExtMode::BL,
-                            RegMem::reg(regs::rax()),
-                            Writable::from_reg(regs::rax()),
-                            /* infallible */ None,
-                        ));
-                    }
+                if kind.is_signed() {
+                    // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
+                    // signed opcodes.
+                    ctx.emit(Inst::sign_extend_data(size));
+                } else if input_ty == types::I8 {
+                    ctx.emit(Inst::movzx_rm_r(
+                        ExtMode::BL,
+                        RegMem::reg(regs::rax()),
+                        Writable::from_reg(regs::rax()),
+                        /* infallible */ None,
+                    ));
                 } else {
-                    if kind.is_signed() {
-                        // sign-extend the sign-bit of rax into rdx, for signed opcodes.
-                        ctx.emit(Inst::sign_extend_data(size));
-                    } else {
-                        // zero for unsigned opcodes.
-                        ctx.emit(Inst::imm_r(
-                            true, /* is_64 */
-                            0,
-                            Writable::from_reg(regs::rdx()),
-                        ));
-                    }
+                    // zero for unsigned opcodes.
+                    ctx.emit(Inst::imm(
+                        OperandSize::Size64,
+                        0,
+                        Writable::from_reg(regs::rdx()),
+                    ));
                 }
 
                 // Emit the actual idiv.
@@ -2530,7 +2593,7 @@ impl LowerBackend for X64Backend {
                             }
                             FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
                         }
-                    } else if is_int_ty(src_ty) || is_bool_ty(src_ty) {
+                    } else if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
                         let src = put_input_in_reg(
                             ctx,
                             InsnInput {
@@ -2553,7 +2616,7 @@ impl LowerBackend for X64Backend {
 
                 Opcode::BrIcmp => {
                     let src_ty = ctx.input_ty(branches[0], 0);
-                    if is_int_ty(src_ty) || is_bool_ty(src_ty) {
+                    if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
                         let lhs = put_input_in_reg(
                             ctx,
                             InsnInput {