diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 16e7490a09..a01e35bc0d 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -1530,7 +1530,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let src = if let Some(ext_spec) = ext_spec {
                 RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
             } else {
-                input_to_reg_mem(ctx, inputs[0])
+                // N.B.: explicitly put input in a reg here because the width of the instruction
+                // into which this RM op goes may not match the width of the input type (in fact,
+                // it won't for i32.popcnt), and we don't want a larger than necessary load.
+                RegMem::reg(put_input_in_reg(ctx, inputs[0]))
             };
             let dst = get_output_reg(ctx, outputs[0]);
 
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif
new file mode 100644
index 0000000000..a06f5a27ce
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif
@@ -0,0 +1,113 @@
+test compile
+target x86_64
+feature "experimental_x64"
+
+; TODO: test with popcnt feature available too, once new backend supports that.
+
+function %popcnt64(i64) -> i64 {
+block0(v0: i64):
+    v1 = popcnt v0
+; check:  movq    %rdi, %rsi
+; nextln: shrq    $$1, %rsi
+; nextln: movabsq $$8608480567731124087, %rax
+; nextln: andq    %rax, %rsi
+; nextln: subq    %rsi, %rdi
+; nextln: shrq    $$1, %rsi
+; nextln: andq    %rax, %rsi
+; nextln: subq    %rsi, %rdi
+; nextln: shrq    $$1, %rsi
+; nextln: andq    %rax, %rsi
+; nextln: subq    %rsi, %rdi
+; nextln: movq    %rdi, %rsi
+; nextln: shrq    $$4, %rsi
+; nextln: addq    %rdi, %rsi
+; nextln: movabsq $$1085102592571150095, %rdi
+; nextln: andq    %rdi, %rsi
+; nextln: movabsq $$72340172838076673, %rdi
+; nextln: imulq   %rdi, %rsi
+; nextln: shrq    $$56, %rsi
+; nextln: movq    %rsi, %rax
+    return v1
+}
+
+function %popcnt64load(i64) -> i64 {
+block0(v0: i64):
+    v1 = load.i64 v0
+    v2 = popcnt v1
+    return v2
+; check:  movq    0(%rdi), %rdi
+; nextln: movq    %rdi, %rsi
+; nextln: shrq    $$1, %rsi
+; nextln: movabsq $$8608480567731124087, %rax
+; nextln: andq    %rax, %rsi
+; nextln: subq    %rsi, %rdi
+; nextln: shrq    $$1, %rsi
+; nextln: andq    %rax, %rsi
+; nextln: subq    %rsi, %rdi
+; nextln: shrq    $$1, %rsi
+; nextln: andq    %rax, %rsi
+; nextln: subq    %rsi, %rdi
+; nextln: movq    %rdi, %rsi
+; nextln: shrq    $$4, %rsi
+; nextln: addq    %rdi, %rsi
+; nextln: movabsq $$1085102592571150095, %rdi
+; nextln: andq    %rdi, %rsi
+; nextln: movabsq $$72340172838076673, %rdi
+; nextln: imulq   %rdi, %rsi
+; nextln: shrq    $$56, %rsi
+; nextln: movq    %rsi, %rax
+}
+
+function %popcnt32(i32) -> i32 {
+block0(v0: i32):
+    v1 = popcnt v0
+    return v1
+; check:  movq    %rdi, %rsi
+; nextln: shrl    $$1, %esi
+; nextln: andl    $$2004318071, %esi
+; nextln: subl    %esi, %edi
+; nextln: shrl    $$1, %esi
+; nextln: andl    $$2004318071, %esi
+; nextln: subl    %esi, %edi
+; nextln: shrl    $$1, %esi
+; nextln: andl    $$2004318071, %esi
+; nextln: subl    %esi, %edi
+; nextln: movq    %rdi, %rsi
+; nextln: shrl    $$4, %esi
+; nextln: addl    %edi, %esi
+; nextln: andl    $$252645135, %esi
+; nextln: imull   $$16843009, %esi
+; nextln: shrl    $$24, %esi
+; nextln: movq    %rsi, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
+; nextln: ret
+}
+
+function %popcnt32load(i64) -> i32 {
+block0(v0: i64):
+    v1 = load.i32 v0
+    v2 = popcnt v1
+    return v2
+; check:  movl    0(%rdi), %edi
+; nextln: movq    %rdi, %rsi
+; nextln: shrl    $$1, %esi
+; nextln: andl    $$2004318071, %esi
+; nextln: subl    %esi, %edi
+; nextln: shrl    $$1, %esi
+; nextln: andl    $$2004318071, %esi
+; nextln: subl    %esi, %edi
+; nextln: shrl    $$1, %esi
+; nextln: andl    $$2004318071, %esi
+; nextln: subl    %esi, %edi
+; nextln: movq    %rdi, %rsi
+; nextln: shrl    $$4, %esi
+; nextln: addl    %edi, %esi
+; nextln: andl    $$252645135, %esi
+; nextln: imull   $$16843009, %esi
+; nextln: shrl    $$24, %esi
+; nextln: movq    %rsi, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
+; nextln: ret
+}