diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs
index f48fea8306..da1c438400 100644
--- a/cranelift/codegen/src/isa/x64/encoding/rex.rs
+++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs
@@ -356,46 +356,30 @@ pub(crate) fn emit_modrm_sib_disp(
     match *mem_e {
         Amode::ImmReg { simm32, base, .. } => {
             let enc_e = int_reg_enc(base);
+            let mut imm = Imm::new(simm32);
 
-            // Now the mod/rm and associated immediates.  This is
-            // significantly complicated due to the multiple special cases.
-            if simm32 == 0
-                && enc_e != regs::ENC_RSP
-                && enc_e != regs::ENC_RBP
-                && enc_e != regs::ENC_R12
-                && enc_e != regs::ENC_R13
-            {
-                // FIXME JRS 2020Feb11: those four tests can surely be
-                // replaced by a single mask-and-compare check.  We should do
-                // that because this routine is likely to be hot.
-                sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
-            } else if simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
-                sink.put1(encode_modrm(0, enc_g & 7, 4));
-                sink.put1(0x24);
-            } else if low8_will_sign_extend_to_32(simm32)
-                && enc_e != regs::ENC_RSP
-                && enc_e != regs::ENC_R12
-            {
-                sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
-                sink.put1((simm32 & 0xFF) as u8);
-            } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
-                sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
-                sink.put4(simm32);
-            } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
-                && low8_will_sign_extend_to_32(simm32)
-            {
-                // REX.B distinguishes RSP from R12
-                sink.put1(encode_modrm(1, enc_g & 7, 4));
-                sink.put1(0x24);
-                sink.put1((simm32 & 0xFF) as u8);
-            } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
-                //.. wait for test case for RSP case
-                // REX.B distinguishes RSP from R12
-                sink.put1(encode_modrm(2, enc_g & 7, 4));
-                sink.put1(0x24);
-                sink.put4(simm32);
+            // Most base registers allow for a single ModRM byte plus an
+            // optional immediate. If rsp is the base register, however, then a
+            // SIB byte must be used.
+            let enc_e_low3 = enc_e & 7;
+            if enc_e_low3 != regs::ENC_RSP {
+                // If the base register is rbp and there's no offset then force
+                // a 1-byte zero offset since otherwise the encoding would be
+                // invalid.
+                if enc_e_low3 == regs::ENC_RBP {
+                    imm.force_immediate();
+                }
+                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
+                imm.emit(sink);
             } else {
-                unreachable!("ImmReg");
+                // Displacement from RSP is encoded with a SIB byte where
+                // the index and base are both encoded as RSP's encoding of
+                // 0b100. This special encoding means that the index register
+                // isn't used and the base is 0b100 with or without a
+                // REX-encoded 4th bit (e.g. rsp or r12)
+                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
+                sink.put1(0b00_100_100);
+                imm.emit(sink);
             }
         }
 
@@ -409,23 +393,31 @@ pub(crate) fn emit_modrm_sib_disp(
             let enc_base = int_reg_enc(*reg_base);
             let enc_index = int_reg_enc(*reg_index);
 
-            // modrm, SIB, immediates.
-            if low8_will_sign_extend_to_32(simm32) && enc_index != regs::ENC_RSP {
-                sink.put1(encode_modrm(1, enc_g & 7, 4));
-                sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
-                sink.put1(simm32 as u8);
-            } else if enc_index != regs::ENC_RSP {
-                sink.put1(encode_modrm(2, enc_g & 7, 4));
-                sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
-                sink.put4(simm32);
-            } else {
-                panic!("ImmRegRegShift");
+            // Encoding of ModRM/SIB bytes don't allow the index register to
+            // ever be rsp. Note, though, that the encoding of r12, whose three
+            // lower bits match the encoding of rsp, is explicitly allowed with
+            // REX bytes so only rsp is disallowed.
+            assert!(enc_index != regs::ENC_RSP);
+
+            // If the offset is zero then there is no immediate. Note, though,
+            // that if the base register's lower three bits are `101` then an
+            // offset must be present. This is a special case in the encoding of
+            // the SIB byte and requires an explicit displacement with rbp/r13.
+            let mut imm = Imm::new(simm32);
+            if enc_base & 7 == regs::ENC_RBP {
+                imm.force_immediate();
             }
+
+            // With the above determined encode the ModRM byte, then the SIB
+            // byte, then any immediate as necessary.
+            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
+            sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
+            imm.emit(sink);
         }
 
         Amode::RipRelative { ref target } => {
             // RIP-relative is mod=00, rm=101.
-            sink.put1(encode_modrm(0, enc_g & 7, 0b101));
+            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
 
             let offset = sink.cur_offset();
             sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
@@ -441,6 +433,52 @@ pub(crate) fn emit_modrm_sib_disp(
     }
 }
 
+enum Imm {
+    None,
+    Imm8(u8),
+    Imm32(u32),
+}
+
+impl Imm {
+    /// Classifies the 32-bit immediate `val` as how this can be encoded
+    /// with ModRM/SIB bytes.
+    fn new(val: u32) -> Imm {
+        if val == 0 {
+            Imm::None
+        } else if low8_will_sign_extend_to_32(val) {
+            Imm::Imm8(val as u8)
+        } else {
+            Imm::Imm32(val)
+        }
+    }
+
+    /// Forces `Imm::None` to become `Imm::Imm8(0)`, used for special cases
+    /// where some base registers require an immediate.
+    fn force_immediate(&mut self) {
+        if let Imm::None = self {
+            *self = Imm::Imm8(0);
+        }
+    }
+
+    /// Returns the two "mod" bits present at the upper bits of the mod/rm
+    /// byte.
+    fn m0d(&self) -> u8 {
+        match self {
+            Imm::None => 0b00,
+            Imm::Imm8(_) => 0b01,
+            Imm::Imm32(_) => 0b10,
+        }
+    }
+
+    fn emit(&self, sink: &mut MachBuffer<Inst>) {
+        match self {
+            Imm::None => {}
+            Imm::Imm8(n) => sink.put1(*n),
+            Imm::Imm32(n) => sink.put4(*n),
+        }
+    }
+}
+
 /// This is the core 'emit' function for instructions that do not reference memory.
 ///
 /// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
@@ -473,7 +511,7 @@ pub(crate) fn emit_std_enc_enc(
 
     // Now the mod/rm byte.  The instruction we're generating doesn't access
     // memory, so there is no SIB byte or immediate -- we're done.
-    sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
+    sink.put1(encode_modrm(0b11, enc_g & 7, enc_e & 7));
 }
 
 // These are merely wrappers for the above two functions that facilitate passing
diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif
index 169b898c70..a7a32f4421 100644
--- a/cranelift/filetests/filetests/isa/x64/branches.clif
+++ b/cranelift/filetests/filetests/isa/x64/branches.clif
@@ -343,7 +343,7 @@ block2:
 ;   movl %edi, %r10d
 ;   cmpl %r9d, %r10d
 ;   cmovbl %r10d, %r9d
-;   leaq 0xa(%rip), %rax
+;   leaq 9(%rip), %rax
 ;   movslq (%rax, %r9, 4), %rcx
 ;   addq %rcx, %rax
 ;   jmpq *%rax
@@ -353,14 +353,14 @@ block2:
 ;   addb %al, (%rax)
 ;   sbbb %al, (%rax)
 ;   addb %al, (%rax)
-; block2: ; offset 0x31
-;   jmp 0x3d
-; block3: ; offset 0x36
+; block2: ; offset 0x30
+;   jmp 0x3c
+; block3: ; offset 0x35
 ;   xorl %eax, %eax
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
-; block4: ; offset 0x3d
+; block4: ; offset 0x3c
 ;   movl $1, %eax
 ;   movq %rbp, %rsp
 ;   popq %rbp
@@ -938,7 +938,7 @@ block5(v5: i32):
 ;   movl %edi, %ecx
 ;   cmpl %eax, %ecx
 ;   cmovbl %ecx, %eax
-;   leaq 0xb(%rip), %r9
+;   leaq 0xa(%rip), %r9
 ;   movslq (%r9, %rax, 4), %r10
 ;   addq %r10, %r9
 ;   jmpq *%r9
@@ -950,20 +950,20 @@ block5(v5: i32):
 ;   addb %al, (%rax)
 ;   addb %dh, (%rdi)
 ;   addb %al, (%rax)
-; block2: ; offset 0x36
-;   jmp 0x45
-; block3: ; offset 0x3b
+; block2: ; offset 0x35
+;   jmp 0x44
+; block3: ; offset 0x3a
 ;   movl $3, %esi
-;   jmp 0x5e
-; block4: ; offset 0x45
+;   jmp 0x5d
+; block4: ; offset 0x44
 ;   movl $2, %esi
-;   jmp 0x5e
-; block5: ; offset 0x4f
+;   jmp 0x5d
+; block5: ; offset 0x4e
 ;   movl $1, %esi
-;   jmp 0x5e
-; block6: ; offset 0x59
+;   jmp 0x5d
+; block6: ; offset 0x58
 ;   movl $4, %esi
-; block7: ; offset 0x5e
+; block7: ; offset 0x5d
 ;   leal (%rdi, %rsi), %eax
 ;   movq %rbp, %rsp
 ;   popq %rbp
@@ -1026,7 +1026,7 @@ block1(v5: i32):
 ;   movl %edi, %r9d
 ;   cmpl %r8d, %r9d
 ;   cmovbl %r9d, %r8d
-;   leaq 0xa(%rip), %rdi
+;   leaq 9(%rip), %rdi
 ;   movslq (%rdi, %r8, 4), %rcx
 ;   addq %rcx, %rdi
 ;   jmpq *%rdi
@@ -1040,20 +1040,20 @@ block1(v5: i32):
 ;   addb %al, (%rax)
 ;   xorb $0, %al
 ;   addb %al, (%rax)
-; block2: ; offset 0x4f
-;   jmp 0x6f
-; block3: ; offset 0x54
+; block2: ; offset 0x4e
+;   jmp 0x6e
+; block3: ; offset 0x53
 ;   movq %r10, %rax
-;   jmp 0x6f
-; block4: ; offset 0x5c
+;   jmp 0x6e
+; block4: ; offset 0x5b
 ;   movq %r11, %rax
-;   jmp 0x6f
-; block5: ; offset 0x64
+;   jmp 0x6e
+; block5: ; offset 0x63
 ;   movq %r11, %rax
-;   jmp 0x6f
-; block6: ; offset 0x6c
+;   jmp 0x6e
+; block6: ; offset 0x6b
 ;   movq %rsi, %rax
-; block7: ; offset 0x6f
+; block7: ; offset 0x6e
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
diff --git a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif
index 547cbaf39f..40be5e1738 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif
@@ -1415,6 +1415,7 @@ block0(v0: i8x16, v1: i32):
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
+;   addb %bh, %bh
 
 function %i8x16_shl_imm(i8x16) -> i8x16 {
 block0(v0: i8x16):
@@ -1658,7 +1659,7 @@ block0(v0: i8x16, v1: i32):
 ;   retq
 ;   addb %al, (%rax)
 ;   addb %al, (%rax)
-;   addb %bh, %bh
+;   addb %al, (%rax)
 
 function %i8x16_ushr_imm(i8x16) -> i8x16 {
 block0(v0: i8x16):
diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
index a0e4a9c279..393a87c4a2 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
@@ -365,6 +365,7 @@ block0(v0: i32):
 ;   addb %al, (%rax)
 ;   addb %al, (%rax)
 ;   addb %al, (%rax)
+;   addb %bh, %bh
 
 function %ishl_i8x16_imm(i8x16) -> i8x16 {
 block0(v0: i8x16):