diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs index f48fea8306..da1c438400 100644 --- a/cranelift/codegen/src/isa/x64/encoding/rex.rs +++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs @@ -356,46 +356,30 @@ pub(crate) fn emit_modrm_sib_disp( match *mem_e { Amode::ImmReg { simm32, base, .. } => { let enc_e = int_reg_enc(base); + let mut imm = Imm::new(simm32); - // Now the mod/rm and associated immediates. This is - // significantly complicated due to the multiple special cases. - if simm32 == 0 - && enc_e != regs::ENC_RSP - && enc_e != regs::ENC_RBP - && enc_e != regs::ENC_R12 - && enc_e != regs::ENC_R13 - { - // FIXME JRS 2020Feb11: those four tests can surely be - // replaced by a single mask-and-compare check. We should do - // that because this routine is likely to be hot. - sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7)); - } else if simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) { - sink.put1(encode_modrm(0, enc_g & 7, 4)); - sink.put1(0x24); - } else if low8_will_sign_extend_to_32(simm32) - && enc_e != regs::ENC_RSP - && enc_e != regs::ENC_R12 - { - sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7)); - sink.put1((simm32 & 0xFF) as u8); - } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 { - sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7)); - sink.put4(simm32); - } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) - && low8_will_sign_extend_to_32(simm32) - { - // REX.B distinguishes RSP from R12 - sink.put1(encode_modrm(1, enc_g & 7, 4)); - sink.put1(0x24); - sink.put1((simm32 & 0xFF) as u8); - } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP { - //.. wait for test case for RSP case - // REX.B distinguishes RSP from R12 - sink.put1(encode_modrm(2, enc_g & 7, 4)); - sink.put1(0x24); - sink.put4(simm32); + // Most base registers allow for a single ModRM byte plus an + // optional immediate. If rsp is the base register, however, then a + // SIB byte must be used. + let enc_e_low3 = enc_e & 7; + if enc_e_low3 != regs::ENC_RSP { + // If the base register is rbp and there's no offset then force + // a 1-byte zero offset since otherwise the encoding would be + // invalid. + if enc_e_low3 == regs::ENC_RBP { + imm.force_immediate(); + } + sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7)); + imm.emit(sink); } else { - unreachable!("ImmReg"); + // Displacement from RSP is encoded with a SIB byte where + // the index and base are both encoded as RSP's encoding of + // 0b100. This special encoding means that the index register + // isn't used and the base is 0b100 with or without a + // REX-encoded 4th bit (e.g. rsp or r12) + sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100)); + sink.put1(0b00_100_100); + imm.emit(sink); } } @@ -409,23 +393,31 @@ pub(crate) fn emit_modrm_sib_disp( let enc_base = int_reg_enc(*reg_base); let enc_index = int_reg_enc(*reg_index); - // modrm, SIB, immediates. - if low8_will_sign_extend_to_32(simm32) && enc_index != regs::ENC_RSP { - sink.put1(encode_modrm(1, enc_g & 7, 4)); - sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7)); - sink.put1(simm32 as u8); - } else if enc_index != regs::ENC_RSP { - sink.put1(encode_modrm(2, enc_g & 7, 4)); - sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7)); - sink.put4(simm32); - } else { - panic!("ImmRegRegShift"); + // Encoding of ModRM/SIB bytes don't allow the index register to + // ever be rsp. Note, though, that the encoding of r12, whose three + // lower bits match the encoding of rsp, is explicitly allowed with + // REX bytes so only rsp is disallowed. + assert!(enc_index != regs::ENC_RSP); + + // If the offset is zero then there is no immediate. Note, though, + // that if the base register's lower three bits are `101` then an + // offset must be present. This is a special case in the encoding of + // the SIB byte and requires an explicit displacement with rbp/r13. + let mut imm = Imm::new(simm32); + if enc_base & 7 == regs::ENC_RBP { + imm.force_immediate(); } + + // With the above determined encode the ModRM byte, then the SIB + // byte, then any immediate as necessary. + sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100)); + sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7)); + imm.emit(sink); } Amode::RipRelative { ref target } => { // RIP-relative is mod=00, rm=101. - sink.put1(encode_modrm(0, enc_g & 7, 0b101)); + sink.put1(encode_modrm(0b00, enc_g & 7, 0b101)); let offset = sink.cur_offset(); sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32); @@ -441,6 +433,52 @@ pub(crate) fn emit_modrm_sib_disp( } } +enum Imm { + None, + Imm8(u8), + Imm32(u32), +} + +impl Imm { + /// Classifies the 32-bit immediate `val` as how this can be encoded + /// with ModRM/SIB bytes. + fn new(val: u32) -> Imm { + if val == 0 { + Imm::None + } else if low8_will_sign_extend_to_32(val) { + Imm::Imm8(val as u8) + } else { + Imm::Imm32(val) + } + } + + /// Forces `Imm::None` to become `Imm::Imm8(0)`, used for special cases + /// where some base registers require an immediate. + fn force_immediate(&mut self) { + if let Imm::None = self { + *self = Imm::Imm8(0); + } + } + + /// Returns the two "mod" bits present at the upper bits of the mod/rm + /// byte. + fn m0d(&self) -> u8 { + match self { + Imm::None => 0b00, + Imm::Imm8(_) => 0b01, + Imm::Imm32(_) => 0b10, + } + } + + fn emit(&self, sink: &mut MachBuffer) { + match self { + Imm::None => {} + Imm::Imm8(n) => sink.put1(*n), + Imm::Imm32(n) => sink.put4(*n), + } + } +} + /// This is the core 'emit' function for instructions that do not reference memory. /// /// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E @@ -473,7 +511,7 @@ pub(crate) fn emit_std_enc_enc( // Now the mod/rm byte. The instruction we're generating doesn't access // memory, so there is no SIB byte or immediate -- we're done. - sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7)); + sink.put1(encode_modrm(0b11, enc_g & 7, enc_e & 7)); } // These are merely wrappers for the above two functions that facilitate passing diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 169b898c70..a7a32f4421 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -343,7 +343,7 @@ block2: ; movl %edi, %r10d ; cmpl %r9d, %r10d ; cmovbl %r10d, %r9d -; leaq 0xa(%rip), %rax +; leaq 9(%rip), %rax ; movslq (%rax, %r9, 4), %rcx ; addq %rcx, %rax ; jmpq *%rax @@ -353,14 +353,14 @@ block2: ; addb %al, (%rax) ; sbbb %al, (%rax) ; addb %al, (%rax) -; block2: ; offset 0x31 -; jmp 0x3d -; block3: ; offset 0x36 +; block2: ; offset 0x30 +; jmp 0x3c +; block3: ; offset 0x35 ; xorl %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; retq -; block4: ; offset 0x3d +; block4: ; offset 0x3c ; movl $1, %eax ; movq %rbp, %rsp ; popq %rbp @@ -938,7 +938,7 @@ block5(v5: i32): ; movl %edi, %ecx ; cmpl %eax, %ecx ; cmovbl %ecx, %eax -; leaq 0xb(%rip), %r9 +; leaq 0xa(%rip), %r9 ; movslq (%r9, %rax, 4), %r10 ; addq %r10, %r9 ; jmpq *%r9 @@ -950,20 +950,20 @@ block5(v5: i32): ; addb %al, (%rax) ; addb %dh, (%rdi) ; addb %al, (%rax) -; block2: ; offset 0x36 -; jmp 0x45 -; block3: ; offset 0x3b +; block2: ; offset 0x35 +; jmp 0x44 +; block3: ; offset 0x3a ; movl $3, %esi -; jmp 0x5e -; block4: ; offset 0x45 +; jmp 0x5d +; block4: ; offset 0x44 ; movl $2, %esi -; jmp 0x5e -; block5: ; offset 0x4f +; jmp 0x5d +; block5: ; offset 0x4e ; movl $1, %esi -; jmp 0x5e -; block6: ; offset 0x59 +; jmp 0x5d +; block6: ; offset 0x58 ; movl $4, %esi -; block7: ; offset 0x5e +; block7: ; offset 0x5d ; leal (%rdi, %rsi), %eax ; movq %rbp, %rsp ; popq %rbp @@ -1026,7 +1026,7 @@ block1(v5: i32): ; movl %edi, %r9d ; cmpl %r8d, %r9d ; cmovbl %r9d, %r8d -; leaq 0xa(%rip), %rdi +; leaq 9(%rip), %rdi ; movslq (%rdi, %r8, 4), %rcx ; addq %rcx, %rdi ; jmpq *%rdi @@ -1040,20 +1040,20 @@ block1(v5: i32): ; addb %al, (%rax) ; xorb $0, %al ; addb %al, (%rax) -; block2: ; offset 0x4f -; jmp 0x6f -; block3: ; offset 0x54 +; block2: ; offset 0x4e +; jmp 0x6e +; block3: ; offset 0x53 ; movq %r10, %rax -; jmp 0x6f -; block4: ; offset 0x5c +; jmp 0x6e +; block4: ; offset 0x5b ; movq %r11, %rax -; jmp 0x6f -; block5: ; offset 0x64 +; jmp 0x6e +; block5: ; offset 0x63 ; movq %r11, %rax -; jmp 0x6f -; block6: ; offset 0x6c +; jmp 0x6e +; block6: ; offset 0x6b ; movq %rsi, %rax -; block7: ; offset 0x6f +; block7: ; offset 0x6e ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif index 547cbaf39f..40be5e1738 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif @@ -1415,6 +1415,7 @@ block0(v0: i8x16, v1: i32): ; movq %rbp, %rsp ; popq %rbp ; retq +; addb %bh, %bh function %i8x16_shl_imm(i8x16) -> i8x16 { block0(v0: i8x16): @@ -1658,7 +1659,7 @@ block0(v0: i8x16, v1: i32): ; retq ; addb %al, (%rax) ; addb %al, (%rax) -; addb %bh, %bh +; addb %al, (%rax) function %i8x16_ushr_imm(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index a0e4a9c279..393a87c4a2 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -365,6 +365,7 @@ block0(v0: i32): ; addb %al, (%rax) ; addb %al, (%rax) ; addb %al, (%rax) +; addb %bh, %bh function %ishl_i8x16_imm(i8x16) -> i8x16 { block0(v0: i8x16):