use log::debug; use regalloc::Reg; use crate::binemit::Reloc; use crate::isa::x64::inst::*; fn low8_will_sign_extend_to_64(x: u32) -> bool { let xs = (x as i32) as i64; xs == ((xs << 56) >> 56) } fn low8_will_sign_extend_to_32(x: u32) -> bool { let xs = x as i32; xs == ((xs << 24) >> 24) } //============================================================================= // Instructions and subcomponents: emission // For all of the routines that take both a memory-or-reg operand (sometimes // called "E" in the Intel documentation) and a reg-only operand ("G" in // Intelese), the order is always G first, then E. // // "enc" in the following means "hardware register encoding number". #[inline(always)] fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 { debug_assert!(m0d < 4); debug_assert!(enc_reg_g < 8); debug_assert!(rm_e < 8); ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7) } #[inline(always)] fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 { debug_assert!(shift < 4); debug_assert!(enc_index < 8); debug_assert!(enc_base < 8); ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7) } /// Get the encoding number of a GPR. #[inline(always)] fn int_reg_enc(reg: Reg) -> u8 { debug_assert!(reg.is_real()); debug_assert_eq!(reg.get_class(), RegClass::I64); reg.get_hw_encoding() } /// Get the encoding number of any register. #[inline(always)] fn reg_enc(reg: Reg) -> u8 { debug_assert!(reg.is_real()); reg.get_hw_encoding() } /// A small bit field to record a REX prefix specification: /// - bit 0 set to 1 indicates REX.W must be 0 (cleared). /// - bit 1 set to 1 indicates the REX prefix must always be emitted. #[repr(transparent)] #[derive(Clone, Copy)] struct RexFlags(u8); impl RexFlags { /// By default, set the W field, and don't always emit. #[inline(always)] fn set_w() -> Self { Self(0) } /// Creates a new RexPrefix for which the REX.W bit will be cleared. #[inline(always)] fn clear_w() -> Self { Self(1) } #[inline(always)] fn always_emit(&mut self) -> &mut Self { self.0 = self.0 | 2; self } #[inline(always)] fn must_clear_w(&self) -> bool { (self.0 & 1) != 0 } #[inline(always)] fn must_always_emit(&self) -> bool { (self.0 & 2) != 0 } #[inline(always)] fn emit_two_op(&self, sink: &mut MachBuffer, enc_g: u8, enc_e: u8) { let w = if self.must_clear_w() { 0 } else { 1 }; let r = (enc_g >> 3) & 1; let x = 0; let b = (enc_e >> 3) & 1; let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; if rex != 0x40 || self.must_always_emit() { sink.put1(rex); } } #[inline(always)] fn emit_three_op(&self, sink: &mut MachBuffer, enc_g: u8, enc_index: u8, enc_base: u8) { let w = if self.must_clear_w() { 0 } else { 1 }; let r = (enc_g >> 3) & 1; let x = (enc_index >> 3) & 1; let b = (enc_base >> 3) & 1; let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; if rex != 0x40 || self.must_always_emit() { sink.put1(rex); } } } /// For specifying the legacy prefixes (or `None` if no prefix required) to /// be used at the start an instruction. A given prefix may be required for /// various operations, including instructions that operate on GPR, SSE, and Vex /// registers. enum LegacyPrefix { None, _66, _F2, _F3, } impl LegacyPrefix { #[inline(always)] fn emit(&self, sink: &mut MachBuffer) { match self { LegacyPrefix::_66 => sink.put1(0x66), LegacyPrefix::_F2 => sink.put1(0xF2), LegacyPrefix::_F3 => sink.put1(0xF3), LegacyPrefix::None => (), } } } /// This is the core 'emit' function for instructions that reference memory. /// /// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`, /// create and emit: /// - first the REX prefix, /// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`), /// - then the MOD/RM byte, /// - then optionally, a SIB byte, /// - and finally optionally an immediate that will be derived from the `mem_e` operand. /// /// For most instructions up to and including SSE4.2, that will be the whole instruction: this is /// what we call "standard" instructions, and abbreviate "std" in the name here. VEX instructions /// will require their own emitter functions. /// /// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided. /// /// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode /// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` == /// 0xF3_0F_27 and `num_opcodes` == 3. /// /// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`. /// `rex` can specify special handling for the REX prefix. By default, the REX prefix will /// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a /// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to /// indicate a 64-bit operation. fn emit_std_enc_mem( sink: &mut MachBuffer, prefix: LegacyPrefix, opcodes: u32, mut num_opcodes: usize, enc_g: u8, mem_e: &Amode, rex: RexFlags, ) { // General comment for this function: the registers in `mem_e` must be // 64-bit integer registers, because they are part of an address // expression. But `enc_g` can be derived from a register of any class. prefix.emit(sink); match mem_e { Amode::ImmReg { simm32, base } => { // First, the REX byte. let enc_e = int_reg_enc(*base); rex.emit_two_op(sink, enc_g, enc_e); // Now the opcode(s). These include any other prefixes the caller // hands to us. while num_opcodes > 0 { num_opcodes -= 1; sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); } // Now the mod/rm and associated immediates. This is // significantly complicated due to the multiple special cases. if *simm32 == 0 && enc_e != regs::ENC_RSP && enc_e != regs::ENC_RBP && enc_e != regs::ENC_R12 && enc_e != regs::ENC_R13 { // FIXME JRS 2020Feb11: those four tests can surely be // replaced by a single mask-and-compare check. We should do // that because this routine is likely to be hot. sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7)); } else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) { sink.put1(encode_modrm(0, enc_g & 7, 4)); sink.put1(0x24); } else if low8_will_sign_extend_to_32(*simm32) && enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 { sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7)); sink.put1((simm32 & 0xFF) as u8); } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 { sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7)); sink.put4(*simm32); } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) && low8_will_sign_extend_to_32(*simm32) { // REX.B distinguishes RSP from R12 sink.put1(encode_modrm(1, enc_g & 7, 4)); sink.put1(0x24); sink.put1((simm32 & 0xFF) as u8); } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP { //.. wait for test case for RSP case // REX.B distinguishes RSP from R12 sink.put1(encode_modrm(2, enc_g & 7, 4)); sink.put1(0x24); sink.put4(*simm32); } else { unreachable!("ImmReg"); } } Amode::ImmRegRegShift { simm32, base: reg_base, index: reg_index, shift, } => { let enc_base = int_reg_enc(*reg_base); let enc_index = int_reg_enc(*reg_index); // The rex byte. rex.emit_three_op(sink, enc_g, enc_index, enc_base); // All other prefixes and opcodes. while num_opcodes > 0 { num_opcodes -= 1; sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); } // modrm, SIB, immediates. if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP { sink.put1(encode_modrm(1, enc_g & 7, 4)); sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7)); sink.put1(*simm32 as u8); } else if enc_index != regs::ENC_RSP { sink.put1(encode_modrm(2, enc_g & 7, 4)); sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7)); sink.put4(*simm32); } else { panic!("ImmRegRegShift"); } } } } /// This is the core 'emit' function for instructions that do not reference memory. /// /// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E /// operand is a register rather than memory. Hence it is much simpler. fn emit_std_enc_enc( sink: &mut MachBuffer, prefix: LegacyPrefix, opcodes: u32, mut num_opcodes: usize, enc_g: u8, enc_e: u8, rex: RexFlags, ) { // EncG and EncE can be derived from registers of any class, and they // don't even have to be from the same class. For example, for an // integer-to-FP conversion insn, one might be RegClass::I64 and the other // RegClass::V128. // The operand-size override. prefix.emit(sink); // The rex byte. rex.emit_two_op(sink, enc_g, enc_e); // All other prefixes and opcodes. while num_opcodes > 0 { num_opcodes -= 1; sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); } // Now the mod/rm byte. The instruction we're generating doesn't access // memory, so there is no SIB byte or immediate -- we're done. sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7)); } // These are merely wrappers for the above two functions that facilitate passing // actual `Reg`s rather than their encodings. fn emit_std_reg_mem( sink: &mut MachBuffer, prefix: LegacyPrefix, opcodes: u32, num_opcodes: usize, reg_g: Reg, mem_e: &Amode, rex: RexFlags, ) { let enc_g = reg_enc(reg_g); emit_std_enc_mem(sink, prefix, opcodes, num_opcodes, enc_g, mem_e, rex); } fn emit_std_reg_reg( sink: &mut MachBuffer, prefix: LegacyPrefix, opcodes: u32, num_opcodes: usize, reg_g: Reg, reg_e: Reg, rex: RexFlags, ) { let enc_g = reg_enc(reg_g); let enc_e = reg_enc(reg_e); emit_std_enc_enc(sink, prefix, opcodes, num_opcodes, enc_g, enc_e, rex); } /// Write a suitable number of bits from an imm64 to the sink. fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { match size { 8 | 4 => sink.put4(simm32), 2 => sink.put2(simm32 as u16), 1 => sink.put1(simm32 as u8), _ => unreachable!(), } } /// The top-level emit function. /// /// Important! Do not add improved (shortened) encoding cases to existing /// instructions without also adding tests for those improved encodings. That /// is a dangerous game that leads to hard-to-track-down errors in the emitted /// code. /// /// For all instructions, make sure to have test coverage for all of the /// following situations. Do this by creating the cross product resulting from /// applying the following rules to each operand: /// /// (1) for any insn that mentions a register: one test using a register from /// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one /// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15]. /// This helps detect incorrect REX prefix construction. /// /// (2) for any insn that mentions a byte register: one test for each of the /// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil], /// [r8b .. r11b] and [r12b .. r15b]. This checks that /// apparently-redundant REX prefixes are retained when required. /// /// (3) for any insn that contains an immediate field, check the following /// cases: field is zero, field is in simm8 range (-128 .. 127), field is /// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some /// instructions that require a 32-bit immediate have a short-form encoding /// when the imm is in simm8 range. /// /// Rules (1), (2) and (3) don't apply for registers within address expressions /// (`Addr`s). Those are already pretty well tested, and the registers in them /// don't have any effect on the containing instruction (apart from possibly /// require REX prefix bits). /// /// When choosing registers for a test, avoid using registers with the same /// offset within a given group. For example, don't use rax and r8, since they /// both have the lowest 3 bits as 000, and so the test won't detect errors /// where those 3-bit register sub-fields are confused by the emitter. Instead /// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl /// and bpl since they have the same offset in their group; use instead (eg) cl /// and sil. /// /// For all instructions, also add a test that uses only low-half registers /// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX /// prefixes are correctly omitted. This low-half restriction must apply to /// _all_ registers in the insn, even those in address expressions. /// /// Following these rules creates large numbers of test cases, but it's the /// only way to make the emitter reliable. /// /// Known possible improvements: /// /// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we /// care?) pub(crate) fn emit( inst: &Inst, sink: &mut MachBuffer, _flags: &settings::Flags, state: &mut EmitState, ) { match inst { Inst::Alu_RMI_R { is_64, op, src, dst: reg_g, } => { let rex = if *is_64 { RexFlags::set_w() } else { RexFlags::clear_w() }; if *op == AluRmiROpcode::Mul { // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so // we have to special-case it. match src { RegMemImm::Reg { reg: reg_e } => { emit_std_reg_reg( sink, LegacyPrefix::None, 0x0FAF, 2, reg_g.to_reg(), *reg_e, rex, ); } RegMemImm::Mem { addr } => { emit_std_reg_mem( sink, LegacyPrefix::None, 0x0FAF, 2, reg_g.to_reg(), &addr.finalize(state), rex, ); } RegMemImm::Imm { simm32 } => { let useImm8 = low8_will_sign_extend_to_32(*simm32); let opcode = if useImm8 { 0x6B } else { 0x69 }; // Yes, really, reg_g twice. emit_std_reg_reg( sink, LegacyPrefix::None, opcode, 1, reg_g.to_reg(), reg_g.to_reg(), rex, ); emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32); } } } else { let (opcode_r, opcode_m, subopcode_i) = match op { AluRmiROpcode::Add => (0x01, 0x03, 0), AluRmiROpcode::Sub => (0x29, 0x2B, 5), AluRmiROpcode::And => (0x21, 0x23, 4), AluRmiROpcode::Or => (0x09, 0x0B, 1), AluRmiROpcode::Xor => (0x31, 0x33, 6), AluRmiROpcode::Mul => panic!("unreachable"), }; match src { RegMemImm::Reg { reg: reg_e } => { // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R // duality). Do this too, so as to be able to compare generated machine // code easily. emit_std_reg_reg( sink, LegacyPrefix::None, opcode_r, 1, *reg_e, reg_g.to_reg(), rex, ); // NB: if this is ever extended to handle byte size ops, be sure to retain // redundant REX prefixes. } RegMemImm::Mem { addr } => { // Here we revert to the "normal" G-E ordering. emit_std_reg_mem( sink, LegacyPrefix::None, opcode_m, 1, reg_g.to_reg(), &addr.finalize(state), rex, ); } RegMemImm::Imm { simm32 } => { let use_imm8 = low8_will_sign_extend_to_32(*simm32); let opcode = if use_imm8 { 0x83 } else { 0x81 }; // And also here we use the "normal" G-E ordering. let enc_g = int_reg_enc(reg_g.to_reg()); emit_std_enc_enc( sink, LegacyPrefix::None, opcode, 1, subopcode_i, enc_g, rex, ); emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32); } } } } Inst::Imm_R { dst_is_64, simm64, dst, } => { let enc_dst = int_reg_enc(dst.to_reg()); if *dst_is_64 { // FIXME JRS 2020Feb10: also use the 32-bit case here when // possible sink.put1(0x48 | ((enc_dst >> 3) & 1)); sink.put1(0xB8 | (enc_dst & 7)); sink.put8(*simm64); } else { if ((enc_dst >> 3) & 1) == 1 { sink.put1(0x41); } sink.put1(0xB8 | (enc_dst & 7)); sink.put4(*simm64 as u32); } } Inst::Mov_R_R { is_64, src, dst } => { let rex = if *is_64 { RexFlags::set_w() } else { RexFlags::clear_w() }; emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex); } Inst::MovZX_RM_R { ext_mode, src, dst } => { let (opcodes, num_opcodes, rex_flags) = match ext_mode { ExtMode::BL => { // MOVZBL is (REX.W==0) 0F B6 /r (0x0FB6, 2, RexFlags::clear_w()) } ExtMode::BQ => { // MOVZBQ is (REX.W==1) 0F B6 /r // I'm not sure why the Intel manual offers different // encodings for MOVZBQ than for MOVZBL. AIUI they should // achieve the same, since MOVZBL is just going to zero out // the upper half of the destination anyway. (0x0FB6, 2, RexFlags::set_w()) } ExtMode::WL => { // MOVZWL is (REX.W==0) 0F B7 /r (0x0FB7, 2, RexFlags::clear_w()) } ExtMode::WQ => { // MOVZWQ is (REX.W==1) 0F B7 /r (0x0FB7, 2, RexFlags::set_w()) } ExtMode::LQ => { // This is just a standard 32 bit load, and we rely on the // default zero-extension rule to perform the extension. // Note that in reg/reg mode, gcc seems to use the swapped form R/RM, which we // don't do here, since it's the same encoding size. // MOV r/m32, r32 is (REX.W==0) 8B /r (0x8B, 1, RexFlags::clear_w()) } }; match src { RegMem::Reg { reg: src } => emit_std_reg_reg( sink, LegacyPrefix::None, opcodes, num_opcodes, dst.to_reg(), *src, rex_flags, ), RegMem::Mem { addr: src } => emit_std_reg_mem( sink, LegacyPrefix::None, opcodes, num_opcodes, dst.to_reg(), &src.finalize(state), rex_flags, ), } } Inst::Mov64_M_R { src, dst } => emit_std_reg_mem( sink, LegacyPrefix::None, 0x8B, 1, dst.to_reg(), &src.finalize(state), RexFlags::set_w(), ), Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem( sink, LegacyPrefix::None, 0x8D, 1, dst.to_reg(), &addr.finalize(state), RexFlags::set_w(), ), Inst::MovSX_RM_R { ext_mode, src, dst } => { let (opcodes, num_opcodes, rex_flags) = match ext_mode { ExtMode::BL => { // MOVSBL is (REX.W==0) 0F BE /r (0x0FBE, 2, RexFlags::clear_w()) } ExtMode::BQ => { // MOVSBQ is (REX.W==1) 0F BE /r (0x0FBE, 2, RexFlags::set_w()) } ExtMode::WL => { // MOVSWL is (REX.W==0) 0F BF /r (0x0FBF, 2, RexFlags::clear_w()) } ExtMode::WQ => { // MOVSWQ is (REX.W==1) 0F BF /r (0x0FBF, 2, RexFlags::set_w()) } ExtMode::LQ => { // MOVSLQ is (REX.W==1) 63 /r (0x63, 1, RexFlags::set_w()) } }; match src { RegMem::Reg { reg: src } => emit_std_reg_reg( sink, LegacyPrefix::None, opcodes, num_opcodes, dst.to_reg(), *src, rex_flags, ), RegMem::Mem { addr: src } => emit_std_reg_mem( sink, LegacyPrefix::None, opcodes, num_opcodes, dst.to_reg(), &src.finalize(state), rex_flags, ), } } Inst::Mov_R_M { size, src, dst } => { let dst = &dst.finalize(state); match size { 1 => { // This is one of the few places where the presence of a // redundant REX prefix changes the meaning of the // instruction. let mut rex = RexFlags::clear_w(); let enc_src = int_reg_enc(*src); if enc_src >= 4 && enc_src <= 7 { rex.always_emit(); }; // MOV r8, r/m8 is (REX.W==0) 88 /r emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex) } 2 => { // MOV r16, r/m16 is 66 (REX.W==0) 89 /r emit_std_reg_mem( sink, LegacyPrefix::_66, 0x89, 1, *src, dst, RexFlags::clear_w(), ) } 4 => { // MOV r32, r/m32 is (REX.W==0) 89 /r emit_std_reg_mem( sink, LegacyPrefix::None, 0x89, 1, *src, dst, RexFlags::clear_w(), ) } 8 => { // MOV r64, r/m64 is (REX.W==1) 89 /r emit_std_reg_mem( sink, LegacyPrefix::None, 0x89, 1, *src, dst, RexFlags::set_w(), ) } _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"), } } Inst::Shift_R { is_64, kind, num_bits, dst, } => { let enc_dst = int_reg_enc(dst.to_reg()); let subopcode = match kind { ShiftKind::Left => 4, ShiftKind::RightZ => 5, ShiftKind::RightS => 7, }; let rex = if *is_64 { RexFlags::set_w() } else { RexFlags::clear_w() }; match num_bits { None => { // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode emit_std_enc_enc(sink, LegacyPrefix::None, 0xD3, 1, subopcode, enc_dst, rex); } Some(num_bits) => { // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib // When the shift amount is 1, there's an even shorter encoding, but we don't // bother with that nicety here. emit_std_enc_enc(sink, LegacyPrefix::None, 0xC1, 1, subopcode, enc_dst, rex); sink.put1(*num_bits); } } } Inst::Cmp_RMI_R { size, src: src_e, dst: reg_g, } => { let mut prefix = LegacyPrefix::None; if *size == 2 { prefix = LegacyPrefix::_66; } let mut rex = match size { 8 => RexFlags::set_w(), 4 | 2 => RexFlags::clear_w(), 1 => { let mut rex = RexFlags::clear_w(); // Here, a redundant REX prefix changes the meaning of the instruction. let enc_g = int_reg_enc(*reg_g); if enc_g >= 4 && enc_g <= 7 { rex.always_emit(); } rex } _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"), }; match src_e { RegMemImm::Reg { reg: reg_e } => { if *size == 1 { // Check whether the E register forces the use of a redundant REX. let enc_e = int_reg_enc(*reg_e); if enc_e >= 4 && enc_e <= 7 { rex.always_emit(); } } // Use the swapped operands encoding, to stay consistent with the output of // gcc/llvm. let opcode = if *size == 1 { 0x38 } else { 0x39 }; emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex); } RegMemImm::Mem { addr } => { let addr = &addr.finalize(state); // Whereas here we revert to the "normal" G-E ordering. let opcode = if *size == 1 { 0x3A } else { 0x3B }; emit_std_reg_mem(sink, prefix, opcode, 1, *reg_g, addr, rex); } RegMemImm::Imm { simm32 } => { // FIXME JRS 2020Feb11: there are shorter encodings for // cmp $imm, rax/eax/ax/al. let use_imm8 = low8_will_sign_extend_to_32(*simm32); // And also here we use the "normal" G-E ordering. let opcode = if *size == 1 { 0x80 } else if use_imm8 { 0x83 } else { 0x81 }; let enc_g = int_reg_enc(*reg_g); emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex); emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32); } } } Inst::Setcc { cc, dst } => { let opcode = 0x0f90 + cc.get_enc() as u32; let mut rex_flags = RexFlags::clear_w(); rex_flags.always_emit(); emit_std_enc_enc( sink, LegacyPrefix::None, opcode, 2, 0, reg_enc(dst.to_reg()), rex_flags, ); } Inst::Push64 { src } => { match src { RegMemImm::Reg { reg } => { let enc_reg = int_reg_enc(*reg); let rex = 0x40 | ((enc_reg >> 3) & 1); if rex != 0x40 { sink.put1(rex); } sink.put1(0x50 | (enc_reg & 7)); } RegMemImm::Mem { addr } => { let addr = &addr.finalize(state); emit_std_enc_mem( sink, LegacyPrefix::None, 0xFF, 1, 6, /*subopcode*/ addr, RexFlags::clear_w(), ); } RegMemImm::Imm { simm32 } => { if low8_will_sign_extend_to_64(*simm32) { sink.put1(0x6A); sink.put1(*simm32 as u8); } else { sink.put1(0x68); sink.put4(*simm32); } } } } Inst::Pop64 { dst } => { let encDst = int_reg_enc(dst.to_reg()); if encDst >= 8 { // 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant // here. sink.put1(0x41); } sink.put1(0x58 + (encDst & 7)); } Inst::CallKnown { dest, loc, opcode, .. } => { sink.put1(0xE8); // The addend adjusts for the difference between the end of the instruction and the // beginning of the immediate field. sink.add_reloc(*loc, Reloc::X86CallPCRel4, &dest, -4); sink.put4(0); if opcode.is_call() { sink.add_call_site(*loc, *opcode); } } Inst::CallUnknown { dest, opcode, loc, .. } => { match dest { RegMem::Reg { reg } => { let reg_enc = int_reg_enc(*reg); emit_std_enc_enc( sink, LegacyPrefix::None, 0xFF, 1, 2, /*subopcode*/ reg_enc, RexFlags::clear_w(), ); } RegMem::Mem { addr } => { let addr = &addr.finalize(state); emit_std_enc_mem( sink, LegacyPrefix::None, 0xFF, 1, 2, /*subopcode*/ addr, RexFlags::clear_w(), ); } } if opcode.is_call() { sink.add_call_site(*loc, *opcode); } } Inst::Ret {} => sink.put1(0xC3), Inst::JmpKnown { dst } => { let br_start = sink.cur_offset(); let br_disp_off = br_start + 1; let br_end = br_start + 5; if let Some(l) = dst.as_label() { sink.use_label_at_offset(br_disp_off, l, LabelUse::JmpRel32); sink.add_uncond_branch(br_start, br_end, l); } let disp = dst.as_offset32_or_zero(); let disp = disp as u32; sink.put1(0xE9); sink.put4(disp); } Inst::JmpCond { cc, taken, not_taken, } => { // If taken. let cond_start = sink.cur_offset(); let cond_disp_off = cond_start + 2; let cond_end = cond_start + 6; if let Some(l) = taken.as_label() { sink.use_label_at_offset(cond_disp_off, l, LabelUse::JmpRel32); let inverted: [u8; 6] = [0x0F, 0x80 + (cc.invert().get_enc()), 0x00, 0x00, 0x00, 0x00]; sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]); } let taken_disp = taken.as_offset32_or_zero(); let taken_disp = taken_disp as u32; sink.put1(0x0F); sink.put1(0x80 + cc.get_enc()); sink.put4(taken_disp); // If not taken. let uncond_start = sink.cur_offset(); let uncond_disp_off = uncond_start + 1; let uncond_end = uncond_start + 5; if let Some(l) = not_taken.as_label() { sink.use_label_at_offset(uncond_disp_off, l, LabelUse::JmpRel32); sink.add_uncond_branch(uncond_start, uncond_end, l); } let nt_disp = not_taken.as_offset32_or_zero(); let nt_disp = nt_disp as u32; sink.put1(0xE9); sink.put4(nt_disp); } Inst::JmpUnknown { target } => { match target { RegMem::Reg { reg } => { let reg_enc = int_reg_enc(*reg); emit_std_enc_enc( sink, LegacyPrefix::None, 0xFF, 1, 4, /*subopcode*/ reg_enc, RexFlags::clear_w(), ); } RegMem::Mem { addr } => { let addr = &addr.finalize(state); emit_std_enc_mem( sink, LegacyPrefix::None, 0xFF, 1, 4, /*subopcode*/ addr, RexFlags::clear_w(), ); } } } Inst::XMM_Mov_RM_R { op, src: src_e, dst: reg_g, } => { let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28), SseOpcode::Movd => (LegacyPrefix::_66, 0x0F6E), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), _ => unimplemented!("Opcode {:?} not implemented", op), }; match src_e { RegMem::Reg { reg: reg_e } => { emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state); emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } } Inst::XMM_RM_R { op, src: src_e, dst: reg_g, } => { let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58), SseOpcode::Andps => (LegacyPrefix::None, 0x0F54), SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55), SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E), SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59), SseOpcode::Orps => (LegacyPrefix::None, 0x0F56), SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C), SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), _ => unimplemented!("Opcode {:?} not implemented", op), }; match src_e { RegMem::Reg { reg: reg_e } => { emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state); emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } } Inst::XMM_Mov_R_M { op, src, dst } => { let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), _ => unimplemented!("Emit xmm mov r m"), }; let dst = &dst.finalize(state); emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex); } Inst::Hlt => { sink.put1(0xcc); } Inst::Ud2 { trap_info } => { sink.add_trap(trap_info.0, trap_info.1); sink.put1(0x0f); sink.put1(0x0b); } Inst::VirtualSPOffsetAdj { offset } => { debug!( "virtual sp offset adjusted by {} -> {}", offset, state.virtual_sp_offset + offset ); state.virtual_sp_offset += offset; } Inst::Nop { .. } | Inst::EpiloguePlaceholder => { // Generate no code. } } }