diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index e6fdf02589..7a84613c4d 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -58,10 +58,12 @@ x86 = [] arm32 = [] arm64 = [] riscv = [] +x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel. # Option to enable all architectures. all-arch = [ "x86", + "x64", "arm32", "arm64", "riscv" diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs index d94e023279..0ef36b6686 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -3,6 +3,12 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { let mut settings = SettingGroupBuilder::new("x86"); + settings.add_bool( + "use_new_backend", + "Whether to use the new codegen backend using the new isel", + false, + ); + // CPUID.01H:ECX let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index d72b46981f..6c9a904f03 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -77,6 +77,9 @@ mod riscv; #[cfg(feature = "x86")] mod x86; +#[cfg(feature = "x64")] +mod x64; + #[cfg(feature = "arm32")] mod arm32; diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs new file mode 100644 index 0000000000..2deb47fb89 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -0,0 +1,457 @@ +//! Implementation of the standard x64 ABI. + +use alloc::vec::Vec; +use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; + +use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type}; +use crate::isa::{self, x64::inst::*}; +use crate::machinst::*; +use crate::settings; + +use args::*; + +#[derive(Clone, Debug)] +enum ABIArg { + Reg(RealReg), + _Stack, +} + +#[derive(Clone, Debug)] +enum ABIRet { + Reg(RealReg), + _Stack, +} + +pub(crate) struct X64ABIBody { + args: Vec, + rets: Vec, + + /// Offsets to each stack slot. + _stack_slots: Vec, + + /// Total stack size of all the stack slots. + stack_slots_size: usize, + + /// Clobbered registers, as indicated by regalloc. + clobbered: Set>, + + /// Total number of spill slots, as indicated by regalloc. + num_spill_slots: Option, + + /// Calculated while creating the prologue, and used when creating the epilogue. Amount by + /// which RSP is adjusted downwards to allocate the spill area. + frame_size_bytes: Option, + + call_conv: isa::CallConv, + + /// The settings controlling this function's compilation. + flags: settings::Flags, +} + +fn in_int_reg(ty: types::Type) -> bool { + match ty { + types::I8 + | types::I16 + | types::I32 + | types::I64 + | types::B1 + | types::B8 + | types::B16 + | types::B32 + | types::B64 => true, + _ => false, + } +} + +fn get_intreg_for_arg_systemv(idx: usize) -> Option { + match idx { + 0 => Some(regs::rdi()), + 1 => Some(regs::rsi()), + 2 => Some(regs::rdx()), + 3 => Some(regs::rcx()), + 4 => Some(regs::r8()), + 5 => Some(regs::r9()), + _ => None, + } +} + +fn get_intreg_for_retval_systemv(idx: usize) -> Option { + match idx { + 0 => Some(regs::rax()), + 1 => Some(regs::rdx()), + _ => None, + } +} + +fn is_callee_save_systemv(r: RealReg) -> bool { + use regs::*; + match r.get_class() { + RegClass::I64 => match r.get_hw_encoding() as u8 { + ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true, + _ => false, + }, + _ => unimplemented!(), + } +} + +fn get_callee_saves(regs: Vec>) -> Vec> { + regs.into_iter() + .filter(|r| is_callee_save_systemv(r.to_reg())) + .collect() +} + +impl X64ABIBody { + /// Create a new body ABI instance. + pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self { + // Compute args and retvals from signature. + let mut args = vec![]; + let mut next_int_arg = 0; + for param in &f.signature.params { + match param.purpose { + ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => { + // `VMContext` is `r14` in Baldrdash. + args.push(ABIArg::Reg(regs::r14().to_real_reg())); + } + + ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => { + if in_int_reg(param.value_type) { + if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) { + args.push(ABIArg::Reg(reg.to_real_reg())); + } else { + unimplemented!("passing arg on the stack"); + } + next_int_arg += 1; + } else { + unimplemented!("non int normal register") + } + } + + _ => unimplemented!("other parameter purposes"), + } + } + + let mut rets = vec![]; + let mut next_int_retval = 0; + for ret in &f.signature.returns { + match ret.purpose { + ir::ArgumentPurpose::Normal => { + if in_int_reg(ret.value_type) { + if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) { + rets.push(ABIRet::Reg(reg.to_real_reg())); + } else { + unimplemented!("passing return on the stack"); + } + next_int_retval += 1; + } else { + unimplemented!("returning non integer normal value"); + } + } + + _ => { + unimplemented!("non normal argument purpose"); + } + } + } + + // Compute stackslot locations and total stackslot size. + let mut stack_offset: usize = 0; + let mut _stack_slots = vec![]; + for (stackslot, data) in f.stack_slots.iter() { + let off = stack_offset; + stack_offset += data.size as usize; + + // 8-bit align. + stack_offset = (stack_offset + 7) & !7usize; + + debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len()); + _stack_slots.push(off); + } + + Self { + args, + rets, + _stack_slots, + stack_slots_size: stack_offset, + clobbered: Set::empty(), + num_spill_slots: None, + frame_size_bytes: None, + call_conv: f.signature.call_conv.clone(), + flags, + } + } +} + +impl ABIBody for X64ABIBody { + type I = Inst; + + fn flags(&self) -> &settings::Flags { + &self.flags + } + + fn num_args(&self) -> usize { + unimplemented!() + } + + fn num_retvals(&self) -> usize { + unimplemented!() + } + + fn num_stackslots(&self) -> usize { + unimplemented!() + } + + fn liveins(&self) -> Set { + let mut set: Set = Set::empty(); + for arg in &self.args { + if let &ABIArg::Reg(r) = arg { + set.insert(r); + } + } + set + } + + fn liveouts(&self) -> Set { + let mut set: Set = Set::empty(); + for ret in &self.rets { + if let &ABIRet::Reg(r) = ret { + set.insert(r); + } + } + set + } + + fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable) -> Inst { + match &self.args[idx] { + ABIArg::Reg(from_reg) => { + if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 { + // TODO do we need a sign extension if it's I32? + return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg); + } + unimplemented!("moving from non-int arg to vreg"); + } + ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"), + } + } + + fn gen_copy_reg_to_retval( + &self, + idx: usize, + from_reg: Writable, + ext: ArgumentExtension, + ) -> Vec { + match ext { + ArgumentExtension::None => {} + _ => unimplemented!( + "unimplemented argument extension {:?} is required for baldrdash", + ext + ), + }; + + let mut ret = Vec::new(); + match &self.rets[idx] { + ABIRet::Reg(to_reg) => { + if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 { + ret.push(Inst::mov_r_r( + /*is64=*/ true, + from_reg.to_reg(), + Writable::::from_reg(to_reg.to_reg()), + )) + } else { + unimplemented!("moving from vreg to non-int return value"); + } + } + + ABIRet::_Stack => { + unimplemented!("moving from vreg to stack return value"); + } + } + + ret + } + + fn gen_ret(&self) -> Inst { + Inst::ret() + } + + fn gen_epilogue_placeholder(&self) -> Inst { + Inst::epilogue_placeholder() + } + + fn set_num_spillslots(&mut self, slots: usize) { + self.num_spill_slots = Some(slots); + } + + fn set_clobbered(&mut self, clobbered: Set>) { + self.clobbered = clobbered; + } + + fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable) -> Inst { + unimplemented!() + } + + fn load_stackslot( + &self, + _slot: StackSlot, + _offset: u32, + _ty: Type, + _into_reg: Writable, + ) -> Inst { + unimplemented!("load_stackslot") + } + + fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst { + unimplemented!("store_stackslot") + } + + fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable) -> Inst { + unimplemented!("load_spillslot") + } + + fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst { + unimplemented!("store_spillslot") + } + + fn gen_prologue(&mut self) -> Vec { + let r_rsp = regs::rsp(); + + let mut insts = vec![]; + + // Baldrdash generates its own prologue sequence, so we don't have to. + if !self.call_conv.extends_baldrdash() { + let r_rbp = regs::rbp(); + let w_rbp = Writable::::from_reg(r_rbp); + + // The "traditional" pre-preamble + // RSP before the call will be 0 % 16. So here, it is 8 % 16. + insts.push(Inst::push64(RMI::reg(r_rbp))); + // RSP is now 0 % 16 + insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); + } + + // Save callee saved registers that we trash. Keep track of how much space we've used, so + // as to know what we have to do to get the base of the spill area 0 % 16. + let mut callee_saved_used = 0; + let clobbered = get_callee_saves(self.clobbered.to_vec()); + for reg in clobbered { + let r_reg = reg.to_reg(); + match r_reg.get_class() { + RegClass::I64 => { + insts.push(Inst::push64(RMI::reg(r_reg.to_reg()))); + callee_saved_used += 8; + } + _ => unimplemented!(), + } + } + + let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap(); + if self.call_conv.extends_baldrdash() { + // Baldrdash expects the stack to take at least the number of words set in + // baldrdash_prologue_words; count them here. + debug_assert!( + !self.flags.enable_probestack(), + "baldrdash does not expect cranelift to emit stack probes" + ); + total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8; + } + + debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8); + let frame_size = total_stacksize + callee_saved_used % 16; + + // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body. + let frame_size = (frame_size + 15) & !15; + if frame_size > 0x7FFF_FFFF { + unimplemented!("gen_prologue(x86): total_stacksize >= 2G"); + } + + if !self.call_conv.extends_baldrdash() { + // Explicitly allocate the frame. + let w_rsp = Writable::::from_reg(r_rsp); + if frame_size > 0 { + insts.push(Inst::alu_rmi_r( + true, + RMI_R_Op::Sub, + RMI::imm(frame_size as u32), + w_rsp, + )); + } + } + + // Stash this value. We'll need it for the epilogue. + debug_assert!(self.frame_size_bytes.is_none()); + self.frame_size_bytes = Some(frame_size); + + insts + } + + fn gen_epilogue(&self) -> Vec { + let mut insts = vec![]; + + // Undo what we did in the prologue. + + // Clear the spill area and the 16-alignment padding below it. + if !self.call_conv.extends_baldrdash() { + let frame_size = self.frame_size_bytes.unwrap(); + if frame_size > 0 { + let r_rsp = regs::rsp(); + let w_rsp = Writable::::from_reg(r_rsp); + + insts.push(Inst::alu_rmi_r( + true, + RMI_R_Op::Add, + RMI::imm(frame_size as u32), + w_rsp, + )); + } + } + + // Restore regs. + let clobbered = get_callee_saves(self.clobbered.to_vec()); + for w_real_reg in clobbered.into_iter().rev() { + match w_real_reg.to_reg().get_class() { + RegClass::I64 => { + // TODO: make these conversion sequences less cumbersome. + insts.push(Inst::pop64(Writable::::from_reg( + w_real_reg.to_reg().to_reg(), + ))) + } + _ => unimplemented!(), + } + } + + // Baldrdash generates its own preamble. + if !self.call_conv.extends_baldrdash() { + let r_rbp = regs::rbp(); + let w_rbp = Writable::::from_reg(r_rbp); + + // Undo the "traditional" pre-preamble + // RSP before the call will be 0 % 16. So here, it is 8 % 16. + insts.push(Inst::pop64(w_rbp)); + insts.push(Inst::ret()); + } + + insts + } + + fn frame_size(&self) -> u32 { + self.frame_size_bytes + .expect("frame size not computed before prologue generation") as u32 + } + + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::V128, F32) | (RegClass::V128, F64) => 1, + (RegClass::V128, _) => 2, + _ => panic!("Unexpected register class!"), + } + } + + fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst { + unimplemented!() + } + + fn gen_reload(&self, _to_reg: Writable, _from_slot: SpillSlot, _ty: Type) -> Inst { + unimplemented!() + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs new file mode 100644 index 0000000000..dbdd484fca --- /dev/null +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -0,0 +1,451 @@ +//! Instruction operand sub-components (aka "parts"): definitions and printing. + +use std::fmt; +use std::string::{String, ToString}; + +use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector}; + +use crate::binemit::CodeOffset; +use crate::machinst::*; + +use super::regs::show_ireg_sized; + +/// A Memory Address. These denote a 64-bit value only. +#[derive(Clone)] +pub(crate) enum Addr { + /// Immediate sign-extended and a Register. + IR { simm32: u32, base: Reg }, + + /// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift) + IRRS { + simm32: u32, + base: Reg, + index: Reg, + shift: u8, /* 0 .. 3 only */ + }, +} + +impl Addr { + // Constructors. + + pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self { + debug_assert!(base.get_class() == RegClass::I64); + Self::IR { simm32, base } + } + + pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self { + debug_assert!(base.get_class() == RegClass::I64); + debug_assert!(index.get_class() == RegClass::I64); + debug_assert!(shift <= 3); + Addr::IRRS { + simm32, + base, + index, + shift, + } + } + + /// Add the regs mentioned by `self` to `collector`. + pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + match self { + Addr::IR { simm32: _, base } => { + collector.add_use(*base); + } + Addr::IRRS { + simm32: _, + base, + index, + shift: _, + } => { + collector.add_use(*base); + collector.add_use(*index); + } + } + } +} + +impl ShowWithRRU for Addr { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)), + Addr::IRRS { + simm32, + base, + index, + shift, + } => format!( + "{}({},{},{})", + *simm32 as i32, + base.show_rru(mb_rru), + index.show_rru(mb_rru), + 1 << shift + ), + } + } +} + +/// An operand which is either an integer Register, a value in Memory or an Immediate. This can +/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only +/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by +/// `simm32` is its sign-extension out to 64 bits. +#[derive(Clone)] +pub(crate) enum RMI { + R { reg: Reg }, + M { addr: Addr }, + I { simm32: u32 }, +} + +impl RMI { + // Constructors + + pub(crate) fn reg(reg: Reg) -> RMI { + debug_assert!(reg.get_class() == RegClass::I64); + RMI::R { reg } + } + pub(crate) fn mem(addr: Addr) -> RMI { + RMI::M { addr } + } + pub(crate) fn imm(simm32: u32) -> RMI { + RMI::I { simm32 } + } + + /// Add the regs mentioned by `self` to `collector`. + pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + match self { + RMI::R { reg } => collector.add_use(*reg), + RMI::M { addr } => addr.get_regs_as_uses(collector), + RMI::I { simm32: _ } => {} + } + } +} + +impl ShowWithRRU for RMI { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.show_rru_sized(mb_rru, 8) + } + + fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { + match self { + RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size), + RMI::M { addr } => addr.show_rru(mb_rru), + RMI::I { simm32 } => format!("${}", *simm32 as i32), + } + } +} + +/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16, +/// 32 or 64 bit value. +#[derive(Clone)] +pub(crate) enum RM { + R { reg: Reg }, + M { addr: Addr }, +} + +impl RM { + // Constructors. + + pub(crate) fn reg(reg: Reg) -> Self { + debug_assert!(reg.get_class() == RegClass::I64); + RM::R { reg } + } + + pub(crate) fn mem(addr: Addr) -> Self { + RM::M { addr } + } + + /// Add the regs mentioned by `self` to `collector`. + pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + match self { + RM::R { reg } => collector.add_use(*reg), + RM::M { addr } => addr.get_regs_as_uses(collector), + } + } +} + +impl ShowWithRRU for RM { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.show_rru_sized(mb_rru, 8) + } + + fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { + match self { + RM::R { reg } => show_ireg_sized(*reg, mb_rru, size), + RM::M { addr } => addr.show_rru(mb_rru), + } + } +} + +/// Some basic ALU operations. TODO: maybe add Adc, Sbb. +#[derive(Clone, PartialEq)] +pub enum RMI_R_Op { + Add, + Sub, + And, + Or, + Xor, + /// The signless, non-extending (N x N -> N, for N in {32,64}) variant. + Mul, +} + +impl RMI_R_Op { + pub(crate) fn to_string(&self) -> String { + match self { + RMI_R_Op::Add => "add".to_string(), + RMI_R_Op::Sub => "sub".to_string(), + RMI_R_Op::And => "and".to_string(), + RMI_R_Op::Or => "or".to_string(), + RMI_R_Op::Xor => "xor".to_string(), + RMI_R_Op::Mul => "imul".to_string(), + } + } +} + +impl fmt::Debug for RMI_R_Op { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.to_string()) + } +} + +/// These indicate ways of extending (widening) a value, using the Intel naming: +/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 +#[derive(Clone, PartialEq)] +pub enum ExtMode { + /// Byte -> Longword. + BL, + /// Byte -> Quadword. + BQ, + /// Word -> Longword. + WL, + /// Word -> Quadword. + WQ, + /// Longword -> Quadword. + LQ, +} + +impl ExtMode { + pub(crate) fn to_string(&self) -> String { + match self { + ExtMode::BL => "bl".to_string(), + ExtMode::BQ => "bq".to_string(), + ExtMode::WL => "wl".to_string(), + ExtMode::WQ => "wq".to_string(), + ExtMode::LQ => "lq".to_string(), + } + } + + pub(crate) fn dst_size(&self) -> u8 { + match self { + ExtMode::BL => 4, + ExtMode::BQ => 8, + ExtMode::WL => 4, + ExtMode::WQ => 8, + ExtMode::LQ => 8, + } + } +} + +impl fmt::Debug for ExtMode { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.to_string()) + } +} + +/// These indicate the form of a scalar shift: left, signed right, unsigned right. +#[derive(Clone)] +pub enum ShiftKind { + Left, + RightZ, + RightS, +} + +impl ShiftKind { + pub(crate) fn to_string(&self) -> String { + match self { + ShiftKind::Left => "shl".to_string(), + ShiftKind::RightZ => "shr".to_string(), + ShiftKind::RightS => "sar".to_string(), + } + } +} + +impl fmt::Debug for ShiftKind { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.to_string()) + } +} + +/// These indicate condition code tests. Not all are represented since not all are useful in +/// compiler-generated code. +#[derive(Copy, Clone)] +#[repr(u8)] +pub enum CC { + /// overflow + O = 0, + /// no overflow + NO = 1, + + /// < unsigned + B = 2, + /// >= unsigned + NB = 3, + + /// zero + Z = 4, + /// not-zero + NZ = 5, + + /// <= unsigned + BE = 6, + /// > unsigend + NBE = 7, + + /// negative + S = 8, + /// not-negative + NS = 9, + + /// < signed + L = 12, + /// >= signed + NL = 13, + + /// <= signed + LE = 14, + /// > signed + NLE = 15, +} + +impl CC { + pub(crate) fn to_string(&self) -> String { + match self { + CC::O => "o".to_string(), + CC::NO => "no".to_string(), + CC::B => "b".to_string(), + CC::NB => "nb".to_string(), + CC::Z => "z".to_string(), + CC::NZ => "nz".to_string(), + CC::BE => "be".to_string(), + CC::NBE => "nbe".to_string(), + CC::S => "s".to_string(), + CC::NS => "ns".to_string(), + CC::L => "l".to_string(), + CC::NL => "nl".to_string(), + CC::LE => "le".to_string(), + CC::NLE => "nle".to_string(), + } + } + + pub(crate) fn invert(&self) -> CC { + match self { + CC::O => CC::NO, + CC::NO => CC::O, + + CC::B => CC::NB, + CC::NB => CC::B, + + CC::Z => CC::NZ, + CC::NZ => CC::Z, + + CC::BE => CC::NBE, + CC::NBE => CC::BE, + + CC::S => CC::NS, + CC::NS => CC::S, + + CC::L => CC::NL, + CC::NL => CC::L, + + CC::LE => CC::NLE, + CC::NLE => CC::LE, + } + } + + pub(crate) fn get_enc(self) -> u8 { + self as u8 + } +} + +impl fmt::Debug for CC { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.to_string()) + } +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug)] +pub enum BranchTarget { + /// An unresolved reference to a BlockIndex, as passed into + /// `lower_branch_group()`. + Block(BlockIndex), + + /// A resolved reference to another instruction, after + /// `Inst::with_block_offsets()`. This offset is in bytes. + ResolvedOffset(BlockIndex, isize), +} + +impl ShowWithRRU for BranchTarget { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + BranchTarget::Block(bix) => format!("(Block {})", bix), + BranchTarget::ResolvedOffset(bix, offs) => format!("(Block {}, offset {})", bix, offs), + } + } +} + +impl BranchTarget { + /// Lower the branch target given offsets of each block. + pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) { + match self { + &mut BranchTarget::Block(bix) => { + let bix = bix as usize; + assert!(bix < targets.len()); + let block_offset_in_func = targets[bix]; + let branch_offset = (block_offset_in_func as isize) - (my_offset as isize); + *self = BranchTarget::ResolvedOffset(bix as BlockIndex, branch_offset); + } + &mut BranchTarget::ResolvedOffset(..) => {} + } + } + + /// Get the block index. + pub fn as_block_index(&self) -> Option { + match self { + &BranchTarget::Block(bix) => Some(bix), + _ => None, + } + } + + /// Get the offset as a signed 32 bit byte offset. This returns the + /// offset in bytes between the first byte of the source and the first + /// byte of the target. It does not take into account the Intel-specific + /// rule that a branch offset is encoded as relative to the start of the + /// following instruction. That is a problem for the emitter to deal + /// with. + pub fn as_offset_i32(&self) -> Option { + match self { + &BranchTarget::ResolvedOffset(_, off) => { + // Leave a bit of slack so that the emitter is guaranteed to + // be able to add the length of the jump instruction encoding + // to this value and still have a value in signed-32 range. + if off >= -0x7FFF_FF00isize && off <= 0x7FFF_FF00isize { + Some(off as i32) + } else { + None + } + } + _ => None, + } + } + + /// Map the block index given a transform map. + pub fn map(&mut self, block_index_map: &[BlockIndex]) { + match self { + &mut BranchTarget::Block(ref mut bix) => { + let n = block_index_map[*bix as usize]; + *bix = n; + } + _ => panic!("BranchTarget::map() called on already-lowered BranchTarget!"), + } + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs new file mode 100644 index 0000000000..2b4d3e54d6 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -0,0 +1,888 @@ +use regalloc::{Reg, RegClass}; + +use crate::isa::x64::inst::*; + +fn low8willSXto64(x: u32) -> bool { + let xs = (x as i32) as i64; + xs == ((xs << 56) >> 56) +} + +fn low8willSXto32(x: u32) -> bool { + let xs = x as i32; + xs == ((xs << 24) >> 24) +} + +//============================================================================= +// Instructions and subcomponents: emission + +// For all of the routines that take both a memory-or-reg operand (sometimes +// called "E" in the Intel documentation) and a reg-only operand ("G" in +// Intelese), the order is always G first, then E. +// +// "enc" in the following means "hardware register encoding number". + +#[inline(always)] +fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 { + debug_assert!(m0d < 4); + debug_assert!(encRegG < 8); + debug_assert!(rmE < 8); + ((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7) +} + +#[inline(always)] +fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 { + debug_assert!(shift < 4); + debug_assert!(encIndex < 8); + debug_assert!(encBase < 8); + ((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7) +} + +/// Get the encoding number from something which we sincerely hope is a real +/// register of class I64. +#[inline(always)] +fn iregEnc(reg: Reg) -> u8 { + debug_assert!(reg.is_real()); + debug_assert!(reg.get_class() == RegClass::I64); + reg.get_hw_encoding() +} + +// F_*: these flags describe special handling of the insn to be generated. Be +// careful with these. It is easy to create nonsensical combinations. +const F_NONE: u32 = 0; + +/// Emit the REX prefix byte even if it appears to be redundant (== 0x40). +const F_RETAIN_REDUNDANT_REX: u32 = 1; + +/// Set the W bit in the REX prefix to zero. By default it will be set to 1, +/// indicating a 64-bit operation. +const F_CLEAR_REX_W: u32 = 2; + +/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate +/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W. +const F_PREFIX_66: u32 = 4; + +/// This is the core 'emit' function for instructions that reference memory. +/// +/// For an instruction that has as operands a register `encG` and a memory +/// address `memE`, create and emit, first the REX prefix, then caller-supplied +/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then +/// optionally, a SIB byte, and finally optionally an immediate that will be +/// derived from the `memE` operand. For most instructions up to and including +/// SSE4.2, that will be the whole instruction. +/// +/// The opcodes are written bigendianly for the convenience of callers. For +/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27, +/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3. +/// +/// The register operand is represented here not as a `Reg` but as its hardware +/// encoding, `encG`. `flags` can specify special handling for the REX prefix. +/// By default, the REX prefix will indicate a 64-bit operation and will be +/// deleted if it is redundant (0x40). Note that for a 64-bit operation, the +/// REX prefix will normally never be redundant, since REX.W must be 1 to +/// indicate a 64-bit operation. +fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( + sink: &mut O, + opcodes: u32, + mut numOpcodes: usize, + encG: u8, + memE: &Addr, + flags: u32, +) { + // General comment for this function: the registers in `memE` must be + // 64-bit integer registers, because they are part of an address + // expression. But `encG` can be derived from a register of any class. + let prefix66 = (flags & F_PREFIX_66) != 0; + let clearRexW = (flags & F_CLEAR_REX_W) != 0; + let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0; + // The operand-size override, if requested. This indicates a 16-bit + // operation. + if prefix66 { + sink.put1(0x66); + } + match memE { + Addr::IR { simm32, base: regE } => { + // First, cook up the REX byte. This is easy. + let encE = iregEnc(*regE); + let w = if clearRexW { 0 } else { 1 }; + let r = (encG >> 3) & 1; + let x = 0; + let b = (encE >> 3) & 1; + let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || retainRedundant { + sink.put1(rex); + } + // Now the opcode(s). These include any other prefixes the caller + // hands to us. + while numOpcodes > 0 { + numOpcodes -= 1; + sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8); + } + // Now the mod/rm and associated immediates. This is + // significantly complicated due to the multiple special cases. + if *simm32 == 0 + && encE != regs::ENC_RSP + && encE != regs::ENC_RBP + && encE != regs::ENC_R12 + && encE != regs::ENC_R13 + { + // FIXME JRS 2020Feb11: those four tests can surely be + // replaced by a single mask-and-compare check. We should do + // that because this routine is likely to be hot. + sink.put1(mkModRegRM(0, encG & 7, encE & 7)); + } else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) { + sink.put1(mkModRegRM(0, encG & 7, 4)); + sink.put1(0x24); + } else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 { + sink.put1(mkModRegRM(1, encG & 7, encE & 7)); + sink.put1((simm32 & 0xFF) as u8); + } else if encE != regs::ENC_RSP && encE != regs::ENC_R12 { + sink.put1(mkModRegRM(2, encG & 7, encE & 7)); + sink.put4(*simm32); + } else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) { + // REX.B distinguishes RSP from R12 + sink.put1(mkModRegRM(1, encG & 7, 4)); + sink.put1(0x24); + sink.put1((simm32 & 0xFF) as u8); + } else if encE == regs::ENC_R12 || encE == regs::ENC_RSP { + //.. wait for test case for RSP case + // REX.B distinguishes RSP from R12 + sink.put1(mkModRegRM(2, encG & 7, 4)); + sink.put1(0x24); + sink.put4(*simm32); + } else { + unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR"); + } + } + // Bizarrely, the IRRS case is much simpler. + Addr::IRRS { + simm32, + base: regBase, + index: regIndex, + shift, + } => { + let encBase = iregEnc(*regBase); + let encIndex = iregEnc(*regIndex); + // The rex byte + let w = if clearRexW { 0 } else { 1 }; + let r = (encG >> 3) & 1; + let x = (encIndex >> 3) & 1; + let b = (encBase >> 3) & 1; + let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || retainRedundant { + sink.put1(rex); + } + // All other prefixes and opcodes + while numOpcodes > 0 { + numOpcodes -= 1; + sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8); + } + // modrm, SIB, immediates + if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP { + sink.put1(mkModRegRM(1, encG & 7, 4)); + sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7)); + sink.put1(*simm32 as u8); + } else if encIndex != regs::ENC_RSP { + sink.put1(mkModRegRM(2, encG & 7, 4)); + sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7)); + sink.put4(*simm32); + } else { + panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS"); + } + } + } +} + +/// This is the core 'emit' function for instructions that do not reference +/// memory. +/// +/// This is conceptually the same as +/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case +/// where the E operand is a register rather than memory. Hence it is much +/// simpler. +fn emit_REX_OPCODES_MODRM_encG_encE( + sink: &mut O, + opcodes: u32, + mut numOpcodes: usize, + encG: u8, + encE: u8, + flags: u32, +) { + // EncG and EncE can be derived from registers of any class, and they + // don't even have to be from the same class. For example, for an + // integer-to-FP conversion insn, one might be RegClass::I64 and the other + // RegClass::V128. + let prefix66 = (flags & F_PREFIX_66) != 0; + let clearRexW = (flags & F_CLEAR_REX_W) != 0; + let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0; + // The operand-size override + if prefix66 { + sink.put1(0x66); + } + // The rex byte + let w = if clearRexW { 0 } else { 1 }; + let r = (encG >> 3) & 1; + let x = 0; + let b = (encE >> 3) & 1; + let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || retainRedundant { + sink.put1(rex); + } + // All other prefixes and opcodes + while numOpcodes > 0 { + numOpcodes -= 1; + sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8); + } + // Now the mod/rm byte. The instruction we're generating doesn't access + // memory, so there is no SIB byte or immediate -- we're done. + sink.put1(mkModRegRM(3, encG & 7, encE & 7)); +} + +// These are merely wrappers for the above two functions that facilitate passing +// actual `Reg`s rather than their encodings. + +fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink: &mut O, + opcodes: u32, + numOpcodes: usize, + regG: Reg, + memE: &Addr, + flags: u32, +) { + // JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc` + let encG = iregEnc(regG); + emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags); +} + +fn emit_REX_OPCODES_MODRM_regG_regE( + sink: &mut O, + opcodes: u32, + numOpcodes: usize, + regG: Reg, + regE: Reg, + flags: u32, +) { + // JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc` + let encG = iregEnc(regG); + let encE = iregEnc(regE); + emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags); +} + +/// Write a suitable number of bits from an imm64 to the sink. +fn emit_simm(sink: &mut O, size: u8, simm32: u32) { + match size { + 8 | 4 => sink.put4(simm32), + 2 => sink.put2(simm32 as u16), + 1 => sink.put1(simm32 as u8), + _ => panic!("x64::Inst::emit_simm: unreachable"), + } +} + +/// The top-level emit function. +/// +/// Important! Do not add improved (shortened) encoding cases to existing +/// instructions without also adding tests for those improved encodings. That +/// is a dangerous game that leads to hard-to-track-down errors in the emitted +/// code. +/// +/// For all instructions, make sure to have test coverage for all of the +/// following situations. Do this by creating the cross product resulting from +/// applying the following rules to each operand: +/// +/// (1) for any insn that mentions a register: one test using a register from +/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one +/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15]. +/// This helps detect incorrect REX prefix construction. +/// +/// (2) for any insn that mentions a byte register: one test for each of the +/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil], +/// [r8b .. r11b] and [r12b .. r15b]. This checks that +/// apparently-redundant REX prefixes are retained when required. +/// +/// (3) for any insn that contains an immediate field, check the following +/// cases: field is zero, field is in simm8 range (-128 .. 127), field is +/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some +/// instructions that require a 32-bit immediate have a short-form encoding +/// when the imm is in simm8 range. +/// +/// Rules (1), (2) and (3) don't apply for registers within address expressions +/// (`Addr`s). Those are already pretty well tested, and the registers in them +/// don't have any effect on the containing instruction (apart from possibly +/// require REX prefix bits). +/// +/// When choosing registers for a test, avoid using registers with the same +/// offset within a given group. For example, don't use rax and r8, since they +/// both have the lowest 3 bits as 000, and so the test won't detect errors +/// where those 3-bit register sub-fields are confused by the emitter. Instead +/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl +/// and bpl since they have the same offset in their group; use instead (eg) cl +/// and sil. +/// +/// For all instructions, also add a test that uses only low-half registers +/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX +/// prefixes are correctly omitted. This low-half restriction must apply to +/// _all_ registers in the insn, even those in address expressions. +/// +/// Following these rules creates large numbers of test cases, but it's the +/// only way to make the emitter reliable. +/// +/// Known possible improvements: +/// +/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we +/// care?) +pub(crate) fn emit(inst: &Inst, sink: &mut O) { + match inst { + Inst::Nop { len: 0 } => {} + Inst::Alu_RMI_R { + is_64, + op, + src: srcE, + dst: regG, + } => { + let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W }; + if *op == RMI_R_Op::Mul { + // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so + // we have to special-case it. + match srcE { + RMI::R { reg: regE } => { + emit_REX_OPCODES_MODRM_regG_regE( + sink, + 0x0FAF, + 2, + regG.to_reg(), + *regE, + flags, + ); + } + RMI::M { addr } => { + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FAF, + 2, + regG.to_reg(), + addr, + flags, + ); + } + RMI::I { simm32 } => { + let useImm8 = low8willSXto32(*simm32); + let opcode = if useImm8 { 0x6B } else { 0x69 }; + // Yes, really, regG twice. + emit_REX_OPCODES_MODRM_regG_regE( + sink, + opcode, + 1, + regG.to_reg(), + regG.to_reg(), + flags, + ); + emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32); + } + } + } else { + let (opcode_R, opcode_M, subopcode_I) = match op { + RMI_R_Op::Add => (0x01, 0x03, 0), + RMI_R_Op::Sub => (0x29, 0x2B, 5), + RMI_R_Op::And => (0x21, 0x23, 4), + RMI_R_Op::Or => (0x09, 0x0B, 1), + RMI_R_Op::Xor => (0x31, 0x33, 6), + RMI_R_Op::Mul => panic!("unreachable"), + }; + match srcE { + RMI::R { reg: regE } => { + // Note. The arguments .. regE .. regG .. sequence + // here is the opposite of what is expected. I'm not + // sure why this is. But I am fairly sure that the + // arg order could be switched back to the expected + // .. regG .. regE .. if opcode_rr is also switched + // over to the "other" basic integer opcode (viz, the + // R/RM vs RM/R duality). However, that would mean + // that the test results won't be in accordance with + // the GNU as reference output. In other words, the + // inversion exists as a result of using GNU as as a + // gold standard. + emit_REX_OPCODES_MODRM_regG_regE( + sink, + opcode_R, + 1, + *regE, + regG.to_reg(), + flags, + ); + // NB: if this is ever extended to handle byte size + // ops, be sure to retain redundant REX prefixes. + } + RMI::M { addr } => { + // Whereas here we revert to the "normal" G-E ordering. + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + opcode_M, + 1, + regG.to_reg(), + addr, + flags, + ); + } + RMI::I { simm32 } => { + let useImm8 = low8willSXto32(*simm32); + let opcode = if useImm8 { 0x83 } else { 0x81 }; + // And also here we use the "normal" G-E ordering. + let encG = iregEnc(regG.to_reg()); + emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags); + emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32); + } + } + } + } + Inst::Imm_R { + dst_is_64, + simm64, + dst, + } => { + let encDst = iregEnc(dst.to_reg()); + if *dst_is_64 { + // FIXME JRS 2020Feb10: also use the 32-bit case here when + // possible + sink.put1(0x48 | ((encDst >> 3) & 1)); + sink.put1(0xB8 | (encDst & 7)); + sink.put8(*simm64); + } else { + if ((encDst >> 3) & 1) == 1 { + sink.put1(0x41); + } + sink.put1(0xB8 | (encDst & 7)); + sink.put4(*simm64 as u32); + } + } + Inst::Mov_R_R { is_64, src, dst } => { + let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W }; + emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags); + } + Inst::MovZX_M_R { extMode, addr, dst } => { + match extMode { + ExtMode::BL => { + // MOVZBL is (REX.W==0) 0F B6 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FB6, + 2, + dst.to_reg(), + addr, + F_CLEAR_REX_W, + ) + } + ExtMode::BQ => { + // MOVZBQ is (REX.W==1) 0F B6 /r + // I'm not sure why the Intel manual offers different + // encodings for MOVZBQ than for MOVZBL. AIUI they should + // achieve the same, since MOVZBL is just going to zero out + // the upper half of the destination anyway. + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FB6, + 2, + dst.to_reg(), + addr, + F_NONE, + ) + } + ExtMode::WL => { + // MOVZWL is (REX.W==0) 0F B7 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FB7, + 2, + dst.to_reg(), + addr, + F_CLEAR_REX_W, + ) + } + ExtMode::WQ => { + // MOVZWQ is (REX.W==1) 0F B7 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FB7, + 2, + dst.to_reg(), + addr, + F_NONE, + ) + } + ExtMode::LQ => { + // This is just a standard 32 bit load, and we rely on the + // default zero-extension rule to perform the extension. + // MOV r/m32, r32 is (REX.W==0) 8B /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x8B, + 1, + dst.to_reg(), + addr, + F_CLEAR_REX_W, + ) + } + } + } + Inst::Mov64_M_R { addr, dst } => { + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE) + } + Inst::MovSX_M_R { extMode, addr, dst } => { + match extMode { + ExtMode::BL => { + // MOVSBL is (REX.W==0) 0F BE /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FBE, + 2, + dst.to_reg(), + addr, + F_CLEAR_REX_W, + ) + } + ExtMode::BQ => { + // MOVSBQ is (REX.W==1) 0F BE /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FBE, + 2, + dst.to_reg(), + addr, + F_NONE, + ) + } + ExtMode::WL => { + // MOVSWL is (REX.W==0) 0F BF /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FBF, + 2, + dst.to_reg(), + addr, + F_CLEAR_REX_W, + ) + } + ExtMode::WQ => { + // MOVSWQ is (REX.W==1) 0F BF /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x0FBF, + 2, + dst.to_reg(), + addr, + F_NONE, + ) + } + ExtMode::LQ => { + // MOVSLQ is (REX.W==1) 63 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x63, + 1, + dst.to_reg(), + addr, + F_NONE, + ) + } + } + } + Inst::Mov_R_M { size, src, addr } => { + match size { + 1 => { + // This is one of the few places where the presence of a + // redundant REX prefix changes the meaning of the + // instruction. + let encSrc = iregEnc(*src); + let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 { + F_RETAIN_REDUNDANT_REX + } else { + 0 + }; + // MOV r8, r/m8 is (REX.W==0) 88 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x88, + 1, + *src, + addr, + F_CLEAR_REX_W | retainRedundantRex, + ) + } + 2 => { + // MOV r16, r/m16 is 66 (REX.W==0) 89 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x89, + 1, + *src, + addr, + F_CLEAR_REX_W | F_PREFIX_66, + ) + } + 4 => { + // MOV r32, r/m32 is (REX.W==0) 89 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + 0x89, + 1, + *src, + addr, + F_CLEAR_REX_W, + ) + } + 8 => { + // MOV r64, r/m64 is (REX.W==1) 89 /r + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE) + } + _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"), + } + } + Inst::Shift_R { + is_64, + kind, + num_bits, + dst, + } => { + let encDst = iregEnc(dst.to_reg()); + let subopcode = match kind { + ShiftKind::Left => 4, + ShiftKind::RightZ => 5, + ShiftKind::RightS => 7, + }; + match num_bits { + None => { + // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode + // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode + emit_REX_OPCODES_MODRM_encG_encE( + sink, + 0xD3, + 1, + subopcode, + encDst, + if *is_64 { F_NONE } else { F_CLEAR_REX_W }, + ); + } + Some(num_bits) => { + // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib + // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib + // When the shift amount is 1, there's an even shorter encoding, but we don't + // bother with that nicety here. + emit_REX_OPCODES_MODRM_encG_encE( + sink, + 0xC1, + 1, + subopcode, + encDst, + if *is_64 { F_NONE } else { F_CLEAR_REX_W }, + ); + sink.put1(*num_bits); + } + } + } + Inst::Cmp_RMI_R { + size, + src: srcE, + dst: regG, + } => { + let mut retainRedundantRex = 0; + if *size == 1 { + // Here, a redundant REX prefix changes the meaning of the + // instruction. + let encG = iregEnc(*regG); + if encG >= 4 && encG <= 7 { + retainRedundantRex = F_RETAIN_REDUNDANT_REX; + } + } + let mut flags = match size { + 8 => F_NONE, + 4 => F_CLEAR_REX_W, + 2 => F_CLEAR_REX_W | F_PREFIX_66, + 1 => F_CLEAR_REX_W | retainRedundantRex, + _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"), + }; + match srcE { + RMI::R { reg: regE } => { + let opcode = if *size == 1 { 0x38 } else { 0x39 }; + if *size == 1 { + // We also need to check whether the E register forces + // the use of a redundant REX. + let encE = iregEnc(*regE); + if encE >= 4 && encE <= 7 { + flags |= F_RETAIN_REDUNDANT_REX; + } + } + // Same comment re swapped args as for Alu_RMI_R. + emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags); + } + RMI::M { addr } => { + let opcode = if *size == 1 { 0x3A } else { 0x3B }; + // Whereas here we revert to the "normal" G-E ordering. + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags); + } + RMI::I { simm32 } => { + // FIXME JRS 2020Feb11: there are shorter encodings for + // cmp $imm, rax/eax/ax/al. + let useImm8 = low8willSXto32(*simm32); + let opcode = if *size == 1 { + 0x80 + } else if useImm8 { + 0x83 + } else { + 0x81 + }; + // And also here we use the "normal" G-E ordering. + let encG = iregEnc(*regG); + emit_REX_OPCODES_MODRM_encG_encE( + sink, opcode, 1, 7, /*subopcode*/ + encG, flags, + ); + emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32); + } + } + } + Inst::Push64 { src } => { + match src { + RMI::R { reg } => { + let encReg = iregEnc(*reg); + let rex = 0x40 | ((encReg >> 3) & 1); + if rex != 0x40 { + sink.put1(rex); + } + sink.put1(0x50 | (encReg & 7)); + } + RMI::M { addr } => { + emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( + sink, + 0xFF, + 1, + 6, /*subopcode*/ + addr, + F_CLEAR_REX_W, + ); + } + RMI::I { simm32 } => { + if low8willSXto64(*simm32) { + sink.put1(0x6A); + sink.put1(*simm32 as u8); + } else { + sink.put1(0x68); + sink.put4(*simm32); + } + } + } + } + Inst::Pop64 { dst } => { + let encDst = iregEnc(dst.to_reg()); + if encDst >= 8 { + // 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant + // here. + sink.put1(0x41); + } + sink.put1(0x58 + (encDst & 7)); + } + // + // ** Inst::CallKnown + // + Inst::CallUnknown { dest } => { + match dest { + RM::R { reg } => { + let regEnc = iregEnc(*reg); + emit_REX_OPCODES_MODRM_encG_encE( + sink, + 0xFF, + 1, + 2, /*subopcode*/ + regEnc, + F_CLEAR_REX_W, + ); + } + RM::M { addr } => { + emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( + sink, + 0xFF, + 1, + 2, /*subopcode*/ + addr, + F_CLEAR_REX_W, + ); + } + } + } + Inst::Ret {} => sink.put1(0xC3), + + Inst::JmpKnown { + dest: BranchTarget::Block(..), + } => { + // Computation of block offsets/sizes. + sink.put1(0); + sink.put4(0); + } + Inst::JmpKnown { + dest: BranchTarget::ResolvedOffset(_bix, offset), + } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => { + // And now for real + let mut offs_i32 = *offset as i32; + offs_i32 -= 5; + let offs_u32 = offs_i32 as u32; + sink.put1(0xE9); + sink.put4(offs_u32); + } + // + // ** Inst::JmpCondSymm XXXX should never happen + // + Inst::JmpCond { + cc: _, + target: BranchTarget::Block(..), + } => { + // This case occurs when we are computing block offsets / sizes, + // prior to lowering block-index targets to concrete-offset targets. + // Only the size matters, so let's emit 6 bytes, as below. + sink.put1(0); + sink.put1(0); + sink.put4(0); + } + Inst::JmpCond { + cc, + target: BranchTarget::ResolvedOffset(_bix, offset), + } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => { + // This insn is 6 bytes long. Currently `offset` is relative to + // the start of this insn, but the Intel encoding requires it to + // be relative to the start of the next instruction. Hence the + // adjustment. + let mut offs_i32 = *offset as i32; + offs_i32 -= 6; + let offs_u32 = offs_i32 as u32; + sink.put1(0x0F); + sink.put1(0x80 + cc.get_enc()); + sink.put4(offs_u32); + } + // + // ** Inst::JmpCondCompound XXXX should never happen + // + Inst::JmpUnknown { target } => { + match target { + RM::R { reg } => { + let regEnc = iregEnc(*reg); + emit_REX_OPCODES_MODRM_encG_encE( + sink, + 0xFF, + 1, + 4, /*subopcode*/ + regEnc, + F_CLEAR_REX_W, + ); + } + RM::M { addr } => { + emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( + sink, + 0xFF, + 1, + 4, /*subopcode*/ + addr, + F_CLEAR_REX_W, + ); + } + } + } + + _ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)), + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs new file mode 100644 index 0000000000..1a6ab16f69 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -0,0 +1,2199 @@ +//! Tests for the emitter +//! +//! See comments at the top of `fn x64_emit` for advice on how to create reliable test cases. +//! +//! to see stdout: cargo test -- --nocapture +//! +//! for this specific case: +//! +//! (cd cranelift/codegen && \ +//! RUST_BACKTRACE=1 cargo test isa::x64::inst::test_x64_insn_encoding_and_printing -- --nocapture) + +use alloc::vec::Vec; + +use super::*; +use crate::isa::test_utils; + +#[test] +fn test_x64_emit() { + let rax = regs::rax(); + let rbx = regs::rbx(); + let rcx = regs::rcx(); + let rdx = regs::rdx(); + let rsi = regs::rsi(); + let rdi = regs::rdi(); + let rsp = regs::rsp(); + let rbp = regs::rbp(); + let r8 = regs::r8(); + let r9 = regs::r9(); + let r10 = regs::r10(); + let r11 = regs::r11(); + let r12 = regs::r12(); + let r13 = regs::r13(); + let r14 = regs::r14(); + let r15 = regs::r15(); + + // And Writable<> versions of the same: + let w_rax = Writable::::from_reg(rax); + let w_rbx = Writable::::from_reg(rbx); + let w_rcx = Writable::::from_reg(rcx); + let w_rdx = Writable::::from_reg(rdx); + let w_rsi = Writable::::from_reg(rsi); + let w_rdi = Writable::::from_reg(rdi); + let _w_rsp = Writable::::from_reg(rsp); + let _w_rbp = Writable::::from_reg(rbp); + let w_r8 = Writable::::from_reg(r8); + let w_r9 = Writable::::from_reg(r9); + let _w_r10 = Writable::::from_reg(r10); + let w_r11 = Writable::::from_reg(r11); + let w_r12 = Writable::::from_reg(r12); + let w_r13 = Writable::::from_reg(r13); + let w_r14 = Writable::::from_reg(r14); + let w_r15 = Writable::::from_reg(r15); + + let mut insns = Vec::<(Inst, &str, &str)>::new(); + + // ======================================================== + // Cases aimed at checking Addr-esses: IR (Imm + Reg) + // + // These are just a bunch of loads with all supported (by the emitter) + // permutations of address formats. + // + // Addr_IR, offset zero + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rax), w_rdi), + "488B38", + "movq 0(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rbx), w_rdi), + "488B3B", + "movq 0(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rcx), w_rdi), + "488B39", + "movq 0(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rdx), w_rdi), + "488B3A", + "movq 0(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rbp), w_rdi), + "488B7D00", + "movq 0(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rsp), w_rdi), + "488B3C24", + "movq 0(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rsi), w_rdi), + "488B3E", + "movq 0(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, rdi), w_rdi), + "488B3F", + "movq 0(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r8), w_rdi), + "498B38", + "movq 0(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r9), w_rdi), + "498B39", + "movq 0(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r10), w_rdi), + "498B3A", + "movq 0(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r11), w_rdi), + "498B3B", + "movq 0(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r12), w_rdi), + "498B3C24", + "movq 0(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r13), w_rdi), + "498B7D00", + "movq 0(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r14), w_rdi), + "498B3E", + "movq 0(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0, r15), w_rdi), + "498B3F", + "movq 0(%r15), %rdi", + )); + + // ======================================================== + // Addr_IR, offset max simm8 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rax), w_rdi), + "488B787F", + "movq 127(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rbx), w_rdi), + "488B7B7F", + "movq 127(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rcx), w_rdi), + "488B797F", + "movq 127(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rdx), w_rdi), + "488B7A7F", + "movq 127(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rbp), w_rdi), + "488B7D7F", + "movq 127(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rsp), w_rdi), + "488B7C247F", + "movq 127(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rsi), w_rdi), + "488B7E7F", + "movq 127(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, rdi), w_rdi), + "488B7F7F", + "movq 127(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r8), w_rdi), + "498B787F", + "movq 127(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r9), w_rdi), + "498B797F", + "movq 127(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r10), w_rdi), + "498B7A7F", + "movq 127(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r11), w_rdi), + "498B7B7F", + "movq 127(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r12), w_rdi), + "498B7C247F", + "movq 127(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r13), w_rdi), + "498B7D7F", + "movq 127(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r14), w_rdi), + "498B7E7F", + "movq 127(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(127, r15), w_rdi), + "498B7F7F", + "movq 127(%r15), %rdi", + )); + + // ======================================================== + // Addr_IR, offset min simm8 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rax), w_rdi), + "488B7880", + "movq -128(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rbx), w_rdi), + "488B7B80", + "movq -128(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rcx), w_rdi), + "488B7980", + "movq -128(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rdx), w_rdi), + "488B7A80", + "movq -128(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rbp), w_rdi), + "488B7D80", + "movq -128(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rsp), w_rdi), + "488B7C2480", + "movq -128(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rsi), w_rdi), + "488B7E80", + "movq -128(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rdi), w_rdi), + "488B7F80", + "movq -128(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r8), w_rdi), + "498B7880", + "movq -128(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r9), w_rdi), + "498B7980", + "movq -128(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r10), w_rdi), + "498B7A80", + "movq -128(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r11), w_rdi), + "498B7B80", + "movq -128(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r12), w_rdi), + "498B7C2480", + "movq -128(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r13), w_rdi), + "498B7D80", + "movq -128(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r14), w_rdi), + "498B7E80", + "movq -128(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r15), w_rdi), + "498B7F80", + "movq -128(%r15), %rdi", + )); + + // ======================================================== + // Addr_IR, offset smallest positive simm32 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rax), w_rdi), + "488BB880000000", + "movq 128(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rbx), w_rdi), + "488BBB80000000", + "movq 128(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rcx), w_rdi), + "488BB980000000", + "movq 128(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rdx), w_rdi), + "488BBA80000000", + "movq 128(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rbp), w_rdi), + "488BBD80000000", + "movq 128(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rsp), w_rdi), + "488BBC2480000000", + "movq 128(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rsi), w_rdi), + "488BBE80000000", + "movq 128(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, rdi), w_rdi), + "488BBF80000000", + "movq 128(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r8), w_rdi), + "498BB880000000", + "movq 128(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r9), w_rdi), + "498BB980000000", + "movq 128(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r10), w_rdi), + "498BBA80000000", + "movq 128(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r11), w_rdi), + "498BBB80000000", + "movq 128(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r12), w_rdi), + "498BBC2480000000", + "movq 128(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r13), w_rdi), + "498BBD80000000", + "movq 128(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r14), w_rdi), + "498BBE80000000", + "movq 128(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(128, r15), w_rdi), + "498BBF80000000", + "movq 128(%r15), %rdi", + )); + + // ======================================================== + // Addr_IR, offset smallest negative simm32 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rax), w_rdi), + "488BB87FFFFFFF", + "movq -129(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rbx), w_rdi), + "488BBB7FFFFFFF", + "movq -129(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rcx), w_rdi), + "488BB97FFFFFFF", + "movq -129(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rdx), w_rdi), + "488BBA7FFFFFFF", + "movq -129(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rbp), w_rdi), + "488BBD7FFFFFFF", + "movq -129(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rsp), w_rdi), + "488BBC247FFFFFFF", + "movq -129(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rsi), w_rdi), + "488BBE7FFFFFFF", + "movq -129(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rdi), w_rdi), + "488BBF7FFFFFFF", + "movq -129(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r8), w_rdi), + "498BB87FFFFFFF", + "movq -129(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r9), w_rdi), + "498BB97FFFFFFF", + "movq -129(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r10), w_rdi), + "498BBA7FFFFFFF", + "movq -129(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r11), w_rdi), + "498BBB7FFFFFFF", + "movq -129(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r12), w_rdi), + "498BBC247FFFFFFF", + "movq -129(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r13), w_rdi), + "498BBD7FFFFFFF", + "movq -129(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r14), w_rdi), + "498BBE7FFFFFFF", + "movq -129(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r15), w_rdi), + "498BBF7FFFFFFF", + "movq -129(%r15), %rdi", + )); + + // ======================================================== + // Addr_IR, offset large positive simm32 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rax), w_rdi), + "488BB877207317", + "movq 393420919(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rbx), w_rdi), + "488BBB77207317", + "movq 393420919(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rcx), w_rdi), + "488BB977207317", + "movq 393420919(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rdx), w_rdi), + "488BBA77207317", + "movq 393420919(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rbp), w_rdi), + "488BBD77207317", + "movq 393420919(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rsp), w_rdi), + "488BBC2477207317", + "movq 393420919(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rsi), w_rdi), + "488BBE77207317", + "movq 393420919(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, rdi), w_rdi), + "488BBF77207317", + "movq 393420919(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r8), w_rdi), + "498BB877207317", + "movq 393420919(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r9), w_rdi), + "498BB977207317", + "movq 393420919(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r10), w_rdi), + "498BBA77207317", + "movq 393420919(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r11), w_rdi), + "498BBB77207317", + "movq 393420919(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r12), w_rdi), + "498BBC2477207317", + "movq 393420919(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r13), w_rdi), + "498BBD77207317", + "movq 393420919(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r14), w_rdi), + "498BBE77207317", + "movq 393420919(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(0x17732077, r15), w_rdi), + "498BBF77207317", + "movq 393420919(%r15), %rdi", + )); + + // ======================================================== + // Addr_IR, offset large negative simm32 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rax), w_rdi), + "488BB8D9A6BECE", + "movq -826366247(%rax), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rbx), w_rdi), + "488BBBD9A6BECE", + "movq -826366247(%rbx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rcx), w_rdi), + "488BB9D9A6BECE", + "movq -826366247(%rcx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rdx), w_rdi), + "488BBAD9A6BECE", + "movq -826366247(%rdx), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rbp), w_rdi), + "488BBDD9A6BECE", + "movq -826366247(%rbp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rsp), w_rdi), + "488BBC24D9A6BECE", + "movq -826366247(%rsp), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rsi), w_rdi), + "488BBED9A6BECE", + "movq -826366247(%rsi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rdi), w_rdi), + "488BBFD9A6BECE", + "movq -826366247(%rdi), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r8), w_rdi), + "498BB8D9A6BECE", + "movq -826366247(%r8), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r9), w_rdi), + "498BB9D9A6BECE", + "movq -826366247(%r9), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r10), w_rdi), + "498BBAD9A6BECE", + "movq -826366247(%r10), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r11), w_rdi), + "498BBBD9A6BECE", + "movq -826366247(%r11), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r12), w_rdi), + "498BBC24D9A6BECE", + "movq -826366247(%r12), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r13), w_rdi), + "498BBDD9A6BECE", + "movq -826366247(%r13), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r14), w_rdi), + "498BBED9A6BECE", + "movq -826366247(%r14), %rdi", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r15), w_rdi), + "498BBFD9A6BECE", + "movq -826366247(%r15), %rdi", + )); + + // ======================================================== + // Cases aimed at checking Addr-esses: IRRS (Imm + Reg + (Reg << Shift)) + // Note these don't check the case where the index reg is RSP, since we + // don't encode any of those. + // + // Addr_IRRS, offset max simm8 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rax, rax, 0), w_r11), + "4C8B5C007F", + "movq 127(%rax,%rax,1), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rdi, rax, 1), w_r11), + "4C8B5C477F", + "movq 127(%rdi,%rax,2), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r8, rax, 2), w_r11), + "4D8B5C807F", + "movq 127(%r8,%rax,4), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r15, rax, 3), w_r11), + "4D8B5CC77F", + "movq 127(%r15,%rax,8), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rax, rdi, 3), w_r11), + "4C8B5CF87F", + "movq 127(%rax,%rdi,8), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11), + "4C8B5CBF7F", + "movq 127(%rdi,%rdi,4), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r8, rdi, 1), w_r11), + "4D8B5C787F", + "movq 127(%r8,%rdi,2), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r15, rdi, 0), w_r11), + "4D8B5C3F7F", + "movq 127(%r15,%rdi,1), %r11", + )); + + // ======================================================== + // Addr_IRRS, offset min simm8 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11), + "4E8B5C8080", + "movq -128(%rax,%r8,4), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11), + "4E8B5CC780", + "movq -128(%rdi,%r8,8), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11), + "4F8B5C0080", + "movq -128(%r8,%r8,1), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11), + "4F8B5C4780", + "movq -128(%r15,%r8,2), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11), + "4E8B5C7880", + "movq -128(%rax,%r15,2), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11), + "4E8B5C3F80", + "movq -128(%rdi,%r15,1), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11), + "4F8B5CF880", + "movq -128(%r8,%r15,8), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11), + "4F8B5CBF80", + "movq -128(%r15,%r15,4), %r11", + )); + + // ======================================================== + // Addr_IRRS, offset large positive simm32 + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11), + "4C8B9C00BE25664F", + "movq 1332094398(%rax,%rax,1), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11), + "4C8B9C47BE25664F", + "movq 1332094398(%rdi,%rax,2), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11), + "4D8B9C80BE25664F", + "movq 1332094398(%r8,%rax,4), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11), + "4D8B9CC7BE25664F", + "movq 1332094398(%r15,%rax,8), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11), + "4C8B9CF8BE25664F", + "movq 1332094398(%rax,%rdi,8), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11), + "4C8B9CBFBE25664F", + "movq 1332094398(%rdi,%rdi,4), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11), + "4D8B9C78BE25664F", + "movq 1332094398(%r8,%rdi,2), %r11", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11), + "4D8B9C3FBE25664F", + "movq 1332094398(%r15,%rdi,1), %r11", + )); + + // ======================================================== + // Addr_IRRS, offset large negative simm32 + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2), + w_r11, + ), + "4E8B9C8070E9B2D9", + "movq -642586256(%rax,%r8,4), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3), + w_r11, + ), + "4E8B9CC770E9B2D9", + "movq -642586256(%rdi,%r8,8), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0), + w_r11, + ), + "4F8B9C0070E9B2D9", + "movq -642586256(%r8,%r8,1), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1), + w_r11, + ), + "4F8B9C4770E9B2D9", + "movq -642586256(%r15,%r8,2), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1), + w_r11, + ), + "4E8B9C7870E9B2D9", + "movq -642586256(%rax,%r15,2), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0), + w_r11, + ), + "4E8B9C3F70E9B2D9", + "movq -642586256(%rdi,%r15,1), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3), + w_r11, + ), + "4F8B9CF870E9B2D9", + "movq -642586256(%r8,%r15,8), %r11", + )); + insns.push(( + Inst::mov64_m_r( + Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2), + w_r11, + ), + "4F8B9CBF70E9B2D9", + "movq -642586256(%r15,%r15,4), %r11", + )); + + // End of test cases for Addr + // ======================================================== + + // ======================================================== + // General tests for each insn. Don't forget to follow the + // guidelines commented just prior to `fn x64_emit`. + // + // Alu_RMI_R + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Add, RMI::reg(r15), w_rdx), + "4C01FA", + "addq %r15, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::reg(rcx), w_r8), + "4101C8", + "addl %ecx, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::reg(rcx), w_rsi), + "01CE", + "addl %ecx, %esi", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Add, RMI::mem(Addr::imm_reg(99, rdi)), w_rdx), + "48035763", + "addq 99(%rdi), %rdx", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::mem(Addr::imm_reg(99, rdi)), w_r8), + "44034763", + "addl 99(%rdi), %r8d", + )); + insns.push(( + Inst::alu_rmi_r( + false, + RMI_R_Op::Add, + RMI::mem(Addr::imm_reg(99, rdi)), + w_rsi, + ), + "037763", + "addl 99(%rdi), %esi", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Add, RMI::imm(-127i32 as u32), w_rdx), + "4883C281", + "addq $-127, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Add, RMI::imm(-129i32 as u32), w_rdx), + "4881C27FFFFFFF", + "addq $-129, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Add, RMI::imm(76543210), w_rdx), + "4881C2EAF48F04", + "addq $76543210, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::imm(-127i32 as u32), w_r8), + "4183C081", + "addl $-127, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::imm(-129i32 as u32), w_r8), + "4181C07FFFFFFF", + "addl $-129, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::imm(-76543210i32 as u32), w_r8), + "4181C0160B70FB", + "addl $-76543210, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::imm(-127i32 as u32), w_rsi), + "83C681", + "addl $-127, %esi", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::imm(-129i32 as u32), w_rsi), + "81C67FFFFFFF", + "addl $-129, %esi", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Add, RMI::imm(76543210), w_rsi), + "81C6EAF48F04", + "addl $76543210, %esi", + )); + // This is pretty feeble + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Sub, RMI::reg(r15), w_rdx), + "4C29FA", + "subq %r15, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::And, RMI::reg(r15), w_rdx), + "4C21FA", + "andq %r15, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Or, RMI::reg(r15), w_rdx), + "4C09FA", + "orq %r15, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Xor, RMI::reg(r15), w_rdx), + "4C31FA", + "xorq %r15, %rdx", + )); + // Test all mul cases, though + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Mul, RMI::reg(r15), w_rdx), + "490FAFD7", + "imulq %r15, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::reg(rcx), w_r8), + "440FAFC1", + "imull %ecx, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::reg(rcx), w_rsi), + "0FAFF1", + "imull %ecx, %esi", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Mul, RMI::mem(Addr::imm_reg(99, rdi)), w_rdx), + "480FAF5763", + "imulq 99(%rdi), %rdx", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::mem(Addr::imm_reg(99, rdi)), w_r8), + "440FAF4763", + "imull 99(%rdi), %r8d", + )); + insns.push(( + Inst::alu_rmi_r( + false, + RMI_R_Op::Mul, + RMI::mem(Addr::imm_reg(99, rdi)), + w_rsi, + ), + "0FAF7763", + "imull 99(%rdi), %esi", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Mul, RMI::imm(-127i32 as u32), w_rdx), + "486BD281", + "imulq $-127, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Mul, RMI::imm(-129i32 as u32), w_rdx), + "4869D27FFFFFFF", + "imulq $-129, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(true, RMI_R_Op::Mul, RMI::imm(76543210), w_rdx), + "4869D2EAF48F04", + "imulq $76543210, %rdx", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::imm(-127i32 as u32), w_r8), + "456BC081", + "imull $-127, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::imm(-129i32 as u32), w_r8), + "4569C07FFFFFFF", + "imull $-129, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::imm(-76543210i32 as u32), w_r8), + "4569C0160B70FB", + "imull $-76543210, %r8d", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::imm(-127i32 as u32), w_rsi), + "6BF681", + "imull $-127, %esi", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::imm(-129i32 as u32), w_rsi), + "69F67FFFFFFF", + "imull $-129, %esi", + )); + insns.push(( + Inst::alu_rmi_r(false, RMI_R_Op::Mul, RMI::imm(76543210), w_rsi), + "69F6EAF48F04", + "imull $76543210, %esi", + )); + + // ======================================================== + // Imm_R + // + insns.push(( + Inst::imm_r(false, 1234567, w_r14), + "41BE87D61200", + "movl $1234567, %r14d", + )); + insns.push(( + Inst::imm_r(false, -126i64 as u64, w_r14), + "41BE82FFFFFF", + "movl $-126, %r14d", + )); + insns.push(( + Inst::imm_r(true, 1234567898765, w_r14), + "49BE8D26FB711F010000", + "movabsq $1234567898765, %r14", + )); + insns.push(( + Inst::imm_r(true, -126i64 as u64, w_r14), + "49BE82FFFFFFFFFFFFFF", + "movabsq $-126, %r14", + )); + insns.push(( + Inst::imm_r(false, 1234567, w_rcx), + "B987D61200", + "movl $1234567, %ecx", + )); + insns.push(( + Inst::imm_r(false, -126i64 as u64, w_rcx), + "B982FFFFFF", + "movl $-126, %ecx", + )); + insns.push(( + Inst::imm_r(true, 1234567898765, w_rsi), + "48BE8D26FB711F010000", + "movabsq $1234567898765, %rsi", + )); + insns.push(( + Inst::imm_r(true, -126i64 as u64, w_rbx), + "48BB82FFFFFFFFFFFFFF", + "movabsq $-126, %rbx", + )); + + // ======================================================== + // Mov_R_R + insns.push(( + Inst::mov_r_r(false, rbx, w_rsi), + "89DE", + "movl %ebx, %esi", + )); + insns.push(( + Inst::mov_r_r(false, rbx, w_r9), + "4189D9", + "movl %ebx, %r9d", + )); + insns.push(( + Inst::mov_r_r(false, r11, w_rsi), + "4489DE", + "movl %r11d, %esi", + )); + insns.push(( + Inst::mov_r_r(false, r12, w_r9), + "4589E1", + "movl %r12d, %r9d", + )); + insns.push(( + Inst::mov_r_r(true, rbx, w_rsi), + "4889DE", + "movq %rbx, %rsi", + )); + insns.push(( + Inst::mov_r_r(true, rbx, w_r9), + "4989D9", + "movq %rbx, %r9", + )); + insns.push(( + Inst::mov_r_r(true, r11, w_rsi), + "4C89DE", + "movq %r11, %rsi", + )); + insns.push(( + Inst::mov_r_r(true, r12, w_r9), + "4D89E1", + "movq %r12, %r9", + )); + + // ======================================================== + // MovZX_M_R + insns.push(( + Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "0FB671F9", + "movzbl -7(%rcx), %esi", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "410FB658F9", + "movzbl -7(%r8), %ebx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "450FB64AF9", + "movzbl -7(%r10), %r9d", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "410FB653F9", + "movzbl -7(%r11), %edx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "480FB671F9", + "movzbq -7(%rcx), %rsi", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "490FB658F9", + "movzbq -7(%r8), %rbx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "4D0FB64AF9", + "movzbq -7(%r10), %r9", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "490FB653F9", + "movzbq -7(%r11), %rdx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "0FB771F9", + "movzwl -7(%rcx), %esi", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "410FB758F9", + "movzwl -7(%r8), %ebx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "450FB74AF9", + "movzwl -7(%r10), %r9d", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "410FB753F9", + "movzwl -7(%r11), %edx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "480FB771F9", + "movzwq -7(%rcx), %rsi", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "490FB758F9", + "movzwq -7(%r8), %rbx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "4D0FB74AF9", + "movzwq -7(%r10), %r9", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "490FB753F9", + "movzwq -7(%r11), %rdx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "8B71F9", + "movl -7(%rcx), %esi", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "418B58F9", + "movl -7(%r8), %ebx", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "458B4AF9", + "movl -7(%r10), %r9d", + )); + insns.push(( + Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "418B53F9", + "movl -7(%r11), %edx", + )); + + // ======================================================== + // Mov64_M_R + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx), + "488B8C18B3000000", + "movq 179(%rax,%rbx,1), %rcx", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, rbx, 0), w_r8), + "4C8B8418B3000000", + "movq 179(%rax,%rbx,1), %r8", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, r9, 0), w_rcx), + "4A8B8C08B3000000", + "movq 179(%rax,%r9,1), %rcx", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, r9, 0), w_r8), + "4E8B8408B3000000", + "movq 179(%rax,%r9,1), %r8", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx), + "498B8C1AB3000000", + "movq 179(%r10,%rbx,1), %rcx", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, rbx, 0), w_r8), + "4D8B841AB3000000", + "movq 179(%r10,%rbx,1), %r8", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, r9, 0), w_rcx), + "4B8B8C0AB3000000", + "movq 179(%r10,%r9,1), %rcx", + )); + insns.push(( + Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, r9, 0), w_r8), + "4F8B840AB3000000", + "movq 179(%r10,%r9,1), %r8", + )); + + // ======================================================== + // MovSX_M_R + insns.push(( + Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "0FBE71F9", + "movsbl -7(%rcx), %esi", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "410FBE58F9", + "movsbl -7(%r8), %ebx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "450FBE4AF9", + "movsbl -7(%r10), %r9d", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "410FBE53F9", + "movsbl -7(%r11), %edx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "480FBE71F9", + "movsbq -7(%rcx), %rsi", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "490FBE58F9", + "movsbq -7(%r8), %rbx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "4D0FBE4AF9", + "movsbq -7(%r10), %r9", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "490FBE53F9", + "movsbq -7(%r11), %rdx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "0FBF71F9", + "movswl -7(%rcx), %esi", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "410FBF58F9", + "movswl -7(%r8), %ebx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "450FBF4AF9", + "movswl -7(%r10), %r9d", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "410FBF53F9", + "movswl -7(%r11), %edx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "480FBF71F9", + "movswq -7(%rcx), %rsi", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "490FBF58F9", + "movswq -7(%r8), %rbx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "4D0FBF4AF9", + "movswq -7(%r10), %r9", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "490FBF53F9", + "movswq -7(%r11), %rdx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + "486371F9", + "movslq -7(%rcx), %rsi", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + "496358F9", + "movslq -7(%r8), %rbx", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + "4D634AF9", + "movslq -7(%r10), %r9", + )); + insns.push(( + Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + "496353F9", + "movslq -7(%r11), %rdx", + )); + + // ======================================================== + // Mov_R_M. Byte stores are tricky. Check everything carefully. + insns.push(( + Inst::mov_r_m(8, rax, Addr::imm_reg(99, rdi)), + "48894763", + "movq %rax, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(8, rbx, Addr::imm_reg(99, r8)), + "49895863", + "movq %rbx, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(8, rcx, Addr::imm_reg(99, rsi)), + "48894E63", + "movq %rcx, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(8, rdx, Addr::imm_reg(99, r9)), + "49895163", + "movq %rdx, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(8, rsi, Addr::imm_reg(99, rax)), + "48897063", + "movq %rsi, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(8, rdi, Addr::imm_reg(99, r15)), + "49897F63", + "movq %rdi, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(8, rsp, Addr::imm_reg(99, rcx)), + "48896163", + "movq %rsp, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(8, rbp, Addr::imm_reg(99, r14)), + "49896E63", + "movq %rbp, 99(%r14)", + )); + insns.push(( + Inst::mov_r_m(8, r8, Addr::imm_reg(99, rdi)), + "4C894763", + "movq %r8, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(8, r9, Addr::imm_reg(99, r8)), + "4D894863", + "movq %r9, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(8, r10, Addr::imm_reg(99, rsi)), + "4C895663", + "movq %r10, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(8, r11, Addr::imm_reg(99, r9)), + "4D895963", + "movq %r11, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(8, r12, Addr::imm_reg(99, rax)), + "4C896063", + "movq %r12, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(8, r13, Addr::imm_reg(99, r15)), + "4D896F63", + "movq %r13, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(8, r14, Addr::imm_reg(99, rcx)), + "4C897163", + "movq %r14, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(8, r15, Addr::imm_reg(99, r14)), + "4D897E63", + "movq %r15, 99(%r14)", + )); + // + insns.push(( + Inst::mov_r_m(4, rax, Addr::imm_reg(99, rdi)), + "894763", + "movl %eax, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(4, rbx, Addr::imm_reg(99, r8)), + "41895863", + "movl %ebx, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(4, rcx, Addr::imm_reg(99, rsi)), + "894E63", + "movl %ecx, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(4, rdx, Addr::imm_reg(99, r9)), + "41895163", + "movl %edx, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(4, rsi, Addr::imm_reg(99, rax)), + "897063", + "movl %esi, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(4, rdi, Addr::imm_reg(99, r15)), + "41897F63", + "movl %edi, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(4, rsp, Addr::imm_reg(99, rcx)), + "896163", + "movl %esp, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(4, rbp, Addr::imm_reg(99, r14)), + "41896E63", + "movl %ebp, 99(%r14)", + )); + insns.push(( + Inst::mov_r_m(4, r8, Addr::imm_reg(99, rdi)), + "44894763", + "movl %r8d, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(4, r9, Addr::imm_reg(99, r8)), + "45894863", + "movl %r9d, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(4, r10, Addr::imm_reg(99, rsi)), + "44895663", + "movl %r10d, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(4, r11, Addr::imm_reg(99, r9)), + "45895963", + "movl %r11d, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(4, r12, Addr::imm_reg(99, rax)), + "44896063", + "movl %r12d, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(4, r13, Addr::imm_reg(99, r15)), + "45896F63", + "movl %r13d, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(4, r14, Addr::imm_reg(99, rcx)), + "44897163", + "movl %r14d, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(4, r15, Addr::imm_reg(99, r14)), + "45897E63", + "movl %r15d, 99(%r14)", + )); + // + insns.push(( + Inst::mov_r_m(2, rax, Addr::imm_reg(99, rdi)), + "66894763", + "movw %ax, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(2, rbx, Addr::imm_reg(99, r8)), + "6641895863", + "movw %bx, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(2, rcx, Addr::imm_reg(99, rsi)), + "66894E63", + "movw %cx, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(2, rdx, Addr::imm_reg(99, r9)), + "6641895163", + "movw %dx, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(2, rsi, Addr::imm_reg(99, rax)), + "66897063", + "movw %si, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(2, rdi, Addr::imm_reg(99, r15)), + "6641897F63", + "movw %di, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(2, rsp, Addr::imm_reg(99, rcx)), + "66896163", + "movw %sp, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(2, rbp, Addr::imm_reg(99, r14)), + "6641896E63", + "movw %bp, 99(%r14)", + )); + insns.push(( + Inst::mov_r_m(2, r8, Addr::imm_reg(99, rdi)), + "6644894763", + "movw %r8w, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(2, r9, Addr::imm_reg(99, r8)), + "6645894863", + "movw %r9w, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(2, r10, Addr::imm_reg(99, rsi)), + "6644895663", + "movw %r10w, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(2, r11, Addr::imm_reg(99, r9)), + "6645895963", + "movw %r11w, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(2, r12, Addr::imm_reg(99, rax)), + "6644896063", + "movw %r12w, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(2, r13, Addr::imm_reg(99, r15)), + "6645896F63", + "movw %r13w, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(2, r14, Addr::imm_reg(99, rcx)), + "6644897163", + "movw %r14w, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(2, r15, Addr::imm_reg(99, r14)), + "6645897E63", + "movw %r15w, 99(%r14)", + )); + // + insns.push(( + Inst::mov_r_m(1, rax, Addr::imm_reg(99, rdi)), + "884763", + "movb %al, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(1, rbx, Addr::imm_reg(99, r8)), + "41885863", + "movb %bl, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(1, rcx, Addr::imm_reg(99, rsi)), + "884E63", + "movb %cl, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(1, rdx, Addr::imm_reg(99, r9)), + "41885163", + "movb %dl, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(1, rsi, Addr::imm_reg(99, rax)), + "40887063", + "movb %sil, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(1, rdi, Addr::imm_reg(99, r15)), + "41887F63", + "movb %dil, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(1, rsp, Addr::imm_reg(99, rcx)), + "40886163", + "movb %spl, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(1, rbp, Addr::imm_reg(99, r14)), + "41886E63", + "movb %bpl, 99(%r14)", + )); + insns.push(( + Inst::mov_r_m(1, r8, Addr::imm_reg(99, rdi)), + "44884763", + "movb %r8b, 99(%rdi)", + )); + insns.push(( + Inst::mov_r_m(1, r9, Addr::imm_reg(99, r8)), + "45884863", + "movb %r9b, 99(%r8)", + )); + insns.push(( + Inst::mov_r_m(1, r10, Addr::imm_reg(99, rsi)), + "44885663", + "movb %r10b, 99(%rsi)", + )); + insns.push(( + Inst::mov_r_m(1, r11, Addr::imm_reg(99, r9)), + "45885963", + "movb %r11b, 99(%r9)", + )); + insns.push(( + Inst::mov_r_m(1, r12, Addr::imm_reg(99, rax)), + "44886063", + "movb %r12b, 99(%rax)", + )); + insns.push(( + Inst::mov_r_m(1, r13, Addr::imm_reg(99, r15)), + "45886F63", + "movb %r13b, 99(%r15)", + )); + insns.push(( + Inst::mov_r_m(1, r14, Addr::imm_reg(99, rcx)), + "44887163", + "movb %r14b, 99(%rcx)", + )); + insns.push(( + Inst::mov_r_m(1, r15, Addr::imm_reg(99, r14)), + "45887E63", + "movb %r15b, 99(%r14)", + )); + + // ======================================================== + // Shift_R + insns.push(( + Inst::shift_r(false, ShiftKind::Left, None, w_rdi), + "D3E7", + "shll %cl, %edi", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::Left, None, w_r12), + "41D3E4", + "shll %cl, %r12d", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::Left, Some(2), w_r8), + "41C1E002", + "shll $2, %r8d", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::Left, Some(31), w_r13), + "41C1E51F", + "shll $31, %r13d", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::Left, None, w_r13), + "49D3E5", + "shlq %cl, %r13", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::Left, None, w_rdi), + "48D3E7", + "shlq %cl, %rdi", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::Left, Some(2), w_r8), + "49C1E002", + "shlq $2, %r8", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::Left, Some(3), w_rbx), + "48C1E303", + "shlq $3, %rbx", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::Left, Some(63), w_r13), + "49C1E53F", + "shlq $63, %r13", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RightZ, None, w_rdi), + "D3EF", + "shrl %cl, %edi", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RightZ, Some(2), w_r8), + "41C1E802", + "shrl $2, %r8d", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RightZ, Some(31), w_r13), + "41C1ED1F", + "shrl $31, %r13d", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RightZ, None, w_rdi), + "48D3EF", + "shrq %cl, %rdi", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RightZ, Some(2), w_r8), + "49C1E802", + "shrq $2, %r8", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RightZ, Some(63), w_r13), + "49C1ED3F", + "shrq $63, %r13", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RightS, None, w_rdi), + "D3FF", + "sarl %cl, %edi", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RightS, Some(2), w_r8), + "41C1F802", + "sarl $2, %r8d", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RightS, Some(31), w_r13), + "41C1FD1F", + "sarl $31, %r13d", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RightS, None, w_rdi), + "48D3FF", + "sarq %cl, %rdi", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RightS, Some(2), w_r8), + "49C1F802", + "sarq $2, %r8", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RightS, Some(63), w_r13), + "49C1FD3F", + "sarq $63, %r13", + )); + + // ======================================================== + // CmpRMIR + insns.push(( + Inst::cmp_rmi_r(8, RMI::reg(r15), rdx), + "4C39FA", + "cmpq %r15, %rdx", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::reg(rcx), r8), + "4939C8", + "cmpq %rcx, %r8", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::reg(rcx), rsi), + "4839CE", + "cmpq %rcx, %rsi", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::mem(Addr::imm_reg(99, rdi)), rdx), + "483B5763", + "cmpq 99(%rdi), %rdx", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::mem(Addr::imm_reg(99, rdi)), r8), + "4C3B4763", + "cmpq 99(%rdi), %r8", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::mem(Addr::imm_reg(99, rdi)), rsi), + "483B7763", + "cmpq 99(%rdi), %rsi", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::imm(76543210), rdx), + "4881FAEAF48F04", + "cmpq $76543210, %rdx", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::imm(-76543210i32 as u32), r8), + "4981F8160B70FB", + "cmpq $-76543210, %r8", + )); + insns.push(( + Inst::cmp_rmi_r(8, RMI::imm(76543210), rsi), + "4881FEEAF48F04", + "cmpq $76543210, %rsi", + )); + // + insns.push(( + Inst::cmp_rmi_r(4, RMI::reg(r15), rdx), + "4439FA", + "cmpl %r15d, %edx", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::reg(rcx), r8), + "4139C8", + "cmpl %ecx, %r8d", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::reg(rcx), rsi), + "39CE", + "cmpl %ecx, %esi", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::mem(Addr::imm_reg(99, rdi)), rdx), + "3B5763", + "cmpl 99(%rdi), %edx", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::mem(Addr::imm_reg(99, rdi)), r8), + "443B4763", + "cmpl 99(%rdi), %r8d", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::mem(Addr::imm_reg(99, rdi)), rsi), + "3B7763", + "cmpl 99(%rdi), %esi", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::imm(76543210), rdx), + "81FAEAF48F04", + "cmpl $76543210, %edx", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::imm(-76543210i32 as u32), r8), + "4181F8160B70FB", + "cmpl $-76543210, %r8d", + )); + insns.push(( + Inst::cmp_rmi_r(4, RMI::imm(76543210), rsi), + "81FEEAF48F04", + "cmpl $76543210, %esi", + )); + // + insns.push(( + Inst::cmp_rmi_r(2, RMI::reg(r15), rdx), + "664439FA", + "cmpw %r15w, %dx", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::reg(rcx), r8), + "664139C8", + "cmpw %cx, %r8w", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::reg(rcx), rsi), + "6639CE", + "cmpw %cx, %si", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::mem(Addr::imm_reg(99, rdi)), rdx), + "663B5763", + "cmpw 99(%rdi), %dx", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::mem(Addr::imm_reg(99, rdi)), r8), + "66443B4763", + "cmpw 99(%rdi), %r8w", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::mem(Addr::imm_reg(99, rdi)), rsi), + "663B7763", + "cmpw 99(%rdi), %si", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::imm(23210), rdx), + "6681FAAA5A", + "cmpw $23210, %dx", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::imm(-7654i32 as u32), r8), + "664181F81AE2", + "cmpw $-7654, %r8w", + )); + insns.push(( + Inst::cmp_rmi_r(2, RMI::imm(7654), rsi), + "6681FEE61D", + "cmpw $7654, %si", + )); + // + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r15), rdx), + "4438FA", + "cmpb %r15b, %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rcx), r8), + "4138C8", + "cmpb %cl, %r8b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rcx), rsi), + "4038CE", + "cmpb %cl, %sil", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::mem(Addr::imm_reg(99, rdi)), rdx), + "3A5763", + "cmpb 99(%rdi), %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::mem(Addr::imm_reg(99, rdi)), r8), + "443A4763", + "cmpb 99(%rdi), %r8b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::mem(Addr::imm_reg(99, rdi)), rsi), + "403A7763", + "cmpb 99(%rdi), %sil", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::imm(70), rdx), + "80FA46", + "cmpb $70, %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::imm(-76i32 as u32), r8), + "4180F8B4", + "cmpb $-76, %r8b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::imm(76), rsi), + "4080FE4C", + "cmpb $76, %sil", + )); + // Extra byte-cases (paranoia!) for cmp_rmi_r for first operand = R + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rax), rbx), + "38C3", + "cmpb %al, %bl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rbx), rax), + "38D8", + "cmpb %bl, %al", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rcx), rdx), + "38CA", + "cmpb %cl, %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rcx), rsi), + "4038CE", + "cmpb %cl, %sil", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rcx), r10), + "4138CA", + "cmpb %cl, %r10b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rcx), r14), + "4138CE", + "cmpb %cl, %r14b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rbp), rdx), + "4038EA", + "cmpb %bpl, %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rbp), rsi), + "4038EE", + "cmpb %bpl, %sil", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rbp), r10), + "4138EA", + "cmpb %bpl, %r10b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(rbp), r14), + "4138EE", + "cmpb %bpl, %r14b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r9), rdx), + "4438CA", + "cmpb %r9b, %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r9), rsi), + "4438CE", + "cmpb %r9b, %sil", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r9), r10), + "4538CA", + "cmpb %r9b, %r10b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r9), r14), + "4538CE", + "cmpb %r9b, %r14b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r13), rdx), + "4438EA", + "cmpb %r13b, %dl", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r13), rsi), + "4438EE", + "cmpb %r13b, %sil", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r13), r10), + "4538EA", + "cmpb %r13b, %r10b", + )); + insns.push(( + Inst::cmp_rmi_r(1, RMI::reg(r13), r14), + "4538EE", + "cmpb %r13b, %r14b", + )); + + // ======================================================== + // Push64 + insns.push((Inst::push64(RMI::reg(rdi)), "57", "pushq %rdi")); + insns.push((Inst::push64(RMI::reg(r8)), "4150", "pushq %r8")); + insns.push(( + Inst::push64(RMI::mem(Addr::imm_reg_reg_shift(321, rsi, rcx, 3))), + "FFB4CE41010000", + "pushq 321(%rsi,%rcx,8)", + )); + insns.push(( + Inst::push64(RMI::mem(Addr::imm_reg_reg_shift(321, r9, rbx, 2))), + "41FFB49941010000", + "pushq 321(%r9,%rbx,4)", + )); + insns.push((Inst::push64(RMI::imm(0)), "6A00", "pushq $0")); + insns.push((Inst::push64(RMI::imm(127)), "6A7F", "pushq $127")); + insns.push((Inst::push64(RMI::imm(128)), "6880000000", "pushq $128")); + insns.push(( + Inst::push64(RMI::imm(0x31415927)), + "6827594131", + "pushq $826366247", + )); + insns.push(( + Inst::push64(RMI::imm(-128i32 as u32)), + "6A80", + "pushq $-128", + )); + insns.push(( + Inst::push64(RMI::imm(-129i32 as u32)), + "687FFFFFFF", + "pushq $-129", + )); + insns.push(( + Inst::push64(RMI::imm(-0x75c4e8a1i32 as u32)), + "685F173B8A", + "pushq $-1975838881", + )); + + // ======================================================== + // Pop64 + insns.push((Inst::pop64(w_rax), "58", "popq %rax")); + insns.push((Inst::pop64(w_rdi), "5F", "popq %rdi")); + insns.push((Inst::pop64(w_r8), "4158", "popq %r8")); + insns.push((Inst::pop64(w_r15), "415F", "popq %r15")); + + // ======================================================== + // CallKnown skipped for now + + // ======================================================== + // CallUnknown + insns.push((Inst::call_unknown(RM::reg(rbp)), "FFD5", "call *%rbp")); + insns.push((Inst::call_unknown(RM::reg(r11)), "41FFD3", "call *%r11")); + insns.push(( + Inst::call_unknown(RM::mem(Addr::imm_reg_reg_shift(321, rsi, rcx, 3))), + "FF94CE41010000", + "call *321(%rsi,%rcx,8)", + )); + insns.push(( + Inst::call_unknown(RM::mem(Addr::imm_reg_reg_shift(321, r10, rdx, 2))), + "41FF949241010000", + "call *321(%r10,%rdx,4)", + )); + + // ======================================================== + // Ret + insns.push((Inst::ret(), "C3", "ret")); + + // ======================================================== + // JmpKnown skipped for now + + // ======================================================== + // JmpCondSymm isn't a real instruction + + // ======================================================== + // JmpCond skipped for now + + // ======================================================== + // JmpCondCompound isn't a real instruction + + // ======================================================== + // JmpUnknown + insns.push((Inst::jmp_unknown(RM::reg(rbp)), "FFE5", "jmp *%rbp")); + insns.push((Inst::jmp_unknown(RM::reg(r11)), "41FFE3", "jmp *%r11")); + insns.push(( + Inst::jmp_unknown(RM::mem(Addr::imm_reg_reg_shift(321, rsi, rcx, 3))), + "FFA4CE41010000", + "jmp *321(%rsi,%rcx,8)", + )); + insns.push(( + Inst::jmp_unknown(RM::mem(Addr::imm_reg_reg_shift(321, r10, rdx, 2))), + "41FFA49241010000", + "jmp *321(%r10,%rdx,4)", + )); + + // ======================================================== + // Actually run the tests! + let flags = settings::Flags::new(settings::builder()); + let rru = regs::create_reg_universe_systemv(&flags); + for (insn, expected_encoding, expected_printing) in insns { + // Check the printed text is as expected. + let actual_printing = insn.show_rru(Some(&rru)); + assert_eq!(expected_printing, actual_printing); + + // Check the encoding is as expected. + let text_size = { + let mut code_sec = MachSectionSize::new(0); + insn.emit(&mut code_sec, &flags); + code_sec.size() + }; + + let mut sink = test_utils::TestCodeSink::new(); + let mut sections = MachSections::new(); + let code_idx = sections.add_section(0, text_size); + let code_sec = sections.get_section(code_idx); + insn.emit(code_sec, &flags); + sections.emit(&mut sink); + let actual_encoding = &sink.stringify(); + assert_eq!(expected_encoding, actual_encoding); + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs new file mode 100644 index 0000000000..a18dcb31fd --- /dev/null +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -0,0 +1,956 @@ +//! This module defines x86_64-specific machine instruction types. + +#![allow(dead_code)] +#![allow(non_snake_case)] +#![allow(non_camel_case_types)] + +use std::fmt; +use std::string::{String, ToString}; + +use regalloc::RegUsageCollector; +use regalloc::Set; +use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable}; + +use crate::binemit::CodeOffset; +use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8}; +use crate::ir::ExternalName; +use crate::ir::Type; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +pub mod args; +mod emit; +#[cfg(test)] +mod emit_tests; +pub mod regs; + +use args::*; +use regs::{create_reg_universe_systemv, show_ireg_sized}; + +//============================================================================= +// Instructions (top level): definition + +// Don't build these directly. Instead use the Inst:: functions to create them. + +/// Instructions. Destinations are on the RIGHT (a la AT&T syntax). +#[derive(Clone)] +pub(crate) enum Inst { + /// nops of various sizes, including zero + Nop { len: u8 }, + + /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg + Alu_RMI_R { + is_64: bool, + op: RMI_R_Op, + src: RMI, + dst: Writable, + }, + + /// (imm32 imm64) reg. + /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32 + Imm_R { + dst_is_64: bool, + simm64: u64, + dst: Writable, + }, + + /// mov (64 32) reg reg + Mov_R_R { + is_64: bool, + src: Reg, + dst: Writable, + }, + + /// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64). + /// Note that the lq variant doesn't really exist since the default + /// zero-extend rule makes it unnecessary. For that case we emit the + /// equivalent "movl AM, reg32". + MovZX_M_R { + extMode: ExtMode, + addr: Addr, + dst: Writable, + }, + + /// A plain 64-bit integer load, since MovZX_M_R can't represent that + Mov64_M_R { addr: Addr, dst: Writable }, + + /// movs (bl bq wl wq lq) addr reg (good for all SX loads) + MovSX_M_R { + extMode: ExtMode, + addr: Addr, + dst: Writable, + }, + + /// mov (b w l q) reg addr (good for all integer stores) + Mov_R_M { + size: u8, // 1, 2, 4 or 8 + src: Reg, + addr: Addr, + }, + + /// (shl shr sar) (l q) imm reg + Shift_R { + is_64: bool, + kind: ShiftKind, + /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl". + num_bits: Option, + dst: Writable, + }, + + /// cmp (b w l q) (reg addr imm) reg + Cmp_RMI_R { + size: u8, // 1, 2, 4 or 8 + src: RMI, + dst: Reg, + }, + + /// pushq (reg addr imm) + Push64 { src: RMI }, + + /// popq reg + Pop64 { dst: Writable }, + + /// call simm32 + CallKnown { + dest: ExternalName, + uses: Set, + defs: Set>, + }, + + /// callq (reg mem) + CallUnknown { + dest: RM, + //uses: Set, + //defs: Set>, + }, + + // ---- branches (exactly one must appear at end of BB) ---- + /// ret + Ret, + + /// A placeholder instruction, generating no code, meaning that a function epilogue must be + /// inserted there. + EpiloguePlaceholder, + + /// jmp simm32 + JmpKnown { dest: BranchTarget }, + + /// jcond cond target target + // Symmetrical two-way conditional branch. + // Should never reach the emitter. + JmpCondSymm { + cc: CC, + taken: BranchTarget, + not_taken: BranchTarget, + }, + + /// Lowered conditional branch: contains the original instruction, and a + /// flag indicating whether to invert the taken-condition or not. Only one + /// BranchTarget is retained, and the other is implicitly the next + /// instruction, given the final basic-block layout. + JmpCond { + cc: CC, + //inverted: bool, is this needed? + target: BranchTarget, + }, + + /// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two + /// actual machine instructions). Needed when the final block layout implies + /// that neither arm of a conditional branch targets the fallthrough block. + // Should never reach the emitter + JmpCondCompound { + cc: CC, + taken: BranchTarget, + not_taken: BranchTarget, + }, + + /// jmpq (reg mem) + JmpUnknown { target: RM }, +} + +// Handy constructors for Insts. + +// For various sizes, will some number of lowest bits sign extend to be the +// same as the whole value? +pub(crate) fn low32willSXto64(x: u64) -> bool { + let xs = x as i64; + xs == ((xs << 32) >> 32) +} + +impl Inst { + pub(crate) fn nop(len: u8) -> Self { + debug_assert!(len <= 16); + Self::Nop { len } + } + + pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable) -> Self { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Self::Alu_RMI_R { + is_64, + op, + src, + dst, + } + } + + pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + if !dst_is_64 { + debug_assert!(low32willSXto64(simm64)); + } + Inst::Imm_R { + dst_is_64, + simm64, + dst, + } + } + + pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable) -> Inst { + debug_assert!(src.get_class() == RegClass::I64); + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::Mov_R_R { is_64, src, dst } + } + + pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::MovZX_M_R { extMode, addr, dst } + } + + pub(crate) fn mov64_m_r(addr: Addr, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::Mov64_M_R { addr, dst } + } + + pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::MovSX_M_R { extMode, addr, dst } + } + + pub(crate) fn mov_r_m( + size: u8, // 1, 2, 4 or 8 + src: Reg, + addr: Addr, + ) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + debug_assert!(src.get_class() == RegClass::I64); + Inst::Mov_R_M { size, src, addr } + } + + pub(crate) fn shift_r( + is_64: bool, + kind: ShiftKind, + num_bits: Option, + dst: Writable, + ) -> Inst { + debug_assert!(if let Some(num_bits) = num_bits { + num_bits < if is_64 { 64 } else { 32 } + } else { + true + }); + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::Shift_R { + is_64, + kind, + num_bits, + dst, + } + } + + pub(crate) fn cmp_rmi_r( + size: u8, // 1, 2, 4 or 8 + src: RMI, + dst: Reg, + ) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + debug_assert!(dst.get_class() == RegClass::I64); + Inst::Cmp_RMI_R { size, src, dst } + } + + pub(crate) fn push64(src: RMI) -> Inst { + Inst::Push64 { src } + } + + pub(crate) fn pop64(dst: Writable) -> Inst { + Inst::Pop64 { dst } + } + + pub(crate) fn call_unknown(dest: RM) -> Inst { + Inst::CallUnknown { dest } + } + + pub(crate) fn ret() -> Inst { + Inst::Ret + } + + pub(crate) fn epilogue_placeholder() -> Inst { + Inst::EpiloguePlaceholder + } + + pub(crate) fn jmp_known(dest: BranchTarget) -> Inst { + Inst::JmpKnown { dest } + } + + pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst { + Inst::JmpCondSymm { + cc, + taken, + not_taken, + } + } + + pub(crate) fn jmp_cond(cc: CC, target: BranchTarget) -> Inst { + Inst::JmpCond { cc, target } + } + + pub(crate) fn jmp_cond_compound(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst { + Inst::JmpCondCompound { + cc, + taken, + not_taken, + } + } + + pub(crate) fn jmp_unknown(target: RM) -> Inst { + Inst::JmpUnknown { target } + } +} + +//============================================================================= +// Instructions: printing + +impl ShowWithRRU for Inst { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn ljustify(s: String) -> String { + let w = 7; + if s.len() >= w { + s + } else { + let need = usize::min(w, w - s.len()); + s + &format!("{nil: String { + ljustify(s1 + &s2) + } + + fn suffixLQ(is_64: bool) -> String { + (if is_64 { "q" } else { "l" }).to_string() + } + + fn sizeLQ(is_64: bool) -> u8 { + if is_64 { + 8 + } else { + 4 + } + } + + fn suffixBWLQ(size: u8) -> String { + match size { + 1 => "b".to_string(), + 2 => "w".to_string(), + 4 => "l".to_string(), + 8 => "q".to_string(), + _ => panic!("Inst(x64).show.suffixBWLQ: size={}", size), + } + } + + match self { + Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len), + Inst::Alu_RMI_R { + is_64, + op, + src, + dst, + } => format!( + "{} {}, {}", + ljustify2(op.to_string(), suffixLQ(*is_64)), + src.show_rru_sized(mb_rru, sizeLQ(*is_64)), + show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), + ), + Inst::Imm_R { + dst_is_64, + simm64, + dst, + } => { + if *dst_is_64 { + format!( + "{} ${}, {}", + ljustify("movabsq".to_string()), + *simm64 as i64, + show_ireg_sized(dst.to_reg(), mb_rru, 8) + ) + } else { + format!( + "{} ${}, {}", + ljustify("movl".to_string()), + (*simm64 as u32) as i32, + show_ireg_sized(dst.to_reg(), mb_rru, 4) + ) + } + } + Inst::Mov_R_R { is_64, src, dst } => format!( + "{} {}, {}", + ljustify2("mov".to_string(), suffixLQ(*is_64)), + show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)), + show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) + ), + Inst::MovZX_M_R { extMode, addr, dst } => { + if *extMode == ExtMode::LQ { + format!( + "{} {}, {}", + ljustify("movl".to_string()), + addr.show_rru(mb_rru), + show_ireg_sized(dst.to_reg(), mb_rru, 4) + ) + } else { + format!( + "{} {}, {}", + ljustify2("movz".to_string(), extMode.to_string()), + addr.show_rru(mb_rru), + show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size()) + ) + } + } + Inst::Mov64_M_R { addr, dst } => format!( + "{} {}, {}", + ljustify("movq".to_string()), + addr.show_rru(mb_rru), + dst.show_rru(mb_rru) + ), + Inst::MovSX_M_R { extMode, addr, dst } => format!( + "{} {}, {}", + ljustify2("movs".to_string(), extMode.to_string()), + addr.show_rru(mb_rru), + show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size()) + ), + Inst::Mov_R_M { size, src, addr } => format!( + "{} {}, {}", + ljustify2("mov".to_string(), suffixBWLQ(*size)), + show_ireg_sized(*src, mb_rru, *size), + addr.show_rru(mb_rru) + ), + Inst::Shift_R { + is_64, + kind, + num_bits, + dst, + } => match num_bits { + None => format!( + "{} %cl, {}", + ljustify2(kind.to_string(), suffixLQ(*is_64)), + show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) + ), + + Some(num_bits) => format!( + "{} ${}, {}", + ljustify2(kind.to_string(), suffixLQ(*is_64)), + num_bits, + show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) + ), + }, + Inst::Cmp_RMI_R { size, src, dst } => format!( + "{} {}, {}", + ljustify2("cmp".to_string(), suffixBWLQ(*size)), + src.show_rru_sized(mb_rru, *size), + show_ireg_sized(*dst, mb_rru, *size) + ), + Inst::Push64 { src } => { + format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) + } + Inst::Pop64 { dst } => { + format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru)) + } + //Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target), + Inst::CallKnown { .. } => "**CallKnown**".to_string(), + Inst::CallUnknown { dest } => format!( + "{} *{}", + ljustify("call".to_string()), + dest.show_rru(mb_rru) + ), + Inst::Ret => "ret".to_string(), + Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), + Inst::JmpKnown { dest } => { + format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru)) + } + Inst::JmpCondSymm { + cc, + taken, + not_taken, + } => format!( + "{} taken={} not_taken={}", + ljustify2("j".to_string(), cc.to_string()), + taken.show_rru(mb_rru), + not_taken.show_rru(mb_rru) + ), + // + Inst::JmpCond { cc, ref target } => format!( + "{} {}", + ljustify2("j".to_string(), cc.to_string()), + target.show_rru(None) + ), + // + Inst::JmpCondCompound { .. } => "**JmpCondCompound**".to_string(), + Inst::JmpUnknown { target } => format!( + "{} *{}", + ljustify("jmp".to_string()), + target.show_rru(mb_rru) + ), + } + } +} + +// Temp hook for legacy printing machinery +impl fmt::Debug for Inst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // Print the insn without a Universe :-( + write!(fmt, "{}", self.show_rru(None)) + } +} + +fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { + // This is a bit subtle. If some register is in the modified set, then it may not be in either + // the use or def sets. However, enforcing that directly is somewhat difficult. Instead, + // regalloc.rs will "fix" this for us by removing the the modified set from the use and def + // sets. + match inst { + // ** Nop + Inst::Alu_RMI_R { + is_64: _, + op: _, + src, + dst, + } => { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } + Inst::Imm_R { + dst_is_64: _, + simm64: _, + dst, + } => { + collector.add_def(*dst); + } + Inst::Mov_R_R { is_64: _, src, dst } => { + collector.add_use(*src); + collector.add_def(*dst); + } + Inst::MovZX_M_R { + extMode: _, + addr, + dst, + } => { + addr.get_regs_as_uses(collector); + collector.add_def(*dst); + } + Inst::Mov64_M_R { addr, dst } => { + addr.get_regs_as_uses(collector); + collector.add_def(*dst); + } + Inst::MovSX_M_R { + extMode: _, + addr, + dst, + } => { + addr.get_regs_as_uses(collector); + collector.add_def(*dst); + } + Inst::Mov_R_M { size: _, src, addr } => { + collector.add_use(*src); + addr.get_regs_as_uses(collector); + } + Inst::Shift_R { + is_64: _, + kind: _, + num_bits, + dst, + } => { + if num_bits.is_none() { + collector.add_use(regs::rcx()); + } + collector.add_mod(*dst); + } + Inst::Cmp_RMI_R { size: _, src, dst } => { + src.get_regs_as_uses(collector); + collector.add_use(*dst); // yes, really `add_use` + } + Inst::Push64 { src } => { + src.get_regs_as_uses(collector); + collector.add_mod(Writable::from_reg(regs::rsp())); + } + Inst::Pop64 { dst } => { + collector.add_def(*dst); + } + Inst::CallKnown { + dest: _, + uses: _, + defs: _, + } => { + // FIXME add arg regs (iru.used) and caller-saved regs (iru.defined) + unimplemented!(); + } + Inst::CallUnknown { dest } => { + dest.get_regs_as_uses(collector); + } + Inst::Ret => {} + Inst::EpiloguePlaceholder => {} + Inst::JmpKnown { dest: _ } => {} + Inst::JmpCondSymm { + cc: _, + taken: _, + not_taken: _, + } => {} + // + // ** JmpCond + // + // ** JmpCondCompound + // + //Inst::JmpUnknown { target } => { + // target.get_regs_as_uses(collector); + //} + Inst::Nop { .. } + | Inst::JmpCond { .. } + | Inst::JmpCondCompound { .. } + | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"), + } +} + +//============================================================================= +// Instructions and subcomponents: map_regs + +fn map_use(m: &RegUsageMapper, r: &mut Reg) { + if r.is_virtual() { + let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg(); + *r = new; + } +} + +fn map_def(m: &RegUsageMapper, r: &mut Writable) { + if r.to_reg().is_virtual() { + let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } +} + +fn map_mod(m: &RegUsageMapper, r: &mut Writable) { + if r.to_reg().is_virtual() { + let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } +} + +impl Addr { + fn map_uses(&mut self, map: &RegUsageMapper) { + match self { + Addr::IR { + simm32: _, + ref mut base, + } => map_use(map, base), + Addr::IRRS { + simm32: _, + ref mut base, + ref mut index, + shift: _, + } => { + map_use(map, base); + map_use(map, index); + } + } + } +} + +impl RMI { + fn map_uses(&mut self, map: &RegUsageMapper) { + match self { + RMI::R { ref mut reg } => map_use(map, reg), + RMI::M { ref mut addr } => addr.map_uses(map), + RMI::I { simm32: _ } => {} + } + } +} + +impl RM { + fn map_uses(&mut self, map: &RegUsageMapper) { + match self { + RM::R { ref mut reg } => map_use(map, reg), + RM::M { ref mut addr } => addr.map_uses(map), + } + } +} + +fn x64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) { + // Note this must be carefully synchronized with x64_get_regs. + match inst { + // ** Nop + Inst::Alu_RMI_R { + is_64: _, + op: _, + ref mut src, + ref mut dst, + } => { + src.map_uses(mapper); + map_mod(mapper, dst); + } + Inst::Imm_R { + dst_is_64: _, + simm64: _, + ref mut dst, + } => map_def(mapper, dst), + Inst::Mov_R_R { + is_64: _, + ref mut src, + ref mut dst, + } => { + map_use(mapper, src); + map_def(mapper, dst); + } + Inst::MovZX_M_R { + extMode: _, + ref mut addr, + ref mut dst, + } => { + addr.map_uses(mapper); + map_def(mapper, dst); + } + Inst::Mov64_M_R { addr, dst } => { + addr.map_uses(mapper); + map_def(mapper, dst); + } + Inst::MovSX_M_R { + extMode: _, + ref mut addr, + ref mut dst, + } => { + addr.map_uses(mapper); + map_def(mapper, dst); + } + Inst::Mov_R_M { + size: _, + ref mut src, + ref mut addr, + } => { + map_use(mapper, src); + addr.map_uses(mapper); + } + Inst::Shift_R { + is_64: _, + kind: _, + num_bits: _, + ref mut dst, + } => { + map_mod(mapper, dst); + } + Inst::Cmp_RMI_R { + size: _, + ref mut src, + ref mut dst, + } => { + src.map_uses(mapper); + map_use(mapper, dst); + } + Inst::Push64 { ref mut src } => src.map_uses(mapper), + Inst::Pop64 { ref mut dst } => { + map_def(mapper, dst); + } + Inst::CallKnown { + dest: _, + uses: _, + defs: _, + } => {} + Inst::CallUnknown { dest } => dest.map_uses(mapper), + Inst::Ret => {} + Inst::EpiloguePlaceholder => {} + Inst::JmpKnown { dest: _ } => {} + Inst::JmpCondSymm { + cc: _, + taken: _, + not_taken: _, + } => {} + // + // ** JmpCond + // + // ** JmpCondCompound + // + //Inst::JmpUnknown { target } => { + // target.apply_map(mapper); + //} + Inst::Nop { .. } + | Inst::JmpCond { .. } + | Inst::JmpCondCompound { .. } + | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"), + } +} + +//============================================================================= +// Instructions: misc functions and external interface + +impl MachInst for Inst { + fn get_regs(&self, collector: &mut RegUsageCollector) { + x64_get_regs(&self, collector) + } + + fn map_regs(&mut self, mapper: &RegUsageMapper) { + x64_map_regs(self, mapper); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes + // out the upper 32 bits of the destination. For example, we could + // conceivably use `movl %reg, %reg` to zero out the top 32 bits of + // %reg. + match self { + Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)), + _ => None, + } + } + + fn is_epilogue_placeholder(&self) -> bool { + if let Self::EpiloguePlaceholder = self { + true + } else { + false + } + } + + fn is_term<'a>(&'a self) -> MachTerminator<'a> { + match self { + // Interesting cases. + &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret, + &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()), + &Self::JmpCondSymm { + cc: _, + taken, + not_taken, + } => MachTerminator::Cond( + taken.as_block_index().unwrap(), + not_taken.as_block_index().unwrap(), + ), + &Self::JmpCond { .. } | &Self::JmpCondCompound { .. } => { + panic!("is_term() called after lowering branches"); + } + // All other cases are boring. + _ => MachTerminator::None, + } + } + + fn gen_move(dst_reg: Writable, src_reg: Reg, _ty: Type) -> Inst { + let rc_dst = dst_reg.to_reg().get_class(); + let rc_src = src_reg.get_class(); + // If this isn't true, we have gone way off the rails. + debug_assert!(rc_dst == rc_src); + match rc_dst { + RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), + _ => panic!("gen_move(x64): unhandled regclass"), + } + } + + fn gen_zero_len_nop() -> Inst { + unimplemented!() + } + + fn gen_nop(_preferred_size: usize) -> Inst { + unimplemented!() + } + + fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { + None + } + + fn rc_for_type(ty: Type) -> CodegenResult { + match ty { + I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64), + F32 | F64 | I128 | B128 => Ok(RegClass::V128), + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(blockindex: BlockIndex) -> Inst { + Inst::jmp_known(BranchTarget::Block(blockindex)) + } + + fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) { + // This is identical (modulo renaming) to the arm64 version. + match self { + &mut Inst::JmpKnown { ref mut dest } => { + dest.map(block_target_map); + } + &mut Inst::JmpCondSymm { + cc: _, + ref mut taken, + ref mut not_taken, + } => { + taken.map(block_target_map); + not_taken.map(block_target_map); + } + &mut Inst::JmpCond { .. } | &mut Inst::JmpCondCompound { .. } => { + panic!("with_block_rewrites called after branch lowering!"); + } + _ => {} + } + } + + fn with_fallthrough_block(&mut self, fallthrough: Option) { + // This is identical (modulo renaming) to the arm64 version. + match self { + &mut Inst::JmpCondSymm { + cc, + taken, + not_taken, + } => { + if taken.as_block_index() == fallthrough { + *self = Inst::jmp_cond(cc.invert(), not_taken); + } else if not_taken.as_block_index() == fallthrough { + *self = Inst::jmp_cond(cc, taken); + } else { + // We need a compound sequence (condbr / uncond-br). + *self = Inst::jmp_cond_compound(cc, taken, not_taken); + } + } + &mut Inst::JmpKnown { dest } => { + if dest.as_block_index() == fallthrough { + *self = Inst::nop(0); + } + } + _ => {} + } + } + + fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) { + // This is identical (modulo renaming) to the arm64 version. + match self { + &mut Self::JmpCond { + cc: _, + ref mut target, + } => { + target.lower(targets, my_offset); + } + &mut Self::JmpCondCompound { + cc: _, + ref mut taken, + ref mut not_taken, + .. + } => { + taken.lower(targets, my_offset); + not_taken.lower(targets, my_offset); + } + &mut Self::JmpKnown { ref mut dest } => { + dest.lower(targets, my_offset); + } + _ => {} + } + } + + fn reg_universe(flags: &settings::Flags) -> RealRegUniverse { + create_reg_universe_systemv(flags) + } +} + +impl MachInstEmit for Inst { + fn emit(&self, sink: &mut O, _flags: &settings::Flags) { + emit::emit(self, sink); + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs new file mode 100644 index 0000000000..bb8f05fb50 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -0,0 +1,261 @@ +//! Registers, the Universe thereof, and printing. +//! +//! These are ordered by sequence number, as required in the Universe. The strange ordering is +//! intended to make callee-save registers available before caller-saved ones. This is a net win +//! provided that each function makes at least one onward call. It'll be a net loss for leaf +//! functions, and we should change the ordering in that case, so as to make caller-save regs +//! available first. +//! +//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions? +//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe +//! for each function we compile. + +use alloc::vec::Vec; +use std::string::String; + +use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES}; + +use crate::machinst::pretty_print::ShowWithRRU; +use crate::settings; + +// Hardware encodings for a few registers. + +pub const ENC_RBX: u8 = 3; +pub const ENC_RSP: u8 = 4; +pub const ENC_RBP: u8 = 5; +pub const ENC_R12: u8 = 12; +pub const ENC_R13: u8 = 13; +pub const ENC_R14: u8 = 14; +pub const ENC_R15: u8 = 15; + +fn gpr(enc: u8, index: u8) -> Reg { + Reg::new_real(RegClass::I64, enc, index) +} + +pub(crate) fn r12() -> Reg { + gpr(ENC_R12, 0) +} +pub(crate) fn r13() -> Reg { + gpr(ENC_R13, 1) +} +pub(crate) fn r14() -> Reg { + gpr(ENC_R14, 2) +} +pub(crate) fn r15() -> Reg { + gpr(ENC_R15, 3) +} +pub(crate) fn rbx() -> Reg { + gpr(ENC_RBX, 4) +} +pub(crate) fn rsi() -> Reg { + gpr(6, 5) +} +pub(crate) fn rdi() -> Reg { + gpr(7, 6) +} +pub(crate) fn rax() -> Reg { + gpr(0, 7) +} +pub(crate) fn rcx() -> Reg { + gpr(1, 8) +} +pub(crate) fn rdx() -> Reg { + gpr(2, 9) +} +pub(crate) fn r8() -> Reg { + gpr(8, 10) +} +pub(crate) fn r9() -> Reg { + gpr(9, 11) +} +pub(crate) fn r10() -> Reg { + gpr(10, 12) +} +pub(crate) fn r11() -> Reg { + gpr(11, 13) +} + +fn fpr(enc: u8, index: u8) -> Reg { + Reg::new_real(RegClass::V128, enc, index) +} +fn xmm0() -> Reg { + fpr(0, 14) +} +fn xmm1() -> Reg { + fpr(1, 15) +} +fn xmm2() -> Reg { + fpr(2, 16) +} +fn xmm3() -> Reg { + fpr(3, 17) +} +fn xmm4() -> Reg { + fpr(4, 18) +} +fn xmm5() -> Reg { + fpr(5, 19) +} +fn xmm6() -> Reg { + fpr(6, 20) +} +fn xmm7() -> Reg { + fpr(7, 21) +} +fn xmm8() -> Reg { + fpr(8, 22) +} +fn xmm9() -> Reg { + fpr(9, 23) +} +fn xmm10() -> Reg { + fpr(10, 24) +} +fn xmm11() -> Reg { + fpr(11, 25) +} +fn xmm12() -> Reg { + fpr(12, 26) +} +fn xmm13() -> Reg { + fpr(13, 27) +} +fn xmm14() -> Reg { + fpr(14, 28) +} +fn xmm15() -> Reg { + fpr(15, 29) +} + +pub(crate) fn rsp() -> Reg { + gpr(ENC_RSP, 30) +} +pub(crate) fn rbp() -> Reg { + gpr(ENC_RBP, 31) +} + +/// Create the register universe for X64. +/// +/// The ordering of registers matters, as commented in the file doc comment: assumes the +/// calling-convention is SystemV, at the moment. +pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse { + let mut regs = Vec::<(RealReg, String)>::new(); + let mut allocable_by_class = [None; NUM_REG_CLASSES]; + + // Integer regs. + let mut base = regs.len(); + + // Callee-saved, in the SystemV x86_64 ABI. + regs.push((r12().to_real_reg(), "%r12".into())); + regs.push((r13().to_real_reg(), "%r13".into())); + regs.push((r14().to_real_reg(), "%r14".into())); + regs.push((r15().to_real_reg(), "%r15".into())); + regs.push((rbx().to_real_reg(), "%rbx".into())); + + // Caller-saved, in the SystemV x86_64 ABI. + regs.push((rsi().to_real_reg(), "%rsi".into())); + regs.push((rdi().to_real_reg(), "%rdi".into())); + regs.push((rax().to_real_reg(), "%rax".into())); + regs.push((rcx().to_real_reg(), "%rcx".into())); + regs.push((rdx().to_real_reg(), "%rdx".into())); + regs.push((r8().to_real_reg(), "%r8".into())); + regs.push((r9().to_real_reg(), "%r9".into())); + regs.push((r10().to_real_reg(), "%r10".into())); + regs.push((r11().to_real_reg(), "%r11".into())); + + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { + first: base, + last: regs.len() - 1, + suggested_scratch: Some(r12().get_index()), + }); + + // XMM registers + base = regs.len(); + regs.push((xmm0().to_real_reg(), "%xmm0".into())); + regs.push((xmm1().to_real_reg(), "%xmm1".into())); + regs.push((xmm2().to_real_reg(), "%xmm2".into())); + regs.push((xmm3().to_real_reg(), "%xmm3".into())); + regs.push((xmm4().to_real_reg(), "%xmm4".into())); + regs.push((xmm5().to_real_reg(), "%xmm5".into())); + regs.push((xmm6().to_real_reg(), "%xmm6".into())); + regs.push((xmm7().to_real_reg(), "%xmm7".into())); + regs.push((xmm8().to_real_reg(), "%xmm8".into())); + regs.push((xmm9().to_real_reg(), "%xmm9".into())); + regs.push((xmm10().to_real_reg(), "%xmm10".into())); + regs.push((xmm11().to_real_reg(), "%xmm11".into())); + regs.push((xmm12().to_real_reg(), "%xmm12".into())); + regs.push((xmm13().to_real_reg(), "%xmm13".into())); + regs.push((xmm14().to_real_reg(), "%xmm14".into())); + regs.push((xmm15().to_real_reg(), "%xmm15".into())); + + allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { + first: base, + last: regs.len() - 1, + suggested_scratch: Some(xmm15().get_index()), + }); + + // Other regs, not available to the allocator. + let allocable = regs.len(); + regs.push((rsp().to_real_reg(), "%rsp".into())); + regs.push((rbp().to_real_reg(), "%rbp".into())); + + RealRegUniverse { + regs, + allocable, + allocable_by_class, + } +} + +/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some +/// smaller size (4, 2 or 1 bytes). +pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { + let mut s = reg.show_rru(mb_rru); + + if reg.get_class() != RegClass::I64 || size == 8 { + // We can't do any better. + return s; + } + + if reg.is_real() { + // Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could + // describe diplomatically as "a kludge", but it's only debug code. + let remapper = match s.as_str() { + "%rax" => Some(["%eax", "%ax", "%al"]), + "%rbx" => Some(["%ebx", "%bx", "%bl"]), + "%rcx" => Some(["%ecx", "%cx", "%cl"]), + "%rdx" => Some(["%edx", "%dx", "%dl"]), + "%rsi" => Some(["%esi", "%si", "%sil"]), + "%rdi" => Some(["%edi", "%di", "%dil"]), + "%rbp" => Some(["%ebp", "%bp", "%bpl"]), + "%rsp" => Some(["%esp", "%sp", "%spl"]), + "%r8" => Some(["%r8d", "%r8w", "%r8b"]), + "%r9" => Some(["%r9d", "%r9w", "%r9b"]), + "%r10" => Some(["%r10d", "%r10w", "%r10b"]), + "%r11" => Some(["%r11d", "%r11w", "%r11b"]), + "%r12" => Some(["%r12d", "%r12w", "%r12b"]), + "%r13" => Some(["%r13d", "%r13w", "%r13b"]), + "%r14" => Some(["%r14d", "%r14w", "%r14b"]), + "%r15" => Some(["%r15d", "%r15w", "%r15b"]), + _ => None, + }; + if let Some(smaller_names) = remapper { + match size { + 4 => s = smaller_names[0].into(), + 2 => s = smaller_names[1].into(), + 1 => s = smaller_names[2].into(), + _ => panic!("show_ireg_sized: real"), + } + } + } else { + // Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths. + let suffix = match size { + 4 => "l", + 2 => "w", + 1 => "b", + _ => panic!("show_ireg_sized: virtual"), + }; + s = s + suffix; + } + + s +} diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs new file mode 100644 index 0000000000..23a190e578 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -0,0 +1,358 @@ +//! Lowering rules for X64. + +#![allow(dead_code)] +#![allow(non_snake_case)] + +use regalloc::{Reg, Writable}; + +use crate::ir::condcodes::IntCC; +use crate::ir::types; +use crate::ir::Inst as IRInst; +use crate::ir::{InstructionData, Opcode, Type}; + +use crate::machinst::lower::*; +use crate::machinst::*; + +use crate::isa::x64::inst::args::*; +use crate::isa::x64::inst::*; +use crate::isa::x64::X64Backend; + +/// Context passed to all lowering functions. +type Ctx<'a> = &'a mut dyn LowerCtx; + +//============================================================================= +// Helpers for instruction lowering. + +fn is_int_ty(ty: Type) -> bool { + match ty { + types::I8 | types::I16 | types::I32 | types::I64 => true, + _ => false, + } +} + +fn int_ty_to_is64(ty: Type) -> bool { + match ty { + types::I8 | types::I16 | types::I32 => false, + types::I64 => true, + _ => panic!("type {} is none of I8, I16, I32 or I64", ty), + } +} + +fn int_ty_to_sizeB(ty: Type) -> u8 { + match ty { + types::I8 => 1, + types::I16 => 2, + types::I32 => 4, + types::I64 => 8, + _ => panic!("ity_to_sizeB"), + } +} + +fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option { + let inst_data = ctx.data(iri); + if inst_data.opcode() == Opcode::Null { + Some(0) + } else { + match inst_data { + &InstructionData::UnaryImm { opcode: _, imm } => { + // Only has Into for i64; we use u64 elsewhere, so we cast. + let imm: i64 = imm.into(); + Some(imm as u64) + } + _ => None, + } + } +} + +fn inst_condcode(data: &InstructionData) -> IntCC { + match data { + &InstructionData::IntCond { cond, .. } + | &InstructionData::BranchIcmp { cond, .. } + | &InstructionData::IntCompare { cond, .. } + | &InstructionData::IntCondTrap { cond, .. } + | &InstructionData::BranchInt { cond, .. } + | &InstructionData::IntSelect { cond, .. } + | &InstructionData::IntCompareImm { cond, .. } => cond, + _ => panic!("inst_condcode(x64): unhandled: {:?}", data), + } +} + +fn intCC_to_x64_CC(cc: IntCC) -> CC { + match cc { + IntCC::Equal => CC::Z, + IntCC::NotEqual => CC::NZ, + IntCC::SignedGreaterThanOrEqual => CC::NL, + IntCC::SignedGreaterThan => CC::NLE, + IntCC::SignedLessThanOrEqual => CC::LE, + IntCC::SignedLessThan => CC::L, + IntCC::UnsignedGreaterThanOrEqual => CC::NB, + IntCC::UnsignedGreaterThan => CC::NBE, + IntCC::UnsignedLessThanOrEqual => CC::BE, + IntCC::UnsignedLessThan => CC::B, + IntCC::Overflow => CC::O, + IntCC::NotOverflow => CC::NO, + } +} + +//============================================================================= +// Top-level instruction lowering entry point, for one instruction. + +/// Actually codegen an instruction's results into registers. +fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { + let op = ctx.data(iri).opcode(); + let ty = if ctx.num_outputs(iri) == 1 { + Some(ctx.output_ty(iri, 0)) + } else { + None + }; + + // This is all outstandingly feeble. TODO: much better! + + match op { + Opcode::Iconst => { + if let Some(w64) = iri_to_u64_immediate(ctx, iri) { + // Get exactly the bit pattern in 'w64' into the dest. No + // monkeying with sign extension etc. + let dstIs64 = w64 > 0xFFFF_FFFF; + let regD = ctx.output(iri, 0); + ctx.emit(Inst::imm_r(dstIs64, w64, regD)); + } else { + unimplemented!(); + } + } + + Opcode::Iadd | Opcode::Isub => { + let regD = ctx.output(iri, 0); + let regL = ctx.input(iri, 0); + let regR = ctx.input(iri, 1); + let is64 = int_ty_to_is64(ty.unwrap()); + let how = if op == Opcode::Iadd { + RMI_R_Op::Add + } else { + RMI_R_Op::Sub + }; + ctx.emit(Inst::mov_r_r(true, regL, regD)); + ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD)); + } + + Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { + // TODO: implement imm shift value into insn + let tySL = ctx.input_ty(iri, 0); + let tyD = ctx.output_ty(iri, 0); // should be the same as tySL + let regSL = ctx.input(iri, 0); + let regSR = ctx.input(iri, 1); + let regD = ctx.output(iri, 0); + if tyD == tySL && (tyD == types::I32 || tyD == types::I64) { + let how = match op { + Opcode::Ishl => ShiftKind::Left, + Opcode::Ushr => ShiftKind::RightZ, + Opcode::Sshr => ShiftKind::RightS, + _ => unreachable!(), + }; + let is64 = tyD == types::I64; + let r_rcx = regs::rcx(); + let w_rcx = Writable::::from_reg(r_rcx); + ctx.emit(Inst::mov_r_r(true, regSL, regD)); + ctx.emit(Inst::mov_r_r(true, regSR, w_rcx)); + ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD)); + } else { + unimplemented!() + } + } + + Opcode::Uextend | Opcode::Sextend => { + // TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R + // don't accept a register source operand. They should be changed + // so as to have _RM_R form. + // TODO2: if the source operand is a load, incorporate that. + let isZX = op == Opcode::Uextend; + let tyS = ctx.input_ty(iri, 0); + let tyD = ctx.output_ty(iri, 0); + let regS = ctx.input(iri, 0); + let regD = ctx.output(iri, 0); + ctx.emit(Inst::mov_r_r(true, regS, regD)); + match (tyS, tyD, isZX) { + (types::I8, types::I64, false) => { + ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD)); + ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD)); + } + _ => unimplemented!(), + } + } + + Opcode::FallthroughReturn | Opcode::Return => { + for i in 0..ctx.num_inputs(iri) { + let src_reg = ctx.input(iri, i); + let retval_reg = ctx.retval(i); + ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); + } + // N.B.: the Ret itself is generated by the ABI. + } + + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::UdivImm + | Opcode::SdivImm + | Opcode::UremImm + | Opcode::SremImm + | Opcode::IrsubImm + | Opcode::IaddCin + | Opcode::IaddIfcin + | Opcode::IaddCout + | Opcode::IaddIfcout + | Opcode::IaddCarry + | Opcode::IaddIfcarry + | Opcode::IsubBin + | Opcode::IsubIfbin + | Opcode::IsubBout + | Opcode::IsubIfbout + | Opcode::IsubBorrow + | Opcode::IsubIfborrow + | Opcode::BandImm + | Opcode::BorImm + | Opcode::BxorImm + | Opcode::RotlImm + | Opcode::RotrImm + | Opcode::IshlImm + | Opcode::UshrImm + | Opcode::SshrImm => { + panic!("ALU+imm and ALU+carry ops should not appear here!"); + } + + Opcode::X86Udivmodx + | Opcode::X86Sdivmodx + | Opcode::X86Umulx + | Opcode::X86Smulx + | Opcode::X86Cvtt2si + | Opcode::X86Fmin + | Opcode::X86Fmax + | Opcode::X86Push + | Opcode::X86Pop + | Opcode::X86Bsr + | Opcode::X86Bsf + | Opcode::X86Pshufd + | Opcode::X86Pshufb + | Opcode::X86Pextr + | Opcode::X86Pinsr + | Opcode::X86Insertps + | Opcode::X86Movsd + | Opcode::X86Movlhps + | Opcode::X86Psll + | Opcode::X86Psrl + | Opcode::X86Psra + | Opcode::X86Ptest + | Opcode::X86Pmaxs + | Opcode::X86Pmaxu + | Opcode::X86Pmins + | Opcode::X86Pminu => { + panic!("x86-specific opcode in supposedly arch-neutral IR!"); + } + + _ => unimplemented!("unimplemented lowering for opcode {:?}", op), + } +} + +//============================================================================= +// Lowering-backend trait implementation. + +impl LowerBackend for X64Backend { + type MInst = Inst; + + fn lower>(&self, ctx: &mut C, ir_inst: IRInst) { + lower_insn_to_regs(ctx, ir_inst); + } + + fn lower_branch_group>( + &self, + ctx: &mut C, + branches: &[IRInst], + targets: &[BlockIndex], + fallthrough: Option, + ) { + // A block should end with at most two branches. The first may be a + // conditional branch; a conditional branch can be followed only by an + // unconditional branch or fallthrough. Otherwise, if only one branch, + // it may be an unconditional branch, a fallthrough, a return, or a + // trap. These conditions are verified by `is_ebb_basic()` during the + // verifier pass. + assert!(branches.len() <= 2); + + let mut unimplemented = false; + + if branches.len() == 2 { + // Must be a conditional branch followed by an unconditional branch. + let op0 = ctx.data(branches[0]).opcode(); + let op1 = ctx.data(branches[1]).opcode(); + + println!( + "QQQQ lowering two-branch group: opcodes are {:?} and {:?}", + op0, op1 + ); + + assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough); + let taken = BranchTarget::Block(targets[0]); + let not_taken = match op1 { + Opcode::Jump => BranchTarget::Block(targets[1]), + Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()), + _ => unreachable!(), // assert above. + }; + match op0 { + Opcode::Brz | Opcode::Brnz => { + let tyS = ctx.input_ty(branches[0], 0); + if is_int_ty(tyS) { + let rS = ctx.input(branches[0], 0); + let cc = match op0 { + Opcode::Brz => CC::Z, + Opcode::Brnz => CC::NZ, + _ => unreachable!(), + }; + let sizeB = int_ty_to_sizeB(tyS); + ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS)); + ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken)); + } else { + unimplemented = true; + } + } + Opcode::BrIcmp => { + let tyS = ctx.input_ty(branches[0], 0); + if is_int_ty(tyS) { + let rSL = ctx.input(branches[0], 0); + let rSR = ctx.input(branches[0], 1); + let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0]))); + let sizeB = int_ty_to_sizeB(tyS); + // FIXME verify rSR vs rSL ordering + ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL)); + ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken)); + } else { + unimplemented = true; + } + } + // TODO: Brif/icmp, Brff/icmp, jump tables + _ => { + unimplemented = true; + } + } + } else { + assert!(branches.len() == 1); + + // Must be an unconditional branch or trap. + let op = ctx.data(branches[0]).opcode(); + match op { + Opcode::Jump => { + ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0]))); + } + Opcode::Fallthrough => { + ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0]))); + } + Opcode::Trap => { + unimplemented = true; + } + _ => panic!("Unknown branch type!"), + } + } + + if unimplemented { + unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches); + } + } +} diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs new file mode 100644 index 0000000000..80a031cef1 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -0,0 +1,92 @@ +//! X86_64-bit Instruction Set Architecture. + +use alloc::boxed::Box; + +use regalloc::RealRegUniverse; +use target_lexicon::Triple; + +use crate::ir::Function; +use crate::isa::Builder as IsaBuilder; +use crate::machinst::pretty_print::ShowWithRRU; +use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::result::CodegenResult; +use crate::settings::{self, Flags}; + +use crate::isa::x64::inst::regs::create_reg_universe_systemv; + +mod abi; +mod inst; +mod lower; + +/// An X64 backend. +pub(crate) struct X64Backend { + triple: Triple, + flags: Flags, +} + +impl X64Backend { + /// Create a new X64 backend with the given (shared) flags. + fn new_with_flags(triple: Triple, flags: Flags) -> Self { + Self { triple, flags } + } + + fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult> { + // This performs lowering to VCode, register-allocates the code, computes + // block layout and finalizes branches. The result is ready for binary emission. + let abi = Box::new(abi::X64ABIBody::new(&func, flags)); + compile::compile::(&func, self, abi) + } +} + +impl MachBackend for X64Backend { + fn compile_function( + &self, + func: &Function, + want_disasm: bool, + ) -> CodegenResult { + let flags = self.flags(); + let vcode = self.compile_vcode(func, flags.clone())?; + let sections = vcode.emit(); + let frame_size = vcode.frame_size(); + + let disasm = if want_disasm { + Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags)))) + } else { + None + }; + + Ok(MachCompileResult { + sections, + frame_size, + disasm, + }) + } + + fn flags(&self) -> &Flags { + &self.flags + } + + fn name(&self) -> &'static str { + "x64" + } + + fn triple(&self) -> Triple { + self.triple.clone() + } + + fn reg_universe(&self) -> RealRegUniverse { + create_reg_universe_systemv(&self.flags) + } +} + +/// Create a new `isa::Builder`. +pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder { + IsaBuilder { + triple, + setup: settings::builder(), + constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| { + let backend = X64Backend::new_with_flags(triple, flags); + Box::new(TargetIsaAdapter::new(backend)) + }, + } +} diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs index 03e167e07d..9386e60310 100644 --- a/cranelift/codegen/src/isa/x86/mod.rs +++ b/cranelift/codegen/src/isa/x86/mod.rs @@ -53,12 +53,23 @@ fn isa_constructor( PointerWidth::U32 => &enc_tables::LEVEL1_I32[..], PointerWidth::U64 => &enc_tables::LEVEL1_I64[..], }; - Box::new(Isa { - triple, - isa_flags: settings::Flags::new(&shared_flags, builder), - shared_flags, - cpumode: level1, - }) + + let isa_flags = settings::Flags::new(&shared_flags, builder); + + if isa_flags.use_new_backend() { + #[cfg(not(feature = "x64"))] + panic!("new backend x86 support not included by cargo features!"); + + #[cfg(feature = "x64")] + super::x64::isa_builder(triple).finish(shared_flags) + } else { + Box::new(Isa { + triple, + isa_flags, + shared_flags, + cpumode: level1, + }) + } } impl TargetIsa for Isa {