diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 7a30a755ee..be8d65ecc2 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -549,9 +549,9 @@ fn define_simd_lane_access( r#" Vector swizzle. - Returns a new vector with byte-width lanes selected from the lanes of the first input - vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range - ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the + Returns a new vector with byte-width lanes selected from the lanes of the first input + vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range + ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the resulting lane is 0. Note that this operates on byte-width lanes. "#, &formats.binary, @@ -1176,7 +1176,7 @@ pub(crate) fn define( Inst::new( "uload8x8", r#" - Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8 + Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8 vector. "#, &formats.load, @@ -1190,7 +1190,7 @@ pub(crate) fn define( Inst::new( "uload8x8_complex", r#" - Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an + Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an i16x8 vector. "#, &formats.load_complex, @@ -1204,7 +1204,7 @@ pub(crate) fn define( Inst::new( "sload8x8", r#" - Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8 + Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8 vector. "#, &formats.load, @@ -1218,7 +1218,7 @@ pub(crate) fn define( Inst::new( "sload8x8_complex", r#" - Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an + Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an i16x8 vector. "#, &formats.load_complex, @@ -1243,7 +1243,7 @@ pub(crate) fn define( Inst::new( "uload16x4", r#" - Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 + Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 vector. "#, &formats.load, @@ -1257,7 +1257,7 @@ pub(crate) fn define( Inst::new( "uload16x4_complex", r#" - Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an + Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an i32x4 vector. "#, &formats.load_complex, @@ -1271,7 +1271,7 @@ pub(crate) fn define( Inst::new( "sload16x4", r#" - Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4 + Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4 vector. "#, &formats.load, @@ -1285,7 +1285,7 @@ pub(crate) fn define( Inst::new( "sload16x4_complex", r#" - Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an + Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an i32x4 vector. "#, &formats.load_complex, @@ -1310,7 +1310,7 @@ pub(crate) fn define( Inst::new( "uload32x2", r#" - Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2 + Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2 vector. "#, &formats.load, @@ -1324,7 +1324,7 @@ pub(crate) fn define( Inst::new( "uload32x2_complex", r#" - Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an + Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an i64x2 vector. "#, &formats.load_complex, @@ -1338,7 +1338,7 @@ pub(crate) fn define( Inst::new( "sload32x2", r#" - Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2 + Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2 vector. "#, &formats.load, @@ -1352,7 +1352,7 @@ pub(crate) fn define( Inst::new( "sload32x2_complex", r#" - Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an + Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an i64x2 vector. "#, &formats.load_complex, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index bd14cf0ba7..1a5563d62a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -899,7 +899,7 @@ pub enum Inst { }, /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This - /// controls MemArg::NominalSPOffset args are lowered. + /// controls how MemArg::NominalSPOffset args are lowered. VirtualSPOffsetAdj { offset: i64, }, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 0188dc2d4a..e4dab4334f 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,37 +1,59 @@ //! Implementation of the standard x64 ABI. use alloc::vec::Vec; +use log::trace; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; +use std::mem; use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type}; use crate::isa::{self, x64::inst::*}; use crate::machinst::*; use crate::settings; +use crate::{CodegenError, CodegenResult}; use args::*; -#[derive(Clone, Debug)] -enum ABIArg { - Reg(RealReg), - _Stack, -} +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; #[derive(Clone, Debug)] -enum ABIRet { - Reg(RealReg), - _Stack, +enum ABIArg { + Reg(RealReg, ir::Type), + Stack(i64, ir::Type), +} + +/// X64 ABI information shared between body (callee) and caller. +struct ABISig { + /// Argument locations (regs or stack slots). Stack offsets are relative to + /// SP on entry to function. + args: Vec, + /// Return-value locations. Stack offsets are relative to the return-area + /// pointer. + rets: Vec, + /// Space on stack used to store arguments. + stack_arg_space: i64, + /// Space on stack used to store return values. + stack_ret_space: i64, + /// Index in `args` of the stack-return-value-area argument. + stack_ret_arg: Option, + /// Calling convention used. + call_conv: isa::CallConv, } pub(crate) struct X64ABIBody { - args: Vec, - rets: Vec, + sig: ABISig, /// Offsets to each stack slot. - _stack_slots: Vec, + stack_slots: Vec, /// Total stack size of all the stack slots. stack_slots_size: usize, + /// The register holding the return-area pointer, if needed. + ret_area_ptr: Option>, + /// Clobbered registers, as indicated by regalloc. clobbered: Set>, @@ -48,7 +70,7 @@ pub(crate) struct X64ABIBody { flags: settings::Flags, } -fn use_int_reg(ty: types::Type) -> bool { +fn in_int_reg(ty: types::Type) -> bool { match ty { types::I8 | types::I16 @@ -63,7 +85,7 @@ fn use_int_reg(ty: types::Type) -> bool { } } -fn use_flt_reg(ty: types::Type) -> bool { +fn in_vec_reg(ty: types::Type) -> bool { match ty { types::F32 | types::F64 => true, _ => false, @@ -132,97 +154,50 @@ fn get_callee_saves(regs: Vec>) -> Vec> { impl X64ABIBody { /// Create a new body ABI instance. - pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self { - // Compute args and retvals from signature. - let mut args = vec![]; - let mut next_int_arg = 0; - let mut next_flt_arg = 0; - for param in &f.signature.params { - match param.purpose { - ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => { - // `VMContext` is `r14` in Baldrdash. - args.push(ABIArg::Reg(regs::r14().to_real_reg())); - } + pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { + let sig = ABISig::from_func_sig(&f.signature)?; - ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => { - if use_int_reg(param.value_type) { - if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) { - args.push(ABIArg::Reg(reg.to_real_reg())); - } else { - unimplemented!("passing arg on the stack"); - } - next_int_arg += 1; - } else if use_flt_reg(param.value_type) { - if let Some(reg) = get_fltreg_for_arg_systemv(next_flt_arg) { - args.push(ABIArg::Reg(reg.to_real_reg())); - } else { - unimplemented!("passing arg on the stack"); - } - next_flt_arg += 1; - } else { - unimplemented!("non int normal register {:?}", param.value_type) - } - } - - _ => unimplemented!("other parameter purposes"), - } - } - - let mut rets = vec![]; - let mut next_int_retval = 0; - let mut next_flt_retval = 0; - for ret in &f.signature.returns { - match ret.purpose { - ir::ArgumentPurpose::Normal => { - if use_int_reg(ret.value_type) { - if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) { - rets.push(ABIRet::Reg(reg.to_real_reg())); - } else { - unimplemented!("passing return on the stack"); - } - next_int_retval += 1; - } else if use_flt_reg(ret.value_type) { - if let Some(reg) = get_fltreg_for_retval_systemv(next_flt_retval) { - rets.push(ABIRet::Reg(reg.to_real_reg())); - } else { - unimplemented!("passing return on the stack"); - } - next_flt_retval += 1; - } else { - unimplemented!("returning non integer normal value"); - } - } - - _ => { - unimplemented!("non normal argument purpose"); - } - } - } + let call_conv = f.signature.call_conv; + debug_assert!( + call_conv == isa::CallConv::SystemV || call_conv.extends_baldrdash(), + "unsupported or unimplemented calling convetion {}", + call_conv + ); // Compute stackslot locations and total stackslot size. let mut stack_offset: usize = 0; - let mut _stack_slots = vec![]; + let mut stack_slots = vec![]; for (stackslot, data) in f.stack_slots.iter() { let off = stack_offset; stack_offset += data.size as usize; - - // 8-bit align. - stack_offset = (stack_offset + 7) & !7usize; - - debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len()); - _stack_slots.push(off); + stack_offset = (stack_offset + 7) & !7; + debug_assert_eq!(stackslot.as_u32() as usize, stack_slots.len()); + stack_slots.push(off); } - Self { - args, - rets, - _stack_slots, + Ok(Self { + sig, + stack_slots, stack_slots_size: stack_offset, + ret_area_ptr: None, clobbered: Set::empty(), num_spill_slots: None, frame_size_bytes: None, call_conv: f.signature.call_conv.clone(), flags, + }) + } + + /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return + /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). + fn fp_to_arg_offset(&self) -> i64 { + if self.call_conv.extends_baldrdash() { + let num_words = self.flags.baldrdash_prologue_words() as i64; + debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); + debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned"); + num_words * 8 + } else { + 16 // frame pointer + return address. } } } @@ -231,31 +206,34 @@ impl ABIBody for X64ABIBody { type I = Inst; fn temp_needed(&self) -> bool { - false + self.sig.stack_ret_arg.is_some() } - fn init(&mut self, _: Option>) {} + fn init(&mut self, maybe_tmp: Option>) { + if self.sig.stack_ret_arg.is_some() { + assert!(maybe_tmp.is_some()); + self.ret_area_ptr = maybe_tmp; + } + } fn flags(&self) -> &settings::Flags { &self.flags } fn num_args(&self) -> usize { - unimplemented!() + self.sig.args.len() } - fn num_retvals(&self) -> usize { - unimplemented!() + self.sig.rets.len() } - fn num_stackslots(&self) -> usize { - unimplemented!() + self.stack_slots.len() } fn liveins(&self) -> Set { let mut set: Set = Set::empty(); - for arg in &self.args { - if let &ABIArg::Reg(r) = arg { + for arg in &self.sig.args { + if let &ABIArg::Reg(r, _) = arg { set.insert(r); } } @@ -264,8 +242,8 @@ impl ABIBody for X64ABIBody { fn liveouts(&self) -> Set { let mut set: Set = Set::empty(); - for ret in &self.rets { - if let &ABIRet::Reg(r) = ret { + for ret in &self.sig.rets { + if let &ABIArg::Reg(r, _) = ret { set.insert(r); } } @@ -273,22 +251,19 @@ impl ABIBody for X64ABIBody { } fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable) -> Inst { - match &self.args[idx] { - ABIArg::Reg(from_reg) => { - if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 { - // TODO do we need a sign extension if it's I32? - return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg); - } else if from_reg.get_class() == RegClass::V128 { - // TODO: How to support Movss. Should is64 always be true? - return Inst::xmm_mov_rm_r( - SseOpcode::Movsd, - RegMem::reg(from_reg.to_reg()), - to_reg, - ); - } - unimplemented!("moving from non-int arg to vreg {:?}", from_reg.get_class()); + match &self.sig.args[idx] { + ABIArg::Reg(from_reg, ty) => Inst::gen_move(to_reg, from_reg.to_reg(), *ty), + &ABIArg::Stack(off, ty) => { + assert!( + self.fp_to_arg_offset() + off <= u32::max_value() as i64, + "large offset nyi" + ); + load_stack( + Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()), + to_reg, + ty, + ) } - ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"), } } @@ -302,36 +277,74 @@ impl ABIBody for X64ABIBody { from_reg: Writable, ext: ArgumentExtension, ) -> Vec { - match ext { - ArgumentExtension::None => {} - _ => unimplemented!( - "unimplemented argument extension {:?} is required for baldrdash", - ext - ), - }; - let mut ret = Vec::new(); - match &self.rets[idx] { - ABIRet::Reg(to_reg) => { - if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 { - ret.push(Inst::mov_r_r( - /*is64=*/ true, - from_reg.to_reg(), - Writable::::from_reg(to_reg.to_reg()), - )) - } else if to_reg.get_class() == RegClass::V128 { - ret.push(Inst::xmm_mov_rm_r( - SseOpcode::Movsd, - RegMem::reg(from_reg.to_reg()), - Writable::::from_reg(to_reg.to_reg()), - )) - } else { - unimplemented!("moving from vreg to unsupported return value"); - } + match &self.sig.rets[idx] { + &ABIArg::Reg(r, ty) => { + let from_bits = ty.bits() as u8; + let ext_mode = match from_bits { + 1 | 8 => Some(ExtMode::BQ), + 16 => Some(ExtMode::WQ), + 32 => Some(ExtMode::LQ), + 64 => None, + _ => unreachable!(), + }; + + let dest_reg = Writable::from_reg(r.to_reg()); + match (ext, ext_mode) { + (ArgumentExtension::Uext, Some(ext_mode)) => { + ret.push(Inst::movzx_rm_r( + ext_mode, + RegMem::reg(r.to_reg()), + dest_reg, + )); + } + (ArgumentExtension::Sext, Some(ext_mode)) => { + ret.push(Inst::movsx_rm_r( + ext_mode, + RegMem::reg(r.to_reg()), + dest_reg, + )); + } + _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)), + }; } - ABIRet::_Stack => { - unimplemented!("moving from vreg to stack return value"); + &ABIArg::Stack(off, ty) => { + let from_bits = ty.bits() as u8; + let ext_mode = match from_bits { + 1 | 8 => Some(ExtMode::BQ), + 16 => Some(ExtMode::WQ), + 32 => Some(ExtMode::LQ), + 64 => None, + _ => unreachable!(), + }; + + // Trash the from_reg; it should be its last use. + match (ext, ext_mode) { + (ArgumentExtension::Uext, Some(ext_mode)) => { + ret.push(Inst::movzx_rm_r( + ext_mode, + RegMem::reg(from_reg.to_reg()), + from_reg, + )); + } + (ArgumentExtension::Sext, Some(ext_mode)) => { + ret.push(Inst::movsx_rm_r( + ext_mode, + RegMem::reg(from_reg.to_reg()), + from_reg, + )); + } + _ => {} + }; + + assert!( + off < u32::max_value() as i64, + "large stack return offset nyi" + ); + + let mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); + ret.push(store_stack(mem, from_reg.to_reg(), ty)) } } @@ -354,8 +367,10 @@ impl ABIBody for X64ABIBody { self.clobbered = clobbered; } - fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable) -> Inst { - unimplemented!() + fn stackslot_addr(&self, slot: StackSlot, offset: u32, dst: Writable) -> Inst { + let stack_off = self.stack_slots[slot.as_u32() as usize] as i64; + let sp_off: i64 = stack_off + (offset as i64); + Inst::lea(SyntheticAmode::nominal_sp_offset(sp_off as u32), dst) } fn load_stackslot( @@ -388,7 +403,7 @@ impl ABIBody for X64ABIBody { // Baldrdash generates its own prologue sequence, so we don't have to. if !self.call_conv.extends_baldrdash() { let r_rbp = regs::rbp(); - let w_rbp = Writable::::from_reg(r_rbp); + let w_rbp = Writable::from_reg(r_rbp); // The "traditional" pre-preamble // RSP before the call will be 0 % 16. So here, it is 8 % 16. @@ -397,20 +412,14 @@ impl ABIBody for X64ABIBody { insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); } - // Save callee saved registers that we trash. Keep track of how much space we've used, so - // as to know what we have to do to get the base of the spill area 0 % 16. - let mut callee_saved_used = 0; let clobbered = get_callee_saves(self.clobbered.to_vec()); - for reg in clobbered { - let r_reg = reg.to_reg(); - match r_reg.get_class() { - RegClass::I64 => { - insts.push(Inst::push64(RegMemImm::reg(r_reg.to_reg()))); - callee_saved_used += 8; - } - _ => unimplemented!(), - } - } + let callee_saved_used: usize = clobbered + .iter() + .map(|reg| match reg.to_reg().get_class() { + RegClass::I64 => 8, + _ => todo!(), + }) + .sum(); let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap(); if self.call_conv.extends_baldrdash() { @@ -423,18 +432,18 @@ impl ABIBody for X64ABIBody { total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8; } - debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8); - let frame_size = total_stacksize + callee_saved_used % 16; - // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body. - let frame_size = (frame_size + 15) & !15; - if frame_size > 0x7FFF_FFFF { - unimplemented!("gen_prologue(x86): total_stacksize >= 2G"); - } + let padding = (16 - ((total_stacksize + callee_saved_used) % 16)) & 15; + let frame_size = total_stacksize + padding; + debug_assert!( + frame_size <= u32::max_value() as usize, + "gen_prologue(x86): total_stacksize >= 2G" + ); + debug_assert_eq!((frame_size + callee_saved_used) % 16, 0, "misaligned stack"); if !self.call_conv.extends_baldrdash() { // Explicitly allocate the frame. - let w_rsp = Writable::::from_reg(r_rsp); + let w_rsp = Writable::from_reg(r_rsp); if frame_size > 0 { insts.push(Inst::alu_rmi_r( true, @@ -445,6 +454,25 @@ impl ABIBody for X64ABIBody { } } + // Save callee saved registers that we trash. Keep track of how much space we've used, so + // as to know what we have to do to get the base of the spill area 0 % 16. + let clobbered = get_callee_saves(self.clobbered.to_vec()); + for reg in clobbered { + let r_reg = reg.to_reg(); + match r_reg.get_class() { + RegClass::I64 => { + insts.push(Inst::push64(RegMemImm::reg(r_reg.to_reg()))); + } + _ => unimplemented!(), + } + } + + if callee_saved_used > 0 { + insts.push(Inst::VirtualSPOffsetAdj { + offset: callee_saved_used as i64, + }); + } + // Stash this value. We'll need it for the epilogue. debug_assert!(self.frame_size_bytes.is_none()); self.frame_size_bytes = Some(frame_size); @@ -457,13 +485,29 @@ impl ABIBody for X64ABIBody { // Undo what we did in the prologue. + // Restore regs. + let clobbered = get_callee_saves(self.clobbered.to_vec()); + for wreg in clobbered.into_iter().rev() { + let rreg = wreg.to_reg(); + match rreg.get_class() { + RegClass::I64 => { + // TODO: make these conversion sequences less cumbersome. + insts.push(Inst::pop64(Writable::from_reg(rreg.to_reg()))); + } + _ => unimplemented!(), + } + } + + // No need to adjust the virtual sp offset here: + // - this would create issues when there's a return in the middle of a function, + // - and nothing in this sequence may try to access stack slots from the nominal SP. + // Clear the spill area and the 16-alignment padding below it. if !self.call_conv.extends_baldrdash() { let frame_size = self.frame_size_bytes.unwrap(); if frame_size > 0 { let r_rsp = regs::rsp(); - let w_rsp = Writable::::from_reg(r_rsp); - + let w_rsp = Writable::from_reg(r_rsp); insts.push(Inst::alu_rmi_r( true, AluRmiROpcode::Add, @@ -473,28 +517,11 @@ impl ABIBody for X64ABIBody { } } - // Restore regs. - let clobbered = get_callee_saves(self.clobbered.to_vec()); - for w_real_reg in clobbered.into_iter().rev() { - match w_real_reg.to_reg().get_class() { - RegClass::I64 => { - // TODO: make these conversion sequences less cumbersome. - insts.push(Inst::pop64(Writable::::from_reg( - w_real_reg.to_reg().to_reg(), - ))) - } - _ => unimplemented!(), - } - } - // Baldrdash generates its own preamble. if !self.call_conv.extends_baldrdash() { - let r_rbp = regs::rbp(); - let w_rbp = Writable::::from_reg(r_rbp); - // Undo the "traditional" pre-preamble // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::pop64(w_rbp)); + insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); insts.push(Inst::ret()); } @@ -524,3 +551,465 @@ impl ABIBody for X64ABIBody { unimplemented!() } } + +fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + let mut caller_saved = Vec::new(); + + // Systemv calling convention: + // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). + caller_saved.push(Writable::from_reg(regs::rsi())); + caller_saved.push(Writable::from_reg(regs::rdi())); + caller_saved.push(Writable::from_reg(regs::rax())); + caller_saved.push(Writable::from_reg(regs::rcx())); + caller_saved.push(Writable::from_reg(regs::rdx())); + caller_saved.push(Writable::from_reg(regs::r8())); + caller_saved.push(Writable::from_reg(regs::r9())); + caller_saved.push(Writable::from_reg(regs::r10())); + caller_saved.push(Writable::from_reg(regs::r11())); + + // - XMM: all the registers! + caller_saved.push(Writable::from_reg(regs::xmm0())); + caller_saved.push(Writable::from_reg(regs::xmm1())); + caller_saved.push(Writable::from_reg(regs::xmm2())); + caller_saved.push(Writable::from_reg(regs::xmm3())); + caller_saved.push(Writable::from_reg(regs::xmm4())); + caller_saved.push(Writable::from_reg(regs::xmm5())); + caller_saved.push(Writable::from_reg(regs::xmm6())); + caller_saved.push(Writable::from_reg(regs::xmm7())); + caller_saved.push(Writable::from_reg(regs::xmm8())); + caller_saved.push(Writable::from_reg(regs::xmm9())); + caller_saved.push(Writable::from_reg(regs::xmm10())); + caller_saved.push(Writable::from_reg(regs::xmm11())); + caller_saved.push(Writable::from_reg(regs::xmm12())); + caller_saved.push(Writable::from_reg(regs::xmm13())); + caller_saved.push(Writable::from_reg(regs::xmm14())); + caller_saved.push(Writable::from_reg(regs::xmm15())); + + if call_conv.extends_baldrdash() { + todo!("add the baldrdash caller saved") + } + + caller_saved +} + +fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { + // Compute uses: all arg regs. + let mut uses = Vec::new(); + for arg in &sig.args { + match arg { + &ABIArg::Reg(reg, _) => uses.push(reg.to_reg()), + _ => {} + } + } + + // Compute defs: all retval regs, and all caller-save (clobbered) regs. + let mut defs = get_caller_saves(sig.call_conv); + for ret in &sig.rets { + match ret { + &ABIArg::Reg(reg, _) => defs.push(Writable::from_reg(reg.to_reg())), + _ => {} + } + } + + (uses, defs) +} + +/// Try to fill a Baldrdash register, returning it if it was found. +fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option { + if call_conv.extends_baldrdash() { + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext => { + // This is SpiderMonkey's `WasmTlsReg`. + Some(ABIArg::Reg(regs::r14().to_real_reg(), ir::types::I64)) + } + &ir::ArgumentPurpose::SignatureId => { + // This is SpiderMonkey's `WasmTableCallSigReg`. + Some(ABIArg::Reg(regs::r10().to_real_reg(), ir::types::I64)) + } + _ => None, + } + } else { + None + } +} + +/// Are we computing information about arguments or return values? Much of the +/// handling is factored out into common routines; this enum allows us to +/// distinguish which case we're handling. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum ArgsOrRets { + Args, + Rets, +} + +/// Process a list of parameters or return values and allocate them to X-regs, +/// V-regs, and stack slots. +/// +/// Returns the list of argument locations, the stack-space used (rounded up +/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the +/// index of the extra synthetic arg that was added. +fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, +) -> CodegenResult<(Vec, i64, Option)> { + let is_baldrdash = call_conv.extends_baldrdash(); + + // XXX assume SystemV at the moment. + debug_assert!(!is_baldrdash, "baldrdash nyi"); + + let mut next_gpr = 0; + let mut next_vreg = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + for i in 0..params.len() { + // Process returns backward, according to the SpiderMonkey ABI (which we + // adopt internally if `is_baldrdash` is set). + let param = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => ¶ms[i], + (ArgsOrRets::Rets, false) => ¶ms[i], + (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], + }; + + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), + } + + let intreg = in_int_reg(param.value_type); + let vecreg = in_vec_reg(param.value_type); + debug_assert!(intreg || vecreg); + debug_assert!(!(intreg && vecreg)); + + let (next_reg, candidate) = if intreg { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_intreg_for_arg_systemv(next_gpr), + ArgsOrRets::Rets => get_intreg_for_retval_systemv(next_gpr), + }; + (&mut next_gpr, candidate) + } else { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_fltreg_for_arg_systemv(next_gpr), + ArgsOrRets::Rets => get_fltreg_for_retval_systemv(next_gpr), + }; + (&mut next_vreg, candidate) + }; + + if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { + assert!(intreg); + ret.push(param); + } else if let Some(reg) = candidate { + ret.push(ABIArg::Reg(reg.to_real_reg(), param.value_type)); + *next_reg += 1; + } else { + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (param.value_type.bits() / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = (next_stack + size - 1) & !(size - 1); + ret.push(ABIArg::Stack(next_stack as i64, param.value_type)); + next_stack += size; + } + } + + if args_or_rets == ArgsOrRets::Rets && is_baldrdash { + ret.reverse(); + } + + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if let Some(reg) = get_intreg_for_arg_systemv(next_gpr) { + ret.push(ABIArg::Reg(reg.to_real_reg(), ir::types::I64)); + } else { + ret.push(ABIArg::Stack(next_stack as i64, ir::types::I64)); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + next_stack = (next_stack + 15) & !15; + + // To avoid overflow issues, limit the arg/return size to something reasonable. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) +} + +impl ABISig { + fn from_func_sig(sig: &ir::Signature) -> CodegenResult { + // Compute args and retvals from signature. Handle retvals first, + // because we may need to add a return-area arg to the args. + let (rets, stack_ret_space, _) = compute_arg_locs( + sig.call_conv, + &sig.returns, + ArgsOrRets::Rets, + /* extra ret-area ptr = */ false, + )?; + let need_stack_return_area = stack_ret_space > 0; + let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( + sig.call_conv, + &sig.params, + ArgsOrRets::Args, + need_stack_return_area, + )?; + + trace!( + "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", + sig, + args, + rets, + stack_arg_space, + stack_ret_space, + stack_ret_arg + ); + + Ok(ABISig { + args, + rets, + stack_arg_space, + stack_ret_space, + stack_ret_arg, + call_conv: sig.call_conv, + }) + } +} + +enum CallDest { + ExtName(ir::ExternalName, RelocDistance), + Reg(Reg), +} + +fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { + if amount == 0 { + return; + } + + let (alu_op, sp_adjustment) = if is_sub { + (AluRmiROpcode::Sub, amount as i64) + } else { + (AluRmiROpcode::Add, -(amount as i64)) + }; + + ctx.emit(Inst::VirtualSPOffsetAdj { + offset: sp_adjustment, + }); + + if amount <= u32::max_value() as u64 { + ctx.emit(Inst::alu_rmi_r( + true, + alu_op, + RegMemImm::imm(amount as u32), + Writable::from_reg(regs::rsp()), + )); + } else { + // TODO will require a scratch register. + unimplemented!("adjust stack with large offset"); + } +} + +fn load_stack(mem: Amode, into_reg: Writable, ty: Type) -> Inst { + let ext_mode = match ty { + types::B1 | types::B8 | types::I8 => Some(ExtMode::BQ), + types::B16 | types::I16 => Some(ExtMode::WQ), + types::B32 | types::I32 => Some(ExtMode::LQ), + types::B64 | types::I64 => None, + types::F32 => todo!("f32 load_stack"), + types::F64 => todo!("f64 load_stack"), + _ => unimplemented!("load_stack({})", ty), + }; + + match ext_mode { + Some(ext_mode) => Inst::movsx_rm_r(ext_mode, RegMem::mem(mem), into_reg), + None => Inst::mov64_m_r(mem, into_reg), + } +} + +fn store_stack(mem: Amode, from_reg: Reg, ty: Type) -> Inst { + let (is_int, size) = match ty { + types::B1 | types::B8 | types::I8 => (true, 1), + types::B16 | types::I16 => (true, 2), + types::B32 | types::I32 => (true, 4), + types::B64 | types::I64 => (true, 8), + types::F32 => (false, 4), + types::F64 => (false, 8), + _ => unimplemented!("store_stack({})", ty), + }; + if is_int { + Inst::mov_r_m(size, from_reg, mem) + } else { + unimplemented!("f32/f64 store_stack"); + } +} + +/// X64 ABI object for a function call. +pub struct X64ABICall { + sig: ABISig, + uses: Vec, + defs: Vec>, + dest: CallDest, + loc: ir::SourceLoc, + opcode: ir::Opcode, +} + +impl X64ABICall { + /// Create a callsite ABI object for a call directly to the specified function. + pub fn from_func( + sig: &ir::Signature, + extname: &ir::ExternalName, + dist: RelocDistance, + loc: ir::SourceLoc, + ) -> CodegenResult { + let sig = ABISig::from_func_sig(sig)?; + let (uses, defs) = abisig_to_uses_and_defs(&sig); + Ok(Self { + sig, + uses, + defs, + dest: CallDest::ExtName(extname.clone(), dist), + loc, + opcode: ir::Opcode::Call, + }) + } + + /// Create a callsite ABI object for a call to a function pointer with the + /// given signature. + pub fn from_ptr( + sig: &ir::Signature, + ptr: Reg, + loc: ir::SourceLoc, + opcode: ir::Opcode, + ) -> CodegenResult { + let sig = ABISig::from_func_sig(sig)?; + let (uses, defs) = abisig_to_uses_and_defs(&sig); + Ok(Self { + sig, + uses, + defs, + dest: CallDest::Reg(ptr), + loc, + opcode, + }) + } +} + +impl ABICall for X64ABICall { + type I = Inst; + + fn num_args(&self) -> usize { + if self.sig.stack_ret_arg.is_some() { + self.sig.args.len() - 1 + } else { + self.sig.args.len() + } + } + + fn emit_stack_pre_adjust>(&self, ctx: &mut C) { + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + adjust_stack(ctx, off as u64, /* is_sub = */ true) + } + + fn emit_stack_post_adjust>(&self, ctx: &mut C) { + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + adjust_stack(ctx, off as u64, /* is_sub = */ false) + } + + fn emit_copy_reg_to_arg>( + &self, + ctx: &mut C, + idx: usize, + from_reg: Reg, + ) { + match &self.sig.args[idx] { + &ABIArg::Reg(reg, ty) => ctx.emit(Inst::gen_move( + Writable::from_reg(reg.to_reg()), + from_reg, + ty, + )), + &ABIArg::Stack(off, ty) => { + debug_assert!(off <= u32::max_value() as i64); + debug_assert!(off >= 0); + ctx.emit(store_stack( + Amode::imm_reg(off as u32, regs::rsp()), + from_reg, + ty, + )) + } + } + } + + fn emit_copy_retval_to_reg>( + &self, + ctx: &mut C, + idx: usize, + into_reg: Writable, + ) { + match &self.sig.rets[idx] { + &ABIArg::Reg(reg, ty) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), + &ABIArg::Stack(off, ty) => { + let ret_area_base = self.sig.stack_arg_space; + let sp_offset = off + ret_area_base; + // TODO handle offsets bigger than u32::max + debug_assert!(sp_offset >= 0); + debug_assert!(sp_offset <= u32::max_value() as i64); + ctx.emit(load_stack( + Amode::imm_reg(sp_offset as u32, regs::rsp()), + into_reg, + ty, + )); + } + } + } + + fn emit_call>(&mut self, ctx: &mut C) { + let (uses, defs) = ( + mem::replace(&mut self.uses, Default::default()), + mem::replace(&mut self.defs, Default::default()), + ); + + if let Some(i) = self.sig.stack_ret_arg { + let dst = ctx.alloc_tmp(RegClass::I64, I64); + let ret_area_base = self.sig.stack_arg_space; + debug_assert!( + ret_area_base <= u32::max_value() as i64, + "large offset for ret area NYI" + ); + ctx.emit(Inst::lea( + Amode::imm_reg(ret_area_base as u32, regs::rsp()), + dst, + )); + self.emit_copy_reg_to_arg(ctx, i, dst.to_reg()); + } + + match &self.dest { + &CallDest::ExtName(ref name, ref _reloc_distance) => ctx.emit(Inst::call_known( + name.clone(), + uses, + defs, + self.loc, + self.opcode, + )), + &CallDest::Reg(reg) => ctx.emit(Inst::call_unknown( + RegMem::reg(reg), + uses, + defs, + self.loc, + self.opcode, + )), + } + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 96e34886da..a19874a923 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -3,16 +3,20 @@ use std::fmt; use std::string::{String, ToString}; -use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector}; +use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper}; use crate::ir::condcodes::IntCC; use crate::machinst::*; -use super::regs::show_ireg_sized; +use super::{ + regs::{self, show_ireg_sized}, + EmitState, +}; -/// A Memory Address. These denote a 64-bit value only. +/// A possible addressing mode (amode) that can be used in instructions. +/// These denote a 64-bit value only. #[derive(Clone)] -pub(crate) enum Addr { +pub enum Amode { /// Immediate sign-extended and a Register. ImmReg { simm32: u32, base: Reg }, @@ -25,7 +29,7 @@ pub(crate) enum Addr { }, } -impl Addr { +impl Amode { pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self { debug_assert!(base.get_class() == RegClass::I64); Self::ImmReg { simm32, base } @@ -46,15 +50,10 @@ impl Addr { /// Add the regs mentioned by `self` to `collector`. pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { match self { - Addr::ImmReg { simm32: _, base } => { + Amode::ImmReg { base, .. } => { collector.add_use(*base); } - Addr::ImmRegRegShift { - simm32: _, - base, - index, - shift: _, - } => { + Amode::ImmRegRegShift { base, index, .. } => { collector.add_use(*base); collector.add_use(*index); } @@ -62,13 +61,13 @@ impl Addr { } } -impl ShowWithRRU for Addr { +impl ShowWithRRU for Amode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { - Addr::ImmReg { simm32, base } => { + Amode::ImmReg { simm32, base } => { format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)) } - Addr::ImmRegRegShift { + Amode::ImmRegRegShift { simm32, base, index, @@ -84,14 +83,84 @@ impl ShowWithRRU for Addr { } } +/// A Memory Address. These denote a 64-bit value only. +/// Used for usual addressing modes as well as addressing modes used during compilation, when the +/// moving SP offset is not known. +#[derive(Clone)] +pub enum SyntheticAmode { + /// A real amode. + Real(Amode), + + /// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop + /// within the function. + NominalSPOffset { simm32: u32 }, +} + +impl SyntheticAmode { + pub(crate) fn nominal_sp_offset(simm32: u32) -> Self { + SyntheticAmode::NominalSPOffset { simm32 } + } + + /// Add the regs mentioned by `self` to `collector`. + pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + match self { + SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector), + SyntheticAmode::NominalSPOffset { .. } => { + // Nothing to do; the base is SP and isn't involved in regalloc. + } + } + } + + pub(crate) fn map_uses(&mut self, map: &RUM) { + match self { + SyntheticAmode::Real(addr) => addr.map_uses(map), + SyntheticAmode::NominalSPOffset { .. } => { + // Nothing to do. + } + } + } + + pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode { + match self { + SyntheticAmode::Real(addr) => addr.clone(), + SyntheticAmode::NominalSPOffset { simm32 } => { + let off = *simm32 as i64 + state.virtual_sp_offset; + // TODO will require a sequence of add etc. + assert!( + off <= u32::max_value() as i64, + "amode finalize: add sequence NYI" + ); + Amode::imm_reg(off as u32, regs::rsp()) + } + } + } +} + +impl Into for Amode { + fn into(self) -> SyntheticAmode { + SyntheticAmode::Real(self) + } +} + +impl ShowWithRRU for SyntheticAmode { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + SyntheticAmode::Real(addr) => addr.show_rru(mb_rru), + SyntheticAmode::NominalSPOffset { simm32 } => { + format!("rsp({} + virtual offset)", *simm32 as i32) + } + } + } +} + /// An operand which is either an integer Register, a value in Memory or an Immediate. This can /// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only /// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by /// `simm32` is its sign-extension out to 64 bits. #[derive(Clone)] -pub(crate) enum RegMemImm { +pub enum RegMemImm { Reg { reg: Reg }, - Mem { addr: Addr }, + Mem { addr: SyntheticAmode }, Imm { simm32: u32 }, } @@ -100,8 +169,8 @@ impl RegMemImm { debug_assert!(reg.get_class() == RegClass::I64); Self::Reg { reg } } - pub(crate) fn mem(addr: Addr) -> Self { - Self::Mem { addr } + pub(crate) fn mem(addr: impl Into) -> Self { + Self::Mem { addr: addr.into() } } pub(crate) fn imm(simm32: u32) -> Self { Self::Imm { simm32 } @@ -134,9 +203,9 @@ impl ShowWithRRU for RegMemImm { /// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16, /// 32 or 64 bit value. #[derive(Clone)] -pub(crate) enum RegMem { +pub enum RegMem { Reg { reg: Reg }, - Mem { addr: Addr }, + Mem { addr: SyntheticAmode }, } impl RegMem { @@ -144,8 +213,8 @@ impl RegMem { debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128); Self::Reg { reg } } - pub(crate) fn mem(addr: Addr) -> Self { - Self::Mem { addr } + pub(crate) fn mem(addr: impl Into) -> Self { + Self::Mem { addr: addr.into() } } /// Add the regs mentioned by `self` to `collector`. @@ -382,6 +451,13 @@ pub enum ExtMode { } impl ExtMode { + pub(crate) fn src_size(&self) -> u8 { + match self { + ExtMode::BL | ExtMode::BQ => 1, + ExtMode::WL | ExtMode::WQ => 2, + ExtMode::LQ => 4, + } + } pub(crate) fn dst_size(&self) -> u8 { match self { ExtMode::BL | ExtMode::WL => 4, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 6f6cea52fc..2325aca3a4 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,9 @@ -use crate::isa::x64::inst::*; +use log::debug; use regalloc::Reg; +use crate::binemit::Reloc; +use crate::isa::x64::inst::*; + fn low8_will_sign_extend_to_64(x: u32) -> bool { let xs = (x as i32) as i64; xs == ((xs << 56) >> 56) @@ -164,7 +167,7 @@ fn emit_std_enc_mem( opcodes: u32, mut num_opcodes: usize, enc_g: u8, - mem_e: &Addr, + mem_e: &Amode, rex: RexFlags, ) { // General comment for this function: the registers in `mem_e` must be @@ -174,7 +177,7 @@ fn emit_std_enc_mem( prefix.emit(sink); match mem_e { - Addr::ImmReg { simm32, base } => { + Amode::ImmReg { simm32, base } => { // First, the REX byte. let enc_e = int_reg_enc(*base); rex.emit_two_op(sink, enc_g, enc_e); @@ -228,7 +231,7 @@ fn emit_std_enc_mem( } } - Addr::ImmRegRegShift { + Amode::ImmRegRegShift { simm32, base: reg_base, index: reg_index, @@ -306,7 +309,7 @@ fn emit_std_reg_mem( opcodes: u32, num_opcodes: usize, reg_g: Reg, - mem_e: &Addr, + mem_e: &Amode, rex: RexFlags, ) { let enc_g = reg_enc(reg_g); @@ -389,10 +392,13 @@ fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { /// /// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we /// care?) -pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { +pub(crate) fn emit( + inst: &Inst, + sink: &mut MachBuffer, + _flags: &settings::Flags, + state: &mut EmitState, +) { match inst { - Inst::Nop { len: 0 } => {} - Inst::Alu_RMI_R { is_64, op, @@ -428,7 +434,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { 0x0FAF, 2, reg_g.to_reg(), - addr, + &addr.finalize(state), rex, ); } @@ -460,47 +466,39 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { }; match src { - RegMemImm::Reg { reg: regE } => { - // Note. The arguments .. regE .. reg_g .. sequence - // here is the opposite of what is expected. I'm not - // sure why this is. But I am fairly sure that the - // arg order could be switched back to the expected - // .. reg_g .. regE .. if opcode_rr is also switched - // over to the "other" basic integer opcode (viz, the - // R/RM vs RM/R duality). However, that would mean - // that the test results won't be in accordance with - // the GNU as reference output. In other words, the - // inversion exists as a result of using GNU as as a - // gold standard. + RegMemImm::Reg { reg: reg_e } => { + // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R + // duality). Do this too, so as to be able to compare generated machine + // code easily. emit_std_reg_reg( sink, LegacyPrefix::None, opcode_r, 1, - *regE, + *reg_e, reg_g.to_reg(), rex, ); - // NB: if this is ever extended to handle byte size - // ops, be sure to retain redundant REX prefixes. + // NB: if this is ever extended to handle byte size ops, be sure to retain + // redundant REX prefixes. } RegMemImm::Mem { addr } => { - // Whereas here we revert to the "normal" G-E ordering. + // Here we revert to the "normal" G-E ordering. emit_std_reg_mem( sink, LegacyPrefix::None, opcode_m, 1, reg_g.to_reg(), - addr, + &addr.finalize(state), rex, ); } RegMemImm::Imm { simm32 } => { - let useImm8 = low8_will_sign_extend_to_32(*simm32); - let opcode = if useImm8 { 0x83 } else { 0x81 }; + let use_imm8 = low8_will_sign_extend_to_32(*simm32); + let opcode = if use_imm8 { 0x83 } else { 0x81 }; // And also here we use the "normal" G-E ordering. let enc_g = int_reg_enc(reg_g.to_reg()); emit_std_enc_enc( @@ -512,7 +510,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { enc_g, rex, ); - emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32); + emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32); } } } @@ -548,161 +546,129 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex); } - Inst::MovZX_M_R { extMode, addr, dst } => { - match extMode { + Inst::MovZX_RM_R { ext_mode, src, dst } => { + let (opcodes, num_opcodes, rex_flags) = match ext_mode { ExtMode::BL => { // MOVZBL is (REX.W==0) 0F B6 /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FB6, - 2, - dst.to_reg(), - addr, - RexFlags::clear_w(), - ) + (0x0FB6, 2, RexFlags::clear_w()) } - ExtMode::BQ => { // MOVZBQ is (REX.W==1) 0F B6 /r // I'm not sure why the Intel manual offers different // encodings for MOVZBQ than for MOVZBL. AIUI they should // achieve the same, since MOVZBL is just going to zero out // the upper half of the destination anyway. - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FB6, - 2, - dst.to_reg(), - addr, - RexFlags::set_w(), - ) + (0x0FB6, 2, RexFlags::set_w()) } - ExtMode::WL => { // MOVZWL is (REX.W==0) 0F B7 /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FB7, - 2, - dst.to_reg(), - addr, - RexFlags::clear_w(), - ) + (0x0FB7, 2, RexFlags::clear_w()) } - ExtMode::WQ => { // MOVZWQ is (REX.W==1) 0F B7 /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FB7, - 2, - dst.to_reg(), - addr, - RexFlags::set_w(), - ) + (0x0FB7, 2, RexFlags::set_w()) } - ExtMode::LQ => { // This is just a standard 32 bit load, and we rely on the // default zero-extension rule to perform the extension. + // Note that in reg/reg mode, gcc seems to use the swapped form R/RM, which we + // don't do here, since it's the same encoding size. // MOV r/m32, r32 is (REX.W==0) 8B /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x8B, - 1, - dst.to_reg(), - addr, - RexFlags::clear_w(), - ) + (0x8B, 1, RexFlags::clear_w()) } + }; + + match src { + RegMem::Reg { reg: src } => emit_std_reg_reg( + sink, + LegacyPrefix::None, + opcodes, + num_opcodes, + dst.to_reg(), + *src, + rex_flags, + ), + RegMem::Mem { addr: src } => emit_std_reg_mem( + sink, + LegacyPrefix::None, + opcodes, + num_opcodes, + dst.to_reg(), + &src.finalize(state), + rex_flags, + ), } } - Inst::Mov64_M_R { addr, dst } => emit_std_reg_mem( + Inst::Mov64_M_R { src, dst } => emit_std_reg_mem( sink, LegacyPrefix::None, 0x8B, 1, dst.to_reg(), - addr, + &src.finalize(state), RexFlags::set_w(), ), - Inst::MovSX_M_R { extMode, addr, dst } => { - match extMode { + Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem( + sink, + LegacyPrefix::None, + 0x8D, + 1, + dst.to_reg(), + &addr.finalize(state), + RexFlags::set_w(), + ), + + Inst::MovSX_RM_R { ext_mode, src, dst } => { + let (opcodes, num_opcodes, rex_flags) = match ext_mode { ExtMode::BL => { // MOVSBL is (REX.W==0) 0F BE /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FBE, - 2, - dst.to_reg(), - addr, - RexFlags::clear_w(), - ) + (0x0FBE, 2, RexFlags::clear_w()) } - ExtMode::BQ => { // MOVSBQ is (REX.W==1) 0F BE /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FBE, - 2, - dst.to_reg(), - addr, - RexFlags::set_w(), - ) + (0x0FBE, 2, RexFlags::set_w()) } - ExtMode::WL => { // MOVSWL is (REX.W==0) 0F BF /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FBF, - 2, - dst.to_reg(), - addr, - RexFlags::clear_w(), - ) + (0x0FBF, 2, RexFlags::clear_w()) } - ExtMode::WQ => { // MOVSWQ is (REX.W==1) 0F BF /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x0FBF, - 2, - dst.to_reg(), - addr, - RexFlags::set_w(), - ) + (0x0FBF, 2, RexFlags::set_w()) } - ExtMode::LQ => { // MOVSLQ is (REX.W==1) 63 /r - emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x63, - 1, - dst.to_reg(), - addr, - RexFlags::set_w(), - ) + (0x63, 1, RexFlags::set_w()) } + }; + + match src { + RegMem::Reg { reg: src } => emit_std_reg_reg( + sink, + LegacyPrefix::None, + opcodes, + num_opcodes, + dst.to_reg(), + *src, + rex_flags, + ), + RegMem::Mem { addr: src } => emit_std_reg_mem( + sink, + LegacyPrefix::None, + opcodes, + num_opcodes, + dst.to_reg(), + &src.finalize(state), + rex_flags, + ), } } - Inst::Mov_R_M { size, src, addr } => { + Inst::Mov_R_M { size, src, dst } => { + let dst = &dst.finalize(state); + match size { 1 => { // This is one of the few places where the presence of a @@ -716,7 +682,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { }; // MOV r8, r/m8 is (REX.W==0) 88 /r - emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, addr, rex) + emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex) } 2 => { @@ -727,7 +693,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { 0x89, 1, *src, - addr, + dst, RexFlags::clear_w(), ) } @@ -740,7 +706,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { 0x89, 1, *src, - addr, + dst, RexFlags::clear_w(), ) } @@ -753,7 +719,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { 0x89, 1, *src, - addr, + dst, RexFlags::set_w(), ) } @@ -825,23 +791,25 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { }; match src_e { - RegMemImm::Reg { reg: regE } => { - let opcode = if *size == 1 { 0x38 } else { 0x39 }; + RegMemImm::Reg { reg: reg_e } => { if *size == 1 { - // We also need to check whether the E register forces - // the use of a redundant REX. - let encE = int_reg_enc(*regE); - if encE >= 4 && encE <= 7 { + // Check whether the E register forces the use of a redundant REX. + let enc_e = int_reg_enc(*reg_e); + if enc_e >= 4 && enc_e <= 7 { rex.always_emit(); } } - // Same comment re swapped args as for Alu_RMI_R. - emit_std_reg_reg(sink, prefix, opcode, 1, *regE, *reg_g, rex); + + // Use the swapped operands encoding, to stay consistent with the output of + // gcc/llvm. + let opcode = if *size == 1 { 0x38 } else { 0x39 }; + emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex); } RegMemImm::Mem { addr } => { - let opcode = if *size == 1 { 0x3A } else { 0x3B }; + let addr = &addr.finalize(state); // Whereas here we revert to the "normal" G-E ordering. + let opcode = if *size == 1 { 0x3A } else { 0x3B }; emit_std_reg_mem(sink, prefix, opcode, 1, *reg_g, addr, rex); } @@ -849,6 +817,8 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // FIXME JRS 2020Feb11: there are shorter encodings for // cmp $imm, rax/eax/ax/al. let use_imm8 = low8_will_sign_extend_to_32(*simm32); + + // And also here we use the "normal" G-E ordering. let opcode = if *size == 1 { 0x80 } else if use_imm8 { @@ -857,7 +827,6 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { 0x81 }; - // And also here we use the "normal" G-E ordering. let enc_g = int_reg_enc(*reg_g); emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex); emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32); @@ -865,6 +834,21 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } + Inst::Setcc { cc, dst } => { + let opcode = 0x0f90 + cc.get_enc() as u32; + let mut rex_flags = RexFlags::clear_w(); + rex_flags.always_emit(); + emit_std_enc_enc( + sink, + LegacyPrefix::None, + opcode, + 2, + 0, + reg_enc(dst.to_reg()), + rex_flags, + ); + } + Inst::Push64 { src } => { match src { RegMemImm::Reg { reg } => { @@ -877,6 +861,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } RegMemImm::Mem { addr } => { + let addr = &addr.finalize(state); emit_std_enc_mem( sink, LegacyPrefix::None, @@ -910,7 +895,22 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { sink.put1(0x58 + (encDst & 7)); } - Inst::CallUnknown { dest } => { + Inst::CallKnown { + dest, loc, opcode, .. + } => { + sink.put1(0xE8); + // The addend adjusts for the difference between the end of the instruction and the + // beginning of the immediate field. + sink.add_reloc(*loc, Reloc::X86CallPCRel4, &dest, -4); + sink.put4(0); + if opcode.is_call() { + sink.add_call_site(*loc, *opcode); + } + } + + Inst::CallUnknown { + dest, opcode, loc, .. + } => { match dest { RegMem::Reg { reg } => { let reg_enc = int_reg_enc(*reg); @@ -926,6 +926,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } RegMem::Mem { addr } => { + let addr = &addr.finalize(state); emit_std_enc_mem( sink, LegacyPrefix::None, @@ -937,61 +938,61 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { ); } } + if opcode.is_call() { + sink.add_call_site(*loc, *opcode); + } } Inst::Ret {} => sink.put1(0xC3), - Inst::JmpKnown { dest } => { - let disp = dest.as_offset32_or_zero() - 5; - let disp = disp as u32; + Inst::JmpKnown { dst } => { let br_start = sink.cur_offset(); let br_disp_off = br_start + 1; let br_end = br_start + 5; - if let Some(l) = dest.as_label() { - sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32); + if let Some(l) = dst.as_label() { + sink.use_label_at_offset(br_disp_off, l, LabelUse::JmpRel32); sink.add_uncond_branch(br_start, br_end, l); } + + let disp = dst.as_offset32_or_zero(); + let disp = disp as u32; sink.put1(0xE9); sink.put4(disp); } - Inst::JmpCondSymm { + Inst::JmpCond { cc, taken, not_taken, } => { - // Conditional part. - - // This insn is 6 bytes long. Currently `offset` is relative to - // the start of this insn, but the Intel encoding requires it to - // be relative to the start of the next instruction. Hence the - // adjustment. - let taken_disp = taken.as_offset32_or_zero() - 6; - let taken_disp = taken_disp as u32; + // If taken. let cond_start = sink.cur_offset(); let cond_disp_off = cond_start + 2; let cond_end = cond_start + 6; if let Some(l) = taken.as_label() { - sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32); + sink.use_label_at_offset(cond_disp_off, l, LabelUse::JmpRel32); let inverted: [u8; 6] = - [0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF]; + [0x0F, 0x80 + (cc.invert().get_enc()), 0x00, 0x00, 0x00, 0x00]; sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]); } + + let taken_disp = taken.as_offset32_or_zero(); + let taken_disp = taken_disp as u32; sink.put1(0x0F); sink.put1(0x80 + cc.get_enc()); sink.put4(taken_disp); - // Unconditional part. - - let nt_disp = not_taken.as_offset32_or_zero() - 5; - let nt_disp = nt_disp as u32; + // If not taken. let uncond_start = sink.cur_offset(); let uncond_disp_off = uncond_start + 1; let uncond_end = uncond_start + 5; if let Some(l) = not_taken.as_label() { - sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32); + sink.use_label_at_offset(uncond_disp_off, l, LabelUse::JmpRel32); sink.add_uncond_branch(uncond_start, uncond_end, l); } + + let nt_disp = not_taken.as_offset32_or_zero(); + let nt_disp = nt_disp as u32; sink.put1(0xE9); sink.put4(nt_disp); } @@ -1012,6 +1013,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } RegMem::Mem { addr } => { + let addr = &addr.finalize(state); emit_std_enc_mem( sink, LegacyPrefix::None, @@ -1045,6 +1047,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } RegMem::Mem { addr } => { + let addr = &addr.finalize(state); emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } @@ -1074,11 +1077,33 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } RegMem::Mem { addr } => { + let addr = &addr.finalize(state); emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } } - _ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)), + Inst::Hlt => { + sink.put1(0xcc); + } + + Inst::Ud2 { trap_info } => { + sink.add_trap(trap_info.0, trap_info.1); + sink.put1(0x0f); + sink.put1(0x0b); + } + + Inst::VirtualSPOffsetAdj { offset } => { + debug!( + "virtual sp offset adjusted by {} -> {}", + offset, + state.virtual_sp_offset + offset + ); + state.virtual_sp_offset += offset; + } + + Inst::Nop { .. } | Inst::EpiloguePlaceholder => { + // Generate no code. + } } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 94c0b7a8b2..77b0b79351 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -95,82 +95,82 @@ fn test_x64_emit() { // // Addr_IR, offset zero insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rax), w_rdi), "488B38", "movq 0(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rbx), w_rdi), "488B3B", "movq 0(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rcx), w_rdi), "488B39", "movq 0(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rdx), w_rdi), "488B3A", "movq 0(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rbp), w_rdi), "488B7D00", "movq 0(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rsp), w_rdi), "488B3C24", "movq 0(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rsi), w_rdi), "488B3E", "movq 0(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rdi), w_rdi), "488B3F", "movq 0(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r8), w_rdi), "498B38", "movq 0(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r9), w_rdi), "498B39", "movq 0(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r10), w_rdi), "498B3A", "movq 0(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r11), w_rdi), "498B3B", "movq 0(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r12), w_rdi), "498B3C24", "movq 0(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r13), w_rdi), "498B7D00", "movq 0(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r14), w_rdi), "498B3E", "movq 0(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r15), w_rdi), "498B3F", "movq 0(%r15), %rdi", )); @@ -178,82 +178,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset max simm8 insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rax), w_rdi), "488B787F", "movq 127(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rbx), w_rdi), "488B7B7F", "movq 127(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rcx), w_rdi), "488B797F", "movq 127(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rdx), w_rdi), "488B7A7F", "movq 127(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rbp), w_rdi), "488B7D7F", "movq 127(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rsp), w_rdi), "488B7C247F", "movq 127(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rsi), w_rdi), "488B7E7F", "movq 127(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rdi), w_rdi), "488B7F7F", "movq 127(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r8), w_rdi), "498B787F", "movq 127(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r9), w_rdi), "498B797F", "movq 127(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r10), w_rdi), "498B7A7F", "movq 127(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r11), w_rdi), "498B7B7F", "movq 127(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r12), w_rdi), "498B7C247F", "movq 127(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r13), w_rdi), "498B7D7F", "movq 127(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r14), w_rdi), "498B7E7F", "movq 127(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(127, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r15), w_rdi), "498B7F7F", "movq 127(%r15), %rdi", )); @@ -261,82 +261,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset min simm8 insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rax), w_rdi), "488B7880", "movq -128(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbx), w_rdi), "488B7B80", "movq -128(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rcx), w_rdi), "488B7980", "movq -128(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdx), w_rdi), "488B7A80", "movq -128(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbp), w_rdi), "488B7D80", "movq -128(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsp), w_rdi), "488B7C2480", "movq -128(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsi), w_rdi), "488B7E80", "movq -128(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdi), w_rdi), "488B7F80", "movq -128(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r8), w_rdi), "498B7880", "movq -128(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r9), w_rdi), "498B7980", "movq -128(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r10), w_rdi), "498B7A80", "movq -128(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r11), w_rdi), "498B7B80", "movq -128(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r12), w_rdi), "498B7C2480", "movq -128(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r13), w_rdi), "498B7D80", "movq -128(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r14), w_rdi), "498B7E80", "movq -128(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-128i32 as u32, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r15), w_rdi), "498B7F80", "movq -128(%r15), %rdi", )); @@ -344,82 +344,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset smallest positive simm32 insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rax), w_rdi), "488BB880000000", "movq 128(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rbx), w_rdi), "488BBB80000000", "movq 128(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rcx), w_rdi), "488BB980000000", "movq 128(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rdx), w_rdi), "488BBA80000000", "movq 128(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rbp), w_rdi), "488BBD80000000", "movq 128(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rsp), w_rdi), "488BBC2480000000", "movq 128(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rsi), w_rdi), "488BBE80000000", "movq 128(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rdi), w_rdi), "488BBF80000000", "movq 128(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r8), w_rdi), "498BB880000000", "movq 128(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r9), w_rdi), "498BB980000000", "movq 128(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r10), w_rdi), "498BBA80000000", "movq 128(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r11), w_rdi), "498BBB80000000", "movq 128(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r12), w_rdi), "498BBC2480000000", "movq 128(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r13), w_rdi), "498BBD80000000", "movq 128(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r14), w_rdi), "498BBE80000000", "movq 128(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(128, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r15), w_rdi), "498BBF80000000", "movq 128(%r15), %rdi", )); @@ -427,82 +427,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset smallest negative simm32 insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rax), w_rdi), "488BB87FFFFFFF", "movq -129(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbx), w_rdi), "488BBB7FFFFFFF", "movq -129(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rcx), w_rdi), "488BB97FFFFFFF", "movq -129(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdx), w_rdi), "488BBA7FFFFFFF", "movq -129(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbp), w_rdi), "488BBD7FFFFFFF", "movq -129(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsp), w_rdi), "488BBC247FFFFFFF", "movq -129(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsi), w_rdi), "488BBE7FFFFFFF", "movq -129(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdi), w_rdi), "488BBF7FFFFFFF", "movq -129(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r8), w_rdi), "498BB87FFFFFFF", "movq -129(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r9), w_rdi), "498BB97FFFFFFF", "movq -129(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r10), w_rdi), "498BBA7FFFFFFF", "movq -129(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r11), w_rdi), "498BBB7FFFFFFF", "movq -129(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r12), w_rdi), "498BBC247FFFFFFF", "movq -129(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r13), w_rdi), "498BBD7FFFFFFF", "movq -129(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r14), w_rdi), "498BBE7FFFFFFF", "movq -129(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-129i32 as u32, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r15), w_rdi), "498BBF7FFFFFFF", "movq -129(%r15), %rdi", )); @@ -510,82 +510,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset large positive simm32 insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rax), w_rdi), "488BB877207317", "movq 393420919(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbx), w_rdi), "488BBB77207317", "movq 393420919(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rcx), w_rdi), "488BB977207317", "movq 393420919(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdx), w_rdi), "488BBA77207317", "movq 393420919(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbp), w_rdi), "488BBD77207317", "movq 393420919(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsp), w_rdi), "488BBC2477207317", "movq 393420919(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsi), w_rdi), "488BBE77207317", "movq 393420919(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdi), w_rdi), "488BBF77207317", "movq 393420919(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r8), w_rdi), "498BB877207317", "movq 393420919(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r9), w_rdi), "498BB977207317", "movq 393420919(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r10), w_rdi), "498BBA77207317", "movq 393420919(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r11), w_rdi), "498BBB77207317", "movq 393420919(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r12), w_rdi), "498BBC2477207317", "movq 393420919(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r13), w_rdi), "498BBD77207317", "movq 393420919(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r14), w_rdi), "498BBE77207317", "movq 393420919(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(0x17732077, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r15), w_rdi), "498BBF77207317", "movq 393420919(%r15), %rdi", )); @@ -593,82 +593,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset large negative simm32 insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rax), w_rdi), "488BB8D9A6BECE", "movq -826366247(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbx), w_rdi), "488BBBD9A6BECE", "movq -826366247(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rcx), w_rdi), "488BB9D9A6BECE", "movq -826366247(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdx), w_rdi), "488BBAD9A6BECE", "movq -826366247(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbp), w_rdi), "488BBDD9A6BECE", "movq -826366247(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsp), w_rdi), "488BBC24D9A6BECE", "movq -826366247(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsi), w_rdi), "488BBED9A6BECE", "movq -826366247(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdi), w_rdi), "488BBFD9A6BECE", "movq -826366247(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r8), w_rdi), "498BB8D9A6BECE", "movq -826366247(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r9), w_rdi), "498BB9D9A6BECE", "movq -826366247(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r10), w_rdi), "498BBAD9A6BECE", "movq -826366247(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r11), w_rdi), "498BBBD9A6BECE", "movq -826366247(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r12), w_rdi), "498BBC24D9A6BECE", "movq -826366247(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r13), w_rdi), "498BBDD9A6BECE", "movq -826366247(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r14), w_rdi), "498BBED9A6BECE", "movq -826366247(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg(-0x31415927i32 as u32, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r15), w_rdi), "498BBFD9A6BECE", "movq -826366247(%r15), %rdi", )); @@ -680,42 +680,42 @@ fn test_x64_emit() { // // Addr_IRRS, offset max simm8 insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rax, rax, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11), "4C8B5C007F", "movq 127(%rax,%rax,1), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rdi, rax, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11), "4C8B5C477F", "movq 127(%rdi,%rax,2), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r8, rax, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11), "4D8B5C807F", "movq 127(%r8,%rax,4), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r15, rax, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11), "4D8B5CC77F", "movq 127(%r15,%rax,8), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rax, rdi, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11), "4C8B5CF87F", "movq 127(%rax,%rdi,8), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11), "4C8B5CBF7F", "movq 127(%rdi,%rdi,4), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r8, rdi, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11), "4D8B5C787F", "movq 127(%r8,%rdi,2), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(127, r15, rdi, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11), "4D8B5C3F7F", "movq 127(%r15,%rdi,1), %r11", )); @@ -723,42 +723,42 @@ fn test_x64_emit() { // ======================================================== // Addr_IRRS, offset min simm8 insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11), "4E8B5C8080", "movq -128(%rax,%r8,4), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11), "4E8B5CC780", "movq -128(%rdi,%r8,8), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11), "4F8B5C0080", "movq -128(%r8,%r8,1), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11), "4F8B5C4780", "movq -128(%r15,%r8,2), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11), "4E8B5C7880", "movq -128(%rax,%r15,2), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11), "4E8B5C3F80", "movq -128(%rdi,%r15,1), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11), "4F8B5CF880", "movq -128(%r8,%r15,8), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11), "4F8B5CBF80", "movq -128(%r15,%r15,4), %r11", )); @@ -766,42 +766,42 @@ fn test_x64_emit() { // ======================================================== // Addr_IRRS, offset large positive simm32 insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11), "4C8B9C00BE25664F", "movq 1332094398(%rax,%rax,1), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11), "4C8B9C47BE25664F", "movq 1332094398(%rdi,%rax,2), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11), "4D8B9C80BE25664F", "movq 1332094398(%r8,%rax,4), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11), "4D8B9CC7BE25664F", "movq 1332094398(%r15,%rax,8), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11), "4C8B9CF8BE25664F", "movq 1332094398(%rax,%rdi,8), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11), "4C8B9CBFBE25664F", "movq 1332094398(%rdi,%rdi,4), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11), "4D8B9C78BE25664F", "movq 1332094398(%r8,%rdi,2), %r11", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11), "4D8B9C3FBE25664F", "movq 1332094398(%r15,%rdi,1), %r11", )); @@ -810,7 +810,7 @@ fn test_x64_emit() { // Addr_IRRS, offset large negative simm32 insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2), w_r11, ), "4E8B9C8070E9B2D9", @@ -818,7 +818,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3), w_r11, ), "4E8B9CC770E9B2D9", @@ -826,7 +826,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0), w_r11, ), "4F8B9C0070E9B2D9", @@ -834,7 +834,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1), w_r11, ), "4F8B9C4770E9B2D9", @@ -842,7 +842,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1), w_r11, ), "4E8B9C7870E9B2D9", @@ -850,7 +850,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0), w_r11, ), "4E8B9C3F70E9B2D9", @@ -858,7 +858,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3), w_r11, ), "4F8B9CF870E9B2D9", @@ -866,7 +866,7 @@ fn test_x64_emit() { )); insns.push(( Inst::mov64_m_r( - Addr::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2), + Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2), w_r11, ), "4F8B9CBF70E9B2D9", @@ -900,7 +900,7 @@ fn test_x64_emit() { Inst::alu_rmi_r( true, AluRmiROpcode::Add, - RegMemImm::mem(Addr::imm_reg(99, rdi)), + RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rdx, ), "48035763", @@ -910,7 +910,7 @@ fn test_x64_emit() { Inst::alu_rmi_r( false, AluRmiROpcode::Add, - RegMemImm::mem(Addr::imm_reg(99, rdi)), + RegMemImm::mem(Amode::imm_reg(99, rdi)), w_r8, ), "44034763", @@ -920,7 +920,7 @@ fn test_x64_emit() { Inst::alu_rmi_r( false, AluRmiROpcode::Add, - RegMemImm::mem(Addr::imm_reg(99, rdi)), + RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rsi, ), "037763", @@ -1047,7 +1047,7 @@ fn test_x64_emit() { Inst::alu_rmi_r( true, AluRmiROpcode::Mul, - RegMemImm::mem(Addr::imm_reg(99, rdi)), + RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rdx, ), "480FAF5763", @@ -1057,7 +1057,7 @@ fn test_x64_emit() { Inst::alu_rmi_r( false, AluRmiROpcode::Mul, - RegMemImm::mem(Addr::imm_reg(99, rdi)), + RegMemImm::mem(Amode::imm_reg(99, rdi)), w_r8, ), "440FAF4763", @@ -1067,7 +1067,7 @@ fn test_x64_emit() { Inst::alu_rmi_r( false, AluRmiROpcode::Mul, - RegMemImm::mem(Addr::imm_reg(99, rdi)), + RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rsi, ), "0FAF7763", @@ -1242,104 +1242,229 @@ fn test_x64_emit() { )); // ======================================================== - // MovZX_M_R + // MovZX_RM_R insns.push(( - Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rax), w_rsi), + "0FB6F0", + "movzbl %al, %esi", + )); + insns.push(( + Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(r15), w_rsi), + "410FB6F7", + "movzbl %r15b, %esi", + )); + insns.push(( + Inst::movzx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "0FB671F9", "movzbl -7(%rcx), %esi", )); insns.push(( - Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movzx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "410FB658F9", "movzbl -7(%r8), %ebx", )); insns.push(( - Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movzx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "450FB64AF9", "movzbl -7(%r10), %r9d", )); insns.push(( - Inst::movzx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movzx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "410FB653F9", "movzbl -7(%r11), %edx", )); insns.push(( - Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(rax), w_rsi), + "480FB6F0", + "movzbq %al, %rsi", + )); + insns.push(( + Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(r10), w_rsi), + "490FB6F2", + "movzbq %r10b, %rsi", + )); + insns.push(( + Inst::movzx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "480FB671F9", "movzbq -7(%rcx), %rsi", )); insns.push(( - Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movzx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "490FB658F9", "movzbq -7(%r8), %rbx", )); insns.push(( - Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movzx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "4D0FB64AF9", "movzbq -7(%r10), %r9", )); insns.push(( - Inst::movzx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movzx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "490FB653F9", "movzbq -7(%r11), %rdx", )); insns.push(( - Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi), + "0FB7F1", + "movzwl %cx, %esi", + )); + insns.push(( + Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(r10), w_rsi), + "410FB7F2", + "movzwl %r10w, %esi", + )); + insns.push(( + Inst::movzx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "0FB771F9", "movzwl -7(%rcx), %esi", )); insns.push(( - Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movzx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "410FB758F9", "movzwl -7(%r8), %ebx", )); insns.push(( - Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movzx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "450FB74AF9", "movzwl -7(%r10), %r9d", )); insns.push(( - Inst::movzx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movzx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "410FB753F9", "movzwl -7(%r11), %edx", )); insns.push(( - Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi), + "480FB7F1", + "movzwq %cx, %rsi", + )); + insns.push(( + Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(r11), w_rsi), + "490FB7F3", + "movzwq %r11w, %rsi", + )); + insns.push(( + Inst::movzx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "480FB771F9", "movzwq -7(%rcx), %rsi", )); insns.push(( - Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movzx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "490FB758F9", "movzwq -7(%r8), %rbx", )); insns.push(( - Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movzx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "4D0FB74AF9", "movzwq -7(%r10), %r9", )); insns.push(( - Inst::movzx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movzx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "490FB753F9", "movzwq -7(%r11), %rdx", )); insns.push(( - Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi), + "8BF1", + "movl %ecx, %esi", + )); + insns.push(( + Inst::movzx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "8B71F9", "movl -7(%rcx), %esi", )); insns.push(( - Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movzx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "418B58F9", "movl -7(%r8), %ebx", )); insns.push(( - Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movzx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "458B4AF9", "movl -7(%r10), %r9d", )); insns.push(( - Inst::movzx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movzx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "418B53F9", "movl -7(%r11), %edx", )); @@ -1347,145 +1472,293 @@ fn test_x64_emit() { // ======================================================== // Mov64_M_R insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx), "488B8C18B3000000", "movq 179(%rax,%rbx,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, rbx, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8), "4C8B8418B3000000", "movq 179(%rax,%rbx,1), %r8", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, r9, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx), "4A8B8C08B3000000", "movq 179(%rax,%r9,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, rax, r9, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8), "4E8B8408B3000000", "movq 179(%rax,%r9,1), %r8", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx), "498B8C1AB3000000", "movq 179(%r10,%rbx,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, rbx, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8), "4D8B841AB3000000", "movq 179(%r10,%rbx,1), %r8", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, r9, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx), "4B8B8C0AB3000000", "movq 179(%r10,%r9,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Addr::imm_reg_reg_shift(179, r10, r9, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8), "4F8B840AB3000000", "movq 179(%r10,%r9,1), %r8", )); // ======================================================== - // MovSX_M_R + // LoadEffectiveAddress insns.push(( - Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::lea(Amode::imm_reg(42, r10), w_r8), + "4D8D422A", + "lea 42(%r10), %r8", + )); + insns.push(( + Inst::lea(Amode::imm_reg(42, r10), w_r15), + "4D8D7A2A", + "lea 42(%r10), %r15", + )); + insns.push(( + Inst::lea(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8), + "4F8D840AB3000000", + "lea 179(%r10,%r9,1), %r8", + )); + + // ======================================================== + // MovSX_RM_R + insns.push(( + Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rcx), w_rsi), + "0FBEF1", + "movsbl %cl, %esi", + )); + insns.push(( + Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(r14), w_rsi), + "410FBEF6", + "movsbl %r14b, %esi", + )); + insns.push(( + Inst::movsx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "0FBE71F9", "movsbl -7(%rcx), %esi", )); insns.push(( - Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movsx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "410FBE58F9", "movsbl -7(%r8), %ebx", )); insns.push(( - Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movsx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "450FBE4AF9", "movsbl -7(%r10), %r9d", )); insns.push(( - Inst::movsx_m_r(ExtMode::BL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movsx_rm_r( + ExtMode::BL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "410FBE53F9", "movsbl -7(%r11), %edx", )); insns.push(( - Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(rcx), w_rsi), + "480FBEF1", + "movsbq %cl, %rsi", + )); + insns.push(( + Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(r15), w_rsi), + "490FBEF7", + "movsbq %r15b, %rsi", + )); + insns.push(( + Inst::movsx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "480FBE71F9", "movsbq -7(%rcx), %rsi", )); insns.push(( - Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movsx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "490FBE58F9", "movsbq -7(%r8), %rbx", )); insns.push(( - Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movsx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "4D0FBE4AF9", "movsbq -7(%r10), %r9", )); insns.push(( - Inst::movsx_m_r(ExtMode::BQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movsx_rm_r( + ExtMode::BQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "490FBE53F9", "movsbq -7(%r11), %rdx", )); insns.push(( - Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi), + "0FBFF1", + "movswl %cx, %esi", + )); + insns.push(( + Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(r14), w_rsi), + "410FBFF6", + "movswl %r14w, %esi", + )); + insns.push(( + Inst::movsx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "0FBF71F9", "movswl -7(%rcx), %esi", )); insns.push(( - Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movsx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "410FBF58F9", "movswl -7(%r8), %ebx", )); insns.push(( - Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movsx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "450FBF4AF9", "movswl -7(%r10), %r9d", )); insns.push(( - Inst::movsx_m_r(ExtMode::WL, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movsx_rm_r( + ExtMode::WL, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "410FBF53F9", "movswl -7(%r11), %edx", )); insns.push(( - Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi), + "480FBFF1", + "movswq %cx, %rsi", + )); + insns.push(( + Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(r13), w_rsi), + "490FBFF5", + "movswq %r13w, %rsi", + )); + insns.push(( + Inst::movsx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "480FBF71F9", "movswq -7(%rcx), %rsi", )); insns.push(( - Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movsx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "490FBF58F9", "movswq -7(%r8), %rbx", )); insns.push(( - Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movsx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "4D0FBF4AF9", "movswq -7(%r10), %r9", )); insns.push(( - Inst::movsx_m_r(ExtMode::WQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movsx_rm_r( + ExtMode::WQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "490FBF53F9", "movswq -7(%r11), %rdx", )); insns.push(( - Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi), + "4863F1", + "movslq %ecx, %rsi", + )); + insns.push(( + Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(r15), w_rsi), + "4963F7", + "movslq %r15d, %rsi", + )); + insns.push(( + Inst::movsx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), + w_rsi, + ), "486371F9", "movslq -7(%rcx), %rsi", )); insns.push(( - Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r8), w_rbx), + Inst::movsx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), + w_rbx, + ), "496358F9", "movslq -7(%r8), %rbx", )); insns.push(( - Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r10), w_r9), + Inst::movsx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), + w_r9, + ), "4D634AF9", "movslq -7(%r10), %r9", )); insns.push(( - Inst::movsx_m_r(ExtMode::LQ, Addr::imm_reg(-7i32 as u32, r11), w_rdx), + Inst::movsx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), + w_rdx, + ), "496353F9", "movslq -7(%r11), %rdx", )); @@ -1493,325 +1766,325 @@ fn test_x64_emit() { // ======================================================== // Mov_R_M. Byte stores are tricky. Check everything carefully. insns.push(( - Inst::mov_r_m(8, rax, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(8, rax, Amode::imm_reg(99, rdi)), "48894763", "movq %rax, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(8, rbx, Addr::imm_reg(99, r8)), + Inst::mov_r_m(8, rbx, Amode::imm_reg(99, r8)), "49895863", "movq %rbx, 99(%r8)", )); insns.push(( - Inst::mov_r_m(8, rcx, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(8, rcx, Amode::imm_reg(99, rsi)), "48894E63", "movq %rcx, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(8, rdx, Addr::imm_reg(99, r9)), + Inst::mov_r_m(8, rdx, Amode::imm_reg(99, r9)), "49895163", "movq %rdx, 99(%r9)", )); insns.push(( - Inst::mov_r_m(8, rsi, Addr::imm_reg(99, rax)), + Inst::mov_r_m(8, rsi, Amode::imm_reg(99, rax)), "48897063", "movq %rsi, 99(%rax)", )); insns.push(( - Inst::mov_r_m(8, rdi, Addr::imm_reg(99, r15)), + Inst::mov_r_m(8, rdi, Amode::imm_reg(99, r15)), "49897F63", "movq %rdi, 99(%r15)", )); insns.push(( - Inst::mov_r_m(8, rsp, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(8, rsp, Amode::imm_reg(99, rcx)), "48896163", "movq %rsp, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(8, rbp, Addr::imm_reg(99, r14)), + Inst::mov_r_m(8, rbp, Amode::imm_reg(99, r14)), "49896E63", "movq %rbp, 99(%r14)", )); insns.push(( - Inst::mov_r_m(8, r8, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(8, r8, Amode::imm_reg(99, rdi)), "4C894763", "movq %r8, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(8, r9, Addr::imm_reg(99, r8)), + Inst::mov_r_m(8, r9, Amode::imm_reg(99, r8)), "4D894863", "movq %r9, 99(%r8)", )); insns.push(( - Inst::mov_r_m(8, r10, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(8, r10, Amode::imm_reg(99, rsi)), "4C895663", "movq %r10, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(8, r11, Addr::imm_reg(99, r9)), + Inst::mov_r_m(8, r11, Amode::imm_reg(99, r9)), "4D895963", "movq %r11, 99(%r9)", )); insns.push(( - Inst::mov_r_m(8, r12, Addr::imm_reg(99, rax)), + Inst::mov_r_m(8, r12, Amode::imm_reg(99, rax)), "4C896063", "movq %r12, 99(%rax)", )); insns.push(( - Inst::mov_r_m(8, r13, Addr::imm_reg(99, r15)), + Inst::mov_r_m(8, r13, Amode::imm_reg(99, r15)), "4D896F63", "movq %r13, 99(%r15)", )); insns.push(( - Inst::mov_r_m(8, r14, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(8, r14, Amode::imm_reg(99, rcx)), "4C897163", "movq %r14, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(8, r15, Addr::imm_reg(99, r14)), + Inst::mov_r_m(8, r15, Amode::imm_reg(99, r14)), "4D897E63", "movq %r15, 99(%r14)", )); // insns.push(( - Inst::mov_r_m(4, rax, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(4, rax, Amode::imm_reg(99, rdi)), "894763", "movl %eax, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(4, rbx, Addr::imm_reg(99, r8)), + Inst::mov_r_m(4, rbx, Amode::imm_reg(99, r8)), "41895863", "movl %ebx, 99(%r8)", )); insns.push(( - Inst::mov_r_m(4, rcx, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(4, rcx, Amode::imm_reg(99, rsi)), "894E63", "movl %ecx, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(4, rdx, Addr::imm_reg(99, r9)), + Inst::mov_r_m(4, rdx, Amode::imm_reg(99, r9)), "41895163", "movl %edx, 99(%r9)", )); insns.push(( - Inst::mov_r_m(4, rsi, Addr::imm_reg(99, rax)), + Inst::mov_r_m(4, rsi, Amode::imm_reg(99, rax)), "897063", "movl %esi, 99(%rax)", )); insns.push(( - Inst::mov_r_m(4, rdi, Addr::imm_reg(99, r15)), + Inst::mov_r_m(4, rdi, Amode::imm_reg(99, r15)), "41897F63", "movl %edi, 99(%r15)", )); insns.push(( - Inst::mov_r_m(4, rsp, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(4, rsp, Amode::imm_reg(99, rcx)), "896163", "movl %esp, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(4, rbp, Addr::imm_reg(99, r14)), + Inst::mov_r_m(4, rbp, Amode::imm_reg(99, r14)), "41896E63", "movl %ebp, 99(%r14)", )); insns.push(( - Inst::mov_r_m(4, r8, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(4, r8, Amode::imm_reg(99, rdi)), "44894763", "movl %r8d, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(4, r9, Addr::imm_reg(99, r8)), + Inst::mov_r_m(4, r9, Amode::imm_reg(99, r8)), "45894863", "movl %r9d, 99(%r8)", )); insns.push(( - Inst::mov_r_m(4, r10, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(4, r10, Amode::imm_reg(99, rsi)), "44895663", "movl %r10d, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(4, r11, Addr::imm_reg(99, r9)), + Inst::mov_r_m(4, r11, Amode::imm_reg(99, r9)), "45895963", "movl %r11d, 99(%r9)", )); insns.push(( - Inst::mov_r_m(4, r12, Addr::imm_reg(99, rax)), + Inst::mov_r_m(4, r12, Amode::imm_reg(99, rax)), "44896063", "movl %r12d, 99(%rax)", )); insns.push(( - Inst::mov_r_m(4, r13, Addr::imm_reg(99, r15)), + Inst::mov_r_m(4, r13, Amode::imm_reg(99, r15)), "45896F63", "movl %r13d, 99(%r15)", )); insns.push(( - Inst::mov_r_m(4, r14, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(4, r14, Amode::imm_reg(99, rcx)), "44897163", "movl %r14d, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(4, r15, Addr::imm_reg(99, r14)), + Inst::mov_r_m(4, r15, Amode::imm_reg(99, r14)), "45897E63", "movl %r15d, 99(%r14)", )); // insns.push(( - Inst::mov_r_m(2, rax, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(2, rax, Amode::imm_reg(99, rdi)), "66894763", "movw %ax, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(2, rbx, Addr::imm_reg(99, r8)), + Inst::mov_r_m(2, rbx, Amode::imm_reg(99, r8)), "6641895863", "movw %bx, 99(%r8)", )); insns.push(( - Inst::mov_r_m(2, rcx, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(2, rcx, Amode::imm_reg(99, rsi)), "66894E63", "movw %cx, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(2, rdx, Addr::imm_reg(99, r9)), + Inst::mov_r_m(2, rdx, Amode::imm_reg(99, r9)), "6641895163", "movw %dx, 99(%r9)", )); insns.push(( - Inst::mov_r_m(2, rsi, Addr::imm_reg(99, rax)), + Inst::mov_r_m(2, rsi, Amode::imm_reg(99, rax)), "66897063", "movw %si, 99(%rax)", )); insns.push(( - Inst::mov_r_m(2, rdi, Addr::imm_reg(99, r15)), + Inst::mov_r_m(2, rdi, Amode::imm_reg(99, r15)), "6641897F63", "movw %di, 99(%r15)", )); insns.push(( - Inst::mov_r_m(2, rsp, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(2, rsp, Amode::imm_reg(99, rcx)), "66896163", "movw %sp, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(2, rbp, Addr::imm_reg(99, r14)), + Inst::mov_r_m(2, rbp, Amode::imm_reg(99, r14)), "6641896E63", "movw %bp, 99(%r14)", )); insns.push(( - Inst::mov_r_m(2, r8, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(2, r8, Amode::imm_reg(99, rdi)), "6644894763", "movw %r8w, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(2, r9, Addr::imm_reg(99, r8)), + Inst::mov_r_m(2, r9, Amode::imm_reg(99, r8)), "6645894863", "movw %r9w, 99(%r8)", )); insns.push(( - Inst::mov_r_m(2, r10, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(2, r10, Amode::imm_reg(99, rsi)), "6644895663", "movw %r10w, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(2, r11, Addr::imm_reg(99, r9)), + Inst::mov_r_m(2, r11, Amode::imm_reg(99, r9)), "6645895963", "movw %r11w, 99(%r9)", )); insns.push(( - Inst::mov_r_m(2, r12, Addr::imm_reg(99, rax)), + Inst::mov_r_m(2, r12, Amode::imm_reg(99, rax)), "6644896063", "movw %r12w, 99(%rax)", )); insns.push(( - Inst::mov_r_m(2, r13, Addr::imm_reg(99, r15)), + Inst::mov_r_m(2, r13, Amode::imm_reg(99, r15)), "6645896F63", "movw %r13w, 99(%r15)", )); insns.push(( - Inst::mov_r_m(2, r14, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(2, r14, Amode::imm_reg(99, rcx)), "6644897163", "movw %r14w, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(2, r15, Addr::imm_reg(99, r14)), + Inst::mov_r_m(2, r15, Amode::imm_reg(99, r14)), "6645897E63", "movw %r15w, 99(%r14)", )); // insns.push(( - Inst::mov_r_m(1, rax, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(1, rax, Amode::imm_reg(99, rdi)), "884763", "movb %al, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(1, rbx, Addr::imm_reg(99, r8)), + Inst::mov_r_m(1, rbx, Amode::imm_reg(99, r8)), "41885863", "movb %bl, 99(%r8)", )); insns.push(( - Inst::mov_r_m(1, rcx, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(1, rcx, Amode::imm_reg(99, rsi)), "884E63", "movb %cl, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(1, rdx, Addr::imm_reg(99, r9)), + Inst::mov_r_m(1, rdx, Amode::imm_reg(99, r9)), "41885163", "movb %dl, 99(%r9)", )); insns.push(( - Inst::mov_r_m(1, rsi, Addr::imm_reg(99, rax)), + Inst::mov_r_m(1, rsi, Amode::imm_reg(99, rax)), "40887063", "movb %sil, 99(%rax)", )); insns.push(( - Inst::mov_r_m(1, rdi, Addr::imm_reg(99, r15)), + Inst::mov_r_m(1, rdi, Amode::imm_reg(99, r15)), "41887F63", "movb %dil, 99(%r15)", )); insns.push(( - Inst::mov_r_m(1, rsp, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(1, rsp, Amode::imm_reg(99, rcx)), "40886163", "movb %spl, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(1, rbp, Addr::imm_reg(99, r14)), + Inst::mov_r_m(1, rbp, Amode::imm_reg(99, r14)), "41886E63", "movb %bpl, 99(%r14)", )); insns.push(( - Inst::mov_r_m(1, r8, Addr::imm_reg(99, rdi)), + Inst::mov_r_m(1, r8, Amode::imm_reg(99, rdi)), "44884763", "movb %r8b, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(1, r9, Addr::imm_reg(99, r8)), + Inst::mov_r_m(1, r9, Amode::imm_reg(99, r8)), "45884863", "movb %r9b, 99(%r8)", )); insns.push(( - Inst::mov_r_m(1, r10, Addr::imm_reg(99, rsi)), + Inst::mov_r_m(1, r10, Amode::imm_reg(99, rsi)), "44885663", "movb %r10b, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(1, r11, Addr::imm_reg(99, r9)), + Inst::mov_r_m(1, r11, Amode::imm_reg(99, r9)), "45885963", "movb %r11b, 99(%r9)", )); insns.push(( - Inst::mov_r_m(1, r12, Addr::imm_reg(99, rax)), + Inst::mov_r_m(1, r12, Amode::imm_reg(99, rax)), "44886063", "movb %r12b, 99(%rax)", )); insns.push(( - Inst::mov_r_m(1, r13, Addr::imm_reg(99, r15)), + Inst::mov_r_m(1, r13, Amode::imm_reg(99, r15)), "45886F63", "movb %r13b, 99(%r15)", )); insns.push(( - Inst::mov_r_m(1, r14, Addr::imm_reg(99, rcx)), + Inst::mov_r_m(1, r14, Amode::imm_reg(99, rcx)), "44887163", "movb %r14b, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(1, r15, Addr::imm_reg(99, r14)), + Inst::mov_r_m(1, r15, Amode::imm_reg(99, r14)), "45887E63", "movb %r15b, 99(%r14)", )); @@ -1942,17 +2215,17 @@ fn test_x64_emit() { "cmpq %rcx, %rsi", )); insns.push(( - Inst::cmp_rmi_r(8, RegMemImm::mem(Addr::imm_reg(99, rdi)), rdx), + Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), "483B5763", "cmpq 99(%rdi), %rdx", )); insns.push(( - Inst::cmp_rmi_r(8, RegMemImm::mem(Addr::imm_reg(99, rdi)), r8), + Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8), "4C3B4763", "cmpq 99(%rdi), %r8", )); insns.push(( - Inst::cmp_rmi_r(8, RegMemImm::mem(Addr::imm_reg(99, rdi)), rsi), + Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi), "483B7763", "cmpq 99(%rdi), %rsi", )); @@ -1988,17 +2261,17 @@ fn test_x64_emit() { "cmpl %ecx, %esi", )); insns.push(( - Inst::cmp_rmi_r(4, RegMemImm::mem(Addr::imm_reg(99, rdi)), rdx), + Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), "3B5763", "cmpl 99(%rdi), %edx", )); insns.push(( - Inst::cmp_rmi_r(4, RegMemImm::mem(Addr::imm_reg(99, rdi)), r8), + Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8), "443B4763", "cmpl 99(%rdi), %r8d", )); insns.push(( - Inst::cmp_rmi_r(4, RegMemImm::mem(Addr::imm_reg(99, rdi)), rsi), + Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi), "3B7763", "cmpl 99(%rdi), %esi", )); @@ -2034,17 +2307,17 @@ fn test_x64_emit() { "cmpw %cx, %si", )); insns.push(( - Inst::cmp_rmi_r(2, RegMemImm::mem(Addr::imm_reg(99, rdi)), rdx), + Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), "663B5763", "cmpw 99(%rdi), %dx", )); insns.push(( - Inst::cmp_rmi_r(2, RegMemImm::mem(Addr::imm_reg(99, rdi)), r8), + Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8), "66443B4763", "cmpw 99(%rdi), %r8w", )); insns.push(( - Inst::cmp_rmi_r(2, RegMemImm::mem(Addr::imm_reg(99, rdi)), rsi), + Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi), "663B7763", "cmpw 99(%rdi), %si", )); @@ -2080,17 +2353,17 @@ fn test_x64_emit() { "cmpb %cl, %sil", )); insns.push(( - Inst::cmp_rmi_r(1, RegMemImm::mem(Addr::imm_reg(99, rdi)), rdx), + Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), "3A5763", "cmpb 99(%rdi), %dl", )); insns.push(( - Inst::cmp_rmi_r(1, RegMemImm::mem(Addr::imm_reg(99, rdi)), r8), + Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8), "443A4763", "cmpb 99(%rdi), %r8b", )); insns.push(( - Inst::cmp_rmi_r(1, RegMemImm::mem(Addr::imm_reg(99, rdi)), rsi), + Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi), "403A7763", "cmpb 99(%rdi), %sil", )); @@ -2201,17 +2474,24 @@ fn test_x64_emit() { "cmpb %r13b, %r14b", )); + // ======================================================== + // SetCC + insns.push((Inst::setcc(CC::O, w_rsi), "400F90C6", "seto %sil")); + insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle %sil")); + insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b")); + insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b")); + // ======================================================== // Push64 insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi")); insns.push((Inst::push64(RegMemImm::reg(r8)), "4150", "pushq %r8")); insns.push(( - Inst::push64(RegMemImm::mem(Addr::imm_reg_reg_shift(321, rsi, rcx, 3))), + Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))), "FFB4CE41010000", "pushq 321(%rsi,%rcx,8)", )); insns.push(( - Inst::push64(RegMemImm::mem(Addr::imm_reg_reg_shift(321, r9, rbx, 2))), + Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, r9, rbx, 2))), "41FFB49941010000", "pushq 321(%r9,%rbx,4)", )); @@ -2251,27 +2531,43 @@ fn test_x64_emit() { insns.push((Inst::pop64(w_r15), "415F", "popq %r15")); // ======================================================== - // CallKnown skipped for now + // CallKnown + insns.push(( + Inst::call_known( + ExternalName::User { + namespace: 0, + index: 0, + }, + Vec::new(), + Vec::new(), + SourceLoc::default(), + Opcode::Call, + ), + "E800000000", + "call User { namespace: 0, index: 0 }", + )); // ======================================================== // CallUnknown + fn call_unknown(rm: RegMem) -> Inst { + Inst::call_unknown( + rm, + Vec::new(), + Vec::new(), + SourceLoc::default(), + Opcode::CallIndirect, + ) + } + + insns.push((call_unknown(RegMem::reg(rbp)), "FFD5", "call *%rbp")); + insns.push((call_unknown(RegMem::reg(r11)), "41FFD3", "call *%r11")); insns.push(( - Inst::call_unknown(RegMem::reg(rbp)), - "FFD5", - "call *%rbp", - )); - insns.push(( - Inst::call_unknown(RegMem::reg(r11)), - "41FFD3", - "call *%r11", - )); - insns.push(( - Inst::call_unknown(RegMem::mem(Addr::imm_reg_reg_shift(321, rsi, rcx, 3))), + call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))), "FF94CE41010000", "call *321(%rsi,%rcx,8)", )); insns.push(( - Inst::call_unknown(RegMem::mem(Addr::imm_reg_reg_shift(321, r10, rdx, 2))), + call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))), "41FF949241010000", "call *321(%r10,%rdx,4)", )); @@ -2301,12 +2597,12 @@ fn test_x64_emit() { "jmp *%r11", )); insns.push(( - Inst::jmp_unknown(RegMem::mem(Addr::imm_reg_reg_shift(321, rsi, rcx, 3))), + Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))), "FFA4CE41010000", "jmp *321(%rsi,%rcx,8)", )); insns.push(( - Inst::jmp_unknown(RegMem::mem(Addr::imm_reg_reg_shift(321, r10, rdx, 2))), + Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))), "41FFA49241010000", "jmp *321(%r10,%rdx,4)", )); @@ -2337,7 +2633,7 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rm_r( SseOpcode::Addss, - RegMem::mem(Addr::imm_reg_reg_shift(123, r10, rdx, 2)), + RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)), w_xmm0, ), "F3410F5844927B", @@ -2346,7 +2642,7 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rm_r( SseOpcode::Subss, - RegMem::mem(Addr::imm_reg_reg_shift(321, r10, rax, 3)), + RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)), w_xmm10, ), "F3450F5C94C241010000", @@ -2408,6 +2704,14 @@ fn test_x64_emit() { "movsd %xmm14, %xmm3", )); + // ======================================================== + // Misc instructions. + + insns.push((Inst::Hlt, "CC", "hlt")); + + let trap_info = (SourceLoc::default(), TrapCode::UnreachableCodeReached); + insns.push((Inst::Ud2 { trap_info }, "0F0B", "ud2 unreachable")); + // ======================================================== // Actually run the tests! let flags = settings::Flags::new(settings::builder()); @@ -2422,6 +2726,6 @@ fn test_x64_emit() { let buffer = buffer.finish(); buffer.emit(&mut sink); let actual_encoding = &sink.stringify(); - assert_eq!(expected_encoding, actual_encoding); + assert_eq!(expected_encoding, actual_encoding, "{}", expected_printing); } } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index e13d0df73d..726575fd28 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -4,19 +4,17 @@ #![allow(non_snake_case)] #![allow(non_camel_case_types)] -use core::convert::TryFrom; +use alloc::vec::Vec; use smallvec::SmallVec; use std::fmt; use std::string::{String, ToString}; use regalloc::RegUsageCollector; -use regalloc::Set; use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable}; use crate::binemit::CodeOffset; use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8}; -use crate::ir::ExternalName; -use crate::ir::Type; +use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; use crate::settings::Flags; use crate::{settings, CodegenError, CodegenResult}; @@ -37,11 +35,13 @@ use regs::{create_reg_universe_systemv, show_ireg_sized}; /// Instructions. Destinations are on the RIGHT (a la AT&T syntax). #[derive(Clone)] -pub(crate) enum Inst { +pub enum Inst { /// nops of various sizes, including zero Nop { len: u8 }, - /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg + // ===================================== + // Integer instructions. + /// Integer arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg Alu_RMI_R { is_64: bool, op: AluRmiROpcode, @@ -49,49 +49,57 @@ pub(crate) enum Inst { dst: Writable, }, - /// (imm32 imm64) reg. - /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32 + /// Constant materialization: (imm32 imm64) reg. + /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32. Imm_R { dst_is_64: bool, simm64: u64, dst: Writable, }, - /// mov (64 32) reg reg + /// GPR to GPR move: mov (64 32) reg reg. Mov_R_R { is_64: bool, src: Reg, dst: Writable, }, - /// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64). - /// Note that the lq variant doesn't really exist since the default - /// zero-extend rule makes it unnecessary. For that case we emit the - /// equivalent "movl AM, reg32". - MovZX_M_R { - extMode: ExtMode, - addr: Addr, + /// Zero-extended loads, except for 64 bits: movz (bl bq wl wq lq) addr reg. + /// Note that the lq variant doesn't really exist since the default zero-extend rule makes it + /// unnecessary. For that case we emit the equivalent "movl AM, reg32". + MovZX_RM_R { + ext_mode: ExtMode, + src: RegMem, dst: Writable, }, - /// A plain 64-bit integer load, since MovZX_M_R can't represent that - Mov64_M_R { addr: Addr, dst: Writable }, - - /// movs (bl bq wl wq lq) addr reg (good for all SX loads) - MovSX_M_R { - extMode: ExtMode, - addr: Addr, + /// A plain 64-bit integer load, since MovZX_RM_R can't represent that. + Mov64_M_R { + src: SyntheticAmode, dst: Writable, }, - /// mov (b w l q) reg addr (good for all integer stores) + /// Loads the memory address of addr into dst. + LoadEffectiveAddress { + addr: SyntheticAmode, + dst: Writable, + }, + + /// Sign-extended loads and moves: movs (bl bq wl wq lq) addr reg. + MovSX_RM_R { + ext_mode: ExtMode, + src: RegMem, + dst: Writable, + }, + + /// Integer stores: mov (b w l q) reg addr. Mov_R_M { - size: u8, // 1, 2, 4 or 8 + size: u8, // 1, 2, 4 or 8. src: Reg, - addr: Addr, + dst: SyntheticAmode, }, - /// (shl shr sar) (l q) imm reg + /// Arithmetic shifts: (shl shr sar) (l q) imm reg. Shift_R { is_64: bool, kind: ShiftKind, @@ -100,75 +108,95 @@ pub(crate) enum Inst { dst: Writable, }, - /// cmp (b w l q) (reg addr imm) reg + /// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg. Cmp_RMI_R { size: u8, // 1, 2, 4 or 8 src: RegMemImm, dst: Reg, }, + /// Materializes the requested condition code in the destination reg. + Setcc { cc: CC, dst: Writable }, + + // ===================================== + // Stack manipulation. /// pushq (reg addr imm) Push64 { src: RegMemImm }, /// popq reg Pop64 { dst: Writable }, - /// call simm32 - CallKnown { - dest: ExternalName, - uses: Set, - defs: Set>, + // ===================================== + // Floating-point operations. + /// Float arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg + XMM_RM_R { + op: SseOpcode, + src: RegMem, + dst: Writable, }, - /// callq (reg mem) - CallUnknown { - dest: RegMem, - //uses: Set, - //defs: Set>, - }, - - // ---- branches (exactly one must appear at end of BB) ---- - /// ret - Ret, - - /// A placeholder instruction, generating no code, meaning that a function epilogue must be - /// inserted there. - EpiloguePlaceholder, - - /// jmp simm32 - JmpKnown { dest: BranchTarget }, - - /// jcond cond target target - /// Symmetrical two-way conditional branch. - /// Emitted as a compound sequence; the MachBuffer will shrink it - /// as appropriate. - JmpCondSymm { - cc: CC, - taken: BranchTarget, - not_taken: BranchTarget, - }, - - /// jmpq (reg mem) - JmpUnknown { target: RegMem }, - /// mov between XMM registers (32 64) (reg addr) reg - /// XMM_MOV_RM_R differs from XMM_RM_R in that the dst - /// register of XMM_MOV_RM_R is not used in the computation - /// of the instruction dst value and so does not have to - /// be a previously valid value. This is characteristic of - /// mov instructions. + /// XMM_MOV_RM_R differs from XMM_RM_R in that the dst register of XMM_MOV_RM_R is not used in + /// the computation of the instruction dst value and so does not have to be a previously valid + /// value. This is characteristic of mov instructions. XMM_MOV_RM_R { op: SseOpcode, src: RegMem, dst: Writable, }, - /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg - XMM_RM_R { - op: SseOpcode, - src: RegMem, - dst: Writable, + // ===================================== + // Control flow instructions. + /// Direct call: call simm32. + CallKnown { + dest: ExternalName, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: Opcode, }, + + /// Indirect call: callq (reg mem). + CallUnknown { + dest: RegMem, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: Opcode, + }, + + /// Return. + Ret, + + /// A placeholder instruction, generating no code, meaning that a function epilogue must be + /// inserted there. + EpiloguePlaceholder, + + /// Jump to a known target: jmp simm32. + JmpKnown { dst: BranchTarget }, + + /// Two-way conditional branch: jcond cond target target. + /// Emitted as a compound sequence; the MachBuffer will shrink it as appropriate. + JmpCond { + cc: CC, + taken: BranchTarget, + not_taken: BranchTarget, + }, + + /// Indirect jump: jmpq (reg mem). + JmpUnknown { target: RegMem }, + + /// A debug trap. + Hlt, + + /// An instruction that will always trigger the illegal instruction exception. + Ud2 { trap_info: (SourceLoc, TrapCode) }, + + // ===================================== + // Meta-instructions generating no code. + /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This + /// controls how MemArg::NominalSPOffset args are lowered. + VirtualSPOffsetAdj { offset: i64 }, } // Handy constructors for Insts. @@ -229,29 +257,44 @@ impl Inst { Inst::XMM_RM_R { op, src, dst } } - pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { + pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::MovZX_M_R { extMode, addr, dst } + Inst::MovZX_RM_R { ext_mode, src, dst } } - pub(crate) fn mov64_m_r(addr: Addr, dst: Writable) -> Inst { + pub(crate) fn mov64_m_r(src: impl Into, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::Mov64_M_R { addr, dst } + Inst::Mov64_M_R { + src: src.into(), + dst, + } } - pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { + pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::MovSX_M_R { extMode, addr, dst } + Inst::MovSX_RM_R { ext_mode, src, dst } } pub(crate) fn mov_r_m( size: u8, // 1, 2, 4 or 8 src: Reg, - addr: Addr, + dst: impl Into, ) -> Inst { debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); debug_assert!(src.get_class() == RegClass::I64); - Inst::Mov_R_M { size, src, addr } + Inst::Mov_R_M { + size, + src, + dst: dst.into(), + } + } + + pub(crate) fn lea(addr: impl Into, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::LoadEffectiveAddress { + addr: addr.into(), + dst, + } } pub(crate) fn shift_r( @@ -274,6 +317,8 @@ impl Inst { } } + /// Does a comparison of dst - src for operands of size `size`, as stated by the machine + /// instruction semantics. Be careful with the order of parameters! pub(crate) fn cmp_rmi_r( size: u8, // 1, 2, 4 or 8 src: RegMemImm, @@ -284,6 +329,11 @@ impl Inst { Inst::Cmp_RMI_R { size, src, dst } } + pub(crate) fn setcc(cc: CC, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::Setcc { cc, dst } + } + pub(crate) fn push64(src: RegMemImm) -> Inst { Inst::Push64 { src } } @@ -292,8 +342,36 @@ impl Inst { Inst::Pop64 { dst } } - pub(crate) fn call_unknown(dest: RegMem) -> Inst { - Inst::CallUnknown { dest } + pub(crate) fn call_known( + dest: ExternalName, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: Opcode, + ) -> Inst { + Inst::CallKnown { + dest, + uses, + defs, + loc, + opcode, + } + } + + pub(crate) fn call_unknown( + dest: RegMem, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: Opcode, + ) -> Inst { + Inst::CallUnknown { + dest, + uses, + defs, + loc, + opcode, + } } pub(crate) fn ret() -> Inst { @@ -304,12 +382,12 @@ impl Inst { Inst::EpiloguePlaceholder } - pub(crate) fn jmp_known(dest: BranchTarget) -> Inst { - Inst::JmpKnown { dest } + pub(crate) fn jmp_known(dst: BranchTarget) -> Inst { + Inst::JmpKnown { dst } } - pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst { - Inst::JmpCondSymm { + pub(crate) fn jmp_cond(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst { + Inst::JmpCond { cc, taken, not_taken, @@ -414,40 +492,46 @@ impl ShowWithRRU for Inst { show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) ), - Inst::MovZX_M_R { extMode, addr, dst } => { - if *extMode == ExtMode::LQ { + Inst::MovZX_RM_R { ext_mode, src, dst } => { + if *ext_mode == ExtMode::LQ { format!( "{} {}, {}", ljustify("movl".to_string()), - addr.show_rru(mb_rru), + src.show_rru_sized(mb_rru, ext_mode.src_size()), show_ireg_sized(dst.to_reg(), mb_rru, 4) ) } else { format!( "{} {}, {}", - ljustify2("movz".to_string(), extMode.to_string()), - addr.show_rru(mb_rru), - show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size()) + ljustify2("movz".to_string(), ext_mode.to_string()), + src.show_rru_sized(mb_rru, ext_mode.src_size()), + show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size()) ) } } - Inst::Mov64_M_R { addr, dst } => format!( + Inst::Mov64_M_R { src, dst } => format!( "{} {}, {}", ljustify("movq".to_string()), + src.show_rru(mb_rru), + dst.show_rru(mb_rru) + ), + Inst::LoadEffectiveAddress { addr, dst } => format!( + "{} {}, {}", + ljustify("lea".to_string()), addr.show_rru(mb_rru), dst.show_rru(mb_rru) ), - Inst::MovSX_M_R { extMode, addr, dst } => format!( + Inst::MovSX_RM_R { ext_mode, src, dst } => format!( "{} {}, {}", - ljustify2("movs".to_string(), extMode.to_string()), - addr.show_rru(mb_rru), - show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size()) + ljustify2("movs".to_string(), ext_mode.to_string()), + src.show_rru_sized(mb_rru, ext_mode.src_size()), + show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size()) ), - Inst::Mov_R_M { size, src, addr } => format!( + Inst::Mov_R_M { size, src, dst } => format!( "{} {}, {}", ljustify2("mov".to_string(), suffixBWLQ(*size)), show_ireg_sized(*src, mb_rru, *size), - addr.show_rru(mb_rru) + dst.show_rru(mb_rru) ), Inst::Shift_R { is_64, @@ -474,25 +558,29 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, *size), show_ireg_sized(*dst, mb_rru, *size) ), + Inst::Setcc { cc, dst } => format!( + "{} {}", + ljustify2("set".to_string(), cc.to_string()), + show_ireg_sized(dst.to_reg(), mb_rru, 1) + ), Inst::Push64 { src } => { format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) } Inst::Pop64 { dst } => { format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru)) } - //Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target), - Inst::CallKnown { .. } => "**CallKnown**".to_string(), - Inst::CallUnknown { dest } => format!( + Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest), + Inst::CallUnknown { dest, .. } => format!( "{} *{}", ljustify("call".to_string()), dest.show_rru(mb_rru) ), Inst::Ret => "ret".to_string(), Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), - Inst::JmpKnown { dest } => { - format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru)) + Inst::JmpKnown { dst } => { + format!("{} {}", ljustify("jmp".to_string()), dst.show_rru(mb_rru)) } - Inst::JmpCondSymm { + Inst::JmpCond { cc, taken, not_taken, @@ -508,6 +596,9 @@ impl ShowWithRRU for Inst { ljustify("jmp".to_string()), target.show_rru(mb_rru) ), + Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), + Inst::Hlt => "hlt".into(), + Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1), } } } @@ -526,7 +617,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { // regalloc.rs will "fix" this for us by removing the the modified set from the use and def // sets. match inst { - // ** Nop Inst::Alu_RMI_R { is_64: _, op: _, @@ -544,40 +634,28 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } - Inst::Imm_R { - dst_is_64: _, - simm64: _, - dst, - } => { + Inst::Imm_R { dst, .. } => { collector.add_def(*dst); } - Inst::Mov_R_R { is_64: _, src, dst } => { + Inst::Mov_R_R { src, dst, .. } => { collector.add_use(*src); collector.add_def(*dst); } - Inst::MovZX_M_R { - extMode: _, - addr, - dst, - } => { - addr.get_regs_as_uses(collector); + Inst::MovZX_RM_R { src, dst, .. } => { + src.get_regs_as_uses(collector); collector.add_def(*dst); } - Inst::Mov64_M_R { addr, dst } => { - addr.get_regs_as_uses(collector); + Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => { + src.get_regs_as_uses(collector); + collector.add_def(*dst) + } + Inst::MovSX_RM_R { src, dst, .. } => { + src.get_regs_as_uses(collector); collector.add_def(*dst); } - Inst::MovSX_M_R { - extMode: _, - addr, - dst, - } => { - addr.get_regs_as_uses(collector); - collector.add_def(*dst); - } - Inst::Mov_R_M { size: _, src, addr } => { + Inst::Mov_R_M { src, dst, .. } => { collector.add_use(*src); - addr.get_regs_as_uses(collector); + dst.get_regs_as_uses(collector); } Inst::Shift_R { is_64: _, @@ -594,6 +672,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_use(*dst); // yes, really `add_use` } + Inst::Setcc { dst, .. } => { + collector.add_def(*dst); + } Inst::Push64 { src } => { src.get_regs_as_uses(collector); collector.add_mod(Writable::from_reg(regs::rsp())); @@ -601,29 +682,36 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { Inst::Pop64 { dst } => { collector.add_def(*dst); } + Inst::CallKnown { - dest: _, - uses: _, - defs: _, + ref uses, ref defs, .. } => { - // FIXME add arg regs (iru.used) and caller-saved regs (iru.defined) - unimplemented!(); + collector.add_uses(uses); + collector.add_defs(defs); } - Inst::CallUnknown { dest } => { + + Inst::CallUnknown { + ref uses, + ref defs, + dest, + .. + } => { + collector.add_uses(uses); + collector.add_defs(defs); dest.get_regs_as_uses(collector); } - Inst::Ret => {} - Inst::EpiloguePlaceholder => {} - Inst::JmpKnown { dest: _ } => {} - Inst::JmpCondSymm { - cc: _, - taken: _, - not_taken: _, - } => {} - //Inst::JmpUnknown { target } => { - // target.get_regs_as_uses(collector); - //} - Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"), + + Inst::Ret + | Inst::EpiloguePlaceholder + | Inst::JmpKnown { .. } + | Inst::JmpCond { .. } + | Inst::Nop { .. } + | Inst::JmpUnknown { .. } + | Inst::VirtualSPOffsetAdj { .. } + | Inst::Hlt + | Inst::Ud2 { .. } => { + // No registers are used. + } } } @@ -631,34 +719,34 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { // Instructions and subcomponents: map_regs fn map_use(m: &RUM, r: &mut Reg) { - if r.is_virtual() { - let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg(); + if let Some(reg) = r.as_virtual_reg() { + let new = m.get_use(reg).unwrap().to_reg(); *r = new; } } fn map_def(m: &RUM, r: &mut Writable) { - if r.to_reg().is_virtual() { - let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + if let Some(reg) = r.to_reg().as_virtual_reg() { + let new = m.get_def(reg).unwrap().to_reg(); *r = Writable::from_reg(new); } } fn map_mod(m: &RUM, r: &mut Writable) { - if r.to_reg().is_virtual() { - let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + if let Some(reg) = r.to_reg().as_virtual_reg() { + let new = m.get_mod(reg).unwrap().to_reg(); *r = Writable::from_reg(new); } } -impl Addr { +impl Amode { fn map_uses(&mut self, map: &RUM) { match self { - Addr::ImmReg { + Amode::ImmReg { simm32: _, ref mut base, } => map_use(map, base), - Addr::ImmRegRegShift { + Amode::ImmRegRegShift { simm32: _, ref mut base, ref mut index, @@ -732,33 +820,33 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, src); map_def(mapper, dst); } - Inst::MovZX_M_R { - extMode: _, - ref mut addr, + Inst::MovZX_RM_R { + ref mut src, ref mut dst, + .. } => { - addr.map_uses(mapper); + src.map_uses(mapper); map_def(mapper, dst); } - Inst::Mov64_M_R { addr, dst } => { - addr.map_uses(mapper); + Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => { + src.map_uses(mapper); map_def(mapper, dst); } - Inst::MovSX_M_R { - extMode: _, - ref mut addr, + Inst::MovSX_RM_R { + ref mut src, ref mut dst, + .. } => { - addr.map_uses(mapper); + src.map_uses(mapper); map_def(mapper, dst); } Inst::Mov_R_M { - size: _, ref mut src, - ref mut addr, + ref mut dst, + .. } => { map_use(mapper, src); - addr.map_uses(mapper); + dst.map_uses(mapper); } Inst::Shift_R { is_64: _, @@ -776,28 +864,51 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_use(mapper, dst); } + Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst), Inst::Push64 { ref mut src } => src.map_uses(mapper), Inst::Pop64 { ref mut dst } => { map_def(mapper, dst); } + Inst::CallKnown { - dest: _, - uses: _, - defs: _, - } => {} - Inst::CallUnknown { dest } => dest.map_uses(mapper), - Inst::Ret => {} - Inst::EpiloguePlaceholder => {} - Inst::JmpKnown { dest: _ } => {} - Inst::JmpCondSymm { - cc: _, - taken: _, - not_taken: _, - } => {} - //Inst::JmpUnknown { target } => { - // target.apply_map(mapper); - //} - Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"), + ref mut uses, + ref mut defs, + .. + } => { + for r in uses.iter_mut() { + map_use(mapper, r); + } + for r in defs.iter_mut() { + map_def(mapper, r); + } + } + + Inst::CallUnknown { + ref mut uses, + ref mut defs, + ref mut dest, + .. + } => { + for r in uses.iter_mut() { + map_use(mapper, r); + } + for r in defs.iter_mut() { + map_def(mapper, r); + } + dest.map_uses(mapper); + } + + Inst::Ret + | Inst::EpiloguePlaceholder + | Inst::JmpKnown { .. } + | Inst::JmpCond { .. } + | Inst::Nop { .. } + | Inst::JmpUnknown { .. } + | Inst::VirtualSPOffsetAdj { .. } + | Inst::Ud2 { .. } + | Inst::Hlt => { + // No registers are used. + } } } @@ -847,8 +958,8 @@ impl MachInst for Inst { match self { // Interesting cases. &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret, - &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()), - &Self::JmpCondSymm { + &Self::JmpKnown { dst } => MachTerminator::Uncond(dst.as_label().unwrap()), + &Self::JmpCond { cc: _, taken, not_taken, @@ -875,7 +986,7 @@ impl MachInst for Inst { } fn gen_zero_len_nop() -> Inst { - unimplemented!() + Inst::Nop { len: 0 } } fn gen_nop(_preferred_size: usize) -> Inst { @@ -919,20 +1030,27 @@ impl MachInst for Inst { type LabelUse = LabelUse; } -impl MachInstEmit for Inst { - type State = (); +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + virtual_sp_offset: i64, +} - fn emit(&self, sink: &mut MachBuffer, _flags: &settings::Flags, _: &mut Self::State) { - emit::emit(self, sink); +impl MachInstEmit for Inst { + type State = EmitState; + + fn emit(&self, sink: &mut MachBuffer, flags: &settings::Flags, state: &mut Self::State) { + emit::emit(self, sink, flags, state); } } /// A label-use (internal relocation) in generated code. #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(crate) enum LabelUse { - /// A 32-bit offset from location of relocation itself, added to the - /// existing value at that location. - Rel32, +pub enum LabelUse { + /// A 32-bit offset from location of relocation itself, added to the existing value at that + /// location. Used for control flow instructions which consider an offset from the start of the + /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload). + JmpRel32, } impl MachInstLabelUse for LabelUse { @@ -940,30 +1058,31 @@ impl MachInstLabelUse for LabelUse { fn max_pos_range(self) -> CodeOffset { match self { - LabelUse::Rel32 => 0x7fff_ffff, + LabelUse::JmpRel32 => 0x7fff_ffff, } } fn max_neg_range(self) -> CodeOffset { match self { - LabelUse::Rel32 => 0x8000_0000, + LabelUse::JmpRel32 => 0x8000_0000, } } fn patch_size(self) -> CodeOffset { match self { - LabelUse::Rel32 => 4, + LabelUse::JmpRel32 => 4, } } fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + let pc_rel = (label_offset as i64) - (use_offset as i64); + debug_assert!(pc_rel <= self.max_pos_range() as i64); + debug_assert!(pc_rel >= -(self.max_neg_range() as i64)); + let pc_rel = pc_rel as u32; match self { - LabelUse::Rel32 => { - let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); - let value = i32::try_from(label_offset) - .unwrap() - .wrapping_sub(i32::try_from(use_offset).unwrap()) - .wrapping_add(addend); + LabelUse::JmpRel32 => { + let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + let value = pc_rel.wrapping_add(addend).wrapping_sub(4); buffer.copy_from_slice(&value.to_le_bytes()[..]); } } @@ -971,20 +1090,20 @@ impl MachInstLabelUse for LabelUse { fn supports_veneer(self) -> bool { match self { - LabelUse::Rel32 => false, + LabelUse::JmpRel32 => false, } } fn veneer_size(self) -> CodeOffset { match self { - LabelUse::Rel32 => 0, + LabelUse::JmpRel32 => 0, } } fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) { match self { - LabelUse::Rel32 => { - panic!("Veneer not supported for Rel32 label-use."); + LabelUse::JmpRel32 => { + panic!("Veneer not supported for JumpRel32 label-use."); } } } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 13c9c37a6a..1f63f53a6e 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1,20 +1,22 @@ //! Lowering rules for X64. -#![allow(dead_code)] #![allow(non_snake_case)] use log::trace; use regalloc::{Reg, RegClass, Writable}; +use smallvec::SmallVec; +use std::convert::TryFrom; use crate::ir::types; use crate::ir::types::*; use crate::ir::Inst as IRInst; -use crate::ir::{condcodes::IntCC, InstructionData, Opcode, Type}; +use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type}; use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; +use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::isa::x64::X64Backend; @@ -32,6 +34,20 @@ fn is_int_ty(ty: Type) -> bool { } } +fn is_bool_ty(ty: Type) -> bool { + match ty { + types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, + _ => false, + } +} + +fn is_float_ty(ty: Type) -> bool { + match ty { + types::F32 | types::F64 => true, + _ => false, + } +} + fn int_ty_is_64(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 => false, @@ -48,29 +64,17 @@ fn flt_ty_is_64(ty: Type) -> bool { } } -fn int_ty_to_sizeB(ty: Type) -> u8 { - match ty { - types::I8 => 1, - types::I16 => 2, - types::I32 => 4, - types::I64 => 8, - _ => panic!("ity_to_sizeB"), - } +fn iri_to_u64_imm(ctx: Ctx, inst: IRInst) -> Option { + ctx.get_constant(inst) } -fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option { - let inst_data = ctx.data(iri); - if inst_data.opcode() == Opcode::Null { - Some(0) - } else { - match inst_data { - &InstructionData::UnaryImm { opcode: _, imm } => { - // Only has Into for i64; we use u64 elsewhere, so we cast. - let imm: i64 = imm.into(); - Some(imm as u64) - } - _ => None, - } +fn inst_trapcode(data: &InstructionData) -> Option { + match data { + &InstructionData::Trap { code, .. } + | &InstructionData::CondTrap { code, .. } + | &InstructionData::IntCondTrap { code, .. } + | &InstructionData::FloatCondTrap { code, .. } => Some(code), + _ => None, } } @@ -87,36 +91,88 @@ fn inst_condcode(data: &InstructionData) -> IntCC { } } -fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg { - let inputs = ctx.get_input(iri, input); +fn ldst_offset(data: &InstructionData) -> Option { + match data { + &InstructionData::Load { offset, .. } + | &InstructionData::StackLoad { offset, .. } + | &InstructionData::LoadComplex { offset, .. } + | &InstructionData::Store { offset, .. } + | &InstructionData::StackStore { offset, .. } + | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()), + _ => None, + } +} + +/// Identifier for a particular input of an instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct InsnInput { + insn: IRInst, + input: usize, +} + +/// Identifier for a particular output of an instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct InsnOutput { + insn: IRInst, + output: usize, +} + +fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg { + let inputs = ctx.get_input(spec.insn, spec.input); ctx.use_input_reg(inputs); inputs.reg } -fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable { - ctx.get_output(iri, output) +/// Try to use an immediate for constant inputs, and a register otherwise. +/// TODO: handle memory as well! +fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { + let imm = ctx.get_input(spec.insn, spec.input).constant.and_then(|x| { + let as_u32 = x as u32; + let extended = as_u32 as u64; + // If the truncation and sign-extension don't change the value, use it. + if extended == x { + Some(as_u32) + } else { + None + } + }); + match imm { + Some(x) => RegMemImm::imm(x), + None => RegMemImm::reg(input_to_reg(ctx, spec)), + } +} + +fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable { + ctx.get_output(spec.insn, spec.output) } //============================================================================= // Top-level instruction lowering entry point, for one instruction. /// Actually codegen an instruction's results into registers. -fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { - let op = ctx.data(inst).opcode(); - let ty = if ctx.num_outputs(inst) == 1 { - Some(ctx.output_ty(inst, 0)) +fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) -> CodegenResult<()> { + let op = ctx.data(insn).opcode(); + + let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) + .map(|i| InsnInput { insn, input: i }) + .collect(); + let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn)) + .map(|i| InsnOutput { insn, output: i }) + .collect(); + + let ty = if outputs.len() > 0 { + Some(ctx.output_ty(insn, 0)) } else { None }; - // This is all outstandingly feeble. TODO: much better! match op { Opcode::Iconst => { - if let Some(w64) = iri_to_u64_immediate(ctx, inst) { + if let Some(w64) = iri_to_u64_imm(ctx, insn) { // Get exactly the bit pattern in 'w64' into the dest. No // monkeying with sign extension etc. let dst_is_64 = w64 > 0xFFFF_FFFF; - let dst = output_to_reg(ctx, inst, 0); + let dst = output_to_reg(ctx, outputs[0]); ctx.emit(Inst::imm_r(dst_is_64, w64, dst)); } else { unimplemented!(); @@ -124,28 +180,32 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { } Opcode::Iadd | Opcode::Isub => { - let dst = output_to_reg(ctx, inst, 0); - let lhs = input_to_reg(ctx, inst, 0); - let rhs = input_to_reg(ctx, inst, 1); + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem_imm(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + + // TODO For add, try to commute the operands if one is an immediate. + let is_64 = int_ty_is_64(ty.unwrap()); let alu_op = if op == Opcode::Iadd { AluRmiROpcode::Add } else { AluRmiROpcode::Sub }; + ctx.emit(Inst::mov_r_r(true, lhs, dst)); - ctx.emit(Inst::alu_rmi_r(is_64, alu_op, RegMemImm::reg(rhs), dst)); + ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst)); } Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { // TODO: implement imm shift value into insn - let dst_ty = ctx.output_ty(inst, 0); - assert_eq!(ctx.input_ty(inst, 0), dst_ty); + let dst_ty = ctx.output_ty(insn, 0); + assert_eq!(ctx.input_ty(insn, 0), dst_ty); assert!(dst_ty == types::I32 || dst_ty == types::I64); - let lhs = input_to_reg(ctx, inst, 0); - let rhs = input_to_reg(ctx, inst, 1); - let dst = output_to_reg(ctx, inst, 0); + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); let shift_kind = match op { Opcode::Ishl => ShiftKind::Left, @@ -161,30 +221,68 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { ctx.emit(Inst::shift_r(is_64, shift_kind, None /*%cl*/, dst)); } - Opcode::Uextend | Opcode::Sextend => { - // TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R - // don't accept a register source operand. They should be changed - // so as to have _RM_R form. - // TODO2: if the source operand is a load, incorporate that. - let zero_extend = op == Opcode::Uextend; - let src_ty = ctx.input_ty(inst, 0); - let dst_ty = ctx.output_ty(inst, 0); - let src = input_to_reg(ctx, inst, 0); - let dst = output_to_reg(ctx, inst, 0); + Opcode::Uextend + | Opcode::Sextend + | Opcode::Bint + | Opcode::Breduce + | Opcode::Bextend + | Opcode::Ireduce => { + let src_ty = ctx.input_ty(insn, 0); + let dst_ty = ctx.output_ty(insn, 0); - ctx.emit(Inst::mov_r_r(true, src, dst)); - match (src_ty, dst_ty, zero_extend) { - (types::I8, types::I64, false) => { - ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), dst)); - ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), dst)); - } - _ => unimplemented!(), + // TODO: if the source operand is a load, incorporate that. + let src = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + let ext_mode = match (src_ty.bits(), dst_ty.bits()) { + (1, 32) | (8, 32) => ExtMode::BL, + (1, 64) | (8, 64) => ExtMode::BQ, + (16, 32) => ExtMode::WL, + (16, 64) => ExtMode::WQ, + (32, 64) => ExtMode::LQ, + _ => unreachable!( + "unexpected extension kind from {:?} to {:?}", + src_ty, dst_ty + ), + }; + + if op == Opcode::Sextend { + ctx.emit(Inst::movsx_rm_r(ext_mode, RegMem::reg(src), dst)); + } else { + // All of these other opcodes are simply a move from a zero-extended source. Here + // is why this works, in each case: + // + // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we + // merely need to zero-extend here. + // + // - Breduce, Bextend: changing width of a boolean. We represent a + // bool as a 0 or 1, so again, this is a zero-extend / no-op. + // + // - Ireduce: changing width of an integer. Smaller ints are stored + // with undefined high-order bits, so we can simply do a copy. + ctx.emit(Inst::movzx_rm_r(ext_mode, RegMem::reg(src), dst)); } } + Opcode::Icmp => { + let condcode = inst_condcode(ctx.data(insn)); + let cc = CC::from_intcc(condcode); + let ty = ctx.input_ty(insn, 0); + + // TODO Try to commute the operands (and invert the condition) if one is an immediate. + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem_imm(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + + // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives + // us dst - src at the machine instruction level, so invert operands. + ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); + ctx.emit(Inst::setcc(cc, dst)); + } + Opcode::FallthroughReturn | Opcode::Return => { - for i in 0..ctx.num_inputs(inst) { - let src_reg = input_to_reg(ctx, inst, i); + for i in 0..ctx.num_inputs(insn) { + let src_reg = input_to_reg(ctx, inputs[i]); let retval_reg = ctx.retval(i); if src_reg.get_class() == RegClass::I64 { ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); @@ -199,10 +297,58 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { // N.B.: the Ret itself is generated by the ABI. } + Opcode::Call | Opcode::CallIndirect => { + let loc = ctx.srcloc(insn); + let (mut abi, inputs) = match op { + Opcode::Call => { + let (extname, dist) = ctx.call_target(insn).unwrap(); + let sig = ctx.call_sig(insn).unwrap(); + assert!(inputs.len() == sig.params.len()); + assert!(outputs.len() == sig.returns.len()); + ( + X64ABICall::from_func(sig, &extname, dist, loc)?, + &inputs[..], + ) + } + + Opcode::CallIndirect => { + let ptr = input_to_reg(ctx, inputs[0]); + let sig = ctx.call_sig(insn).unwrap(); + assert!(inputs.len() - 1 == sig.params.len()); + assert!(outputs.len() == sig.returns.len()); + (X64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) + } + + _ => unreachable!(), + }; + + abi.emit_stack_pre_adjust(ctx); + assert!(inputs.len() == abi.num_args()); + for (i, input) in inputs.iter().enumerate() { + let arg_reg = input_to_reg(ctx, *input); + abi.emit_copy_reg_to_arg(ctx, i, arg_reg); + } + abi.emit_call(ctx); + for (i, output) in outputs.iter().enumerate() { + let retval_reg = output_to_reg(ctx, *output); + abi.emit_copy_retval_to_reg(ctx, i, retval_reg); + } + abi.emit_stack_post_adjust(ctx); + } + + Opcode::Debugtrap => { + ctx.emit(Inst::Hlt); + } + + Opcode::Trap => { + let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap()); + ctx.emit(Inst::Ud2 { trap_info }) + } + Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => { - let dst = output_to_reg(ctx, inst, 0); - let lhs = input_to_reg(ctx, inst, 0); - let rhs = input_to_reg(ctx, inst, 1); + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); let is_64 = flt_ty_is_64(ty.unwrap()); if !is_64 { let sse_op = match op { @@ -219,10 +365,11 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { unimplemented!("unimplemented lowering for opcode {:?}", op); } } + Opcode::Fcopysign => { - let dst = output_to_reg(ctx, inst, 0); - let lhs = input_to_reg(ctx, inst, 0); - let rhs = input_to_reg(ctx, inst, 1); + let dst = output_to_reg(ctx, outputs[0]); + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg(ctx, inputs[1]); if !flt_ty_is_64(ty.unwrap()) { // movabs 0x8000_0000, tmp_gpr1 // movd tmp_gpr1, tmp_xmm1 @@ -265,6 +412,185 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { unimplemented!("{:?} for non 32-bit destination is not supported", op); } } + + Opcode::Load + | Opcode::Uload8 + | Opcode::Sload8 + | Opcode::Uload16 + | Opcode::Sload16 + | Opcode::Uload32 + | Opcode::Sload32 + | Opcode::LoadComplex + | Opcode::Uload8Complex + | Opcode::Sload8Complex + | Opcode::Uload16Complex + | Opcode::Sload16Complex + | Opcode::Uload32Complex + | Opcode::Sload32Complex => { + let offset = ldst_offset(ctx.data(insn)).unwrap(); + + let elem_ty = match op { + Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => { + types::I8 + } + Opcode::Sload16 + | Opcode::Uload16 + | Opcode::Sload16Complex + | Opcode::Uload16Complex => types::I16, + Opcode::Sload32 + | Opcode::Uload32 + | Opcode::Sload32Complex + | Opcode::Uload32Complex => types::I32, + Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0), + _ => unimplemented!(), + }; + + let ext_mode = match elem_ty.bytes() { + 1 => Some(ExtMode::BQ), + 2 => Some(ExtMode::WQ), + 4 => Some(ExtMode::LQ), + _ => None, + }; + + let sign_extend = match op { + Opcode::Sload8 + | Opcode::Sload8Complex + | Opcode::Sload16 + | Opcode::Sload16Complex + | Opcode::Sload32 + | Opcode::Sload32Complex => true, + _ => false, + }; + + let is_float = is_float_ty(elem_ty); + + let addr = match op { + Opcode::Load + | Opcode::Uload8 + | Opcode::Sload8 + | Opcode::Uload16 + | Opcode::Sload16 + | Opcode::Uload32 + | Opcode::Sload32 => { + assert!(inputs.len() == 1, "only one input for load operands"); + let base = input_to_reg(ctx, inputs[0]); + Amode::imm_reg(offset as u32, base) + } + + Opcode::LoadComplex + | Opcode::Uload8Complex + | Opcode::Sload8Complex + | Opcode::Uload16Complex + | Opcode::Sload16Complex + | Opcode::Uload32Complex + | Opcode::Sload32Complex => { + assert!( + inputs.len() == 2, + "can't handle more than two inputs in complex load" + ); + let base = input_to_reg(ctx, inputs[0]); + let index = input_to_reg(ctx, inputs[1]); + let shift = 0; + Amode::imm_reg_reg_shift(offset as u32, base, index, shift) + } + + _ => unreachable!(), + }; + + let dst = output_to_reg(ctx, outputs[0]); + match (sign_extend, is_float) { + (true, false) => { + // The load is sign-extended only when the output size is lower than 64 bits, + // so ext-mode is defined in this case. + ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst)); + } + (false, false) => { + if elem_ty.bytes() == 8 { + // Use a plain load. + ctx.emit(Inst::mov64_m_r(addr, dst)) + } else { + // Use a zero-extended load. + ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst)) + } + } + (_, true) => unimplemented!("FPU loads"), + } + } + + Opcode::Store + | Opcode::Istore8 + | Opcode::Istore16 + | Opcode::Istore32 + | Opcode::StoreComplex + | Opcode::Istore8Complex + | Opcode::Istore16Complex + | Opcode::Istore32Complex => { + let offset = ldst_offset(ctx.data(insn)).unwrap(); + + let elem_ty = match op { + Opcode::Istore8 | Opcode::Istore8Complex => types::I8, + Opcode::Istore16 | Opcode::Istore16Complex => types::I16, + Opcode::Istore32 | Opcode::Istore32Complex => types::I32, + Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), + _ => unreachable!(), + }; + let is_float = is_float_ty(elem_ty); + + let addr = match op { + Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { + assert!( + inputs.len() == 2, + "only one input for store memory operands" + ); + let base = input_to_reg(ctx, inputs[1]); + // TODO sign? + Amode::imm_reg(offset as u32, base) + } + + Opcode::StoreComplex + | Opcode::Istore8Complex + | Opcode::Istore16Complex + | Opcode::Istore32Complex => { + assert!( + inputs.len() == 3, + "can't handle more than two inputs in complex load" + ); + let base = input_to_reg(ctx, inputs[1]); + let index = input_to_reg(ctx, inputs[2]); + let shift = 0; + Amode::imm_reg_reg_shift(offset as u32, base, index, shift) + } + + _ => unreachable!(), + }; + + let src = input_to_reg(ctx, inputs[0]); + + if is_float { + unimplemented!("FPU stores"); + } else { + ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr)); + } + } + + Opcode::StackAddr => { + let (stack_slot, offset) = match *ctx.data(insn) { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } => (stack_slot, offset), + _ => unreachable!(), + }; + let dst = output_to_reg(ctx, outputs[0]); + let offset: i32 = offset.into(); + println!("stackslot_addr: {:?} @ off{}", stack_slot, offset); + let inst = ctx + .abi() + .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); + ctx.emit(inst); + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm @@ -296,6 +622,8 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { } _ => unimplemented!("unimplemented lowering for opcode {:?}", op), } + + Ok(()) } //============================================================================= @@ -305,8 +633,7 @@ impl LowerBackend for X64Backend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { - lower_insn_to_regs(ctx, ir_inst); - Ok(()) + lower_insn_to_regs(ctx, ir_inst) } fn lower_branch_group>( @@ -346,33 +673,52 @@ impl LowerBackend for X64Backend { match op0 { Opcode::Brz | Opcode::Brnz => { let src_ty = ctx.input_ty(branches[0], 0); - if is_int_ty(src_ty) { - let src = input_to_reg(ctx, branches[0], 0); + if is_int_ty(src_ty) || is_bool_ty(src_ty) { + let src = input_to_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + ); let cc = match op0 { Opcode::Brz => CC::Z, Opcode::Brnz => CC::NZ, _ => unreachable!(), }; - let sizeB = int_ty_to_sizeB(src_ty); - ctx.emit(Inst::cmp_rmi_r(sizeB, RegMemImm::imm(0), src)); - ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken)); + let size_bytes = src_ty.bytes() as u8; + ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src)); + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); } else { - unimplemented!("brz/brnz with non-int type"); + unimplemented!("brz/brnz with non-int type {:?}", src_ty); } } Opcode::BrIcmp => { let src_ty = ctx.input_ty(branches[0], 0); - if is_int_ty(src_ty) { - let lhs = input_to_reg(ctx, branches[0], 0); - let rhs = input_to_reg(ctx, branches[0], 1); + if is_int_ty(src_ty) || is_bool_ty(src_ty) { + let lhs = input_to_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + ); + let rhs = input_to_reg_mem_imm( + ctx, + InsnInput { + insn: branches[0], + input: 1, + }, + ); let cc = CC::from_intcc(inst_condcode(ctx.data(branches[0]))); - let byte_size = int_ty_to_sizeB(src_ty); - // FIXME verify rSR vs rSL ordering - ctx.emit(Inst::cmp_rmi_r(byte_size, RegMemImm::reg(rhs), lhs)); - ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken)); + let byte_size = src_ty.bytes() as u8; + // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives + // us dst - src at the machine instruction level, so invert operands. + ctx.emit(Inst::cmp_rmi_r(byte_size, rhs, lhs)); + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); } else { - unimplemented!("bricmp with non-int type"); + unimplemented!("bricmp with non-int type {:?}", src_ty); } } @@ -385,15 +731,9 @@ impl LowerBackend for X64Backend { // Must be an unconditional branch or trap. let op = ctx.data(branches[0]).opcode(); match op { - Opcode::Jump => { + Opcode::Jump | Opcode::Fallthrough => { ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0]))); } - Opcode::Fallthrough => { - ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0]))); - } - Opcode::Trap => { - unimplemented!("trap"); - } _ => panic!("Unknown branch type!"), } } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 3b1652cb10..7666875a0e 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -40,7 +40,7 @@ impl X64Backend { fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult> { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. - let abi = Box::new(abi::X64ABIBody::new(&func, flags)); + let abi = Box::new(abi::X64ABIBody::new(&func, flags)?); compile::compile::(&func, self, abi) } } diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 03b4ab0750..4d9bc828f8 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -1024,7 +1024,7 @@ impl MachBuffer { let veneer_offset = self.cur_offset(); trace!("making a veneer at {}", veneer_offset); let slice = &mut self.data[start..end]; - // Patch the original label use to refer to teh veneer. + // Patch the original label use to refer to the veneer. trace!( "patching original at offset {} to veneer offset {}", offset, diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs index f344d4ab69..10556b60e7 100644 --- a/crates/jit/src/link.rs +++ b/crates/jit/src/link.rs @@ -106,6 +106,19 @@ fn apply_reloc( .wrapping_add(reloc_addend as u32); write_unaligned(reloc_address as *mut u32, reloc_delta_u32); }, + #[cfg(target_pointer_width = "64")] + Reloc::X86CallPCRel4 => unsafe { + let reloc_address = body.add(r.offset as usize) as usize; + let reloc_addend = r.addend as isize; + let reloc_delta_u64 = (target_func_address as u64) + .wrapping_sub(reloc_address as u64) + .wrapping_add(reloc_addend as u64); + assert!( + reloc_delta_u64 as isize <= i32::max_value() as isize, + "relocation too large to fit in i32" + ); + write_unaligned(reloc_address as *mut u32, reloc_delta_u64 as u32); + }, Reloc::X86PCRelRodata4 => { // ignore }