Merge branch 'main' into peepmatic-bnot
This commit is contained in:
@@ -7,7 +7,7 @@ use std::mem;
|
||||
|
||||
use crate::binemit::Stackmap;
|
||||
use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
|
||||
use crate::isa::{self, x64::inst::*};
|
||||
use crate::isa::{x64::inst::*, CallConv};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
@@ -40,7 +40,7 @@ struct ABISig {
|
||||
/// Index in `args` of the stack-return-value-area argument.
|
||||
stack_ret_arg: Option<usize>,
|
||||
/// Calling convention used.
|
||||
call_conv: isa::CallConv,
|
||||
call_conv: CallConv,
|
||||
}
|
||||
|
||||
pub(crate) struct X64ABIBody {
|
||||
@@ -65,7 +65,7 @@ pub(crate) struct X64ABIBody {
|
||||
/// which RSP is adjusted downwards to allocate the spill area.
|
||||
frame_size_bytes: Option<usize>,
|
||||
|
||||
call_conv: isa::CallConv,
|
||||
call_conv: CallConv,
|
||||
|
||||
/// The settings controlling this function's compilation.
|
||||
flags: settings::Flags,
|
||||
@@ -93,7 +93,11 @@ fn in_vec_reg(ty: types::Type) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {}
|
||||
_ => panic!("int args only supported for SysV calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::rdi()),
|
||||
1 => Some(regs::rsi()),
|
||||
@@ -105,7 +109,11 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {}
|
||||
_ => panic!("float args only supported for SysV calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
@@ -119,19 +127,39 @@ fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
fn get_intreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashSystemV => {
|
||||
if idx == 0 {
|
||||
Some(regs::rax())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_retval_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
_ => None,
|
||||
fn get_fltreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashSystemV => {
|
||||
if idx == 0 {
|
||||
Some(regs::xmm0())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,10 +175,39 @@ fn is_callee_save_systemv(r: RealReg) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
regs.into_iter()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect()
|
||||
fn is_callee_save_baldrdash(r: RealReg) -> bool {
|
||||
use regs::*;
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
if r.get_hw_encoding() as u8 == ENC_R14 {
|
||||
// r14 is the WasmTlsReg and is preserved implicitly.
|
||||
false
|
||||
} else {
|
||||
// Defer to native for the other ones.
|
||||
is_callee_save_systemv(r)
|
||||
}
|
||||
}
|
||||
RegClass::V128 => false,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(call_conv: &CallConv, regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
match call_conv {
|
||||
CallConv::BaldrdashSystemV => regs
|
||||
.into_iter()
|
||||
.filter(|r| is_callee_save_baldrdash(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::BaldrdashWindows => {
|
||||
todo!("baldrdash windows");
|
||||
}
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
|
||||
.into_iter()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::WindowsFastcall => todo!("windows fastcall"),
|
||||
CallConv::Probestack => todo!("probestack?"),
|
||||
}
|
||||
}
|
||||
|
||||
impl X64ABIBody {
|
||||
@@ -160,7 +217,7 @@ impl X64ABIBody {
|
||||
|
||||
let call_conv = f.signature.call_conv;
|
||||
debug_assert!(
|
||||
call_conv == isa::CallConv::SystemV || call_conv.extends_baldrdash(),
|
||||
call_conv == CallConv::SystemV || call_conv.extends_baldrdash(),
|
||||
"unsupported or unimplemented calling convention {}",
|
||||
call_conv
|
||||
);
|
||||
@@ -195,7 +252,6 @@ impl X64ABIBody {
|
||||
if self.call_conv.extends_baldrdash() {
|
||||
let num_words = self.flags.baldrdash_prologue_words() as i64;
|
||||
debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
|
||||
debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned");
|
||||
num_words * 8
|
||||
} else {
|
||||
16 // frame pointer + return address.
|
||||
@@ -269,7 +325,18 @@ impl ABIBody for X64ABIBody {
|
||||
}
|
||||
|
||||
fn gen_retval_area_setup(&self) -> Option<Inst> {
|
||||
None
|
||||
if let Some(i) = self.sig.stack_ret_arg {
|
||||
let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap());
|
||||
trace!(
|
||||
"gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
|
||||
inst,
|
||||
self.ret_area_ptr.unwrap().to_reg()
|
||||
);
|
||||
Some(inst)
|
||||
} else {
|
||||
trace!("gen_retval_area_setup: not needed");
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_copy_reg_to_retval(
|
||||
@@ -295,15 +362,17 @@ impl ABIBody for X64ABIBody {
|
||||
(ArgumentExtension::Uext, Some(ext_mode)) => {
|
||||
ret.push(Inst::movzx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::reg(r.to_reg()),
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
dest_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
(ArgumentExtension::Sext, Some(ext_mode)) => {
|
||||
ret.push(Inst::movsx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::reg(r.to_reg()),
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
dest_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
_ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)),
|
||||
@@ -327,6 +396,7 @@ impl ABIBody for X64ABIBody {
|
||||
ext_mode,
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
from_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
(ArgumentExtension::Sext, Some(ext_mode)) => {
|
||||
@@ -334,6 +404,7 @@ impl ABIBody for X64ABIBody {
|
||||
ext_mode,
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
from_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
@@ -437,7 +508,7 @@ impl ABIBody for X64ABIBody {
|
||||
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
|
||||
}
|
||||
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
|
||||
let callee_saved_used: usize = clobbered
|
||||
.iter()
|
||||
.map(|reg| match reg.to_reg().get_class() {
|
||||
@@ -481,7 +552,7 @@ impl ABIBody for X64ABIBody {
|
||||
|
||||
// Save callee saved registers that we trash. Keep track of how much space we've used, so
|
||||
// as to know what we have to do to get the base of the spill area 0 % 16.
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
|
||||
for reg in clobbered {
|
||||
let r_reg = reg.to_reg();
|
||||
match r_reg.get_class() {
|
||||
@@ -511,7 +582,7 @@ impl ABIBody for X64ABIBody {
|
||||
// Undo what we did in the prologue.
|
||||
|
||||
// Restore regs.
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
|
||||
for wreg in clobbered.into_iter().rev() {
|
||||
let rreg = wreg.to_reg();
|
||||
match rreg.get_class() {
|
||||
@@ -608,7 +679,7 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
fn get_caller_saves(call_conv: CallConv) -> Vec<Writable<Reg>> {
|
||||
let mut caller_saved = Vec::new();
|
||||
|
||||
// Systemv calling convention:
|
||||
@@ -623,6 +694,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
caller_saved.push(Writable::from_reg(regs::r10()));
|
||||
caller_saved.push(Writable::from_reg(regs::r11()));
|
||||
|
||||
if call_conv.extends_baldrdash() {
|
||||
caller_saved.push(Writable::from_reg(regs::r12()));
|
||||
caller_saved.push(Writable::from_reg(regs::r13()));
|
||||
// Not r14; implicitly preserved in the entry.
|
||||
caller_saved.push(Writable::from_reg(regs::r15()));
|
||||
caller_saved.push(Writable::from_reg(regs::rbx()));
|
||||
}
|
||||
|
||||
// - XMM: all the registers!
|
||||
caller_saved.push(Writable::from_reg(regs::xmm0()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm1()));
|
||||
@@ -641,10 +720,6 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
caller_saved.push(Writable::from_reg(regs::xmm14()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm15()));
|
||||
|
||||
if call_conv.extends_baldrdash() {
|
||||
todo!("add the baldrdash caller saved")
|
||||
}
|
||||
|
||||
caller_saved
|
||||
}
|
||||
|
||||
@@ -671,7 +746,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
|
||||
}
|
||||
|
||||
/// Try to fill a Baldrdash register, returning it if it was found.
|
||||
fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
|
||||
fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
|
||||
if call_conv.extends_baldrdash() {
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
@@ -705,16 +780,13 @@ enum ArgsOrRets {
|
||||
/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the
|
||||
/// index of the extra synthetic arg that was added.
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
call_conv: CallConv,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||
let is_baldrdash = call_conv.extends_baldrdash();
|
||||
|
||||
// XXX assume SystemV at the moment.
|
||||
debug_assert!(!is_baldrdash, "baldrdash nyi");
|
||||
|
||||
let mut next_gpr = 0;
|
||||
let mut next_vreg = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
@@ -748,8 +820,8 @@ fn compute_arg_locs(
|
||||
|
||||
let (next_reg, candidate) = if intreg {
|
||||
let candidate = match args_or_rets {
|
||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(next_gpr),
|
||||
ArgsOrRets::Rets => get_intreg_for_retval_systemv(next_gpr),
|
||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
|
||||
ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr),
|
||||
};
|
||||
debug_assert!(candidate
|
||||
.map(|r| r.get_class() == RegClass::I64)
|
||||
@@ -757,8 +829,8 @@ fn compute_arg_locs(
|
||||
(&mut next_gpr, candidate)
|
||||
} else {
|
||||
let candidate = match args_or_rets {
|
||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(next_vreg),
|
||||
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(next_vreg),
|
||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
|
||||
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg),
|
||||
};
|
||||
debug_assert!(candidate
|
||||
.map(|r| r.get_class() == RegClass::V128)
|
||||
@@ -791,7 +863,7 @@ fn compute_arg_locs(
|
||||
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(next_gpr) {
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
|
||||
ret.push(ABIArg::Reg(reg.to_real_reg(), ir::types::I64));
|
||||
} else {
|
||||
ret.push(ABIArg::Stack(next_stack as i64, ir::types::I64));
|
||||
@@ -897,8 +969,13 @@ fn load_stack(mem: impl Into<SyntheticAmode>, into_reg: Writable<Reg>, ty: Type)
|
||||
|
||||
let mem = mem.into();
|
||||
match ext_mode {
|
||||
Some(ext_mode) => Inst::movsx_rm_r(ext_mode, RegMem::mem(mem), into_reg),
|
||||
None => Inst::mov64_m_r(mem, into_reg),
|
||||
Some(ext_mode) => Inst::movsx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::mem(mem),
|
||||
into_reg,
|
||||
/* infallible load */ None,
|
||||
),
|
||||
None => Inst::mov64_m_r(mem, into_reg, None /* infallible */),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -914,7 +991,7 @@ fn store_stack(mem: impl Into<SyntheticAmode>, from_reg: Reg, ty: Type) -> Inst
|
||||
};
|
||||
let mem = mem.into();
|
||||
if is_int {
|
||||
Inst::mov_r_m(size, from_reg, mem)
|
||||
Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None)
|
||||
} else {
|
||||
unimplemented!("f32/f64 store_stack");
|
||||
}
|
||||
|
||||
@@ -199,7 +199,7 @@ impl RegMemImm {
|
||||
match self {
|
||||
Self::Reg { reg } => collector.add_use(*reg),
|
||||
Self::Mem { addr } => addr.get_regs_as_uses(collector),
|
||||
Self::Imm { simm32: _ } => {}
|
||||
Self::Imm { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -234,12 +234,11 @@ impl RegMem {
|
||||
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
|
||||
Self::Mem { addr: addr.into() }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
RegMem::Reg { reg } => collector.add_use(*reg),
|
||||
RegMem::Mem { addr } => addr.get_regs_as_uses(collector),
|
||||
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -252,7 +251,7 @@ impl ShowWithRRU for RegMem {
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
match self {
|
||||
RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
|
||||
RegMem::Mem { addr } => addr.show_rru(mb_rru),
|
||||
RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -283,9 +282,32 @@ impl fmt::Debug for AluRmiROpcode {
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for AluRmiROpcode {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for AluRmiROpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum UnaryRmROpcode {
|
||||
/// Bit-scan reverse.
|
||||
Bsr,
|
||||
/// Bit-scan forward.
|
||||
Bsf,
|
||||
}
|
||||
|
||||
impl fmt::Debug for UnaryRmROpcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
|
||||
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for UnaryRmROpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,9 +468,9 @@ impl fmt::Debug for SseOpcode {
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for SseOpcode {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for SseOpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -497,34 +519,65 @@ impl fmt::Debug for ExtMode {
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for ExtMode {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for ExtMode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate the form of a scalar shift: left, signed right, unsigned right.
|
||||
/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
|
||||
#[derive(Clone)]
|
||||
pub enum ShiftKind {
|
||||
Left,
|
||||
RightZ,
|
||||
RightS,
|
||||
ShiftLeft,
|
||||
/// Inserts zeros in the most significant bits.
|
||||
ShiftRightLogical,
|
||||
/// Replicates the sign bit in the most significant bits.
|
||||
ShiftRightArithmetic,
|
||||
RotateLeft,
|
||||
RotateRight,
|
||||
}
|
||||
|
||||
impl fmt::Debug for ShiftKind {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
ShiftKind::Left => "shl",
|
||||
ShiftKind::RightZ => "shr",
|
||||
ShiftKind::RightS => "sar",
|
||||
ShiftKind::ShiftLeft => "shl",
|
||||
ShiftKind::ShiftRightLogical => "shr",
|
||||
ShiftKind::ShiftRightArithmetic => "sar",
|
||||
ShiftKind::RotateLeft => "rol",
|
||||
ShiftKind::RotateRight => "ror",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for ShiftKind {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for ShiftKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// What kind of division or remainer instruction this is?
|
||||
#[derive(Clone)]
|
||||
pub enum DivOrRemKind {
|
||||
SignedDiv,
|
||||
UnsignedDiv,
|
||||
SignedRem,
|
||||
UnsignedRem,
|
||||
}
|
||||
|
||||
impl DivOrRemKind {
|
||||
pub(crate) fn is_signed(&self) -> bool {
|
||||
match self {
|
||||
DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_div(&self) -> bool {
|
||||
match self {
|
||||
DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -639,9 +692,9 @@ impl fmt::Debug for CC {
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for CC {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for CC {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -556,6 +556,41 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::UnaryRmR { size, op, src, dst } => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let (opcode, num_opcodes) = match op {
|
||||
UnaryRmROpcode::Bsr => (0x0fbd, 2),
|
||||
UnaryRmROpcode::Bsf => (0x0fbc, 2),
|
||||
};
|
||||
|
||||
match src {
|
||||
RegMem::Reg { reg: src } => emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
@@ -589,6 +624,32 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::MulHi { size, signed, rhs } => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let subopcode = if *signed { 5 } else { 4 };
|
||||
match rhs {
|
||||
RegMem::Reg { reg } => {
|
||||
let src = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
|
||||
}
|
||||
RegMem::Mem { addr: src } => emit_std_enc_mem(
|
||||
sink,
|
||||
prefix,
|
||||
0xF7,
|
||||
1,
|
||||
subopcode,
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::SignExtendRaxRdx { size } => {
|
||||
match size {
|
||||
2 => sink.put1(0x66),
|
||||
@@ -600,11 +661,11 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
Inst::CheckedDivOrRemSeq {
|
||||
is_div,
|
||||
is_signed,
|
||||
kind,
|
||||
size,
|
||||
divisor,
|
||||
loc,
|
||||
tmp,
|
||||
} => {
|
||||
// Generates the following code sequence:
|
||||
//
|
||||
@@ -642,7 +703,7 @@ pub(crate) fn emit(
|
||||
let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let (do_op, done_label) = if *is_signed {
|
||||
let (do_op, done_label) = if kind.is_signed() {
|
||||
// Now check if the divisor is -1.
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor);
|
||||
inst.emit(sink, flags, state);
|
||||
@@ -653,7 +714,7 @@ pub(crate) fn emit(
|
||||
one_way_jmp(sink, CC::NZ, do_op);
|
||||
|
||||
// Here, divisor == -1.
|
||||
if !*is_div {
|
||||
if !kind.is_div() {
|
||||
// x % -1 = 0; put the result into the destination, $rdx.
|
||||
let done_label = sink.get_label();
|
||||
|
||||
@@ -666,8 +727,18 @@ pub(crate) fn emit(
|
||||
(Some(do_op), Some(done_label))
|
||||
} else {
|
||||
// Check for integer overflow.
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
|
||||
inst.emit(sink, flags, state);
|
||||
if *size == 8 {
|
||||
let tmp = tmp.expect("temporary for i64 sdiv");
|
||||
|
||||
let inst = Inst::imm_r(true, 0x8000000000000000, tmp);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
|
||||
inst.emit(sink, flags, state);
|
||||
} else {
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
// If not equal, jump over the trap.
|
||||
let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow, *loc);
|
||||
@@ -684,7 +755,7 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
// Fill in the high parts:
|
||||
if *is_signed {
|
||||
if kind.is_signed() {
|
||||
// sign-extend the sign-bit of rax into rdx, for signed opcodes.
|
||||
let inst = Inst::sign_extend_rax_to_rdx(*size);
|
||||
inst.emit(sink, flags, state);
|
||||
@@ -694,7 +765,7 @@ pub(crate) fn emit(
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
let inst = Inst::div(*size, *is_signed, RegMem::reg(*divisor), *loc);
|
||||
let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(*divisor), *loc);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Lowering takes care of moving the result back into the right register, see comment
|
||||
@@ -735,7 +806,12 @@ pub(crate) fn emit(
|
||||
emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
|
||||
}
|
||||
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst } => {
|
||||
Inst::MovZX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
|
||||
ExtMode::BL => {
|
||||
// MOVZBL is (REX.W==0) 0F B6 /r
|
||||
@@ -777,27 +853,45 @@ pub(crate) fn emit(
|
||||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => {
|
||||
let src = &src.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
src,
|
||||
rex_flags,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Mov64_M_R { src, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
RexFlags::set_w(),
|
||||
),
|
||||
Inst::Mov64_M_R { src, dst, srcloc } => {
|
||||
let src = &src.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
src,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
}
|
||||
|
||||
Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
@@ -809,7 +903,12 @@ pub(crate) fn emit(
|
||||
RexFlags::set_w(),
|
||||
),
|
||||
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst } => {
|
||||
Inst::MovSX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
|
||||
ExtMode::BL => {
|
||||
// MOVSBL is (REX.W==0) 0F BE /r
|
||||
@@ -843,21 +942,41 @@ pub(crate) fn emit(
|
||||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
|
||||
RegMem::Mem { addr: src } => {
|
||||
let src = &src.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
src,
|
||||
rex_flags,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Mov_R_M { size, src, dst } => {
|
||||
Inst::Mov_R_M {
|
||||
size,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let dst = &dst.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
match size {
|
||||
1 => {
|
||||
// This is one of the few places where the presence of a
|
||||
@@ -925,9 +1044,11 @@ pub(crate) fn emit(
|
||||
} => {
|
||||
let enc_dst = int_reg_enc(dst.to_reg());
|
||||
let subopcode = match kind {
|
||||
ShiftKind::Left => 4,
|
||||
ShiftKind::RightZ => 5,
|
||||
ShiftKind::RightS => 7,
|
||||
ShiftKind::RotateLeft => 0,
|
||||
ShiftKind::RotateRight => 1,
|
||||
ShiftKind::ShiftLeft => 4,
|
||||
ShiftKind::ShiftRightLogical => 5,
|
||||
ShiftKind::ShiftRightArithmetic => 7,
|
||||
};
|
||||
|
||||
let rex = if *is_64 {
|
||||
@@ -1262,7 +1383,7 @@ pub(crate) fn emit(
|
||||
// We generate the following sequence:
|
||||
// ;; generated by lowering: cmp #jmp_table_size, %idx
|
||||
// jnb $default_target
|
||||
// mov %idx, %tmp2
|
||||
// movl %idx, %tmp2
|
||||
// lea start_of_jump_table_offset(%rip), %tmp1
|
||||
// movzlq [%tmp1, %tmp2], %tmp2
|
||||
// addq %tmp2, %tmp1
|
||||
@@ -1275,7 +1396,8 @@ pub(crate) fn emit(
|
||||
};
|
||||
one_way_jmp(sink, CC::NB, *default_label); // idx unsigned >= jmp table size
|
||||
|
||||
let inst = Inst::gen_move(*tmp2, *idx, I64);
|
||||
// Copy the index (and make sure to clear the high 32-bits lane of tmp2).
|
||||
let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2, None);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Load base address of jump table.
|
||||
@@ -1291,6 +1413,7 @@ pub(crate) fn emit(
|
||||
ExtMode::LQ,
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
|
||||
*tmp2,
|
||||
None,
|
||||
);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
@@ -1343,6 +1466,7 @@ pub(crate) fn emit(
|
||||
op,
|
||||
src: src_e,
|
||||
dst: reg_g,
|
||||
srcloc,
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
@@ -1357,9 +1481,12 @@ pub(crate) fn emit(
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
@@ -1387,14 +1514,19 @@ pub(crate) fn emit(
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::XMM_Mov_R_M { op, src, dst } => {
|
||||
|
||||
Inst::XMM_Mov_R_M {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E),
|
||||
@@ -1403,8 +1535,32 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let dst = &dst.finalize(state);
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex);
|
||||
}
|
||||
|
||||
Inst::LoadExtName {
|
||||
dst,
|
||||
name,
|
||||
offset,
|
||||
srcloc,
|
||||
} => {
|
||||
// The full address can be encoded in the register, with a relocation.
|
||||
// Generates: movabsq $name, %dst
|
||||
let enc_dst = int_reg_enc(dst.to_reg());
|
||||
sink.put1(0x48 | ((enc_dst >> 3) & 1));
|
||||
sink.put1(0xB8 | (enc_dst & 7));
|
||||
sink.add_reloc(*srcloc, Reloc::Abs8, name, *offset);
|
||||
if flags.emit_all_ones_funcaddrs() {
|
||||
sink.put8(u64::max_value());
|
||||
} else {
|
||||
sink.put8(0);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Hlt => {
|
||||
sink.put1(0xcc);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,16 +4,17 @@
|
||||
#![allow(non_snake_case)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
@@ -49,6 +50,14 @@ pub enum Inst {
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc.
|
||||
UnaryRmR {
|
||||
size: u8, // 2, 4 or 8
|
||||
op: UnaryRmROpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
|
||||
Div {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
@@ -57,17 +66,25 @@ pub enum Inst {
|
||||
loc: SourceLoc,
|
||||
},
|
||||
|
||||
/// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs.
|
||||
MulHi { size: u8, signed: bool, rhs: RegMem },
|
||||
|
||||
/// A synthetic sequence to implement the right inline checks for remainder and division,
|
||||
/// assuming the dividend is in $rax.
|
||||
/// Puts the result back into $rax if is_div, $rdx if !is_div, to mimic what the div
|
||||
/// assuming the dividend is in %rax.
|
||||
/// Puts the result back into %rax if is_div, %rdx if !is_div, to mimic what the div
|
||||
/// instruction does.
|
||||
/// The generated code sequence is described in the emit's function match arm for this
|
||||
/// instruction.
|
||||
///
|
||||
/// Note: %rdx is marked as modified by this instruction, to avoid an early clobber problem
|
||||
/// with the temporary and divisor registers. Make sure to zero %rdx right before this
|
||||
/// instruction, or you might run into regalloc failures where %rdx is live before its first
|
||||
/// def!
|
||||
CheckedDivOrRemSeq {
|
||||
is_div: bool,
|
||||
is_signed: bool,
|
||||
kind: DivOrRemKind,
|
||||
size: u8,
|
||||
divisor: Reg,
|
||||
tmp: Option<Writable<Reg>>,
|
||||
loc: SourceLoc,
|
||||
},
|
||||
|
||||
@@ -98,12 +115,16 @@ pub enum Inst {
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
|
||||
Mov64_M_R {
|
||||
src: SyntheticAmode,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// Loads the memory address of addr into dst.
|
||||
@@ -117,6 +138,8 @@ pub enum Inst {
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// Integer stores: mov (b w l q) reg addr.
|
||||
@@ -124,6 +147,8 @@ pub enum Inst {
|
||||
size: u8, // 1, 2, 4 or 8.
|
||||
src: Reg,
|
||||
dst: SyntheticAmode,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// Arithmetic shifts: (shl shr sar) (l q) imm reg.
|
||||
@@ -180,6 +205,8 @@ pub enum Inst {
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// mov reg addr (good for all memory stores from xmm registers)
|
||||
@@ -187,6 +214,8 @@ pub enum Inst {
|
||||
op: SseOpcode,
|
||||
src: Reg,
|
||||
dst: SyntheticAmode,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
// =====================================
|
||||
@@ -255,6 +284,14 @@ pub enum Inst {
|
||||
/// An instruction that will always trigger the illegal instruction exception.
|
||||
Ud2 { trap_info: (SourceLoc, TrapCode) },
|
||||
|
||||
/// Loads an external symbol in a register, with a relocation: movabsq $name, dst
|
||||
LoadExtName {
|
||||
dst: Writable<Reg>,
|
||||
name: Box<ExternalName>,
|
||||
srcloc: SourceLoc,
|
||||
offset: i64,
|
||||
},
|
||||
|
||||
// =====================================
|
||||
// Meta-instructions generating no code.
|
||||
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
|
||||
@@ -262,15 +299,13 @@ pub enum Inst {
|
||||
VirtualSPOffsetAdj { offset: i64 },
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
|
||||
// For various sizes, will some number of lowest bits sign extend to be the
|
||||
// same as the whole value?
|
||||
pub(crate) fn low32willSXto64(x: u64) -> bool {
|
||||
pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
|
||||
let xs = x as i64;
|
||||
xs == ((xs << 32) >> 32)
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
|
||||
impl Inst {
|
||||
pub(crate) fn nop(len: u8) -> Self {
|
||||
debug_assert!(len <= 16);
|
||||
@@ -292,6 +327,17 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn unary_rm_r(
|
||||
size: u8,
|
||||
op: UnaryRmROpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
) -> Self {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
debug_assert!(size == 8 || size == 4 || size == 2);
|
||||
Self::UnaryRmR { size, op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
Inst::Div {
|
||||
@@ -301,6 +347,12 @@ impl Inst {
|
||||
loc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
Inst::MulHi { size, signed, rhs }
|
||||
}
|
||||
|
||||
pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2);
|
||||
Inst::SignExtendRaxRdx { size }
|
||||
@@ -309,7 +361,11 @@ impl Inst {
|
||||
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
if !dst_is_64 {
|
||||
debug_assert!(low32willSXto64(simm64));
|
||||
debug_assert!(
|
||||
low32_will_sign_extend_to_64(simm64),
|
||||
"{} won't sign-extend to 64 bits!",
|
||||
simm64
|
||||
);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
@@ -324,9 +380,19 @@ impl Inst {
|
||||
Inst::Mov_R_R { is_64, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn xmm_mov_rm_r(
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||
Inst::XMM_Mov_RM_R { op, src, dst }
|
||||
Inst::XMM_Mov_RM_R {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
|
||||
@@ -334,37 +400,77 @@ impl Inst {
|
||||
Inst::XMM_RM_R { op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
|
||||
pub(crate) fn xmm_mov_r_m(
|
||||
op: SseOpcode,
|
||||
src: Reg,
|
||||
dst: impl Into<SyntheticAmode>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(src.get_class() == RegClass::V128);
|
||||
Inst::XMM_Mov_R_M {
|
||||
op,
|
||||
src,
|
||||
dst: dst.into(),
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn movzx_rm_r(
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst }
|
||||
Inst::MovZX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn movsx_rm_r(
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(
|
||||
src: impl Into<SyntheticAmode>,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov64_M_R {
|
||||
src: src.into(),
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst }
|
||||
/// A convenience function to be able to use a RegMem as the source of a move.
|
||||
pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>, srcloc: Option<SourceLoc>) -> Inst {
|
||||
match src {
|
||||
RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst),
|
||||
RegMem::Mem { addr } => Self::mov64_m_r(addr, dst, srcloc),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_m(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
dst: impl Into<SyntheticAmode>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
@@ -372,6 +478,7 @@ impl Inst {
|
||||
size,
|
||||
src,
|
||||
dst: dst.into(),
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -548,6 +655,7 @@ impl ShowWithRRU for Inst {
|
||||
|
||||
match self {
|
||||
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
|
||||
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
@@ -559,6 +667,14 @@ impl ShowWithRRU for Inst {
|
||||
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
|
||||
),
|
||||
|
||||
Inst::UnaryRmR { src, dst, op, size } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2(op.to_string(), suffixBWLQ(*size)),
|
||||
src.show_rru_sized(mb_rru, *size),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, *size),
|
||||
),
|
||||
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
@@ -573,16 +689,30 @@ impl ShowWithRRU for Inst {
|
||||
}),
|
||||
divisor.show_rru_sized(mb_rru, *size)
|
||||
),
|
||||
Inst::MulHi {
|
||||
size, signed, rhs, ..
|
||||
} => format!(
|
||||
"{} {}",
|
||||
ljustify(if *signed {
|
||||
"imul".to_string()
|
||||
} else {
|
||||
"mul".to_string()
|
||||
}),
|
||||
rhs.show_rru_sized(mb_rru, *size)
|
||||
),
|
||||
Inst::CheckedDivOrRemSeq {
|
||||
is_div,
|
||||
is_signed,
|
||||
kind,
|
||||
size,
|
||||
divisor,
|
||||
..
|
||||
} => format!(
|
||||
"{}{} $rax:$rdx, {}",
|
||||
if *is_signed { "s" } else { "u" },
|
||||
if *is_div { "div " } else { "rem " },
|
||||
"{} $rax:$rdx, {}",
|
||||
match kind {
|
||||
DivOrRemKind::SignedDiv => "sdiv",
|
||||
DivOrRemKind::UnsignedDiv => "udiv",
|
||||
DivOrRemKind::SignedRem => "srem",
|
||||
DivOrRemKind::UnsignedRem => "urem",
|
||||
},
|
||||
show_ireg_sized(*divisor, mb_rru, *size),
|
||||
),
|
||||
Inst::SignExtendRaxRdx { size } => match size {
|
||||
@@ -592,13 +722,13 @@ impl ShowWithRRU for Inst {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.into(),
|
||||
Inst::XMM_Mov_RM_R { op, src, dst } => format!(
|
||||
Inst::XMM_Mov_RM_R { op, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
src.show_rru_sized(mb_rru, op.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
Inst::XMM_Mov_R_M { op, src, dst } => format!(
|
||||
Inst::XMM_Mov_R_M { op, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
show_ireg_sized(*src, mb_rru, 8),
|
||||
@@ -637,7 +767,9 @@ impl ShowWithRRU for Inst {
|
||||
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst } => {
|
||||
Inst::MovZX_RM_R {
|
||||
ext_mode, src, dst, ..
|
||||
} => {
|
||||
if *ext_mode == ExtMode::LQ {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
@@ -654,7 +786,7 @@ impl ShowWithRRU for Inst {
|
||||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { src, dst } => format!(
|
||||
Inst::Mov64_M_R { src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movq".to_string()),
|
||||
src.show_rru(mb_rru),
|
||||
@@ -666,13 +798,15 @@ impl ShowWithRRU for Inst {
|
||||
addr.show_rru(mb_rru),
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst } => format!(
|
||||
Inst::MovSX_RM_R {
|
||||
ext_mode, src, dst, ..
|
||||
} => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movs".to_string(), ext_mode.to_string()),
|
||||
src.show_rru_sized(mb_rru, ext_mode.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
|
||||
),
|
||||
Inst::Mov_R_M { size, src, dst } => format!(
|
||||
Inst::Mov_R_M { size, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixBWLQ(*size)),
|
||||
show_ireg_sized(*src, mb_rru, *size),
|
||||
@@ -753,6 +887,15 @@ impl ShowWithRRU for Inst {
|
||||
Inst::TrapIf { cc, trap_code, .. } => {
|
||||
format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
|
||||
}
|
||||
Inst::LoadExtName {
|
||||
dst, name, offset, ..
|
||||
} => format!(
|
||||
"{} {}+{}, {}",
|
||||
ljustify("movaps".into()),
|
||||
name,
|
||||
offset,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
|
||||
Inst::Hlt => "hlt".into(),
|
||||
Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1),
|
||||
@@ -774,12 +917,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
|
||||
// sets.
|
||||
match inst {
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
src,
|
||||
dst,
|
||||
} => {
|
||||
Inst::Alu_RMI_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
@@ -788,16 +926,27 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
divisor.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::CheckedDivOrRemSeq { divisor, .. } => {
|
||||
Inst::MulHi { rhs, .. } => {
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
collector.add_def(Writable::from_reg(regs::rdx()));
|
||||
rhs.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
|
||||
// Mark both fixed registers as mods, to avoid an early clobber problem in codegen
|
||||
// (i.e. the temporary is allocated one of the fixed registers). This requires writing
|
||||
// the rdx register *before* the instruction, which is not too bad.
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
collector.add_use(*divisor);
|
||||
if let Some(tmp) = tmp {
|
||||
collector.add_def(*tmp);
|
||||
}
|
||||
}
|
||||
Inst::SignExtendRaxRdx { .. } => {
|
||||
collector.add_use(regs::rax());
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
}
|
||||
Inst::XMM_Mov_RM_R { src, dst, .. } => {
|
||||
Inst::UnaryRmR { src, dst, .. } | Inst::XMM_Mov_RM_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
@@ -820,7 +969,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst)
|
||||
}
|
||||
@@ -832,18 +981,13 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_use(*src);
|
||||
dst.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
Inst::Shift_R { num_bits, dst, .. } => {
|
||||
if num_bits.is_none() {
|
||||
collector.add_use(regs::rcx());
|
||||
}
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R { size: _, src, dst } => {
|
||||
Inst::Cmp_RMI_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_use(*dst); // yes, really `add_use`
|
||||
}
|
||||
@@ -852,7 +996,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
}
|
||||
Inst::Cmove { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
@@ -891,12 +1035,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(*tmp2);
|
||||
}
|
||||
|
||||
Inst::JmpUnknown { target } => {
|
||||
target.get_regs_as_uses(collector);
|
||||
}
|
||||
|
||||
Inst::LoadExtName { dst, .. } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::JmpUnknown { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Hlt
|
||||
@@ -933,15 +1084,11 @@ fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
impl Amode {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
Amode::ImmReg {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
} => map_use(map, base),
|
||||
Amode::ImmReg { ref mut base, .. } => map_use(map, base),
|
||||
Amode::ImmRegRegShift {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
ref mut index,
|
||||
shift: _,
|
||||
..
|
||||
} => {
|
||||
map_use(map, base);
|
||||
map_use(map, index);
|
||||
@@ -958,7 +1105,7 @@ impl RegMemImm {
|
||||
match self {
|
||||
RegMemImm::Reg { ref mut reg } => map_use(map, reg),
|
||||
RegMemImm::Mem { ref mut addr } => addr.map_uses(map),
|
||||
RegMemImm::Imm { simm32: _ } => {}
|
||||
RegMemImm::Imm { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -967,7 +1114,7 @@ impl RegMem {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
RegMem::Reg { ref mut reg } => map_use(map, reg),
|
||||
RegMem::Mem { ref mut addr } => addr.map_uses(map),
|
||||
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -977,23 +1124,31 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Div { divisor, .. } => divisor.map_uses(mapper),
|
||||
Inst::CheckedDivOrRemSeq { divisor, .. } => {
|
||||
Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
|
||||
Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
|
||||
map_use(mapper, divisor);
|
||||
if let Some(tmp) = tmp {
|
||||
map_def(mapper, tmp)
|
||||
}
|
||||
}
|
||||
Inst::SignExtendRaxRdx { .. } => {}
|
||||
Inst::XMM_Mov_RM_R {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
}
|
||||
| Inst::UnaryRmR {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
@@ -1014,15 +1169,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, src);
|
||||
dst.map_uses(mapper);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
ref mut dst,
|
||||
} => map_def(mapper, dst),
|
||||
Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
|
||||
Inst::Mov_R_R {
|
||||
is_64: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
map_def(mapper, dst);
|
||||
@@ -1035,7 +1186,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
@@ -1055,18 +1206,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, src);
|
||||
dst.map_uses(mapper);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits: _,
|
||||
ref mut dst,
|
||||
} => {
|
||||
Inst::Shift_R { ref mut dst, .. } => {
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_use(mapper, dst);
|
||||
@@ -1078,7 +1224,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst)
|
||||
map_mod(mapper, dst)
|
||||
}
|
||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||
Inst::Pop64 { ref mut dst } => {
|
||||
@@ -1124,12 +1270,15 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, tmp2);
|
||||
}
|
||||
|
||||
Inst::JmpUnknown { ref mut target } => target.map_uses(mapper),
|
||||
|
||||
Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::JmpUnknown { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Ud2 { .. }
|
||||
@@ -1157,8 +1306,10 @@ impl MachInst for Inst {
|
||||
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
|
||||
// %reg.
|
||||
match self {
|
||||
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
|
||||
Self::XMM_Mov_RM_R { op, src, dst }
|
||||
Self::Mov_R_R {
|
||||
is_64, src, dst, ..
|
||||
} if *is_64 => Some((*dst, *src)),
|
||||
Self::XMM_Mov_RM_R { op, src, dst, .. }
|
||||
if *op == SseOpcode::Movss
|
||||
|| *op == SseOpcode::Movsd
|
||||
|| *op == SseOpcode::Movaps =>
|
||||
@@ -1187,9 +1338,7 @@ impl MachInst for Inst {
|
||||
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
|
||||
&Self::JmpKnown { dst } => MachTerminator::Uncond(dst.as_label().unwrap()),
|
||||
&Self::JmpCond {
|
||||
cc: _,
|
||||
taken,
|
||||
not_taken,
|
||||
taken, not_taken, ..
|
||||
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
|
||||
&Self::JmpTableSeq {
|
||||
ref targets_for_term,
|
||||
@@ -1208,8 +1357,8 @@ impl MachInst for Inst {
|
||||
match rc_dst {
|
||||
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
|
||||
RegClass::V128 => match ty {
|
||||
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg),
|
||||
F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg),
|
||||
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
|
||||
F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
|
||||
_ => panic!("unexpected V128 type in gen_move"),
|
||||
},
|
||||
_ => panic!("gen_move(x64): unhandled regclass"),
|
||||
@@ -1232,6 +1381,7 @@ impl MachInst for Inst {
|
||||
match ty {
|
||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
|
||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||
_ => Err(CodegenError::Unsupported(format!(
|
||||
"Unexpected SSA-value type: {}",
|
||||
ty
|
||||
@@ -1243,10 +1393,11 @@ impl MachInst for Inst {
|
||||
Inst::jmp_known(BranchTarget::Label(label))
|
||||
}
|
||||
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]> {
|
||||
let mut ret = SmallVec::new();
|
||||
let is64 = value > 0xffff_ffff;
|
||||
ret.push(Inst::imm_r(is64, value, to_reg));
|
||||
debug_assert!(ty.is_int(), "float constants NYI");
|
||||
let is_64 = ty == I64 && value > 0x7fffffff;
|
||||
ret.push(Inst::imm_r(is_64, value, to_reg));
|
||||
ret
|
||||
}
|
||||
|
||||
|
||||
@@ -33,46 +33,55 @@ fn gpr(enc: u8, index: u8) -> Reg {
|
||||
}
|
||||
|
||||
pub(crate) fn r12() -> Reg {
|
||||
gpr(ENC_R12, 0)
|
||||
gpr(ENC_R12, 16)
|
||||
}
|
||||
pub(crate) fn r13() -> Reg {
|
||||
gpr(ENC_R13, 1)
|
||||
gpr(ENC_R13, 17)
|
||||
}
|
||||
pub(crate) fn r14() -> Reg {
|
||||
gpr(ENC_R14, 2)
|
||||
}
|
||||
pub(crate) fn r15() -> Reg {
|
||||
gpr(ENC_R15, 3)
|
||||
gpr(ENC_R14, 18)
|
||||
}
|
||||
pub(crate) fn rbx() -> Reg {
|
||||
gpr(ENC_RBX, 4)
|
||||
gpr(ENC_RBX, 19)
|
||||
}
|
||||
pub(crate) fn rsi() -> Reg {
|
||||
gpr(6, 5)
|
||||
gpr(6, 20)
|
||||
}
|
||||
pub(crate) fn rdi() -> Reg {
|
||||
gpr(7, 6)
|
||||
gpr(7, 21)
|
||||
}
|
||||
pub(crate) fn rax() -> Reg {
|
||||
gpr(0, 7)
|
||||
gpr(0, 22)
|
||||
}
|
||||
pub(crate) fn rcx() -> Reg {
|
||||
gpr(1, 8)
|
||||
gpr(1, 23)
|
||||
}
|
||||
pub(crate) fn rdx() -> Reg {
|
||||
gpr(2, 9)
|
||||
gpr(2, 24)
|
||||
}
|
||||
pub(crate) fn r8() -> Reg {
|
||||
gpr(8, 10)
|
||||
gpr(8, 25)
|
||||
}
|
||||
pub(crate) fn r9() -> Reg {
|
||||
gpr(9, 11)
|
||||
gpr(9, 26)
|
||||
}
|
||||
pub(crate) fn r10() -> Reg {
|
||||
gpr(10, 12)
|
||||
gpr(10, 27)
|
||||
}
|
||||
pub(crate) fn r11() -> Reg {
|
||||
gpr(11, 13)
|
||||
gpr(11, 28)
|
||||
}
|
||||
|
||||
pub(crate) fn r15() -> Reg {
|
||||
// r15 is put aside since this is the pinned register.
|
||||
gpr(ENC_R15, 29)
|
||||
}
|
||||
|
||||
/// The pinned register on this architecture.
|
||||
/// It must be the same as Spidermonkey's HeapReg, as found in this file.
|
||||
/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99
|
||||
pub(crate) fn pinned_reg() -> Reg {
|
||||
r15()
|
||||
}
|
||||
|
||||
fn fpr(enc: u8, index: u8) -> Reg {
|
||||
@@ -80,52 +89,52 @@ fn fpr(enc: u8, index: u8) -> Reg {
|
||||
}
|
||||
|
||||
pub(crate) fn xmm0() -> Reg {
|
||||
fpr(0, 14)
|
||||
fpr(0, 0)
|
||||
}
|
||||
pub(crate) fn xmm1() -> Reg {
|
||||
fpr(1, 15)
|
||||
fpr(1, 1)
|
||||
}
|
||||
pub(crate) fn xmm2() -> Reg {
|
||||
fpr(2, 16)
|
||||
fpr(2, 2)
|
||||
}
|
||||
pub(crate) fn xmm3() -> Reg {
|
||||
fpr(3, 17)
|
||||
fpr(3, 3)
|
||||
}
|
||||
pub(crate) fn xmm4() -> Reg {
|
||||
fpr(4, 18)
|
||||
fpr(4, 4)
|
||||
}
|
||||
pub(crate) fn xmm5() -> Reg {
|
||||
fpr(5, 19)
|
||||
fpr(5, 5)
|
||||
}
|
||||
pub(crate) fn xmm6() -> Reg {
|
||||
fpr(6, 20)
|
||||
fpr(6, 6)
|
||||
}
|
||||
pub(crate) fn xmm7() -> Reg {
|
||||
fpr(7, 21)
|
||||
fpr(7, 7)
|
||||
}
|
||||
pub(crate) fn xmm8() -> Reg {
|
||||
fpr(8, 22)
|
||||
fpr(8, 8)
|
||||
}
|
||||
pub(crate) fn xmm9() -> Reg {
|
||||
fpr(9, 23)
|
||||
fpr(9, 9)
|
||||
}
|
||||
pub(crate) fn xmm10() -> Reg {
|
||||
fpr(10, 24)
|
||||
fpr(10, 10)
|
||||
}
|
||||
pub(crate) fn xmm11() -> Reg {
|
||||
fpr(11, 25)
|
||||
fpr(11, 11)
|
||||
}
|
||||
pub(crate) fn xmm12() -> Reg {
|
||||
fpr(12, 26)
|
||||
fpr(12, 12)
|
||||
}
|
||||
pub(crate) fn xmm13() -> Reg {
|
||||
fpr(13, 27)
|
||||
fpr(13, 13)
|
||||
}
|
||||
pub(crate) fn xmm14() -> Reg {
|
||||
fpr(14, 28)
|
||||
fpr(14, 14)
|
||||
}
|
||||
pub(crate) fn xmm15() -> Reg {
|
||||
fpr(15, 29)
|
||||
fpr(15, 15)
|
||||
}
|
||||
|
||||
pub(crate) fn rsp() -> Reg {
|
||||
@@ -139,39 +148,14 @@ pub(crate) fn rbp() -> Reg {
|
||||
///
|
||||
/// The ordering of registers matters, as commented in the file doc comment: assumes the
|
||||
/// calling-convention is SystemV, at the moment.
|
||||
pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
|
||||
pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = Vec::<(RealReg, String)>::new();
|
||||
let mut allocable_by_class = [None; NUM_REG_CLASSES];
|
||||
|
||||
// Integer regs.
|
||||
let mut base = regs.len();
|
||||
|
||||
// Callee-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((r12().to_real_reg(), "%r12".into()));
|
||||
regs.push((r13().to_real_reg(), "%r13".into()));
|
||||
regs.push((r14().to_real_reg(), "%r14".into()));
|
||||
regs.push((r15().to_real_reg(), "%r15".into()));
|
||||
regs.push((rbx().to_real_reg(), "%rbx".into()));
|
||||
|
||||
// Caller-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((rsi().to_real_reg(), "%rsi".into()));
|
||||
regs.push((rdi().to_real_reg(), "%rdi".into()));
|
||||
regs.push((rax().to_real_reg(), "%rax".into()));
|
||||
regs.push((rcx().to_real_reg(), "%rcx".into()));
|
||||
regs.push((rdx().to_real_reg(), "%rdx".into()));
|
||||
regs.push((r8().to_real_reg(), "%r8".into()));
|
||||
regs.push((r9().to_real_reg(), "%r9".into()));
|
||||
regs.push((r10().to_real_reg(), "%r10".into()));
|
||||
regs.push((r11().to_real_reg(), "%r11".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(r12().get_index()),
|
||||
});
|
||||
let use_pinned_reg = flags.enable_pinned_reg();
|
||||
|
||||
// XMM registers
|
||||
base = regs.len();
|
||||
let first_fpr = regs.len();
|
||||
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
|
||||
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
|
||||
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
|
||||
@@ -188,17 +172,61 @@ pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUn
|
||||
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
|
||||
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
|
||||
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
|
||||
let last_fpr = regs.len() - 1;
|
||||
|
||||
// Integer regs.
|
||||
let first_gpr = regs.len();
|
||||
|
||||
// Callee-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((r12().to_real_reg(), "%r12".into()));
|
||||
regs.push((r13().to_real_reg(), "%r13".into()));
|
||||
regs.push((r14().to_real_reg(), "%r14".into()));
|
||||
|
||||
regs.push((rbx().to_real_reg(), "%rbx".into()));
|
||||
|
||||
// Caller-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((rsi().to_real_reg(), "%rsi".into()));
|
||||
regs.push((rdi().to_real_reg(), "%rdi".into()));
|
||||
regs.push((rax().to_real_reg(), "%rax".into()));
|
||||
regs.push((rcx().to_real_reg(), "%rcx".into()));
|
||||
regs.push((rdx().to_real_reg(), "%rdx".into()));
|
||||
regs.push((r8().to_real_reg(), "%r8".into()));
|
||||
regs.push((r9().to_real_reg(), "%r9".into()));
|
||||
regs.push((r10().to_real_reg(), "%r10".into()));
|
||||
regs.push((r11().to_real_reg(), "%r11".into()));
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
debug_assert_eq!(r15(), pinned_reg());
|
||||
let allocable = if use_pinned_reg {
|
||||
// The pinned register is not allocatable in this case, so record the length before adding
|
||||
// it.
|
||||
let len = regs.len();
|
||||
regs.push((r15().to_real_reg(), "%r15/pinned".into()));
|
||||
len
|
||||
} else {
|
||||
regs.push((r15().to_real_reg(), "%r15".into()));
|
||||
regs.len()
|
||||
};
|
||||
let last_gpr = allocable - 1;
|
||||
|
||||
regs.push((rsp().to_real_reg(), "%rsp".into()));
|
||||
regs.push((rbp().to_real_reg(), "%rbp".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: first_gpr,
|
||||
last: last_gpr,
|
||||
suggested_scratch: Some(r12().get_index()),
|
||||
});
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
first: first_fpr,
|
||||
last: last_fpr,
|
||||
suggested_scratch: Some(xmm15().get_index()),
|
||||
});
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
let allocable = regs.len();
|
||||
regs.push((rsp().to_real_reg(), "%rsp".into()));
|
||||
regs.push((rbp().to_real_reg(), "%rbp".into()));
|
||||
// Sanity-check: the index passed to the Reg ctor must match the order in the register list.
|
||||
for (i, reg) in regs.iter().enumerate() {
|
||||
assert_eq!(i, reg.0.get_index());
|
||||
}
|
||||
|
||||
RealRegUniverse {
|
||||
regs,
|
||||
|
||||
@@ -6,6 +6,7 @@ use log::trace;
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
@@ -120,12 +121,55 @@ struct InsnOutput {
|
||||
output: usize,
|
||||
}
|
||||
|
||||
fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg {
|
||||
fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg {
|
||||
let inputs = ctx.get_input(spec.insn, spec.input);
|
||||
ctx.use_input_reg(inputs);
|
||||
inputs.reg
|
||||
}
|
||||
|
||||
enum ExtSpec {
|
||||
ZeroExtendTo32,
|
||||
ZeroExtendTo64,
|
||||
SignExtendTo32,
|
||||
SignExtendTo64,
|
||||
}
|
||||
|
||||
fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
|
||||
let requested_size = match ext_spec {
|
||||
ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32,
|
||||
ExtSpec::ZeroExtendTo64 | ExtSpec::SignExtendTo64 => 64,
|
||||
};
|
||||
let input_size = ctx.input_ty(spec.insn, spec.input).bits();
|
||||
|
||||
let ext_mode = match (input_size, requested_size) {
|
||||
(a, b) if a == b => return input_to_reg(ctx, spec),
|
||||
(a, 32) if a == 1 || a == 8 => ExtMode::BL,
|
||||
(a, 64) if a == 1 || a == 8 => ExtMode::BQ,
|
||||
(16, 32) => ExtMode::WL,
|
||||
(16, 64) => ExtMode::WQ,
|
||||
(32, 64) => ExtMode::LQ,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let requested_ty = if requested_size == 32 { I32 } else { I64 };
|
||||
|
||||
let src = input_to_reg_mem(ctx, spec);
|
||||
let dst = ctx.alloc_tmp(RegClass::I64, requested_ty);
|
||||
match ext_spec {
|
||||
ExtSpec::ZeroExtendTo32 | ExtSpec::ZeroExtendTo64 => {
|
||||
ctx.emit(Inst::movzx_rm_r(
|
||||
ext_mode, src, dst, /* infallible */ None,
|
||||
))
|
||||
}
|
||||
ExtSpec::SignExtendTo32 | ExtSpec::SignExtendTo64 => {
|
||||
ctx.emit(Inst::movsx_rm_r(
|
||||
ext_mode, src, dst, /* infallible */ None,
|
||||
))
|
||||
}
|
||||
}
|
||||
dst.to_reg()
|
||||
}
|
||||
|
||||
fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
|
||||
// TODO handle memory.
|
||||
RegMem::reg(input_to_reg(ctx, spec))
|
||||
@@ -135,11 +179,11 @@ fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
|
||||
/// TODO: handle memory as well!
|
||||
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
||||
let imm = ctx.get_input(spec.insn, spec.input).constant.and_then(|x| {
|
||||
let as_u32 = x as u32;
|
||||
let extended = as_u32 as u64;
|
||||
// If the truncation and sign-extension don't change the value, use it.
|
||||
if extended == x {
|
||||
Some(as_u32)
|
||||
// For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
|
||||
// to 64 bits. For other sizes, it doesn't matter and we can just use the plain
|
||||
// constant.
|
||||
if ctx.input_ty(spec.insn, spec.input).bytes() != 8 || low32_will_sign_extend_to_64(x) {
|
||||
Some(x as u32)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -150,7 +194,7 @@ fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
||||
}
|
||||
}
|
||||
|
||||
fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable<Reg> {
|
||||
fn output_to_reg(ctx: Ctx, spec: InsnOutput) -> Writable<Reg> {
|
||||
ctx.get_output(spec.insn, spec.output)
|
||||
}
|
||||
|
||||
@@ -195,9 +239,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
match op {
|
||||
Opcode::Iconst => {
|
||||
if let Some(w64) = iri_to_u64_imm(ctx, insn) {
|
||||
// Get exactly the bit pattern in 'w64' into the dest. No
|
||||
// monkeying with sign extension etc.
|
||||
let dst_is_64 = w64 > 0xFFFF_FFFF;
|
||||
let dst_is_64 = w64 > 0x7fffffff;
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::imm_r(dst_is_64, w64, dst));
|
||||
} else {
|
||||
@@ -228,28 +270,407 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst));
|
||||
}
|
||||
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
// TODO: implement imm shift value into insn
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => {
|
||||
let dst_ty = ctx.output_ty(insn, 0);
|
||||
assert_eq!(ctx.input_ty(insn, 0), dst_ty);
|
||||
assert!(dst_ty == types::I32 || dst_ty == types::I64);
|
||||
debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty);
|
||||
debug_assert!(dst_ty == types::I32 || dst_ty == types::I64);
|
||||
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg(ctx, inputs[1]);
|
||||
|
||||
let (count, rhs) = if let Some(cst) = ctx.get_constant(inputs[1].insn) {
|
||||
let cst = if op == Opcode::Rotl || op == Opcode::Rotr {
|
||||
// Mask rotation count, according to Cranelift's semantics.
|
||||
(cst as u8) & (dst_ty.bits() as u8 - 1)
|
||||
} else {
|
||||
cst as u8
|
||||
};
|
||||
(Some(cst), None)
|
||||
} else {
|
||||
(None, Some(input_to_reg(ctx, inputs[1])))
|
||||
};
|
||||
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
let shift_kind = match op {
|
||||
Opcode::Ishl => ShiftKind::Left,
|
||||
Opcode::Ushr => ShiftKind::RightZ,
|
||||
Opcode::Sshr => ShiftKind::RightS,
|
||||
Opcode::Ishl => ShiftKind::ShiftLeft,
|
||||
Opcode::Ushr => ShiftKind::ShiftRightLogical,
|
||||
Opcode::Sshr => ShiftKind::ShiftRightArithmetic,
|
||||
Opcode::Rotl => ShiftKind::RotateLeft,
|
||||
Opcode::Rotr => ShiftKind::RotateRight,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let is_64 = dst_ty == types::I64;
|
||||
let w_rcx = Writable::from_reg(regs::rcx());
|
||||
ctx.emit(Inst::mov_r_r(true, lhs, dst));
|
||||
ctx.emit(Inst::mov_r_r(true, rhs, w_rcx));
|
||||
ctx.emit(Inst::shift_r(is_64, shift_kind, None /*%cl*/, dst));
|
||||
if count.is_none() {
|
||||
ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx));
|
||||
}
|
||||
ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst));
|
||||
}
|
||||
|
||||
Opcode::Clz => {
|
||||
// TODO when the x86 flags have use_lzcnt, we can use LZCNT.
|
||||
|
||||
// General formula using bit-scan reverse (BSR):
|
||||
// mov -1, %dst
|
||||
// bsr %src, %tmp
|
||||
// cmovz %dst, %tmp
|
||||
// mov $(size_bits - 1), %dst
|
||||
// sub %tmp, %dst
|
||||
|
||||
let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
|
||||
I8 | I16 => (Some(ExtSpec::ZeroExtendTo32), I32),
|
||||
a if a == I32 || a == I64 => (None, a),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let src = if let Some(ext_spec) = ext_spec {
|
||||
RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
|
||||
} else {
|
||||
input_to_reg_mem(ctx, inputs[0])
|
||||
};
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
|
||||
ctx.emit(Inst::imm_r(ty == I64, u64::max_value(), dst));
|
||||
|
||||
ctx.emit(Inst::unary_rm_r(
|
||||
ty.bytes() as u8,
|
||||
UnaryRmROpcode::Bsr,
|
||||
src,
|
||||
tmp,
|
||||
));
|
||||
|
||||
ctx.emit(Inst::cmove(
|
||||
ty.bytes() as u8,
|
||||
CC::Z,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
tmp,
|
||||
));
|
||||
|
||||
ctx.emit(Inst::imm_r(ty == I64, ty.bits() as u64 - 1, dst));
|
||||
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
ty == I64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
|
||||
Opcode::Ctz => {
|
||||
// TODO when the x86 flags have use_bmi1, we can use TZCNT.
|
||||
|
||||
// General formula using bit-scan forward (BSF):
|
||||
// bsf %src, %dst
|
||||
// mov $(size_bits), %tmp
|
||||
// cmovz %tmp, %dst
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let ty = if ty.bits() < 32 { I32 } else { ty };
|
||||
debug_assert!(ty == I32 || ty == I64);
|
||||
|
||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
|
||||
ctx.emit(Inst::imm_r(false /* 64 bits */, ty.bits() as u64, tmp));
|
||||
|
||||
ctx.emit(Inst::unary_rm_r(
|
||||
ty.bytes() as u8,
|
||||
UnaryRmROpcode::Bsf,
|
||||
src,
|
||||
dst,
|
||||
));
|
||||
|
||||
ctx.emit(Inst::cmove(
|
||||
ty.bytes() as u8,
|
||||
CC::Z,
|
||||
RegMem::reg(tmp.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
|
||||
Opcode::Popcnt => {
|
||||
// TODO when the x86 flags have use_popcnt, we can use the popcnt instruction.
|
||||
|
||||
let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
|
||||
I8 | I16 => (Some(ExtSpec::ZeroExtendTo32), I32),
|
||||
a if a == I32 || a == I64 => (None, a),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let src = if let Some(ext_spec) = ext_spec {
|
||||
RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
|
||||
} else {
|
||||
input_to_reg_mem(ctx, inputs[0])
|
||||
};
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
if ty == I64 {
|
||||
let is_64 = true;
|
||||
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let cst = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
|
||||
// mov src, tmp1
|
||||
ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1, None));
|
||||
|
||||
// shr $1, tmp1
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// mov 0x7777_7777_7777_7777, cst
|
||||
ctx.emit(Inst::imm_r(is_64, 0x7777777777777777, cst));
|
||||
|
||||
// andq cst, tmp1
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::reg(cst.to_reg()),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// mov src, tmp2
|
||||
ctx.emit(Inst::mov64_rm_r(src, tmp2, None));
|
||||
|
||||
// sub tmp1, tmp2
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
|
||||
// shr $1, tmp1
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// and cst, tmp1
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::reg(cst.to_reg()),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// sub tmp1, tmp2
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
|
||||
// shr $1, tmp1
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// and cst, tmp1
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::reg(cst.to_reg()),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// sub tmp1, tmp2
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
|
||||
// mov tmp2, dst
|
||||
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None));
|
||||
|
||||
// shr $4, dst
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(4),
|
||||
dst,
|
||||
));
|
||||
|
||||
// add tmp2, dst
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::reg(tmp2.to_reg()),
|
||||
dst,
|
||||
));
|
||||
|
||||
// mov $0x0F0F_0F0F_0F0F_0F0F, cst
|
||||
ctx.emit(Inst::imm_r(is_64, 0x0F0F0F0F0F0F0F0F, cst));
|
||||
|
||||
// and cst, dst
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::reg(cst.to_reg()),
|
||||
dst,
|
||||
));
|
||||
|
||||
// mov $0x0101_0101_0101_0101, cst
|
||||
ctx.emit(Inst::imm_r(is_64, 0x0101010101010101, cst));
|
||||
|
||||
// mul cst, dst
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Mul,
|
||||
RegMemImm::reg(cst.to_reg()),
|
||||
dst,
|
||||
));
|
||||
|
||||
// shr $56, dst
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(56),
|
||||
dst,
|
||||
));
|
||||
} else {
|
||||
assert_eq!(ty, I32);
|
||||
let is_64 = false;
|
||||
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
|
||||
// mov src, tmp1
|
||||
ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1, None));
|
||||
|
||||
// shr $1, tmp1
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// andq $0x7777_7777, tmp1
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::imm(0x77777777),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// mov src, tmp2
|
||||
ctx.emit(Inst::mov64_rm_r(src, tmp2, None));
|
||||
|
||||
// sub tmp1, tmp2
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
|
||||
// shr $1, tmp1
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// and 0x7777_7777, tmp1
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::imm(0x77777777),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// sub tmp1, tmp2
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
|
||||
// shr $1, tmp1
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// and $0x7777_7777, tmp1
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::imm(0x77777777),
|
||||
tmp1,
|
||||
));
|
||||
|
||||
// sub tmp1, tmp2
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Sub,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
|
||||
// mov tmp2, dst
|
||||
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None));
|
||||
|
||||
// shr $4, dst
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(4),
|
||||
dst,
|
||||
));
|
||||
|
||||
// add tmp2, dst
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::reg(tmp2.to_reg()),
|
||||
dst,
|
||||
));
|
||||
|
||||
// and $0x0F0F_0F0F, dst
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::imm(0x0F0F0F0F),
|
||||
dst,
|
||||
));
|
||||
|
||||
// mul $0x0101_0101, dst
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
is_64,
|
||||
AluRmiROpcode::Mul,
|
||||
RegMemImm::imm(0x01010101),
|
||||
dst,
|
||||
));
|
||||
|
||||
// shr $24, dst
|
||||
ctx.emit(Inst::shift_r(
|
||||
is_64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(24),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Uextend
|
||||
@@ -261,37 +682,46 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let src_ty = ctx.input_ty(insn, 0);
|
||||
let dst_ty = ctx.output_ty(insn, 0);
|
||||
|
||||
// TODO: if the source operand is a load, incorporate that.
|
||||
let src = input_to_reg(ctx, inputs[0]);
|
||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
let ext_mode = match (src_ty.bits(), dst_ty.bits()) {
|
||||
(1, 32) | (8, 32) => ExtMode::BL,
|
||||
(1, 64) | (8, 64) => ExtMode::BQ,
|
||||
(16, 32) => ExtMode::WL,
|
||||
(16, 64) => ExtMode::WQ,
|
||||
(32, 64) => ExtMode::LQ,
|
||||
(1, 32) | (8, 32) => Some(ExtMode::BL),
|
||||
(1, 64) | (8, 64) => Some(ExtMode::BQ),
|
||||
(16, 32) => Some(ExtMode::WL),
|
||||
(16, 64) => Some(ExtMode::WQ),
|
||||
(32, 64) => Some(ExtMode::LQ),
|
||||
(x, y) if x >= y => None,
|
||||
_ => unreachable!(
|
||||
"unexpected extension kind from {:?} to {:?}",
|
||||
src_ty, dst_ty
|
||||
),
|
||||
};
|
||||
|
||||
if op == Opcode::Sextend {
|
||||
ctx.emit(Inst::movsx_rm_r(ext_mode, RegMem::reg(src), dst));
|
||||
// All of these other opcodes are simply a move from a zero-extended source. Here
|
||||
// is why this works, in each case:
|
||||
//
|
||||
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we
|
||||
// merely need to zero-extend here.
|
||||
//
|
||||
// - Breduce, Bextend: changing width of a boolean. We represent a
|
||||
// bool as a 0 or 1, so again, this is a zero-extend / no-op.
|
||||
//
|
||||
// - Ireduce: changing width of an integer. Smaller ints are stored
|
||||
// with undefined high-order bits, so we can simply do a copy.
|
||||
|
||||
if let Some(ext_mode) = ext_mode {
|
||||
if op == Opcode::Sextend {
|
||||
ctx.emit(Inst::movsx_rm_r(
|
||||
ext_mode, src, dst, /* infallible */ None,
|
||||
));
|
||||
} else {
|
||||
ctx.emit(Inst::movzx_rm_r(
|
||||
ext_mode, src, dst, /* infallible */ None,
|
||||
));
|
||||
}
|
||||
} else {
|
||||
// All of these other opcodes are simply a move from a zero-extended source. Here
|
||||
// is why this works, in each case:
|
||||
//
|
||||
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we
|
||||
// merely need to zero-extend here.
|
||||
//
|
||||
// - Breduce, Bextend: changing width of a boolean. We represent a
|
||||
// bool as a 0 or 1, so again, this is a zero-extend / no-op.
|
||||
//
|
||||
// - Ireduce: changing width of an integer. Smaller ints are stored
|
||||
// with undefined high-order bits, so we can simply do a copy.
|
||||
ctx.emit(Inst::movzx_rm_r(ext_mode, RegMem::reg(src), dst));
|
||||
ctx.emit(Inst::mov64_rm_r(src, dst, /* infallible */ None));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -308,15 +738,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
for i in 0..ctx.num_inputs(insn) {
|
||||
let src_reg = input_to_reg(ctx, inputs[i]);
|
||||
let retval_reg = ctx.retval(i);
|
||||
if src_reg.get_class() == RegClass::I64 {
|
||||
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
|
||||
} else if src_reg.get_class() == RegClass::V128 {
|
||||
ctx.emit(Inst::xmm_mov_rm_r(
|
||||
SseOpcode::Movsd,
|
||||
RegMem::reg(src_reg),
|
||||
retval_reg,
|
||||
));
|
||||
}
|
||||
let ty = ctx.input_ty(insn, i);
|
||||
ctx.emit(Inst::gen_move(retval_reg, src_reg, ty));
|
||||
}
|
||||
// N.B.: the Ret itself is generated by the ABI.
|
||||
}
|
||||
@@ -364,7 +787,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::Hlt);
|
||||
}
|
||||
|
||||
Opcode::Trap => {
|
||||
Opcode::Trap | Opcode::ResumableTrap => {
|
||||
let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
|
||||
ctx.emit(Inst::Ud2 { trap_info })
|
||||
}
|
||||
@@ -383,7 +806,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// TODO Fmax, Fmin.
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
ctx.emit(Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(lhs), dst));
|
||||
ctx.emit(Inst::xmm_mov_rm_r(
|
||||
SseOpcode::Movss,
|
||||
RegMem::reg(lhs),
|
||||
dst,
|
||||
None,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
|
||||
} else {
|
||||
unimplemented!("unimplemented lowering for opcode {:?}", op);
|
||||
@@ -410,17 +838,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
SseOpcode::Movd,
|
||||
RegMem::reg(tmp_gpr1.to_reg()),
|
||||
tmp_xmm1,
|
||||
None,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov_rm_r(
|
||||
SseOpcode::Movaps,
|
||||
RegMem::reg(tmp_xmm1.to_reg()),
|
||||
dst,
|
||||
None,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst));
|
||||
ctx.emit(Inst::xmm_mov_rm_r(
|
||||
SseOpcode::Movss,
|
||||
RegMem::reg(rhs),
|
||||
tmp_xmm2,
|
||||
None,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Andps,
|
||||
@@ -521,25 +952,37 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let srcloc = Some(ctx.srcloc(insn));
|
||||
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
match (sign_extend, is_float) {
|
||||
(true, false) => {
|
||||
// The load is sign-extended only when the output size is lower than 64 bits,
|
||||
// so ext-mode is defined in this case.
|
||||
ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst));
|
||||
ctx.emit(Inst::movsx_rm_r(
|
||||
ext_mode.unwrap(),
|
||||
RegMem::mem(addr),
|
||||
dst,
|
||||
srcloc,
|
||||
));
|
||||
}
|
||||
(false, false) => {
|
||||
if elem_ty.bytes() == 8 {
|
||||
// Use a plain load.
|
||||
ctx.emit(Inst::mov64_m_r(addr, dst))
|
||||
ctx.emit(Inst::mov64_m_r(addr, dst, srcloc))
|
||||
} else {
|
||||
// Use a zero-extended load.
|
||||
ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst))
|
||||
ctx.emit(Inst::movzx_rm_r(
|
||||
ext_mode.unwrap(),
|
||||
RegMem::mem(addr),
|
||||
dst,
|
||||
srcloc,
|
||||
))
|
||||
}
|
||||
}
|
||||
(_, true) => {
|
||||
ctx.emit(match elem_ty {
|
||||
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst),
|
||||
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc),
|
||||
_ => unimplemented!("FP load not 32-bit"),
|
||||
});
|
||||
}
|
||||
@@ -595,16 +1038,44 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
let src = input_to_reg(ctx, inputs[0]);
|
||||
|
||||
let srcloc = Some(ctx.srcloc(insn));
|
||||
|
||||
if is_float {
|
||||
ctx.emit(match elem_ty {
|
||||
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
|
||||
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
|
||||
_ => unimplemented!("FP store not 32-bit"),
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr));
|
||||
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FuncAddr => {
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let (extname, _) = ctx.call_target(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let loc = ctx.srcloc(insn);
|
||||
ctx.emit(Inst::LoadExtName {
|
||||
dst,
|
||||
name: Box::new(extname),
|
||||
srcloc: loc,
|
||||
offset: 0,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::SymbolValue => {
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let loc = ctx.srcloc(insn);
|
||||
ctx.emit(Inst::LoadExtName {
|
||||
dst,
|
||||
name: Box::new(extname),
|
||||
srcloc: loc,
|
||||
offset,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::StackAddr => {
|
||||
let (stack_slot, offset) = match *ctx.data(insn) {
|
||||
InstructionData::StackLoad {
|
||||
@@ -616,7 +1087,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
};
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let offset: i32 = offset.into();
|
||||
println!("stackslot_addr: {:?} @ off{}", stack_slot, offset);
|
||||
let inst = ctx
|
||||
.abi()
|
||||
.stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
|
||||
@@ -655,8 +1125,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
if size == 1 {
|
||||
// Sign-extend operands to 32, then do a cmove of size 4.
|
||||
let lhs_se = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se));
|
||||
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None));
|
||||
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None));
|
||||
ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst));
|
||||
} else {
|
||||
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||
@@ -665,8 +1135,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
|
||||
let is_div = op == Opcode::Udiv || op == Opcode::Sdiv;
|
||||
let is_signed = op == Opcode::Sdiv || op == Opcode::Srem;
|
||||
let kind = match op {
|
||||
Opcode::Udiv => DivOrRemKind::UnsignedDiv,
|
||||
Opcode::Sdiv => DivOrRemKind::SignedDiv,
|
||||
Opcode::Urem => DivOrRemKind::UnsignedRem,
|
||||
Opcode::Srem => DivOrRemKind::SignedRem,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let is_div = kind.is_div();
|
||||
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = input_ty.bytes() as u8;
|
||||
@@ -686,22 +1162,28 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// pc-relative offsets that must not change, thus requiring regalloc to not
|
||||
// interfere by introducing spills and reloads.
|
||||
//
|
||||
// Note it keeps the result in $rax (if is_div) or $rdx (if !is_div), so that
|
||||
// Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
|
||||
// regalloc is aware of the coalescing opportunity between rax/rdx and the
|
||||
// destination register.
|
||||
let divisor = input_to_reg(ctx, inputs[1]);
|
||||
let tmp = if op == Opcode::Sdiv && size == 8 {
|
||||
Some(ctx.alloc_tmp(RegClass::I64, I64))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx())));
|
||||
ctx.emit(Inst::CheckedDivOrRemSeq {
|
||||
is_div,
|
||||
is_signed,
|
||||
kind,
|
||||
size,
|
||||
divisor,
|
||||
tmp,
|
||||
loc: srcloc,
|
||||
});
|
||||
} else {
|
||||
let divisor = input_to_reg_mem(ctx, inputs[1]);
|
||||
|
||||
// Fill in the high parts:
|
||||
if is_signed {
|
||||
if kind.is_signed() {
|
||||
// sign-extend the sign-bit of rax into rdx, for signed opcodes.
|
||||
ctx.emit(Inst::sign_extend_rax_to_rdx(size));
|
||||
} else {
|
||||
@@ -714,7 +1196,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
// Emit the actual idiv.
|
||||
ctx.emit(Inst::div(size, is_signed, divisor, ctx.srcloc(insn)));
|
||||
ctx.emit(Inst::div(size, kind.is_signed(), divisor, ctx.srcloc(insn)));
|
||||
}
|
||||
|
||||
// Move the result back into the destination reg.
|
||||
@@ -727,6 +1209,43 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Umulhi | Opcode::Smulhi => {
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = input_ty.bytes() as u8;
|
||||
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
// Move lhs in %rax.
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(regs::rax()),
|
||||
lhs,
|
||||
input_ty,
|
||||
));
|
||||
|
||||
// Emit the actual mul or imul.
|
||||
let signed = op == Opcode::Smulhi;
|
||||
ctx.emit(Inst::mul_hi(size, signed, rhs));
|
||||
|
||||
// Read the result from the high part (stored in %rdx).
|
||||
ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
|
||||
}
|
||||
|
||||
Opcode::GetPinnedReg => {
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), I64));
|
||||
}
|
||||
|
||||
Opcode::SetPinnedReg => {
|
||||
let src = input_to_reg(ctx, inputs[0]);
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(regs::pinned_reg()),
|
||||
src,
|
||||
I64,
|
||||
));
|
||||
}
|
||||
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::UdivImm
|
||||
@@ -876,35 +1395,14 @@ impl LowerBackend for X64Backend {
|
||||
assert!(jt_size <= u32::max_value() as usize);
|
||||
let jt_size = jt_size as u32;
|
||||
|
||||
let idx_size_bits = ctx.input_ty(branches[0], 0).bits();
|
||||
|
||||
// Zero-extend to 32-bits if needed.
|
||||
// TODO consider factoring this out?
|
||||
let idx = if idx_size_bits < 32 {
|
||||
let ext_mode = match idx_size_bits {
|
||||
1 | 8 => ExtMode::BL,
|
||||
16 => ExtMode::WL,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let idx = input_to_reg_mem(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
);
|
||||
let tmp_idx = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::movzx_rm_r(ext_mode, idx, tmp_idx));
|
||||
tmp_idx.to_reg()
|
||||
} else {
|
||||
input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
)
|
||||
};
|
||||
let idx = extend_input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
ExtSpec::ZeroExtendTo32,
|
||||
);
|
||||
|
||||
// Bounds-check (compute flags from idx - jt_size) and branch to default.
|
||||
ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx));
|
||||
@@ -951,4 +1449,8 @@ impl LowerBackend for X64Backend {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
||||
Some(regs::pinned_reg())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user