machinst x64: implement calls and int cmp/store/loads;
This makes it possible to run a simple recursive fibonacci function in wasmtime.
This commit is contained in:
@@ -899,7 +899,7 @@ pub enum Inst {
|
||||
},
|
||||
|
||||
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
|
||||
/// controls MemArg::NominalSPOffset args are lowered.
|
||||
/// controls how MemArg::NominalSPOffset args are lowered.
|
||||
VirtualSPOffsetAdj {
|
||||
offset: i64,
|
||||
},
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,16 +3,20 @@
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper};
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::machinst::*;
|
||||
|
||||
use super::regs::show_ireg_sized;
|
||||
use super::{
|
||||
regs::{self, show_ireg_sized},
|
||||
EmitState,
|
||||
};
|
||||
|
||||
/// A Memory Address. These denote a 64-bit value only.
|
||||
/// A possible addressing mode (amode) that can be used in instructions.
|
||||
/// These denote a 64-bit value only.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Addr {
|
||||
pub enum Amode {
|
||||
/// Immediate sign-extended and a Register.
|
||||
ImmReg { simm32: u32, base: Reg },
|
||||
|
||||
@@ -25,7 +29,7 @@ pub(crate) enum Addr {
|
||||
},
|
||||
}
|
||||
|
||||
impl Addr {
|
||||
impl Amode {
|
||||
pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
|
||||
debug_assert!(base.get_class() == RegClass::I64);
|
||||
Self::ImmReg { simm32, base }
|
||||
@@ -46,15 +50,10 @@ impl Addr {
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
Addr::ImmReg { simm32: _, base } => {
|
||||
Amode::ImmReg { base, .. } => {
|
||||
collector.add_use(*base);
|
||||
}
|
||||
Addr::ImmRegRegShift {
|
||||
simm32: _,
|
||||
base,
|
||||
index,
|
||||
shift: _,
|
||||
} => {
|
||||
Amode::ImmRegRegShift { base, index, .. } => {
|
||||
collector.add_use(*base);
|
||||
collector.add_use(*index);
|
||||
}
|
||||
@@ -62,13 +61,13 @@ impl Addr {
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Addr {
|
||||
impl ShowWithRRU for Amode {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
Addr::ImmReg { simm32, base } => {
|
||||
Amode::ImmReg { simm32, base } => {
|
||||
format!("{}({})", *simm32 as i32, base.show_rru(mb_rru))
|
||||
}
|
||||
Addr::ImmRegRegShift {
|
||||
Amode::ImmRegRegShift {
|
||||
simm32,
|
||||
base,
|
||||
index,
|
||||
@@ -84,14 +83,84 @@ impl ShowWithRRU for Addr {
|
||||
}
|
||||
}
|
||||
|
||||
/// A Memory Address. These denote a 64-bit value only.
|
||||
/// Used for usual addressing modes as well as addressing modes used during compilation, when the
|
||||
/// moving SP offset is not known.
|
||||
#[derive(Clone)]
|
||||
pub enum SyntheticAmode {
|
||||
/// A real amode.
|
||||
Real(Amode),
|
||||
|
||||
/// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop
|
||||
/// within the function.
|
||||
NominalSPOffset { simm32: u32 },
|
||||
}
|
||||
|
||||
impl SyntheticAmode {
|
||||
pub(crate) fn nominal_sp_offset(simm32: u32) -> Self {
|
||||
SyntheticAmode::NominalSPOffset { simm32 }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector),
|
||||
SyntheticAmode::NominalSPOffset { .. } => {
|
||||
// Nothing to do; the base is SP and isn't involved in regalloc.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
SyntheticAmode::Real(addr) => addr.map_uses(map),
|
||||
SyntheticAmode::NominalSPOffset { .. } => {
|
||||
// Nothing to do.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode {
|
||||
match self {
|
||||
SyntheticAmode::Real(addr) => addr.clone(),
|
||||
SyntheticAmode::NominalSPOffset { simm32 } => {
|
||||
let off = *simm32 as i64 + state.virtual_sp_offset;
|
||||
// TODO will require a sequence of add etc.
|
||||
assert!(
|
||||
off <= u32::max_value() as i64,
|
||||
"amode finalize: add sequence NYI"
|
||||
);
|
||||
Amode::imm_reg(off as u32, regs::rsp())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<SyntheticAmode> for Amode {
|
||||
fn into(self) -> SyntheticAmode {
|
||||
SyntheticAmode::Real(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for SyntheticAmode {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
|
||||
SyntheticAmode::NominalSPOffset { simm32 } => {
|
||||
format!("rsp({} + virtual offset)", *simm32 as i32)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
|
||||
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
|
||||
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
|
||||
/// `simm32` is its sign-extension out to 64 bits.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum RegMemImm {
|
||||
pub enum RegMemImm {
|
||||
Reg { reg: Reg },
|
||||
Mem { addr: Addr },
|
||||
Mem { addr: SyntheticAmode },
|
||||
Imm { simm32: u32 },
|
||||
}
|
||||
|
||||
@@ -100,8 +169,8 @@ impl RegMemImm {
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
Self::Reg { reg }
|
||||
}
|
||||
pub(crate) fn mem(addr: Addr) -> Self {
|
||||
Self::Mem { addr }
|
||||
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
|
||||
Self::Mem { addr: addr.into() }
|
||||
}
|
||||
pub(crate) fn imm(simm32: u32) -> Self {
|
||||
Self::Imm { simm32 }
|
||||
@@ -134,9 +203,9 @@ impl ShowWithRRU for RegMemImm {
|
||||
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
|
||||
/// 32 or 64 bit value.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum RegMem {
|
||||
pub enum RegMem {
|
||||
Reg { reg: Reg },
|
||||
Mem { addr: Addr },
|
||||
Mem { addr: SyntheticAmode },
|
||||
}
|
||||
|
||||
impl RegMem {
|
||||
@@ -144,8 +213,8 @@ impl RegMem {
|
||||
debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
|
||||
Self::Reg { reg }
|
||||
}
|
||||
pub(crate) fn mem(addr: Addr) -> Self {
|
||||
Self::Mem { addr }
|
||||
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
|
||||
Self::Mem { addr: addr.into() }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
@@ -382,6 +451,13 @@ pub enum ExtMode {
|
||||
}
|
||||
|
||||
impl ExtMode {
|
||||
pub(crate) fn src_size(&self) -> u8 {
|
||||
match self {
|
||||
ExtMode::BL | ExtMode::BQ => 1,
|
||||
ExtMode::WL | ExtMode::WQ => 2,
|
||||
ExtMode::LQ => 4,
|
||||
}
|
||||
}
|
||||
pub(crate) fn dst_size(&self) -> u8 {
|
||||
match self {
|
||||
ExtMode::BL | ExtMode::WL => 4,
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use crate::isa::x64::inst::*;
|
||||
use log::debug;
|
||||
use regalloc::Reg;
|
||||
|
||||
use crate::binemit::Reloc;
|
||||
use crate::isa::x64::inst::*;
|
||||
|
||||
fn low8_will_sign_extend_to_64(x: u32) -> bool {
|
||||
let xs = (x as i32) as i64;
|
||||
xs == ((xs << 56) >> 56)
|
||||
@@ -164,7 +167,7 @@ fn emit_std_enc_mem(
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
mem_e: &Addr,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// General comment for this function: the registers in `mem_e` must be
|
||||
@@ -174,7 +177,7 @@ fn emit_std_enc_mem(
|
||||
prefix.emit(sink);
|
||||
|
||||
match mem_e {
|
||||
Addr::ImmReg { simm32, base } => {
|
||||
Amode::ImmReg { simm32, base } => {
|
||||
// First, the REX byte.
|
||||
let enc_e = int_reg_enc(*base);
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
@@ -228,7 +231,7 @@ fn emit_std_enc_mem(
|
||||
}
|
||||
}
|
||||
|
||||
Addr::ImmRegRegShift {
|
||||
Amode::ImmRegRegShift {
|
||||
simm32,
|
||||
base: reg_base,
|
||||
index: reg_index,
|
||||
@@ -306,7 +309,7 @@ fn emit_std_reg_mem(
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
mem_e: &Addr,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
@@ -389,10 +392,13 @@ fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
||||
///
|
||||
/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
|
||||
/// care?)
|
||||
pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
pub(crate) fn emit(
|
||||
inst: &Inst,
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
_flags: &settings::Flags,
|
||||
state: &mut EmitState,
|
||||
) {
|
||||
match inst {
|
||||
Inst::Nop { len: 0 } => {}
|
||||
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
@@ -428,7 +434,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
0x0FAF,
|
||||
2,
|
||||
reg_g.to_reg(),
|
||||
addr,
|
||||
&addr.finalize(state),
|
||||
rex,
|
||||
);
|
||||
}
|
||||
@@ -460,47 +466,39 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
};
|
||||
|
||||
match src {
|
||||
RegMemImm::Reg { reg: regE } => {
|
||||
// Note. The arguments .. regE .. reg_g .. sequence
|
||||
// here is the opposite of what is expected. I'm not
|
||||
// sure why this is. But I am fairly sure that the
|
||||
// arg order could be switched back to the expected
|
||||
// .. reg_g .. regE .. if opcode_rr is also switched
|
||||
// over to the "other" basic integer opcode (viz, the
|
||||
// R/RM vs RM/R duality). However, that would mean
|
||||
// that the test results won't be in accordance with
|
||||
// the GNU as reference output. In other words, the
|
||||
// inversion exists as a result of using GNU as as a
|
||||
// gold standard.
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
// GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
|
||||
// duality). Do this too, so as to be able to compare generated machine
|
||||
// code easily.
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcode_r,
|
||||
1,
|
||||
*regE,
|
||||
*reg_e,
|
||||
reg_g.to_reg(),
|
||||
rex,
|
||||
);
|
||||
// NB: if this is ever extended to handle byte size
|
||||
// ops, be sure to retain redundant REX prefixes.
|
||||
// NB: if this is ever extended to handle byte size ops, be sure to retain
|
||||
// redundant REX prefixes.
|
||||
}
|
||||
|
||||
RegMemImm::Mem { addr } => {
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
// Here we revert to the "normal" G-E ordering.
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcode_m,
|
||||
1,
|
||||
reg_g.to_reg(),
|
||||
addr,
|
||||
&addr.finalize(state),
|
||||
rex,
|
||||
);
|
||||
}
|
||||
|
||||
RegMemImm::Imm { simm32 } => {
|
||||
let useImm8 = low8_will_sign_extend_to_32(*simm32);
|
||||
let opcode = if useImm8 { 0x83 } else { 0x81 };
|
||||
let use_imm8 = low8_will_sign_extend_to_32(*simm32);
|
||||
let opcode = if use_imm8 { 0x83 } else { 0x81 };
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let enc_g = int_reg_enc(reg_g.to_reg());
|
||||
emit_std_enc_enc(
|
||||
@@ -512,7 +510,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
enc_g,
|
||||
rex,
|
||||
);
|
||||
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
|
||||
emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -548,161 +546,129 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
|
||||
}
|
||||
|
||||
Inst::MovZX_M_R { extMode, addr, dst } => {
|
||||
match extMode {
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst } => {
|
||||
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
|
||||
ExtMode::BL => {
|
||||
// MOVZBL is (REX.W==0) 0F B6 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FB6,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
(0x0FB6, 2, RexFlags::clear_w())
|
||||
}
|
||||
|
||||
ExtMode::BQ => {
|
||||
// MOVZBQ is (REX.W==1) 0F B6 /r
|
||||
// I'm not sure why the Intel manual offers different
|
||||
// encodings for MOVZBQ than for MOVZBL. AIUI they should
|
||||
// achieve the same, since MOVZBL is just going to zero out
|
||||
// the upper half of the destination anyway.
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FB6,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
(0x0FB6, 2, RexFlags::set_w())
|
||||
}
|
||||
|
||||
ExtMode::WL => {
|
||||
// MOVZWL is (REX.W==0) 0F B7 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FB7,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
(0x0FB7, 2, RexFlags::clear_w())
|
||||
}
|
||||
|
||||
ExtMode::WQ => {
|
||||
// MOVZWQ is (REX.W==1) 0F B7 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FB7,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
(0x0FB7, 2, RexFlags::set_w())
|
||||
}
|
||||
|
||||
ExtMode::LQ => {
|
||||
// This is just a standard 32 bit load, and we rely on the
|
||||
// default zero-extension rule to perform the extension.
|
||||
// Note that in reg/reg mode, gcc seems to use the swapped form R/RM, which we
|
||||
// don't do here, since it's the same encoding size.
|
||||
// MOV r/m32, r32 is (REX.W==0) 8B /r
|
||||
emit_std_reg_mem(
|
||||
(0x8B, 1, RexFlags::clear_w())
|
||||
}
|
||||
};
|
||||
|
||||
match src {
|
||||
RegMem::Reg { reg: src } => emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x8B,
|
||||
1,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
}
|
||||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Mov64_M_R { addr, dst } => emit_std_reg_mem(
|
||||
Inst::Mov64_M_R { src, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
&src.finalize(state),
|
||||
RexFlags::set_w(),
|
||||
),
|
||||
|
||||
Inst::MovSX_M_R { extMode, addr, dst } => {
|
||||
match extMode {
|
||||
ExtMode::BL => {
|
||||
// MOVSBL is (REX.W==0) 0F BE /r
|
||||
emit_std_reg_mem(
|
||||
Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FBE,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
}
|
||||
|
||||
ExtMode::BQ => {
|
||||
// MOVSBQ is (REX.W==1) 0F BE /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FBE,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
}
|
||||
|
||||
ExtMode::WL => {
|
||||
// MOVSWL is (REX.W==0) 0F BF /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FBF,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
}
|
||||
|
||||
ExtMode::WQ => {
|
||||
// MOVSWQ is (REX.W==1) 0F BF /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x0FBF,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
}
|
||||
|
||||
ExtMode::LQ => {
|
||||
// MOVSLQ is (REX.W==1) 63 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x63,
|
||||
0x8D,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
&addr.finalize(state),
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
),
|
||||
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst } => {
|
||||
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
|
||||
ExtMode::BL => {
|
||||
// MOVSBL is (REX.W==0) 0F BE /r
|
||||
(0x0FBE, 2, RexFlags::clear_w())
|
||||
}
|
||||
ExtMode::BQ => {
|
||||
// MOVSBQ is (REX.W==1) 0F BE /r
|
||||
(0x0FBE, 2, RexFlags::set_w())
|
||||
}
|
||||
ExtMode::WL => {
|
||||
// MOVSWL is (REX.W==0) 0F BF /r
|
||||
(0x0FBF, 2, RexFlags::clear_w())
|
||||
}
|
||||
ExtMode::WQ => {
|
||||
// MOVSWQ is (REX.W==1) 0F BF /r
|
||||
(0x0FBF, 2, RexFlags::set_w())
|
||||
}
|
||||
ExtMode::LQ => {
|
||||
// MOVSLQ is (REX.W==1) 63 /r
|
||||
(0x63, 1, RexFlags::set_w())
|
||||
}
|
||||
};
|
||||
|
||||
match src {
|
||||
RegMem::Reg { reg: src } => emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Mov_R_M { size, src, addr } => {
|
||||
Inst::Mov_R_M { size, src, dst } => {
|
||||
let dst = &dst.finalize(state);
|
||||
|
||||
match size {
|
||||
1 => {
|
||||
// This is one of the few places where the presence of a
|
||||
@@ -716,7 +682,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
};
|
||||
|
||||
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
||||
emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, addr, rex)
|
||||
emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex)
|
||||
}
|
||||
|
||||
2 => {
|
||||
@@ -727,7 +693,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
dst,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
}
|
||||
@@ -740,7 +706,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
dst,
|
||||
RexFlags::clear_w(),
|
||||
)
|
||||
}
|
||||
@@ -753,7 +719,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
dst,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
}
|
||||
@@ -825,23 +791,25 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
};
|
||||
|
||||
match src_e {
|
||||
RegMemImm::Reg { reg: regE } => {
|
||||
let opcode = if *size == 1 { 0x38 } else { 0x39 };
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
if *size == 1 {
|
||||
// We also need to check whether the E register forces
|
||||
// the use of a redundant REX.
|
||||
let encE = int_reg_enc(*regE);
|
||||
if encE >= 4 && encE <= 7 {
|
||||
// Check whether the E register forces the use of a redundant REX.
|
||||
let enc_e = int_reg_enc(*reg_e);
|
||||
if enc_e >= 4 && enc_e <= 7 {
|
||||
rex.always_emit();
|
||||
}
|
||||
}
|
||||
// Same comment re swapped args as for Alu_RMI_R.
|
||||
emit_std_reg_reg(sink, prefix, opcode, 1, *regE, *reg_g, rex);
|
||||
|
||||
// Use the swapped operands encoding, to stay consistent with the output of
|
||||
// gcc/llvm.
|
||||
let opcode = if *size == 1 { 0x38 } else { 0x39 };
|
||||
emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex);
|
||||
}
|
||||
|
||||
RegMemImm::Mem { addr } => {
|
||||
let opcode = if *size == 1 { 0x3A } else { 0x3B };
|
||||
let addr = &addr.finalize(state);
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
let opcode = if *size == 1 { 0x3A } else { 0x3B };
|
||||
emit_std_reg_mem(sink, prefix, opcode, 1, *reg_g, addr, rex);
|
||||
}
|
||||
|
||||
@@ -849,6 +817,8 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
// FIXME JRS 2020Feb11: there are shorter encodings for
|
||||
// cmp $imm, rax/eax/ax/al.
|
||||
let use_imm8 = low8_will_sign_extend_to_32(*simm32);
|
||||
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let opcode = if *size == 1 {
|
||||
0x80
|
||||
} else if use_imm8 {
|
||||
@@ -857,7 +827,6 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
0x81
|
||||
};
|
||||
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let enc_g = int_reg_enc(*reg_g);
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex);
|
||||
emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32);
|
||||
@@ -865,6 +834,21 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Setcc { cc, dst } => {
|
||||
let opcode = 0x0f90 + cc.get_enc() as u32;
|
||||
let mut rex_flags = RexFlags::clear_w();
|
||||
rex_flags.always_emit();
|
||||
emit_std_enc_enc(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcode,
|
||||
2,
|
||||
0,
|
||||
reg_enc(dst.to_reg()),
|
||||
rex_flags,
|
||||
);
|
||||
}
|
||||
|
||||
Inst::Push64 { src } => {
|
||||
match src {
|
||||
RegMemImm::Reg { reg } => {
|
||||
@@ -877,6 +861,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
}
|
||||
|
||||
RegMemImm::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
@@ -910,7 +895,22 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
sink.put1(0x58 + (encDst & 7));
|
||||
}
|
||||
|
||||
Inst::CallUnknown { dest } => {
|
||||
Inst::CallKnown {
|
||||
dest, loc, opcode, ..
|
||||
} => {
|
||||
sink.put1(0xE8);
|
||||
// The addend adjusts for the difference between the end of the instruction and the
|
||||
// beginning of the immediate field.
|
||||
sink.add_reloc(*loc, Reloc::X86CallPCRel4, &dest, -4);
|
||||
sink.put4(0);
|
||||
if opcode.is_call() {
|
||||
sink.add_call_site(*loc, *opcode);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::CallUnknown {
|
||||
dest, opcode, loc, ..
|
||||
} => {
|
||||
match dest {
|
||||
RegMem::Reg { reg } => {
|
||||
let reg_enc = int_reg_enc(*reg);
|
||||
@@ -926,6 +926,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
@@ -937,61 +938,61 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
);
|
||||
}
|
||||
}
|
||||
if opcode.is_call() {
|
||||
sink.add_call_site(*loc, *opcode);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Ret {} => sink.put1(0xC3),
|
||||
|
||||
Inst::JmpKnown { dest } => {
|
||||
let disp = dest.as_offset32_or_zero() - 5;
|
||||
let disp = disp as u32;
|
||||
Inst::JmpKnown { dst } => {
|
||||
let br_start = sink.cur_offset();
|
||||
let br_disp_off = br_start + 1;
|
||||
let br_end = br_start + 5;
|
||||
if let Some(l) = dest.as_label() {
|
||||
sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
|
||||
if let Some(l) = dst.as_label() {
|
||||
sink.use_label_at_offset(br_disp_off, l, LabelUse::JmpRel32);
|
||||
sink.add_uncond_branch(br_start, br_end, l);
|
||||
}
|
||||
|
||||
let disp = dst.as_offset32_or_zero();
|
||||
let disp = disp as u32;
|
||||
sink.put1(0xE9);
|
||||
sink.put4(disp);
|
||||
}
|
||||
|
||||
Inst::JmpCondSymm {
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
} => {
|
||||
// Conditional part.
|
||||
|
||||
// This insn is 6 bytes long. Currently `offset` is relative to
|
||||
// the start of this insn, but the Intel encoding requires it to
|
||||
// be relative to the start of the next instruction. Hence the
|
||||
// adjustment.
|
||||
let taken_disp = taken.as_offset32_or_zero() - 6;
|
||||
let taken_disp = taken_disp as u32;
|
||||
// If taken.
|
||||
let cond_start = sink.cur_offset();
|
||||
let cond_disp_off = cond_start + 2;
|
||||
let cond_end = cond_start + 6;
|
||||
if let Some(l) = taken.as_label() {
|
||||
sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
|
||||
sink.use_label_at_offset(cond_disp_off, l, LabelUse::JmpRel32);
|
||||
let inverted: [u8; 6] =
|
||||
[0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
|
||||
[0x0F, 0x80 + (cc.invert().get_enc()), 0x00, 0x00, 0x00, 0x00];
|
||||
sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
|
||||
}
|
||||
|
||||
let taken_disp = taken.as_offset32_or_zero();
|
||||
let taken_disp = taken_disp as u32;
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0x80 + cc.get_enc());
|
||||
sink.put4(taken_disp);
|
||||
|
||||
// Unconditional part.
|
||||
|
||||
let nt_disp = not_taken.as_offset32_or_zero() - 5;
|
||||
let nt_disp = nt_disp as u32;
|
||||
// If not taken.
|
||||
let uncond_start = sink.cur_offset();
|
||||
let uncond_disp_off = uncond_start + 1;
|
||||
let uncond_end = uncond_start + 5;
|
||||
if let Some(l) = not_taken.as_label() {
|
||||
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
|
||||
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::JmpRel32);
|
||||
sink.add_uncond_branch(uncond_start, uncond_end, l);
|
||||
}
|
||||
|
||||
let nt_disp = not_taken.as_offset32_or_zero();
|
||||
let nt_disp = nt_disp as u32;
|
||||
sink.put1(0xE9);
|
||||
sink.put4(nt_disp);
|
||||
}
|
||||
@@ -1012,6 +1013,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
@@ -1045,6 +1047,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
@@ -1074,11 +1077,33 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
|
||||
Inst::Hlt => {
|
||||
sink.put1(0xcc);
|
||||
}
|
||||
|
||||
Inst::Ud2 { trap_info } => {
|
||||
sink.add_trap(trap_info.0, trap_info.1);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(0x0b);
|
||||
}
|
||||
|
||||
Inst::VirtualSPOffsetAdj { offset } => {
|
||||
debug!(
|
||||
"virtual sp offset adjusted by {} -> {}",
|
||||
offset,
|
||||
state.virtual_sp_offset + offset
|
||||
);
|
||||
state.virtual_sp_offset += offset;
|
||||
}
|
||||
|
||||
Inst::Nop { .. } | Inst::EpiloguePlaceholder => {
|
||||
// Generate no code.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,19 +4,17 @@
|
||||
#![allow(non_snake_case)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use core::convert::TryFrom;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::Set;
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
|
||||
use crate::ir::ExternalName;
|
||||
use crate::ir::Type;
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
use crate::{settings, CodegenError, CodegenResult};
|
||||
@@ -37,11 +35,13 @@ use regs::{create_reg_universe_systemv, show_ireg_sized};
|
||||
|
||||
/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Inst {
|
||||
pub enum Inst {
|
||||
/// nops of various sizes, including zero
|
||||
Nop { len: u8 },
|
||||
|
||||
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
|
||||
// =====================================
|
||||
// Integer instructions.
|
||||
/// Integer arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
|
||||
Alu_RMI_R {
|
||||
is_64: bool,
|
||||
op: AluRmiROpcode,
|
||||
@@ -49,49 +49,57 @@ pub(crate) enum Inst {
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// (imm32 imm64) reg.
|
||||
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
|
||||
/// Constant materialization: (imm32 imm64) reg.
|
||||
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
|
||||
Imm_R {
|
||||
dst_is_64: bool,
|
||||
simm64: u64,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// mov (64 32) reg reg
|
||||
/// GPR to GPR move: mov (64 32) reg reg.
|
||||
Mov_R_R {
|
||||
is_64: bool,
|
||||
src: Reg,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
|
||||
/// Note that the lq variant doesn't really exist since the default
|
||||
/// zero-extend rule makes it unnecessary. For that case we emit the
|
||||
/// equivalent "movl AM, reg32".
|
||||
MovZX_M_R {
|
||||
extMode: ExtMode,
|
||||
addr: Addr,
|
||||
/// Zero-extended loads, except for 64 bits: movz (bl bq wl wq lq) addr reg.
|
||||
/// Note that the lq variant doesn't really exist since the default zero-extend rule makes it
|
||||
/// unnecessary. For that case we emit the equivalent "movl AM, reg32".
|
||||
MovZX_RM_R {
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// A plain 64-bit integer load, since MovZX_M_R can't represent that
|
||||
Mov64_M_R { addr: Addr, dst: Writable<Reg> },
|
||||
|
||||
/// movs (bl bq wl wq lq) addr reg (good for all SX loads)
|
||||
MovSX_M_R {
|
||||
extMode: ExtMode,
|
||||
addr: Addr,
|
||||
/// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
|
||||
Mov64_M_R {
|
||||
src: SyntheticAmode,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// mov (b w l q) reg addr (good for all integer stores)
|
||||
/// Loads the memory address of addr into dst.
|
||||
LoadEffectiveAddress {
|
||||
addr: SyntheticAmode,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// Sign-extended loads and moves: movs (bl bq wl wq lq) addr reg.
|
||||
MovSX_RM_R {
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// Integer stores: mov (b w l q) reg addr.
|
||||
Mov_R_M {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
size: u8, // 1, 2, 4 or 8.
|
||||
src: Reg,
|
||||
addr: Addr,
|
||||
dst: SyntheticAmode,
|
||||
},
|
||||
|
||||
/// (shl shr sar) (l q) imm reg
|
||||
/// Arithmetic shifts: (shl shr sar) (l q) imm reg.
|
||||
Shift_R {
|
||||
is_64: bool,
|
||||
kind: ShiftKind,
|
||||
@@ -100,75 +108,95 @@ pub(crate) enum Inst {
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// cmp (b w l q) (reg addr imm) reg
|
||||
/// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg.
|
||||
Cmp_RMI_R {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: RegMemImm,
|
||||
dst: Reg,
|
||||
},
|
||||
|
||||
/// Materializes the requested condition code in the destination reg.
|
||||
Setcc { cc: CC, dst: Writable<Reg> },
|
||||
|
||||
// =====================================
|
||||
// Stack manipulation.
|
||||
/// pushq (reg addr imm)
|
||||
Push64 { src: RegMemImm },
|
||||
|
||||
/// popq reg
|
||||
Pop64 { dst: Writable<Reg> },
|
||||
|
||||
/// call simm32
|
||||
CallKnown {
|
||||
dest: ExternalName,
|
||||
uses: Set<Reg>,
|
||||
defs: Set<Writable<Reg>>,
|
||||
// =====================================
|
||||
// Floating-point operations.
|
||||
/// Float arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg
|
||||
XMM_RM_R {
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// callq (reg mem)
|
||||
CallUnknown {
|
||||
dest: RegMem,
|
||||
//uses: Set<Reg>,
|
||||
//defs: Set<Writable<Reg>>,
|
||||
},
|
||||
|
||||
// ---- branches (exactly one must appear at end of BB) ----
|
||||
/// ret
|
||||
Ret,
|
||||
|
||||
/// A placeholder instruction, generating no code, meaning that a function epilogue must be
|
||||
/// inserted there.
|
||||
EpiloguePlaceholder,
|
||||
|
||||
/// jmp simm32
|
||||
JmpKnown { dest: BranchTarget },
|
||||
|
||||
/// jcond cond target target
|
||||
/// Symmetrical two-way conditional branch.
|
||||
/// Emitted as a compound sequence; the MachBuffer will shrink it
|
||||
/// as appropriate.
|
||||
JmpCondSymm {
|
||||
cc: CC,
|
||||
taken: BranchTarget,
|
||||
not_taken: BranchTarget,
|
||||
},
|
||||
|
||||
/// jmpq (reg mem)
|
||||
JmpUnknown { target: RegMem },
|
||||
|
||||
/// mov between XMM registers (32 64) (reg addr) reg
|
||||
/// XMM_MOV_RM_R differs from XMM_RM_R in that the dst
|
||||
/// register of XMM_MOV_RM_R is not used in the computation
|
||||
/// of the instruction dst value and so does not have to
|
||||
/// be a previously valid value. This is characteristic of
|
||||
/// mov instructions.
|
||||
/// XMM_MOV_RM_R differs from XMM_RM_R in that the dst register of XMM_MOV_RM_R is not used in
|
||||
/// the computation of the instruction dst value and so does not have to be a previously valid
|
||||
/// value. This is characteristic of mov instructions.
|
||||
XMM_MOV_RM_R {
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
|
||||
XMM_RM_R {
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
// =====================================
|
||||
// Control flow instructions.
|
||||
/// Direct call: call simm32.
|
||||
CallKnown {
|
||||
dest: ExternalName,
|
||||
uses: Vec<Reg>,
|
||||
defs: Vec<Writable<Reg>>,
|
||||
loc: SourceLoc,
|
||||
opcode: Opcode,
|
||||
},
|
||||
|
||||
/// Indirect call: callq (reg mem).
|
||||
CallUnknown {
|
||||
dest: RegMem,
|
||||
uses: Vec<Reg>,
|
||||
defs: Vec<Writable<Reg>>,
|
||||
loc: SourceLoc,
|
||||
opcode: Opcode,
|
||||
},
|
||||
|
||||
/// Return.
|
||||
Ret,
|
||||
|
||||
/// A placeholder instruction, generating no code, meaning that a function epilogue must be
|
||||
/// inserted there.
|
||||
EpiloguePlaceholder,
|
||||
|
||||
/// Jump to a known target: jmp simm32.
|
||||
JmpKnown { dst: BranchTarget },
|
||||
|
||||
/// Two-way conditional branch: jcond cond target target.
|
||||
/// Emitted as a compound sequence; the MachBuffer will shrink it as appropriate.
|
||||
JmpCond {
|
||||
cc: CC,
|
||||
taken: BranchTarget,
|
||||
not_taken: BranchTarget,
|
||||
},
|
||||
|
||||
/// Indirect jump: jmpq (reg mem).
|
||||
JmpUnknown { target: RegMem },
|
||||
|
||||
/// A debug trap.
|
||||
Hlt,
|
||||
|
||||
/// An instruction that will always trigger the illegal instruction exception.
|
||||
Ud2 { trap_info: (SourceLoc, TrapCode) },
|
||||
|
||||
// =====================================
|
||||
// Meta-instructions generating no code.
|
||||
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
|
||||
/// controls how MemArg::NominalSPOffset args are lowered.
|
||||
VirtualSPOffsetAdj { offset: i64 },
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
@@ -229,29 +257,44 @@ impl Inst {
|
||||
Inst::XMM_RM_R { op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovZX_M_R { extMode, addr, dst }
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov64_M_R { addr, dst }
|
||||
Inst::Mov64_M_R {
|
||||
src: src.into(),
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_M_R { extMode, addr, dst }
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_m(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
addr: Addr,
|
||||
dst: impl Into<SyntheticAmode>,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
Inst::Mov_R_M { size, src, addr }
|
||||
Inst::Mov_R_M {
|
||||
size,
|
||||
src,
|
||||
dst: dst.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lea(addr: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::LoadEffectiveAddress {
|
||||
addr: addr.into(),
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn shift_r(
|
||||
@@ -274,6 +317,8 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
/// Does a comparison of dst - src for operands of size `size`, as stated by the machine
|
||||
/// instruction semantics. Be careful with the order of parameters!
|
||||
pub(crate) fn cmp_rmi_r(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: RegMemImm,
|
||||
@@ -284,6 +329,11 @@ impl Inst {
|
||||
Inst::Cmp_RMI_R { size, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn setcc(cc: CC, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Setcc { cc, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn push64(src: RegMemImm) -> Inst {
|
||||
Inst::Push64 { src }
|
||||
}
|
||||
@@ -292,8 +342,36 @@ impl Inst {
|
||||
Inst::Pop64 { dst }
|
||||
}
|
||||
|
||||
pub(crate) fn call_unknown(dest: RegMem) -> Inst {
|
||||
Inst::CallUnknown { dest }
|
||||
pub(crate) fn call_known(
|
||||
dest: ExternalName,
|
||||
uses: Vec<Reg>,
|
||||
defs: Vec<Writable<Reg>>,
|
||||
loc: SourceLoc,
|
||||
opcode: Opcode,
|
||||
) -> Inst {
|
||||
Inst::CallKnown {
|
||||
dest,
|
||||
uses,
|
||||
defs,
|
||||
loc,
|
||||
opcode,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn call_unknown(
|
||||
dest: RegMem,
|
||||
uses: Vec<Reg>,
|
||||
defs: Vec<Writable<Reg>>,
|
||||
loc: SourceLoc,
|
||||
opcode: Opcode,
|
||||
) -> Inst {
|
||||
Inst::CallUnknown {
|
||||
dest,
|
||||
uses,
|
||||
defs,
|
||||
loc,
|
||||
opcode,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn ret() -> Inst {
|
||||
@@ -304,12 +382,12 @@ impl Inst {
|
||||
Inst::EpiloguePlaceholder
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
|
||||
Inst::JmpKnown { dest }
|
||||
pub(crate) fn jmp_known(dst: BranchTarget) -> Inst {
|
||||
Inst::JmpKnown { dst }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
|
||||
Inst::JmpCondSymm {
|
||||
pub(crate) fn jmp_cond(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
@@ -414,40 +492,46 @@ impl ShowWithRRU for Inst {
|
||||
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
Inst::MovZX_M_R { extMode, addr, dst } => {
|
||||
if *extMode == ExtMode::LQ {
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst } => {
|
||||
if *ext_mode == ExtMode::LQ {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movl".to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
src.show_rru_sized(mb_rru, ext_mode.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 4)
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movz".to_string(), extMode.to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
|
||||
ljustify2("movz".to_string(), ext_mode.to_string()),
|
||||
src.show_rru_sized(mb_rru, ext_mode.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
|
||||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => format!(
|
||||
Inst::Mov64_M_R { src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movq".to_string()),
|
||||
src.show_rru(mb_rru),
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::LoadEffectiveAddress { addr, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify("lea".to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::MovSX_M_R { extMode, addr, dst } => format!(
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movs".to_string(), extMode.to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
|
||||
ljustify2("movs".to_string(), ext_mode.to_string()),
|
||||
src.show_rru_sized(mb_rru, ext_mode.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
|
||||
),
|
||||
Inst::Mov_R_M { size, src, addr } => format!(
|
||||
Inst::Mov_R_M { size, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixBWLQ(*size)),
|
||||
show_ireg_sized(*src, mb_rru, *size),
|
||||
addr.show_rru(mb_rru)
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
@@ -474,25 +558,29 @@ impl ShowWithRRU for Inst {
|
||||
src.show_rru_sized(mb_rru, *size),
|
||||
show_ireg_sized(*dst, mb_rru, *size)
|
||||
),
|
||||
Inst::Setcc { cc, dst } => format!(
|
||||
"{} {}",
|
||||
ljustify2("set".to_string(), cc.to_string()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 1)
|
||||
),
|
||||
Inst::Push64 { src } => {
|
||||
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
|
||||
}
|
||||
//Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
|
||||
Inst::CallKnown { .. } => "**CallKnown**".to_string(),
|
||||
Inst::CallUnknown { dest } => format!(
|
||||
Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest),
|
||||
Inst::CallUnknown { dest, .. } => format!(
|
||||
"{} *{}",
|
||||
ljustify("call".to_string()),
|
||||
dest.show_rru(mb_rru)
|
||||
),
|
||||
Inst::Ret => "ret".to_string(),
|
||||
Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
|
||||
Inst::JmpKnown { dest } => {
|
||||
format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
|
||||
Inst::JmpKnown { dst } => {
|
||||
format!("{} {}", ljustify("jmp".to_string()), dst.show_rru(mb_rru))
|
||||
}
|
||||
Inst::JmpCondSymm {
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
@@ -508,6 +596,9 @@ impl ShowWithRRU for Inst {
|
||||
ljustify("jmp".to_string()),
|
||||
target.show_rru(mb_rru)
|
||||
),
|
||||
Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
|
||||
Inst::Hlt => "hlt".into(),
|
||||
Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -526,7 +617,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
|
||||
// sets.
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
@@ -544,40 +634,28 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
dst,
|
||||
} => {
|
||||
Inst::Imm_R { dst, .. } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov_R_R { is_64: _, src, dst } => {
|
||||
Inst::Mov_R_R { src, dst, .. } => {
|
||||
collector.add_use(*src);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::MovZX_M_R {
|
||||
extMode: _,
|
||||
addr,
|
||||
dst,
|
||||
} => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
Inst::MovZX_RM_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst)
|
||||
}
|
||||
Inst::MovSX_RM_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::MovSX_M_R {
|
||||
extMode: _,
|
||||
addr,
|
||||
dst,
|
||||
} => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov_R_M { size: _, src, addr } => {
|
||||
Inst::Mov_R_M { src, dst, .. } => {
|
||||
collector.add_use(*src);
|
||||
addr.get_regs_as_uses(collector);
|
||||
dst.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
@@ -594,6 +672,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_use(*dst); // yes, really `add_use`
|
||||
}
|
||||
Inst::Setcc { dst, .. } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(Writable::from_reg(regs::rsp()));
|
||||
@@ -601,29 +682,36 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
Inst::Pop64 { dst } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
|
||||
Inst::CallKnown {
|
||||
dest: _,
|
||||
uses: _,
|
||||
defs: _,
|
||||
ref uses, ref defs, ..
|
||||
} => {
|
||||
// FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
|
||||
unimplemented!();
|
||||
collector.add_uses(uses);
|
||||
collector.add_defs(defs);
|
||||
}
|
||||
Inst::CallUnknown { dest } => {
|
||||
|
||||
Inst::CallUnknown {
|
||||
ref uses,
|
||||
ref defs,
|
||||
dest,
|
||||
..
|
||||
} => {
|
||||
collector.add_uses(uses);
|
||||
collector.add_defs(defs);
|
||||
dest.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Ret => {}
|
||||
Inst::EpiloguePlaceholder => {}
|
||||
Inst::JmpKnown { dest: _ } => {}
|
||||
Inst::JmpCondSymm {
|
||||
cc: _,
|
||||
taken: _,
|
||||
not_taken: _,
|
||||
} => {}
|
||||
//Inst::JmpUnknown { target } => {
|
||||
// target.get_regs_as_uses(collector);
|
||||
//}
|
||||
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::JmpUnknown { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Hlt
|
||||
| Inst::Ud2 { .. } => {
|
||||
// No registers are used.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -631,34 +719,34 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
// Instructions and subcomponents: map_regs
|
||||
|
||||
fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
|
||||
if r.is_virtual() {
|
||||
let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
|
||||
if let Some(reg) = r.as_virtual_reg() {
|
||||
let new = m.get_use(reg).unwrap().to_reg();
|
||||
*r = new;
|
||||
}
|
||||
}
|
||||
|
||||
fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
if r.to_reg().is_virtual() {
|
||||
let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
|
||||
if let Some(reg) = r.to_reg().as_virtual_reg() {
|
||||
let new = m.get_def(reg).unwrap().to_reg();
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
if r.to_reg().is_virtual() {
|
||||
let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
|
||||
if let Some(reg) = r.to_reg().as_virtual_reg() {
|
||||
let new = m.get_mod(reg).unwrap().to_reg();
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
impl Addr {
|
||||
impl Amode {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
Addr::ImmReg {
|
||||
Amode::ImmReg {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
} => map_use(map, base),
|
||||
Addr::ImmRegRegShift {
|
||||
Amode::ImmRegRegShift {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
ref mut index,
|
||||
@@ -732,33 +820,33 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, src);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::MovZX_M_R {
|
||||
extMode: _,
|
||||
ref mut addr,
|
||||
Inst::MovZX_RM_R {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
addr.map_uses(mapper);
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
addr.map_uses(mapper);
|
||||
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::MovSX_M_R {
|
||||
extMode: _,
|
||||
ref mut addr,
|
||||
Inst::MovSX_RM_R {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
addr.map_uses(mapper);
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov_R_M {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut addr,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
addr.map_uses(mapper);
|
||||
dst.map_uses(mapper);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
@@ -776,28 +864,51 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
src.map_uses(mapper);
|
||||
map_use(mapper, dst);
|
||||
}
|
||||
Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
|
||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||
Inst::Pop64 { ref mut dst } => {
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
|
||||
Inst::CallKnown {
|
||||
dest: _,
|
||||
uses: _,
|
||||
defs: _,
|
||||
} => {}
|
||||
Inst::CallUnknown { dest } => dest.map_uses(mapper),
|
||||
Inst::Ret => {}
|
||||
Inst::EpiloguePlaceholder => {}
|
||||
Inst::JmpKnown { dest: _ } => {}
|
||||
Inst::JmpCondSymm {
|
||||
cc: _,
|
||||
taken: _,
|
||||
not_taken: _,
|
||||
} => {}
|
||||
//Inst::JmpUnknown { target } => {
|
||||
// target.apply_map(mapper);
|
||||
//}
|
||||
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
|
||||
ref mut uses,
|
||||
ref mut defs,
|
||||
..
|
||||
} => {
|
||||
for r in uses.iter_mut() {
|
||||
map_use(mapper, r);
|
||||
}
|
||||
for r in defs.iter_mut() {
|
||||
map_def(mapper, r);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::CallUnknown {
|
||||
ref mut uses,
|
||||
ref mut defs,
|
||||
ref mut dest,
|
||||
..
|
||||
} => {
|
||||
for r in uses.iter_mut() {
|
||||
map_use(mapper, r);
|
||||
}
|
||||
for r in defs.iter_mut() {
|
||||
map_def(mapper, r);
|
||||
}
|
||||
dest.map_uses(mapper);
|
||||
}
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::JmpUnknown { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Ud2 { .. }
|
||||
| Inst::Hlt => {
|
||||
// No registers are used.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -847,8 +958,8 @@ impl MachInst for Inst {
|
||||
match self {
|
||||
// Interesting cases.
|
||||
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
|
||||
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
|
||||
&Self::JmpCondSymm {
|
||||
&Self::JmpKnown { dst } => MachTerminator::Uncond(dst.as_label().unwrap()),
|
||||
&Self::JmpCond {
|
||||
cc: _,
|
||||
taken,
|
||||
not_taken,
|
||||
@@ -875,7 +986,7 @@ impl MachInst for Inst {
|
||||
}
|
||||
|
||||
fn gen_zero_len_nop() -> Inst {
|
||||
unimplemented!()
|
||||
Inst::Nop { len: 0 }
|
||||
}
|
||||
|
||||
fn gen_nop(_preferred_size: usize) -> Inst {
|
||||
@@ -919,20 +1030,27 @@ impl MachInst for Inst {
|
||||
type LabelUse = LabelUse;
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
type State = ();
|
||||
/// State carried between emissions of a sequence of instructions.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct EmitState {
|
||||
virtual_sp_offset: i64,
|
||||
}
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
|
||||
emit::emit(self, sink);
|
||||
impl MachInstEmit for Inst {
|
||||
type State = EmitState;
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut Self::State) {
|
||||
emit::emit(self, sink, flags, state);
|
||||
}
|
||||
}
|
||||
|
||||
/// A label-use (internal relocation) in generated code.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum LabelUse {
|
||||
/// A 32-bit offset from location of relocation itself, added to the
|
||||
/// existing value at that location.
|
||||
Rel32,
|
||||
pub enum LabelUse {
|
||||
/// A 32-bit offset from location of relocation itself, added to the existing value at that
|
||||
/// location. Used for control flow instructions which consider an offset from the start of the
|
||||
/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
|
||||
JmpRel32,
|
||||
}
|
||||
|
||||
impl MachInstLabelUse for LabelUse {
|
||||
@@ -940,30 +1058,31 @@ impl MachInstLabelUse for LabelUse {
|
||||
|
||||
fn max_pos_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0x7fff_ffff,
|
||||
LabelUse::JmpRel32 => 0x7fff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
fn max_neg_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0x8000_0000,
|
||||
LabelUse::JmpRel32 => 0x8000_0000,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 4,
|
||||
LabelUse::JmpRel32 => 4,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
|
||||
let pc_rel = (label_offset as i64) - (use_offset as i64);
|
||||
debug_assert!(pc_rel <= self.max_pos_range() as i64);
|
||||
debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
|
||||
let pc_rel = pc_rel as u32;
|
||||
match self {
|
||||
LabelUse::Rel32 => {
|
||||
let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
|
||||
let value = i32::try_from(label_offset)
|
||||
.unwrap()
|
||||
.wrapping_sub(i32::try_from(use_offset).unwrap())
|
||||
.wrapping_add(addend);
|
||||
LabelUse::JmpRel32 => {
|
||||
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
|
||||
let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
|
||||
buffer.copy_from_slice(&value.to_le_bytes()[..]);
|
||||
}
|
||||
}
|
||||
@@ -971,20 +1090,20 @@ impl MachInstLabelUse for LabelUse {
|
||||
|
||||
fn supports_veneer(self) -> bool {
|
||||
match self {
|
||||
LabelUse::Rel32 => false,
|
||||
LabelUse::JmpRel32 => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn veneer_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0,
|
||||
LabelUse::JmpRel32 => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
|
||||
match self {
|
||||
LabelUse::Rel32 => {
|
||||
panic!("Veneer not supported for Rel32 label-use.");
|
||||
LabelUse::JmpRel32 => {
|
||||
panic!("Veneer not supported for JumpRel32 label-use.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,20 +1,22 @@
|
||||
//! Lowering rules for X64.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use log::trace;
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use crate::ir::types;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{condcodes::IntCC, InstructionData, Opcode, Type};
|
||||
use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};
|
||||
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::result::CodegenResult;
|
||||
|
||||
use crate::isa::x64::abi::*;
|
||||
use crate::isa::x64::inst::args::*;
|
||||
use crate::isa::x64::inst::*;
|
||||
use crate::isa::x64::X64Backend;
|
||||
@@ -32,6 +34,20 @@ fn is_int_ty(ty: Type) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_bool_ty(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_float_ty(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::F32 | types::F64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn int_ty_is_64(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 => false,
|
||||
@@ -48,30 +64,18 @@ fn flt_ty_is_64(ty: Type) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn int_ty_to_sizeB(ty: Type) -> u8 {
|
||||
match ty {
|
||||
types::I8 => 1,
|
||||
types::I16 => 2,
|
||||
types::I32 => 4,
|
||||
types::I64 => 8,
|
||||
_ => panic!("ity_to_sizeB"),
|
||||
}
|
||||
fn iri_to_u64_imm(ctx: Ctx, inst: IRInst) -> Option<u64> {
|
||||
ctx.get_constant(inst)
|
||||
}
|
||||
|
||||
fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
|
||||
let inst_data = ctx.data(iri);
|
||||
if inst_data.opcode() == Opcode::Null {
|
||||
Some(0)
|
||||
} else {
|
||||
match inst_data {
|
||||
&InstructionData::UnaryImm { opcode: _, imm } => {
|
||||
// Only has Into for i64; we use u64 elsewhere, so we cast.
|
||||
let imm: i64 = imm.into();
|
||||
Some(imm as u64)
|
||||
}
|
||||
fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
|
||||
match data {
|
||||
&InstructionData::Trap { code, .. }
|
||||
| &InstructionData::CondTrap { code, .. }
|
||||
| &InstructionData::IntCondTrap { code, .. }
|
||||
| &InstructionData::FloatCondTrap { code, .. } => Some(code),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inst_condcode(data: &InstructionData) -> IntCC {
|
||||
@@ -87,36 +91,88 @@ fn inst_condcode(data: &InstructionData) -> IntCC {
|
||||
}
|
||||
}
|
||||
|
||||
fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
|
||||
let inputs = ctx.get_input(iri, input);
|
||||
fn ldst_offset(data: &InstructionData) -> Option<i32> {
|
||||
match data {
|
||||
&InstructionData::Load { offset, .. }
|
||||
| &InstructionData::StackLoad { offset, .. }
|
||||
| &InstructionData::LoadComplex { offset, .. }
|
||||
| &InstructionData::Store { offset, .. }
|
||||
| &InstructionData::StackStore { offset, .. }
|
||||
| &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Identifier for a particular input of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
struct InsnInput {
|
||||
insn: IRInst,
|
||||
input: usize,
|
||||
}
|
||||
|
||||
/// Identifier for a particular output of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
struct InsnOutput {
|
||||
insn: IRInst,
|
||||
output: usize,
|
||||
}
|
||||
|
||||
fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg {
|
||||
let inputs = ctx.get_input(spec.insn, spec.input);
|
||||
ctx.use_input_reg(inputs);
|
||||
inputs.reg
|
||||
}
|
||||
|
||||
fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
|
||||
ctx.get_output(iri, output)
|
||||
/// Try to use an immediate for constant inputs, and a register otherwise.
|
||||
/// TODO: handle memory as well!
|
||||
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
||||
let imm = ctx.get_input(spec.insn, spec.input).constant.and_then(|x| {
|
||||
let as_u32 = x as u32;
|
||||
let extended = as_u32 as u64;
|
||||
// If the truncation and sign-extension don't change the value, use it.
|
||||
if extended == x {
|
||||
Some(as_u32)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
match imm {
|
||||
Some(x) => RegMemImm::imm(x),
|
||||
None => RegMemImm::reg(input_to_reg(ctx, spec)),
|
||||
}
|
||||
}
|
||||
|
||||
fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable<Reg> {
|
||||
ctx.get_output(spec.insn, spec.output)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Top-level instruction lowering entry point, for one instruction.
|
||||
|
||||
/// Actually codegen an instruction's results into registers.
|
||||
fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
let op = ctx.data(inst).opcode();
|
||||
let ty = if ctx.num_outputs(inst) == 1 {
|
||||
Some(ctx.output_ty(inst, 0))
|
||||
fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) -> CodegenResult<()> {
|
||||
let op = ctx.data(insn).opcode();
|
||||
|
||||
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
|
||||
.map(|i| InsnInput { insn, input: i })
|
||||
.collect();
|
||||
let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
|
||||
.map(|i| InsnOutput { insn, output: i })
|
||||
.collect();
|
||||
|
||||
let ty = if outputs.len() > 0 {
|
||||
Some(ctx.output_ty(insn, 0))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// This is all outstandingly feeble. TODO: much better!
|
||||
match op {
|
||||
Opcode::Iconst => {
|
||||
if let Some(w64) = iri_to_u64_immediate(ctx, inst) {
|
||||
if let Some(w64) = iri_to_u64_imm(ctx, insn) {
|
||||
// Get exactly the bit pattern in 'w64' into the dest. No
|
||||
// monkeying with sign extension etc.
|
||||
let dst_is_64 = w64 > 0xFFFF_FFFF;
|
||||
let dst = output_to_reg(ctx, inst, 0);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::imm_r(dst_is_64, w64, dst));
|
||||
} else {
|
||||
unimplemented!();
|
||||
@@ -124,28 +180,32 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
}
|
||||
|
||||
Opcode::Iadd | Opcode::Isub => {
|
||||
let dst = output_to_reg(ctx, inst, 0);
|
||||
let lhs = input_to_reg(ctx, inst, 0);
|
||||
let rhs = input_to_reg(ctx, inst, 1);
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
// TODO For add, try to commute the operands if one is an immediate.
|
||||
|
||||
let is_64 = int_ty_is_64(ty.unwrap());
|
||||
let alu_op = if op == Opcode::Iadd {
|
||||
AluRmiROpcode::Add
|
||||
} else {
|
||||
AluRmiROpcode::Sub
|
||||
};
|
||||
|
||||
ctx.emit(Inst::mov_r_r(true, lhs, dst));
|
||||
ctx.emit(Inst::alu_rmi_r(is_64, alu_op, RegMemImm::reg(rhs), dst));
|
||||
ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst));
|
||||
}
|
||||
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
// TODO: implement imm shift value into insn
|
||||
let dst_ty = ctx.output_ty(inst, 0);
|
||||
assert_eq!(ctx.input_ty(inst, 0), dst_ty);
|
||||
let dst_ty = ctx.output_ty(insn, 0);
|
||||
assert_eq!(ctx.input_ty(insn, 0), dst_ty);
|
||||
assert!(dst_ty == types::I32 || dst_ty == types::I64);
|
||||
|
||||
let lhs = input_to_reg(ctx, inst, 0);
|
||||
let rhs = input_to_reg(ctx, inst, 1);
|
||||
let dst = output_to_reg(ctx, inst, 0);
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg(ctx, inputs[1]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
let shift_kind = match op {
|
||||
Opcode::Ishl => ShiftKind::Left,
|
||||
@@ -161,30 +221,68 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
ctx.emit(Inst::shift_r(is_64, shift_kind, None /*%cl*/, dst));
|
||||
}
|
||||
|
||||
Opcode::Uextend | Opcode::Sextend => {
|
||||
// TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
|
||||
// don't accept a register source operand. They should be changed
|
||||
// so as to have _RM_R form.
|
||||
// TODO2: if the source operand is a load, incorporate that.
|
||||
let zero_extend = op == Opcode::Uextend;
|
||||
let src_ty = ctx.input_ty(inst, 0);
|
||||
let dst_ty = ctx.output_ty(inst, 0);
|
||||
let src = input_to_reg(ctx, inst, 0);
|
||||
let dst = output_to_reg(ctx, inst, 0);
|
||||
Opcode::Uextend
|
||||
| Opcode::Sextend
|
||||
| Opcode::Bint
|
||||
| Opcode::Breduce
|
||||
| Opcode::Bextend
|
||||
| Opcode::Ireduce => {
|
||||
let src_ty = ctx.input_ty(insn, 0);
|
||||
let dst_ty = ctx.output_ty(insn, 0);
|
||||
|
||||
ctx.emit(Inst::mov_r_r(true, src, dst));
|
||||
match (src_ty, dst_ty, zero_extend) {
|
||||
(types::I8, types::I64, false) => {
|
||||
ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), dst));
|
||||
ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), dst));
|
||||
// TODO: if the source operand is a load, incorporate that.
|
||||
let src = input_to_reg(ctx, inputs[0]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
let ext_mode = match (src_ty.bits(), dst_ty.bits()) {
|
||||
(1, 32) | (8, 32) => ExtMode::BL,
|
||||
(1, 64) | (8, 64) => ExtMode::BQ,
|
||||
(16, 32) => ExtMode::WL,
|
||||
(16, 64) => ExtMode::WQ,
|
||||
(32, 64) => ExtMode::LQ,
|
||||
_ => unreachable!(
|
||||
"unexpected extension kind from {:?} to {:?}",
|
||||
src_ty, dst_ty
|
||||
),
|
||||
};
|
||||
|
||||
if op == Opcode::Sextend {
|
||||
ctx.emit(Inst::movsx_rm_r(ext_mode, RegMem::reg(src), dst));
|
||||
} else {
|
||||
// All of these other opcodes are simply a move from a zero-extended source. Here
|
||||
// is why this works, in each case:
|
||||
//
|
||||
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we
|
||||
// merely need to zero-extend here.
|
||||
//
|
||||
// - Breduce, Bextend: changing width of a boolean. We represent a
|
||||
// bool as a 0 or 1, so again, this is a zero-extend / no-op.
|
||||
//
|
||||
// - Ireduce: changing width of an integer. Smaller ints are stored
|
||||
// with undefined high-order bits, so we can simply do a copy.
|
||||
ctx.emit(Inst::movzx_rm_r(ext_mode, RegMem::reg(src), dst));
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
|
||||
Opcode::Icmp => {
|
||||
let condcode = inst_condcode(ctx.data(insn));
|
||||
let cc = CC::from_intcc(condcode);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
|
||||
// TODO Try to commute the operands (and invert the condition) if one is an immediate.
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
|
||||
// us dst - src at the machine instruction level, so invert operands.
|
||||
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
|
||||
ctx.emit(Inst::setcc(cc, dst));
|
||||
}
|
||||
|
||||
Opcode::FallthroughReturn | Opcode::Return => {
|
||||
for i in 0..ctx.num_inputs(inst) {
|
||||
let src_reg = input_to_reg(ctx, inst, i);
|
||||
for i in 0..ctx.num_inputs(insn) {
|
||||
let src_reg = input_to_reg(ctx, inputs[i]);
|
||||
let retval_reg = ctx.retval(i);
|
||||
if src_reg.get_class() == RegClass::I64 {
|
||||
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
|
||||
@@ -199,10 +297,58 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
// N.B.: the Ret itself is generated by the ABI.
|
||||
}
|
||||
|
||||
Opcode::Call | Opcode::CallIndirect => {
|
||||
let loc = ctx.srcloc(insn);
|
||||
let (mut abi, inputs) = match op {
|
||||
Opcode::Call => {
|
||||
let (extname, dist) = ctx.call_target(insn).unwrap();
|
||||
let sig = ctx.call_sig(insn).unwrap();
|
||||
assert!(inputs.len() == sig.params.len());
|
||||
assert!(outputs.len() == sig.returns.len());
|
||||
(
|
||||
X64ABICall::from_func(sig, &extname, dist, loc)?,
|
||||
&inputs[..],
|
||||
)
|
||||
}
|
||||
|
||||
Opcode::CallIndirect => {
|
||||
let ptr = input_to_reg(ctx, inputs[0]);
|
||||
let sig = ctx.call_sig(insn).unwrap();
|
||||
assert!(inputs.len() - 1 == sig.params.len());
|
||||
assert!(outputs.len() == sig.returns.len());
|
||||
(X64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
|
||||
}
|
||||
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
abi.emit_stack_pre_adjust(ctx);
|
||||
assert!(inputs.len() == abi.num_args());
|
||||
for (i, input) in inputs.iter().enumerate() {
|
||||
let arg_reg = input_to_reg(ctx, *input);
|
||||
abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
|
||||
}
|
||||
abi.emit_call(ctx);
|
||||
for (i, output) in outputs.iter().enumerate() {
|
||||
let retval_reg = output_to_reg(ctx, *output);
|
||||
abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
|
||||
}
|
||||
abi.emit_stack_post_adjust(ctx);
|
||||
}
|
||||
|
||||
Opcode::Debugtrap => {
|
||||
ctx.emit(Inst::Hlt);
|
||||
}
|
||||
|
||||
Opcode::Trap => {
|
||||
let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
|
||||
ctx.emit(Inst::Ud2 { trap_info })
|
||||
}
|
||||
|
||||
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
|
||||
let dst = output_to_reg(ctx, inst, 0);
|
||||
let lhs = input_to_reg(ctx, inst, 0);
|
||||
let rhs = input_to_reg(ctx, inst, 1);
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg(ctx, inputs[1]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let is_64 = flt_ty_is_64(ty.unwrap());
|
||||
if !is_64 {
|
||||
let sse_op = match op {
|
||||
@@ -219,10 +365,11 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
unimplemented!("unimplemented lowering for opcode {:?}", op);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Fcopysign => {
|
||||
let dst = output_to_reg(ctx, inst, 0);
|
||||
let lhs = input_to_reg(ctx, inst, 0);
|
||||
let rhs = input_to_reg(ctx, inst, 1);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg(ctx, inputs[1]);
|
||||
if !flt_ty_is_64(ty.unwrap()) {
|
||||
// movabs 0x8000_0000, tmp_gpr1
|
||||
// movd tmp_gpr1, tmp_xmm1
|
||||
@@ -265,6 +412,185 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
unimplemented!("{:?} for non 32-bit destination is not supported", op);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Load
|
||||
| Opcode::Uload8
|
||||
| Opcode::Sload8
|
||||
| Opcode::Uload16
|
||||
| Opcode::Sload16
|
||||
| Opcode::Uload32
|
||||
| Opcode::Sload32
|
||||
| Opcode::LoadComplex
|
||||
| Opcode::Uload8Complex
|
||||
| Opcode::Sload8Complex
|
||||
| Opcode::Uload16Complex
|
||||
| Opcode::Sload16Complex
|
||||
| Opcode::Uload32Complex
|
||||
| Opcode::Sload32Complex => {
|
||||
let offset = ldst_offset(ctx.data(insn)).unwrap();
|
||||
|
||||
let elem_ty = match op {
|
||||
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
||||
types::I8
|
||||
}
|
||||
Opcode::Sload16
|
||||
| Opcode::Uload16
|
||||
| Opcode::Sload16Complex
|
||||
| Opcode::Uload16Complex => types::I16,
|
||||
Opcode::Sload32
|
||||
| Opcode::Uload32
|
||||
| Opcode::Sload32Complex
|
||||
| Opcode::Uload32Complex => types::I32,
|
||||
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
|
||||
let ext_mode = match elem_ty.bytes() {
|
||||
1 => Some(ExtMode::BQ),
|
||||
2 => Some(ExtMode::WQ),
|
||||
4 => Some(ExtMode::LQ),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let sign_extend = match op {
|
||||
Opcode::Sload8
|
||||
| Opcode::Sload8Complex
|
||||
| Opcode::Sload16
|
||||
| Opcode::Sload16Complex
|
||||
| Opcode::Sload32
|
||||
| Opcode::Sload32Complex => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
let is_float = is_float_ty(elem_ty);
|
||||
|
||||
let addr = match op {
|
||||
Opcode::Load
|
||||
| Opcode::Uload8
|
||||
| Opcode::Sload8
|
||||
| Opcode::Uload16
|
||||
| Opcode::Sload16
|
||||
| Opcode::Uload32
|
||||
| Opcode::Sload32 => {
|
||||
assert!(inputs.len() == 1, "only one input for load operands");
|
||||
let base = input_to_reg(ctx, inputs[0]);
|
||||
Amode::imm_reg(offset as u32, base)
|
||||
}
|
||||
|
||||
Opcode::LoadComplex
|
||||
| Opcode::Uload8Complex
|
||||
| Opcode::Sload8Complex
|
||||
| Opcode::Uload16Complex
|
||||
| Opcode::Sload16Complex
|
||||
| Opcode::Uload32Complex
|
||||
| Opcode::Sload32Complex => {
|
||||
assert!(
|
||||
inputs.len() == 2,
|
||||
"can't handle more than two inputs in complex load"
|
||||
);
|
||||
let base = input_to_reg(ctx, inputs[0]);
|
||||
let index = input_to_reg(ctx, inputs[1]);
|
||||
let shift = 0;
|
||||
Amode::imm_reg_reg_shift(offset as u32, base, index, shift)
|
||||
}
|
||||
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
match (sign_extend, is_float) {
|
||||
(true, false) => {
|
||||
// The load is sign-extended only when the output size is lower than 64 bits,
|
||||
// so ext-mode is defined in this case.
|
||||
ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst));
|
||||
}
|
||||
(false, false) => {
|
||||
if elem_ty.bytes() == 8 {
|
||||
// Use a plain load.
|
||||
ctx.emit(Inst::mov64_m_r(addr, dst))
|
||||
} else {
|
||||
// Use a zero-extended load.
|
||||
ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst))
|
||||
}
|
||||
}
|
||||
(_, true) => unimplemented!("FPU loads"),
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Store
|
||||
| Opcode::Istore8
|
||||
| Opcode::Istore16
|
||||
| Opcode::Istore32
|
||||
| Opcode::StoreComplex
|
||||
| Opcode::Istore8Complex
|
||||
| Opcode::Istore16Complex
|
||||
| Opcode::Istore32Complex => {
|
||||
let offset = ldst_offset(ctx.data(insn)).unwrap();
|
||||
|
||||
let elem_ty = match op {
|
||||
Opcode::Istore8 | Opcode::Istore8Complex => types::I8,
|
||||
Opcode::Istore16 | Opcode::Istore16Complex => types::I16,
|
||||
Opcode::Istore32 | Opcode::Istore32Complex => types::I32,
|
||||
Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let is_float = is_float_ty(elem_ty);
|
||||
|
||||
let addr = match op {
|
||||
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
|
||||
assert!(
|
||||
inputs.len() == 2,
|
||||
"only one input for store memory operands"
|
||||
);
|
||||
let base = input_to_reg(ctx, inputs[1]);
|
||||
// TODO sign?
|
||||
Amode::imm_reg(offset as u32, base)
|
||||
}
|
||||
|
||||
Opcode::StoreComplex
|
||||
| Opcode::Istore8Complex
|
||||
| Opcode::Istore16Complex
|
||||
| Opcode::Istore32Complex => {
|
||||
assert!(
|
||||
inputs.len() == 3,
|
||||
"can't handle more than two inputs in complex load"
|
||||
);
|
||||
let base = input_to_reg(ctx, inputs[1]);
|
||||
let index = input_to_reg(ctx, inputs[2]);
|
||||
let shift = 0;
|
||||
Amode::imm_reg_reg_shift(offset as u32, base, index, shift)
|
||||
}
|
||||
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let src = input_to_reg(ctx, inputs[0]);
|
||||
|
||||
if is_float {
|
||||
unimplemented!("FPU stores");
|
||||
} else {
|
||||
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::StackAddr => {
|
||||
let (stack_slot, offset) = match *ctx.data(insn) {
|
||||
InstructionData::StackLoad {
|
||||
opcode: Opcode::StackAddr,
|
||||
stack_slot,
|
||||
offset,
|
||||
} => (stack_slot, offset),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let offset: i32 = offset.into();
|
||||
println!("stackslot_addr: {:?} @ off{}", stack_slot, offset);
|
||||
let inst = ctx
|
||||
.abi()
|
||||
.stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
|
||||
ctx.emit(inst);
|
||||
}
|
||||
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::UdivImm
|
||||
@@ -296,6 +622,8 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
||||
}
|
||||
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
@@ -305,8 +633,7 @@ impl LowerBackend for X64Backend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_insn_to_regs(ctx, ir_inst);
|
||||
Ok(())
|
||||
lower_insn_to_regs(ctx, ir_inst)
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
@@ -346,33 +673,52 @@ impl LowerBackend for X64Backend {
|
||||
match op0 {
|
||||
Opcode::Brz | Opcode::Brnz => {
|
||||
let src_ty = ctx.input_ty(branches[0], 0);
|
||||
if is_int_ty(src_ty) {
|
||||
let src = input_to_reg(ctx, branches[0], 0);
|
||||
if is_int_ty(src_ty) || is_bool_ty(src_ty) {
|
||||
let src = input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
);
|
||||
let cc = match op0 {
|
||||
Opcode::Brz => CC::Z,
|
||||
Opcode::Brnz => CC::NZ,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let sizeB = int_ty_to_sizeB(src_ty);
|
||||
ctx.emit(Inst::cmp_rmi_r(sizeB, RegMemImm::imm(0), src));
|
||||
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
|
||||
let size_bytes = src_ty.bytes() as u8;
|
||||
ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src));
|
||||
ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
|
||||
} else {
|
||||
unimplemented!("brz/brnz with non-int type");
|
||||
unimplemented!("brz/brnz with non-int type {:?}", src_ty);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::BrIcmp => {
|
||||
let src_ty = ctx.input_ty(branches[0], 0);
|
||||
if is_int_ty(src_ty) {
|
||||
let lhs = input_to_reg(ctx, branches[0], 0);
|
||||
let rhs = input_to_reg(ctx, branches[0], 1);
|
||||
if is_int_ty(src_ty) || is_bool_ty(src_ty) {
|
||||
let lhs = input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
);
|
||||
let rhs = input_to_reg_mem_imm(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 1,
|
||||
},
|
||||
);
|
||||
let cc = CC::from_intcc(inst_condcode(ctx.data(branches[0])));
|
||||
let byte_size = int_ty_to_sizeB(src_ty);
|
||||
// FIXME verify rSR vs rSL ordering
|
||||
ctx.emit(Inst::cmp_rmi_r(byte_size, RegMemImm::reg(rhs), lhs));
|
||||
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
|
||||
let byte_size = src_ty.bytes() as u8;
|
||||
// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
|
||||
// us dst - src at the machine instruction level, so invert operands.
|
||||
ctx.emit(Inst::cmp_rmi_r(byte_size, rhs, lhs));
|
||||
ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
|
||||
} else {
|
||||
unimplemented!("bricmp with non-int type");
|
||||
unimplemented!("bricmp with non-int type {:?}", src_ty);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,15 +731,9 @@ impl LowerBackend for X64Backend {
|
||||
// Must be an unconditional branch or trap.
|
||||
let op = ctx.data(branches[0]).opcode();
|
||||
match op {
|
||||
Opcode::Jump => {
|
||||
Opcode::Jump | Opcode::Fallthrough => {
|
||||
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
|
||||
}
|
||||
Opcode::Fallthrough => {
|
||||
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
|
||||
}
|
||||
Opcode::Trap => {
|
||||
unimplemented!("trap");
|
||||
}
|
||||
_ => panic!("Unknown branch type!"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ impl X64Backend {
|
||||
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
|
||||
// This performs lowering to VCode, register-allocates the code, computes
|
||||
// block layout and finalizes branches. The result is ready for binary emission.
|
||||
let abi = Box::new(abi::X64ABIBody::new(&func, flags));
|
||||
let abi = Box::new(abi::X64ABIBody::new(&func, flags)?);
|
||||
compile::compile::<Self>(&func, self, abi)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1024,7 +1024,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
||||
let veneer_offset = self.cur_offset();
|
||||
trace!("making a veneer at {}", veneer_offset);
|
||||
let slice = &mut self.data[start..end];
|
||||
// Patch the original label use to refer to teh veneer.
|
||||
// Patch the original label use to refer to the veneer.
|
||||
trace!(
|
||||
"patching original at offset {} to veneer offset {}",
|
||||
offset,
|
||||
|
||||
@@ -106,6 +106,19 @@ fn apply_reloc(
|
||||
.wrapping_add(reloc_addend as u32);
|
||||
write_unaligned(reloc_address as *mut u32, reloc_delta_u32);
|
||||
},
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
Reloc::X86CallPCRel4 => unsafe {
|
||||
let reloc_address = body.add(r.offset as usize) as usize;
|
||||
let reloc_addend = r.addend as isize;
|
||||
let reloc_delta_u64 = (target_func_address as u64)
|
||||
.wrapping_sub(reloc_address as u64)
|
||||
.wrapping_add(reloc_addend as u64);
|
||||
assert!(
|
||||
reloc_delta_u64 as isize <= i32::max_value() as isize,
|
||||
"relocation too large to fit in i32"
|
||||
);
|
||||
write_unaligned(reloc_address as *mut u32, reloc_delta_u64 as u32);
|
||||
},
|
||||
Reloc::X86PCRelRodata4 => {
|
||||
// ignore
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user