Add a work-in-progress backend for x86_64 using the new instruction selection;

Most of the work is credited to Julian Seward.

Co-authored-by: Julian Seward <jseward@acm.org>
Co-authored-by: Chris Fallin <cfallin@mozilla.com>
This commit is contained in:
Benjamin Bouvier
2020-04-27 16:19:08 +02:00
parent 6bee767129
commit fa54422854
12 changed files with 5690 additions and 6 deletions

View File

@@ -58,10 +58,12 @@ x86 = []
arm32 = [] arm32 = []
arm64 = [] arm64 = []
riscv = [] riscv = []
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
# Option to enable all architectures. # Option to enable all architectures.
all-arch = [ all-arch = [
"x86", "x86",
"x64",
"arm32", "arm32",
"arm64", "arm64",
"riscv" "riscv"

View File

@@ -3,6 +3,12 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
let mut settings = SettingGroupBuilder::new("x86"); let mut settings = SettingGroupBuilder::new("x86");
settings.add_bool(
"use_new_backend",
"Whether to use the new codegen backend using the new isel",
false,
);
// CPUID.01H:ECX // CPUID.01H:ECX
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);

View File

@@ -77,6 +77,9 @@ mod riscv;
#[cfg(feature = "x86")] #[cfg(feature = "x86")]
mod x86; mod x86;
#[cfg(feature = "x64")]
mod x64;
#[cfg(feature = "arm32")] #[cfg(feature = "arm32")]
mod arm32; mod arm32;

View File

@@ -0,0 +1,457 @@
//! Implementation of the standard x64 ABI.
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
use crate::isa::{self, x64::inst::*};
use crate::machinst::*;
use crate::settings;
use args::*;
#[derive(Clone, Debug)]
enum ABIArg {
Reg(RealReg),
_Stack,
}
#[derive(Clone, Debug)]
enum ABIRet {
Reg(RealReg),
_Stack,
}
pub(crate) struct X64ABIBody {
args: Vec<ABIArg>,
rets: Vec<ABIRet>,
/// Offsets to each stack slot.
_stack_slots: Vec<usize>,
/// Total stack size of all the stack slots.
stack_slots_size: usize,
/// Clobbered registers, as indicated by regalloc.
clobbered: Set<Writable<RealReg>>,
/// Total number of spill slots, as indicated by regalloc.
num_spill_slots: Option<usize>,
/// Calculated while creating the prologue, and used when creating the epilogue. Amount by
/// which RSP is adjusted downwards to allocate the spill area.
frame_size_bytes: Option<usize>,
call_conv: isa::CallConv,
/// The settings controlling this function's compilation.
flags: settings::Flags,
}
fn in_int_reg(ty: types::Type) -> bool {
match ty {
types::I8
| types::I16
| types::I32
| types::I64
| types::B1
| types::B8
| types::B16
| types::B32
| types::B64 => true,
_ => false,
}
}
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::rdi()),
1 => Some(regs::rsi()),
2 => Some(regs::rdx()),
3 => Some(regs::rcx()),
4 => Some(regs::r8()),
5 => Some(regs::r9()),
_ => None,
}
}
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()),
_ => None,
}
}
fn is_callee_save_systemv(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
_ => unimplemented!(),
}
}
fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
regs.into_iter()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect()
}
impl X64ABIBody {
/// Create a new body ABI instance.
pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self {
// Compute args and retvals from signature.
let mut args = vec![];
let mut next_int_arg = 0;
for param in &f.signature.params {
match param.purpose {
ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
// `VMContext` is `r14` in Baldrdash.
args.push(ABIArg::Reg(regs::r14().to_real_reg()));
}
ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
if in_int_reg(param.value_type) {
if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
args.push(ABIArg::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing arg on the stack");
}
next_int_arg += 1;
} else {
unimplemented!("non int normal register")
}
}
_ => unimplemented!("other parameter purposes"),
}
}
let mut rets = vec![];
let mut next_int_retval = 0;
for ret in &f.signature.returns {
match ret.purpose {
ir::ArgumentPurpose::Normal => {
if in_int_reg(ret.value_type) {
if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
rets.push(ABIRet::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing return on the stack");
}
next_int_retval += 1;
} else {
unimplemented!("returning non integer normal value");
}
}
_ => {
unimplemented!("non normal argument purpose");
}
}
}
// Compute stackslot locations and total stackslot size.
let mut stack_offset: usize = 0;
let mut _stack_slots = vec![];
for (stackslot, data) in f.stack_slots.iter() {
let off = stack_offset;
stack_offset += data.size as usize;
// 8-bit align.
stack_offset = (stack_offset + 7) & !7usize;
debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len());
_stack_slots.push(off);
}
Self {
args,
rets,
_stack_slots,
stack_slots_size: stack_offset,
clobbered: Set::empty(),
num_spill_slots: None,
frame_size_bytes: None,
call_conv: f.signature.call_conv.clone(),
flags,
}
}
}
impl ABIBody for X64ABIBody {
type I = Inst;
fn flags(&self) -> &settings::Flags {
&self.flags
}
fn num_args(&self) -> usize {
unimplemented!()
}
fn num_retvals(&self) -> usize {
unimplemented!()
}
fn num_stackslots(&self) -> usize {
unimplemented!()
}
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for arg in &self.args {
if let &ABIArg::Reg(r) = arg {
set.insert(r);
}
}
set
}
fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for ret in &self.rets {
if let &ABIRet::Reg(r) = ret {
set.insert(r);
}
}
set
}
fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable<Reg>) -> Inst {
match &self.args[idx] {
ABIArg::Reg(from_reg) => {
if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
// TODO do we need a sign extension if it's I32?
return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
}
unimplemented!("moving from non-int arg to vreg");
}
ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
}
}
fn gen_copy_reg_to_retval(
&self,
idx: usize,
from_reg: Writable<Reg>,
ext: ArgumentExtension,
) -> Vec<Inst> {
match ext {
ArgumentExtension::None => {}
_ => unimplemented!(
"unimplemented argument extension {:?} is required for baldrdash",
ext
),
};
let mut ret = Vec::new();
match &self.rets[idx] {
ABIRet::Reg(to_reg) => {
if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 {
ret.push(Inst::mov_r_r(
/*is64=*/ true,
from_reg.to_reg(),
Writable::<Reg>::from_reg(to_reg.to_reg()),
))
} else {
unimplemented!("moving from vreg to non-int return value");
}
}
ABIRet::_Stack => {
unimplemented!("moving from vreg to stack return value");
}
}
ret
}
fn gen_ret(&self) -> Inst {
Inst::ret()
}
fn gen_epilogue_placeholder(&self) -> Inst {
Inst::epilogue_placeholder()
}
fn set_num_spillslots(&mut self, slots: usize) {
self.num_spill_slots = Some(slots);
}
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
self.clobbered = clobbered;
}
fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable<Reg>) -> Inst {
unimplemented!()
}
fn load_stackslot(
&self,
_slot: StackSlot,
_offset: u32,
_ty: Type,
_into_reg: Writable<Reg>,
) -> Inst {
unimplemented!("load_stackslot")
}
fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst {
unimplemented!("store_stackslot")
}
fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
unimplemented!("load_spillslot")
}
fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
unimplemented!("store_spillslot")
}
fn gen_prologue(&mut self) -> Vec<Inst> {
let r_rsp = regs::rsp();
let mut insts = vec![];
// Baldrdash generates its own prologue sequence, so we don't have to.
if !self.call_conv.extends_baldrdash() {
let r_rbp = regs::rbp();
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
// The "traditional" pre-preamble
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
insts.push(Inst::push64(RMI::reg(r_rbp)));
// RSP is now 0 % 16
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
}
// Save callee saved registers that we trash. Keep track of how much space we've used, so
// as to know what we have to do to get the base of the spill area 0 % 16.
let mut callee_saved_used = 0;
let clobbered = get_callee_saves(self.clobbered.to_vec());
for reg in clobbered {
let r_reg = reg.to_reg();
match r_reg.get_class() {
RegClass::I64 => {
insts.push(Inst::push64(RMI::reg(r_reg.to_reg())));
callee_saved_used += 8;
}
_ => unimplemented!(),
}
}
let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap();
if self.call_conv.extends_baldrdash() {
// Baldrdash expects the stack to take at least the number of words set in
// baldrdash_prologue_words; count them here.
debug_assert!(
!self.flags.enable_probestack(),
"baldrdash does not expect cranelift to emit stack probes"
);
total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8;
}
debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8);
let frame_size = total_stacksize + callee_saved_used % 16;
// Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body.
let frame_size = (frame_size + 15) & !15;
if frame_size > 0x7FFF_FFFF {
unimplemented!("gen_prologue(x86): total_stacksize >= 2G");
}
if !self.call_conv.extends_baldrdash() {
// Explicitly allocate the frame.
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
if frame_size > 0 {
insts.push(Inst::alu_rmi_r(
true,
RMI_R_Op::Sub,
RMI::imm(frame_size as u32),
w_rsp,
));
}
}
// Stash this value. We'll need it for the epilogue.
debug_assert!(self.frame_size_bytes.is_none());
self.frame_size_bytes = Some(frame_size);
insts
}
fn gen_epilogue(&self) -> Vec<Inst> {
let mut insts = vec![];
// Undo what we did in the prologue.
// Clear the spill area and the 16-alignment padding below it.
if !self.call_conv.extends_baldrdash() {
let frame_size = self.frame_size_bytes.unwrap();
if frame_size > 0 {
let r_rsp = regs::rsp();
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
insts.push(Inst::alu_rmi_r(
true,
RMI_R_Op::Add,
RMI::imm(frame_size as u32),
w_rsp,
));
}
}
// Restore regs.
let clobbered = get_callee_saves(self.clobbered.to_vec());
for w_real_reg in clobbered.into_iter().rev() {
match w_real_reg.to_reg().get_class() {
RegClass::I64 => {
// TODO: make these conversion sequences less cumbersome.
insts.push(Inst::pop64(Writable::<Reg>::from_reg(
w_real_reg.to_reg().to_reg(),
)))
}
_ => unimplemented!(),
}
}
// Baldrdash generates its own preamble.
if !self.call_conv.extends_baldrdash() {
let r_rbp = regs::rbp();
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
// Undo the "traditional" pre-preamble
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
insts.push(Inst::pop64(w_rbp));
insts.push(Inst::ret());
}
insts
}
fn frame_size(&self) -> u32 {
self.frame_size_bytes
.expect("frame size not computed before prologue generation") as u32
}
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
(RegClass::V128, _) => 2,
_ => panic!("Unexpected register class!"),
}
}
fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
unimplemented!()
}
fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
unimplemented!()
}
}

View File

@@ -0,0 +1,451 @@
//! Instruction operand sub-components (aka "parts"): definitions and printing.
use std::fmt;
use std::string::{String, ToString};
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
use crate::binemit::CodeOffset;
use crate::machinst::*;
use super::regs::show_ireg_sized;
/// A Memory Address. These denote a 64-bit value only.
#[derive(Clone)]
pub(crate) enum Addr {
/// Immediate sign-extended and a Register.
IR { simm32: u32, base: Reg },
/// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
IRRS {
simm32: u32,
base: Reg,
index: Reg,
shift: u8, /* 0 .. 3 only */
},
}
impl Addr {
// Constructors.
pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
Self::IR { simm32, base }
}
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
debug_assert!(index.get_class() == RegClass::I64);
debug_assert!(shift <= 3);
Addr::IRRS {
simm32,
base,
index,
shift,
}
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
Addr::IR { simm32: _, base } => {
collector.add_use(*base);
}
Addr::IRRS {
simm32: _,
base,
index,
shift: _,
} => {
collector.add_use(*base);
collector.add_use(*index);
}
}
}
}
impl ShowWithRRU for Addr {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
Addr::IRRS {
simm32,
base,
index,
shift,
} => format!(
"{}({},{},{})",
*simm32 as i32,
base.show_rru(mb_rru),
index.show_rru(mb_rru),
1 << shift
),
}
}
}
/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
/// `simm32` is its sign-extension out to 64 bits.
#[derive(Clone)]
pub(crate) enum RMI {
R { reg: Reg },
M { addr: Addr },
I { simm32: u32 },
}
impl RMI {
// Constructors
pub(crate) fn reg(reg: Reg) -> RMI {
debug_assert!(reg.get_class() == RegClass::I64);
RMI::R { reg }
}
pub(crate) fn mem(addr: Addr) -> RMI {
RMI::M { addr }
}
pub(crate) fn imm(simm32: u32) -> RMI {
RMI::I { simm32 }
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
RMI::R { reg } => collector.add_use(*reg),
RMI::M { addr } => addr.get_regs_as_uses(collector),
RMI::I { simm32: _ } => {}
}
}
}
impl ShowWithRRU for RMI {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
RMI::M { addr } => addr.show_rru(mb_rru),
RMI::I { simm32 } => format!("${}", *simm32 as i32),
}
}
}
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
/// 32 or 64 bit value.
#[derive(Clone)]
pub(crate) enum RM {
R { reg: Reg },
M { addr: Addr },
}
impl RM {
// Constructors.
pub(crate) fn reg(reg: Reg) -> Self {
debug_assert!(reg.get_class() == RegClass::I64);
RM::R { reg }
}
pub(crate) fn mem(addr: Addr) -> Self {
RM::M { addr }
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
RM::R { reg } => collector.add_use(*reg),
RM::M { addr } => addr.get_regs_as_uses(collector),
}
}
}
impl ShowWithRRU for RM {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
RM::M { addr } => addr.show_rru(mb_rru),
}
}
}
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
#[derive(Clone, PartialEq)]
pub enum RMI_R_Op {
Add,
Sub,
And,
Or,
Xor,
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
Mul,
}
impl RMI_R_Op {
pub(crate) fn to_string(&self) -> String {
match self {
RMI_R_Op::Add => "add".to_string(),
RMI_R_Op::Sub => "sub".to_string(),
RMI_R_Op::And => "and".to_string(),
RMI_R_Op::Or => "or".to_string(),
RMI_R_Op::Xor => "xor".to_string(),
RMI_R_Op::Mul => "imul".to_string(),
}
}
}
impl fmt::Debug for RMI_R_Op {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate ways of extending (widening) a value, using the Intel naming:
/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
#[derive(Clone, PartialEq)]
pub enum ExtMode {
/// Byte -> Longword.
BL,
/// Byte -> Quadword.
BQ,
/// Word -> Longword.
WL,
/// Word -> Quadword.
WQ,
/// Longword -> Quadword.
LQ,
}
impl ExtMode {
pub(crate) fn to_string(&self) -> String {
match self {
ExtMode::BL => "bl".to_string(),
ExtMode::BQ => "bq".to_string(),
ExtMode::WL => "wl".to_string(),
ExtMode::WQ => "wq".to_string(),
ExtMode::LQ => "lq".to_string(),
}
}
pub(crate) fn dst_size(&self) -> u8 {
match self {
ExtMode::BL => 4,
ExtMode::BQ => 8,
ExtMode::WL => 4,
ExtMode::WQ => 8,
ExtMode::LQ => 8,
}
}
}
impl fmt::Debug for ExtMode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate the form of a scalar shift: left, signed right, unsigned right.
#[derive(Clone)]
pub enum ShiftKind {
Left,
RightZ,
RightS,
}
impl ShiftKind {
pub(crate) fn to_string(&self) -> String {
match self {
ShiftKind::Left => "shl".to_string(),
ShiftKind::RightZ => "shr".to_string(),
ShiftKind::RightS => "sar".to_string(),
}
}
}
impl fmt::Debug for ShiftKind {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate condition code tests. Not all are represented since not all are useful in
/// compiler-generated code.
#[derive(Copy, Clone)]
#[repr(u8)]
pub enum CC {
/// overflow
O = 0,
/// no overflow
NO = 1,
/// < unsigned
B = 2,
/// >= unsigned
NB = 3,
/// zero
Z = 4,
/// not-zero
NZ = 5,
/// <= unsigned
BE = 6,
/// > unsigend
NBE = 7,
/// negative
S = 8,
/// not-negative
NS = 9,
/// < signed
L = 12,
/// >= signed
NL = 13,
/// <= signed
LE = 14,
/// > signed
NLE = 15,
}
impl CC {
pub(crate) fn to_string(&self) -> String {
match self {
CC::O => "o".to_string(),
CC::NO => "no".to_string(),
CC::B => "b".to_string(),
CC::NB => "nb".to_string(),
CC::Z => "z".to_string(),
CC::NZ => "nz".to_string(),
CC::BE => "be".to_string(),
CC::NBE => "nbe".to_string(),
CC::S => "s".to_string(),
CC::NS => "ns".to_string(),
CC::L => "l".to_string(),
CC::NL => "nl".to_string(),
CC::LE => "le".to_string(),
CC::NLE => "nle".to_string(),
}
}
pub(crate) fn invert(&self) -> CC {
match self {
CC::O => CC::NO,
CC::NO => CC::O,
CC::B => CC::NB,
CC::NB => CC::B,
CC::Z => CC::NZ,
CC::NZ => CC::Z,
CC::BE => CC::NBE,
CC::NBE => CC::BE,
CC::S => CC::NS,
CC::NS => CC::S,
CC::L => CC::NL,
CC::NL => CC::L,
CC::LE => CC::NLE,
CC::NLE => CC::LE,
}
}
pub(crate) fn get_enc(self) -> u8 {
self as u8
}
}
impl fmt::Debug for CC {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// A branch target. Either unresolved (basic-block index) or resolved (offset
/// from end of current instruction).
#[derive(Clone, Copy, Debug)]
pub enum BranchTarget {
/// An unresolved reference to a BlockIndex, as passed into
/// `lower_branch_group()`.
Block(BlockIndex),
/// A resolved reference to another instruction, after
/// `Inst::with_block_offsets()`. This offset is in bytes.
ResolvedOffset(BlockIndex, isize),
}
impl ShowWithRRU for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
BranchTarget::Block(bix) => format!("(Block {})", bix),
BranchTarget::ResolvedOffset(bix, offs) => format!("(Block {}, offset {})", bix, offs),
}
}
}
impl BranchTarget {
/// Lower the branch target given offsets of each block.
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
match self {
&mut BranchTarget::Block(bix) => {
let bix = bix as usize;
assert!(bix < targets.len());
let block_offset_in_func = targets[bix];
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
*self = BranchTarget::ResolvedOffset(bix as BlockIndex, branch_offset);
}
&mut BranchTarget::ResolvedOffset(..) => {}
}
}
/// Get the block index.
pub fn as_block_index(&self) -> Option<BlockIndex> {
match self {
&BranchTarget::Block(bix) => Some(bix),
_ => None,
}
}
/// Get the offset as a signed 32 bit byte offset. This returns the
/// offset in bytes between the first byte of the source and the first
/// byte of the target. It does not take into account the Intel-specific
/// rule that a branch offset is encoded as relative to the start of the
/// following instruction. That is a problem for the emitter to deal
/// with.
pub fn as_offset_i32(&self) -> Option<i32> {
match self {
&BranchTarget::ResolvedOffset(_, off) => {
// Leave a bit of slack so that the emitter is guaranteed to
// be able to add the length of the jump instruction encoding
// to this value and still have a value in signed-32 range.
if off >= -0x7FFF_FF00isize && off <= 0x7FFF_FF00isize {
Some(off as i32)
} else {
None
}
}
_ => None,
}
}
/// Map the block index given a transform map.
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
match self {
&mut BranchTarget::Block(ref mut bix) => {
let n = block_index_map[*bix as usize];
*bix = n;
}
_ => panic!("BranchTarget::map() called on already-lowered BranchTarget!"),
}
}
}

View File

@@ -0,0 +1,888 @@
use regalloc::{Reg, RegClass};
use crate::isa::x64::inst::*;
fn low8willSXto64(x: u32) -> bool {
let xs = (x as i32) as i64;
xs == ((xs << 56) >> 56)
}
fn low8willSXto32(x: u32) -> bool {
let xs = x as i32;
xs == ((xs << 24) >> 24)
}
//=============================================================================
// Instructions and subcomponents: emission
// For all of the routines that take both a memory-or-reg operand (sometimes
// called "E" in the Intel documentation) and a reg-only operand ("G" in
// Intelese), the order is always G first, then E.
//
// "enc" in the following means "hardware register encoding number".
#[inline(always)]
fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 {
debug_assert!(m0d < 4);
debug_assert!(encRegG < 8);
debug_assert!(rmE < 8);
((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7)
}
#[inline(always)]
fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
debug_assert!(shift < 4);
debug_assert!(encIndex < 8);
debug_assert!(encBase < 8);
((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7)
}
/// Get the encoding number from something which we sincerely hope is a real
/// register of class I64.
#[inline(always)]
fn iregEnc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
debug_assert!(reg.get_class() == RegClass::I64);
reg.get_hw_encoding()
}
// F_*: these flags describe special handling of the insn to be generated. Be
// careful with these. It is easy to create nonsensical combinations.
const F_NONE: u32 = 0;
/// Emit the REX prefix byte even if it appears to be redundant (== 0x40).
const F_RETAIN_REDUNDANT_REX: u32 = 1;
/// Set the W bit in the REX prefix to zero. By default it will be set to 1,
/// indicating a 64-bit operation.
const F_CLEAR_REX_W: u32 = 2;
/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate
/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W.
const F_PREFIX_66: u32 = 4;
/// This is the core 'emit' function for instructions that reference memory.
///
/// For an instruction that has as operands a register `encG` and a memory
/// address `memE`, create and emit, first the REX prefix, then caller-supplied
/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then
/// optionally, a SIB byte, and finally optionally an immediate that will be
/// derived from the `memE` operand. For most instructions up to and including
/// SSE4.2, that will be the whole instruction.
///
/// The opcodes are written bigendianly for the convenience of callers. For
/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27,
/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3.
///
/// The register operand is represented here not as a `Reg` but as its hardware
/// encoding, `encG`. `flags` can specify special handling for the REX prefix.
/// By default, the REX prefix will indicate a 64-bit operation and will be
/// deleted if it is redundant (0x40). Note that for a 64-bit operation, the
/// REX prefix will normally never be redundant, since REX.W must be 1 to
/// indicate a 64-bit operation.
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
sink: &mut O,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
memE: &Addr,
flags: u32,
) {
// General comment for this function: the registers in `memE` must be
// 64-bit integer registers, because they are part of an address
// expression. But `encG` can be derived from a register of any class.
let prefix66 = (flags & F_PREFIX_66) != 0;
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
// The operand-size override, if requested. This indicates a 16-bit
// operation.
if prefix66 {
sink.put1(0x66);
}
match memE {
Addr::IR { simm32, base: regE } => {
// First, cook up the REX byte. This is easy.
let encE = iregEnc(*regE);
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = 0;
let b = (encE >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while numOpcodes > 0 {
numOpcodes -= 1;
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm and associated immediates. This is
// significantly complicated due to the multiple special cases.
if *simm32 == 0
&& encE != regs::ENC_RSP
&& encE != regs::ENC_RBP
&& encE != regs::ENC_R12
&& encE != regs::ENC_R13
{
// FIXME JRS 2020Feb11: those four tests can surely be
// replaced by a single mask-and-compare check. We should do
// that because this routine is likely to be hot.
sink.put1(mkModRegRM(0, encG & 7, encE & 7));
} else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) {
sink.put1(mkModRegRM(0, encG & 7, 4));
sink.put1(0x24);
} else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 {
sink.put1(mkModRegRM(1, encG & 7, encE & 7));
sink.put1((simm32 & 0xFF) as u8);
} else if encE != regs::ENC_RSP && encE != regs::ENC_R12 {
sink.put1(mkModRegRM(2, encG & 7, encE & 7));
sink.put4(*simm32);
} else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) {
// REX.B distinguishes RSP from R12
sink.put1(mkModRegRM(1, encG & 7, 4));
sink.put1(0x24);
sink.put1((simm32 & 0xFF) as u8);
} else if encE == regs::ENC_R12 || encE == regs::ENC_RSP {
//.. wait for test case for RSP case
// REX.B distinguishes RSP from R12
sink.put1(mkModRegRM(2, encG & 7, 4));
sink.put1(0x24);
sink.put4(*simm32);
} else {
unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR");
}
}
// Bizarrely, the IRRS case is much simpler.
Addr::IRRS {
simm32,
base: regBase,
index: regIndex,
shift,
} => {
let encBase = iregEnc(*regBase);
let encIndex = iregEnc(*regIndex);
// The rex byte
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = (encIndex >> 3) & 1;
let b = (encBase >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// All other prefixes and opcodes
while numOpcodes > 0 {
numOpcodes -= 1;
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
}
// modrm, SIB, immediates
if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP {
sink.put1(mkModRegRM(1, encG & 7, 4));
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
sink.put1(*simm32 as u8);
} else if encIndex != regs::ENC_RSP {
sink.put1(mkModRegRM(2, encG & 7, 4));
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
sink.put4(*simm32);
} else {
panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS");
}
}
}
}
/// This is the core 'emit' function for instructions that do not reference
/// memory.
///
/// This is conceptually the same as
/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
/// where the E operand is a register rather than memory. Hence it is much
/// simpler.
fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
sink: &mut O,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
encE: u8,
flags: u32,
) {
// EncG and EncE can be derived from registers of any class, and they
// don't even have to be from the same class. For example, for an
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
let prefix66 = (flags & F_PREFIX_66) != 0;
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
// The operand-size override
if prefix66 {
sink.put1(0x66);
}
// The rex byte
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = 0;
let b = (encE >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// All other prefixes and opcodes
while numOpcodes > 0 {
numOpcodes -= 1;
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm byte. The instruction we're generating doesn't access
// memory, so there is no SIB byte or immediate -- we're done.
sink.put1(mkModRegRM(3, encG & 7, encE & 7));
}
// These are merely wrappers for the above two functions that facilitate passing
// actual `Reg`s rather than their encodings.
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
sink: &mut O,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
memE: &Addr,
flags: u32,
) {
// JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
let encG = iregEnc(regG);
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
}
fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
sink: &mut O,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
regE: Reg,
flags: u32,
) {
// JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
let encG = iregEnc(regG);
let encE = iregEnc(regE);
emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
}
/// Write a suitable number of bits from an imm64 to the sink.
fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
match size {
8 | 4 => sink.put4(simm32),
2 => sink.put2(simm32 as u16),
1 => sink.put1(simm32 as u8),
_ => panic!("x64::Inst::emit_simm: unreachable"),
}
}
/// The top-level emit function.
///
/// Important! Do not add improved (shortened) encoding cases to existing
/// instructions without also adding tests for those improved encodings. That
/// is a dangerous game that leads to hard-to-track-down errors in the emitted
/// code.
///
/// For all instructions, make sure to have test coverage for all of the
/// following situations. Do this by creating the cross product resulting from
/// applying the following rules to each operand:
///
/// (1) for any insn that mentions a register: one test using a register from
/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
/// This helps detect incorrect REX prefix construction.
///
/// (2) for any insn that mentions a byte register: one test for each of the
/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
/// [r8b .. r11b] and [r12b .. r15b]. This checks that
/// apparently-redundant REX prefixes are retained when required.
///
/// (3) for any insn that contains an immediate field, check the following
/// cases: field is zero, field is in simm8 range (-128 .. 127), field is
/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some
/// instructions that require a 32-bit immediate have a short-form encoding
/// when the imm is in simm8 range.
///
/// Rules (1), (2) and (3) don't apply for registers within address expressions
/// (`Addr`s). Those are already pretty well tested, and the registers in them
/// don't have any effect on the containing instruction (apart from possibly
/// require REX prefix bits).
///
/// When choosing registers for a test, avoid using registers with the same
/// offset within a given group. For example, don't use rax and r8, since they
/// both have the lowest 3 bits as 000, and so the test won't detect errors
/// where those 3-bit register sub-fields are confused by the emitter. Instead
/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl
/// and bpl since they have the same offset in their group; use instead (eg) cl
/// and sil.
///
/// For all instructions, also add a test that uses only low-half registers
/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
/// prefixes are correctly omitted. This low-half restriction must apply to
/// _all_ registers in the insn, even those in address expressions.
///
/// Following these rules creates large numbers of test cases, but it's the
/// only way to make the emitter reliable.
///
/// Known possible improvements:
///
/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
/// care?)
pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
match inst {
Inst::Nop { len: 0 } => {}
Inst::Alu_RMI_R {
is_64,
op,
src: srcE,
dst: regG,
} => {
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
if *op == RMI_R_Op::Mul {
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
// we have to special-case it.
match srcE {
RMI::R { reg: regE } => {
emit_REX_OPCODES_MODRM_regG_regE(
sink,
0x0FAF,
2,
regG.to_reg(),
*regE,
flags,
);
}
RMI::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FAF,
2,
regG.to_reg(),
addr,
flags,
);
}
RMI::I { simm32 } => {
let useImm8 = low8willSXto32(*simm32);
let opcode = if useImm8 { 0x6B } else { 0x69 };
// Yes, really, regG twice.
emit_REX_OPCODES_MODRM_regG_regE(
sink,
opcode,
1,
regG.to_reg(),
regG.to_reg(),
flags,
);
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
}
}
} else {
let (opcode_R, opcode_M, subopcode_I) = match op {
RMI_R_Op::Add => (0x01, 0x03, 0),
RMI_R_Op::Sub => (0x29, 0x2B, 5),
RMI_R_Op::And => (0x21, 0x23, 4),
RMI_R_Op::Or => (0x09, 0x0B, 1),
RMI_R_Op::Xor => (0x31, 0x33, 6),
RMI_R_Op::Mul => panic!("unreachable"),
};
match srcE {
RMI::R { reg: regE } => {
// Note. The arguments .. regE .. regG .. sequence
// here is the opposite of what is expected. I'm not
// sure why this is. But I am fairly sure that the
// arg order could be switched back to the expected
// .. regG .. regE .. if opcode_rr is also switched
// over to the "other" basic integer opcode (viz, the
// R/RM vs RM/R duality). However, that would mean
// that the test results won't be in accordance with
// the GNU as reference output. In other words, the
// inversion exists as a result of using GNU as as a
// gold standard.
emit_REX_OPCODES_MODRM_regG_regE(
sink,
opcode_R,
1,
*regE,
regG.to_reg(),
flags,
);
// NB: if this is ever extended to handle byte size
// ops, be sure to retain redundant REX prefixes.
}
RMI::M { addr } => {
// Whereas here we revert to the "normal" G-E ordering.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
opcode_M,
1,
regG.to_reg(),
addr,
flags,
);
}
RMI::I { simm32 } => {
let useImm8 = low8willSXto32(*simm32);
let opcode = if useImm8 { 0x83 } else { 0x81 };
// And also here we use the "normal" G-E ordering.
let encG = iregEnc(regG.to_reg());
emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
}
}
}
}
Inst::Imm_R {
dst_is_64,
simm64,
dst,
} => {
let encDst = iregEnc(dst.to_reg());
if *dst_is_64 {
// FIXME JRS 2020Feb10: also use the 32-bit case here when
// possible
sink.put1(0x48 | ((encDst >> 3) & 1));
sink.put1(0xB8 | (encDst & 7));
sink.put8(*simm64);
} else {
if ((encDst >> 3) & 1) == 1 {
sink.put1(0x41);
}
sink.put1(0xB8 | (encDst & 7));
sink.put4(*simm64 as u32);
}
}
Inst::Mov_R_R { is_64, src, dst } => {
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
}
Inst::MovZX_M_R { extMode, addr, dst } => {
match extMode {
ExtMode::BL => {
// MOVZBL is (REX.W==0) 0F B6 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB6,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::BQ => {
// MOVZBQ is (REX.W==1) 0F B6 /r
// I'm not sure why the Intel manual offers different
// encodings for MOVZBQ than for MOVZBL. AIUI they should
// achieve the same, since MOVZBL is just going to zero out
// the upper half of the destination anyway.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB6,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::WL => {
// MOVZWL is (REX.W==0) 0F B7 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB7,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::WQ => {
// MOVZWQ is (REX.W==1) 0F B7 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB7,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::LQ => {
// This is just a standard 32 bit load, and we rely on the
// default zero-extension rule to perform the extension.
// MOV r/m32, r32 is (REX.W==0) 8B /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x8B,
1,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
}
}
Inst::Mov64_M_R { addr, dst } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
}
Inst::MovSX_M_R { extMode, addr, dst } => {
match extMode {
ExtMode::BL => {
// MOVSBL is (REX.W==0) 0F BE /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBE,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::BQ => {
// MOVSBQ is (REX.W==1) 0F BE /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBE,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::WL => {
// MOVSWL is (REX.W==0) 0F BF /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBF,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::WQ => {
// MOVSWQ is (REX.W==1) 0F BF /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBF,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::LQ => {
// MOVSLQ is (REX.W==1) 63 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x63,
1,
dst.to_reg(),
addr,
F_NONE,
)
}
}
}
Inst::Mov_R_M { size, src, addr } => {
match size {
1 => {
// This is one of the few places where the presence of a
// redundant REX prefix changes the meaning of the
// instruction.
let encSrc = iregEnc(*src);
let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 {
F_RETAIN_REDUNDANT_REX
} else {
0
};
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x88,
1,
*src,
addr,
F_CLEAR_REX_W | retainRedundantRex,
)
}
2 => {
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x89,
1,
*src,
addr,
F_CLEAR_REX_W | F_PREFIX_66,
)
}
4 => {
// MOV r32, r/m32 is (REX.W==0) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x89,
1,
*src,
addr,
F_CLEAR_REX_W,
)
}
8 => {
// MOV r64, r/m64 is (REX.W==1) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
}
_ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
}
}
Inst::Shift_R {
is_64,
kind,
num_bits,
dst,
} => {
let encDst = iregEnc(dst.to_reg());
let subopcode = match kind {
ShiftKind::Left => 4,
ShiftKind::RightZ => 5,
ShiftKind::RightS => 7,
};
match num_bits {
None => {
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xD3,
1,
subopcode,
encDst,
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
);
}
Some(num_bits) => {
// SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
// When the shift amount is 1, there's an even shorter encoding, but we don't
// bother with that nicety here.
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xC1,
1,
subopcode,
encDst,
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
);
sink.put1(*num_bits);
}
}
}
Inst::Cmp_RMI_R {
size,
src: srcE,
dst: regG,
} => {
let mut retainRedundantRex = 0;
if *size == 1 {
// Here, a redundant REX prefix changes the meaning of the
// instruction.
let encG = iregEnc(*regG);
if encG >= 4 && encG <= 7 {
retainRedundantRex = F_RETAIN_REDUNDANT_REX;
}
}
let mut flags = match size {
8 => F_NONE,
4 => F_CLEAR_REX_W,
2 => F_CLEAR_REX_W | F_PREFIX_66,
1 => F_CLEAR_REX_W | retainRedundantRex,
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
};
match srcE {
RMI::R { reg: regE } => {
let opcode = if *size == 1 { 0x38 } else { 0x39 };
if *size == 1 {
// We also need to check whether the E register forces
// the use of a redundant REX.
let encE = iregEnc(*regE);
if encE >= 4 && encE <= 7 {
flags |= F_RETAIN_REDUNDANT_REX;
}
}
// Same comment re swapped args as for Alu_RMI_R.
emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
}
RMI::M { addr } => {
let opcode = if *size == 1 { 0x3A } else { 0x3B };
// Whereas here we revert to the "normal" G-E ordering.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
}
RMI::I { simm32 } => {
// FIXME JRS 2020Feb11: there are shorter encodings for
// cmp $imm, rax/eax/ax/al.
let useImm8 = low8willSXto32(*simm32);
let opcode = if *size == 1 {
0x80
} else if useImm8 {
0x83
} else {
0x81
};
// And also here we use the "normal" G-E ordering.
let encG = iregEnc(*regG);
emit_REX_OPCODES_MODRM_encG_encE(
sink, opcode, 1, 7, /*subopcode*/
encG, flags,
);
emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
}
}
}
Inst::Push64 { src } => {
match src {
RMI::R { reg } => {
let encReg = iregEnc(*reg);
let rex = 0x40 | ((encReg >> 3) & 1);
if rex != 0x40 {
sink.put1(rex);
}
sink.put1(0x50 | (encReg & 7));
}
RMI::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
0xFF,
1,
6, /*subopcode*/
addr,
F_CLEAR_REX_W,
);
}
RMI::I { simm32 } => {
if low8willSXto64(*simm32) {
sink.put1(0x6A);
sink.put1(*simm32 as u8);
} else {
sink.put1(0x68);
sink.put4(*simm32);
}
}
}
}
Inst::Pop64 { dst } => {
let encDst = iregEnc(dst.to_reg());
if encDst >= 8 {
// 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant
// here.
sink.put1(0x41);
}
sink.put1(0x58 + (encDst & 7));
}
//
// ** Inst::CallKnown
//
Inst::CallUnknown { dest } => {
match dest {
RM::R { reg } => {
let regEnc = iregEnc(*reg);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xFF,
1,
2, /*subopcode*/
regEnc,
F_CLEAR_REX_W,
);
}
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
0xFF,
1,
2, /*subopcode*/
addr,
F_CLEAR_REX_W,
);
}
}
}
Inst::Ret {} => sink.put1(0xC3),
Inst::JmpKnown {
dest: BranchTarget::Block(..),
} => {
// Computation of block offsets/sizes.
sink.put1(0);
sink.put4(0);
}
Inst::JmpKnown {
dest: BranchTarget::ResolvedOffset(_bix, offset),
} if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
// And now for real
let mut offs_i32 = *offset as i32;
offs_i32 -= 5;
let offs_u32 = offs_i32 as u32;
sink.put1(0xE9);
sink.put4(offs_u32);
}
//
// ** Inst::JmpCondSymm XXXX should never happen
//
Inst::JmpCond {
cc: _,
target: BranchTarget::Block(..),
} => {
// This case occurs when we are computing block offsets / sizes,
// prior to lowering block-index targets to concrete-offset targets.
// Only the size matters, so let's emit 6 bytes, as below.
sink.put1(0);
sink.put1(0);
sink.put4(0);
}
Inst::JmpCond {
cc,
target: BranchTarget::ResolvedOffset(_bix, offset),
} if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
// This insn is 6 bytes long. Currently `offset` is relative to
// the start of this insn, but the Intel encoding requires it to
// be relative to the start of the next instruction. Hence the
// adjustment.
let mut offs_i32 = *offset as i32;
offs_i32 -= 6;
let offs_u32 = offs_i32 as u32;
sink.put1(0x0F);
sink.put1(0x80 + cc.get_enc());
sink.put4(offs_u32);
}
//
// ** Inst::JmpCondCompound XXXX should never happen
//
Inst::JmpUnknown { target } => {
match target {
RM::R { reg } => {
let regEnc = iregEnc(*reg);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xFF,
1,
4, /*subopcode*/
regEnc,
F_CLEAR_REX_W,
);
}
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
0xFF,
1,
4, /*subopcode*/
addr,
F_CLEAR_REX_W,
);
}
}
}
_ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,956 @@
//! This module defines x86_64-specific machine instruction types.
#![allow(dead_code)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
use std::fmt;
use std::string::{String, ToString};
use regalloc::RegUsageCollector;
use regalloc::Set;
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
use crate::binemit::CodeOffset;
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
use crate::ir::ExternalName;
use crate::ir::Type;
use crate::machinst::*;
use crate::{settings, CodegenError, CodegenResult};
pub mod args;
mod emit;
#[cfg(test)]
mod emit_tests;
pub mod regs;
use args::*;
use regs::{create_reg_universe_systemv, show_ireg_sized};
//=============================================================================
// Instructions (top level): definition
// Don't build these directly. Instead use the Inst:: functions to create them.
/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
#[derive(Clone)]
pub(crate) enum Inst {
/// nops of various sizes, including zero
Nop { len: u8 },
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
Alu_RMI_R {
is_64: bool,
op: RMI_R_Op,
src: RMI,
dst: Writable<Reg>,
},
/// (imm32 imm64) reg.
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
Imm_R {
dst_is_64: bool,
simm64: u64,
dst: Writable<Reg>,
},
/// mov (64 32) reg reg
Mov_R_R {
is_64: bool,
src: Reg,
dst: Writable<Reg>,
},
/// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
/// Note that the lq variant doesn't really exist since the default
/// zero-extend rule makes it unnecessary. For that case we emit the
/// equivalent "movl AM, reg32".
MovZX_M_R {
extMode: ExtMode,
addr: Addr,
dst: Writable<Reg>,
},
/// A plain 64-bit integer load, since MovZX_M_R can't represent that
Mov64_M_R { addr: Addr, dst: Writable<Reg> },
/// movs (bl bq wl wq lq) addr reg (good for all SX loads)
MovSX_M_R {
extMode: ExtMode,
addr: Addr,
dst: Writable<Reg>,
},
/// mov (b w l q) reg addr (good for all integer stores)
Mov_R_M {
size: u8, // 1, 2, 4 or 8
src: Reg,
addr: Addr,
},
/// (shl shr sar) (l q) imm reg
Shift_R {
is_64: bool,
kind: ShiftKind,
/// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
num_bits: Option<u8>,
dst: Writable<Reg>,
},
/// cmp (b w l q) (reg addr imm) reg
Cmp_RMI_R {
size: u8, // 1, 2, 4 or 8
src: RMI,
dst: Reg,
},
/// pushq (reg addr imm)
Push64 { src: RMI },
/// popq reg
Pop64 { dst: Writable<Reg> },
/// call simm32
CallKnown {
dest: ExternalName,
uses: Set<Reg>,
defs: Set<Writable<Reg>>,
},
/// callq (reg mem)
CallUnknown {
dest: RM,
//uses: Set<Reg>,
//defs: Set<Writable<Reg>>,
},
// ---- branches (exactly one must appear at end of BB) ----
/// ret
Ret,
/// A placeholder instruction, generating no code, meaning that a function epilogue must be
/// inserted there.
EpiloguePlaceholder,
/// jmp simm32
JmpKnown { dest: BranchTarget },
/// jcond cond target target
// Symmetrical two-way conditional branch.
// Should never reach the emitter.
JmpCondSymm {
cc: CC,
taken: BranchTarget,
not_taken: BranchTarget,
},
/// Lowered conditional branch: contains the original instruction, and a
/// flag indicating whether to invert the taken-condition or not. Only one
/// BranchTarget is retained, and the other is implicitly the next
/// instruction, given the final basic-block layout.
JmpCond {
cc: CC,
//inverted: bool, is this needed?
target: BranchTarget,
},
/// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
/// actual machine instructions). Needed when the final block layout implies
/// that neither arm of a conditional branch targets the fallthrough block.
// Should never reach the emitter
JmpCondCompound {
cc: CC,
taken: BranchTarget,
not_taken: BranchTarget,
},
/// jmpq (reg mem)
JmpUnknown { target: RM },
}
// Handy constructors for Insts.
// For various sizes, will some number of lowest bits sign extend to be the
// same as the whole value?
pub(crate) fn low32willSXto64(x: u64) -> bool {
let xs = x as i64;
xs == ((xs << 32) >> 32)
}
impl Inst {
pub(crate) fn nop(len: u8) -> Self {
debug_assert!(len <= 16);
Self::Nop { len }
}
pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable<Reg>) -> Self {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Self::Alu_RMI_R {
is_64,
op,
src,
dst,
}
}
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
if !dst_is_64 {
debug_assert!(low32willSXto64(simm64));
}
Inst::Imm_R {
dst_is_64,
simm64,
dst,
}
}
pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
debug_assert!(src.get_class() == RegClass::I64);
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Mov_R_R { is_64, src, dst }
}
pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovZX_M_R { extMode, addr, dst }
}
pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Mov64_M_R { addr, dst }
}
pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovSX_M_R { extMode, addr, dst }
}
pub(crate) fn mov_r_m(
size: u8, // 1, 2, 4 or 8
src: Reg,
addr: Addr,
) -> Inst {
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
debug_assert!(src.get_class() == RegClass::I64);
Inst::Mov_R_M { size, src, addr }
}
pub(crate) fn shift_r(
is_64: bool,
kind: ShiftKind,
num_bits: Option<u8>,
dst: Writable<Reg>,
) -> Inst {
debug_assert!(if let Some(num_bits) = num_bits {
num_bits < if is_64 { 64 } else { 32 }
} else {
true
});
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Shift_R {
is_64,
kind,
num_bits,
dst,
}
}
pub(crate) fn cmp_rmi_r(
size: u8, // 1, 2, 4 or 8
src: RMI,
dst: Reg,
) -> Inst {
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
debug_assert!(dst.get_class() == RegClass::I64);
Inst::Cmp_RMI_R { size, src, dst }
}
pub(crate) fn push64(src: RMI) -> Inst {
Inst::Push64 { src }
}
pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
Inst::Pop64 { dst }
}
pub(crate) fn call_unknown(dest: RM) -> Inst {
Inst::CallUnknown { dest }
}
pub(crate) fn ret() -> Inst {
Inst::Ret
}
pub(crate) fn epilogue_placeholder() -> Inst {
Inst::EpiloguePlaceholder
}
pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
Inst::JmpKnown { dest }
}
pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
Inst::JmpCondSymm {
cc,
taken,
not_taken,
}
}
pub(crate) fn jmp_cond(cc: CC, target: BranchTarget) -> Inst {
Inst::JmpCond { cc, target }
}
pub(crate) fn jmp_cond_compound(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
Inst::JmpCondCompound {
cc,
taken,
not_taken,
}
}
pub(crate) fn jmp_unknown(target: RM) -> Inst {
Inst::JmpUnknown { target }
}
}
//=============================================================================
// Instructions: printing
impl ShowWithRRU for Inst {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
fn ljustify(s: String) -> String {
let w = 7;
if s.len() >= w {
s
} else {
let need = usize::min(w, w - s.len());
s + &format!("{nil: <width$}", nil = "", width = need)
}
}
fn ljustify2(s1: String, s2: String) -> String {
ljustify(s1 + &s2)
}
fn suffixLQ(is_64: bool) -> String {
(if is_64 { "q" } else { "l" }).to_string()
}
fn sizeLQ(is_64: bool) -> u8 {
if is_64 {
8
} else {
4
}
}
fn suffixBWLQ(size: u8) -> String {
match size {
1 => "b".to_string(),
2 => "w".to_string(),
4 => "l".to_string(),
8 => "q".to_string(),
_ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
}
}
match self {
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
Inst::Alu_RMI_R {
is_64,
op,
src,
dst,
} => format!(
"{} {}, {}",
ljustify2(op.to_string(), suffixLQ(*is_64)),
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
),
Inst::Imm_R {
dst_is_64,
simm64,
dst,
} => {
if *dst_is_64 {
format!(
"{} ${}, {}",
ljustify("movabsq".to_string()),
*simm64 as i64,
show_ireg_sized(dst.to_reg(), mb_rru, 8)
)
} else {
format!(
"{} ${}, {}",
ljustify("movl".to_string()),
(*simm64 as u32) as i32,
show_ireg_sized(dst.to_reg(), mb_rru, 4)
)
}
}
Inst::Mov_R_R { is_64, src, dst } => format!(
"{} {}, {}",
ljustify2("mov".to_string(), suffixLQ(*is_64)),
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
Inst::MovZX_M_R { extMode, addr, dst } => {
if *extMode == ExtMode::LQ {
format!(
"{} {}, {}",
ljustify("movl".to_string()),
addr.show_rru(mb_rru),
show_ireg_sized(dst.to_reg(), mb_rru, 4)
)
} else {
format!(
"{} {}, {}",
ljustify2("movz".to_string(), extMode.to_string()),
addr.show_rru(mb_rru),
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
)
}
}
Inst::Mov64_M_R { addr, dst } => format!(
"{} {}, {}",
ljustify("movq".to_string()),
addr.show_rru(mb_rru),
dst.show_rru(mb_rru)
),
Inst::MovSX_M_R { extMode, addr, dst } => format!(
"{} {}, {}",
ljustify2("movs".to_string(), extMode.to_string()),
addr.show_rru(mb_rru),
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
),
Inst::Mov_R_M { size, src, addr } => format!(
"{} {}, {}",
ljustify2("mov".to_string(), suffixBWLQ(*size)),
show_ireg_sized(*src, mb_rru, *size),
addr.show_rru(mb_rru)
),
Inst::Shift_R {
is_64,
kind,
num_bits,
dst,
} => match num_bits {
None => format!(
"{} %cl, {}",
ljustify2(kind.to_string(), suffixLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
Some(num_bits) => format!(
"{} ${}, {}",
ljustify2(kind.to_string(), suffixLQ(*is_64)),
num_bits,
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
},
Inst::Cmp_RMI_R { size, src, dst } => format!(
"{} {}, {}",
ljustify2("cmp".to_string(), suffixBWLQ(*size)),
src.show_rru_sized(mb_rru, *size),
show_ireg_sized(*dst, mb_rru, *size)
),
Inst::Push64 { src } => {
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
}
Inst::Pop64 { dst } => {
format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
}
//Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
Inst::CallKnown { .. } => "**CallKnown**".to_string(),
Inst::CallUnknown { dest } => format!(
"{} *{}",
ljustify("call".to_string()),
dest.show_rru(mb_rru)
),
Inst::Ret => "ret".to_string(),
Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
Inst::JmpKnown { dest } => {
format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
}
Inst::JmpCondSymm {
cc,
taken,
not_taken,
} => format!(
"{} taken={} not_taken={}",
ljustify2("j".to_string(), cc.to_string()),
taken.show_rru(mb_rru),
not_taken.show_rru(mb_rru)
),
//
Inst::JmpCond { cc, ref target } => format!(
"{} {}",
ljustify2("j".to_string(), cc.to_string()),
target.show_rru(None)
),
//
Inst::JmpCondCompound { .. } => "**JmpCondCompound**".to_string(),
Inst::JmpUnknown { target } => format!(
"{} *{}",
ljustify("jmp".to_string()),
target.show_rru(mb_rru)
),
}
}
}
// Temp hook for legacy printing machinery
impl fmt::Debug for Inst {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
// Print the insn without a Universe :-(
write!(fmt, "{}", self.show_rru(None))
}
}
fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
// This is a bit subtle. If some register is in the modified set, then it may not be in either
// the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
// sets.
match inst {
// ** Nop
Inst::Alu_RMI_R {
is_64: _,
op: _,
src,
dst,
} => {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
Inst::Imm_R {
dst_is_64: _,
simm64: _,
dst,
} => {
collector.add_def(*dst);
}
Inst::Mov_R_R { is_64: _, src, dst } => {
collector.add_use(*src);
collector.add_def(*dst);
}
Inst::MovZX_M_R {
extMode: _,
addr,
dst,
} => {
addr.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::Mov64_M_R { addr, dst } => {
addr.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::MovSX_M_R {
extMode: _,
addr,
dst,
} => {
addr.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::Mov_R_M { size: _, src, addr } => {
collector.add_use(*src);
addr.get_regs_as_uses(collector);
}
Inst::Shift_R {
is_64: _,
kind: _,
num_bits,
dst,
} => {
if num_bits.is_none() {
collector.add_use(regs::rcx());
}
collector.add_mod(*dst);
}
Inst::Cmp_RMI_R { size: _, src, dst } => {
src.get_regs_as_uses(collector);
collector.add_use(*dst); // yes, really `add_use`
}
Inst::Push64 { src } => {
src.get_regs_as_uses(collector);
collector.add_mod(Writable::from_reg(regs::rsp()));
}
Inst::Pop64 { dst } => {
collector.add_def(*dst);
}
Inst::CallKnown {
dest: _,
uses: _,
defs: _,
} => {
// FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
unimplemented!();
}
Inst::CallUnknown { dest } => {
dest.get_regs_as_uses(collector);
}
Inst::Ret => {}
Inst::EpiloguePlaceholder => {}
Inst::JmpKnown { dest: _ } => {}
Inst::JmpCondSymm {
cc: _,
taken: _,
not_taken: _,
} => {}
//
// ** JmpCond
//
// ** JmpCondCompound
//
//Inst::JmpUnknown { target } => {
// target.get_regs_as_uses(collector);
//}
Inst::Nop { .. }
| Inst::JmpCond { .. }
| Inst::JmpCondCompound { .. }
| Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
}
}
//=============================================================================
// Instructions and subcomponents: map_regs
fn map_use(m: &RegUsageMapper, r: &mut Reg) {
if r.is_virtual() {
let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
*r = new;
}
}
fn map_def(m: &RegUsageMapper, r: &mut Writable<Reg>) {
if r.to_reg().is_virtual() {
let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
*r = Writable::from_reg(new);
}
}
fn map_mod(m: &RegUsageMapper, r: &mut Writable<Reg>) {
if r.to_reg().is_virtual() {
let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
*r = Writable::from_reg(new);
}
}
impl Addr {
fn map_uses(&mut self, map: &RegUsageMapper) {
match self {
Addr::IR {
simm32: _,
ref mut base,
} => map_use(map, base),
Addr::IRRS {
simm32: _,
ref mut base,
ref mut index,
shift: _,
} => {
map_use(map, base);
map_use(map, index);
}
}
}
}
impl RMI {
fn map_uses(&mut self, map: &RegUsageMapper) {
match self {
RMI::R { ref mut reg } => map_use(map, reg),
RMI::M { ref mut addr } => addr.map_uses(map),
RMI::I { simm32: _ } => {}
}
}
}
impl RM {
fn map_uses(&mut self, map: &RegUsageMapper) {
match self {
RM::R { ref mut reg } => map_use(map, reg),
RM::M { ref mut addr } => addr.map_uses(map),
}
}
}
fn x64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
// Note this must be carefully synchronized with x64_get_regs.
match inst {
// ** Nop
Inst::Alu_RMI_R {
is_64: _,
op: _,
ref mut src,
ref mut dst,
} => {
src.map_uses(mapper);
map_mod(mapper, dst);
}
Inst::Imm_R {
dst_is_64: _,
simm64: _,
ref mut dst,
} => map_def(mapper, dst),
Inst::Mov_R_R {
is_64: _,
ref mut src,
ref mut dst,
} => {
map_use(mapper, src);
map_def(mapper, dst);
}
Inst::MovZX_M_R {
extMode: _,
ref mut addr,
ref mut dst,
} => {
addr.map_uses(mapper);
map_def(mapper, dst);
}
Inst::Mov64_M_R { addr, dst } => {
addr.map_uses(mapper);
map_def(mapper, dst);
}
Inst::MovSX_M_R {
extMode: _,
ref mut addr,
ref mut dst,
} => {
addr.map_uses(mapper);
map_def(mapper, dst);
}
Inst::Mov_R_M {
size: _,
ref mut src,
ref mut addr,
} => {
map_use(mapper, src);
addr.map_uses(mapper);
}
Inst::Shift_R {
is_64: _,
kind: _,
num_bits: _,
ref mut dst,
} => {
map_mod(mapper, dst);
}
Inst::Cmp_RMI_R {
size: _,
ref mut src,
ref mut dst,
} => {
src.map_uses(mapper);
map_use(mapper, dst);
}
Inst::Push64 { ref mut src } => src.map_uses(mapper),
Inst::Pop64 { ref mut dst } => {
map_def(mapper, dst);
}
Inst::CallKnown {
dest: _,
uses: _,
defs: _,
} => {}
Inst::CallUnknown { dest } => dest.map_uses(mapper),
Inst::Ret => {}
Inst::EpiloguePlaceholder => {}
Inst::JmpKnown { dest: _ } => {}
Inst::JmpCondSymm {
cc: _,
taken: _,
not_taken: _,
} => {}
//
// ** JmpCond
//
// ** JmpCondCompound
//
//Inst::JmpUnknown { target } => {
// target.apply_map(mapper);
//}
Inst::Nop { .. }
| Inst::JmpCond { .. }
| Inst::JmpCondCompound { .. }
| Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
}
}
//=============================================================================
// Instructions: misc functions and external interface
impl MachInst for Inst {
fn get_regs(&self, collector: &mut RegUsageCollector) {
x64_get_regs(&self, collector)
}
fn map_regs(&mut self, mapper: &RegUsageMapper) {
x64_map_regs(self, mapper);
}
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
// out the upper 32 bits of the destination. For example, we could
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
// %reg.
match self {
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
_ => None,
}
}
fn is_epilogue_placeholder(&self) -> bool {
if let Self::EpiloguePlaceholder = self {
true
} else {
false
}
}
fn is_term<'a>(&'a self) -> MachTerminator<'a> {
match self {
// Interesting cases.
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
&Self::JmpCondSymm {
cc: _,
taken,
not_taken,
} => MachTerminator::Cond(
taken.as_block_index().unwrap(),
not_taken.as_block_index().unwrap(),
),
&Self::JmpCond { .. } | &Self::JmpCondCompound { .. } => {
panic!("is_term() called after lowering branches");
}
// All other cases are boring.
_ => MachTerminator::None,
}
}
fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, _ty: Type) -> Inst {
let rc_dst = dst_reg.to_reg().get_class();
let rc_src = src_reg.get_class();
// If this isn't true, we have gone way off the rails.
debug_assert!(rc_dst == rc_src);
match rc_dst {
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
_ => panic!("gen_move(x64): unhandled regclass"),
}
}
fn gen_zero_len_nop() -> Inst {
unimplemented!()
}
fn gen_nop(_preferred_size: usize) -> Inst {
unimplemented!()
}
fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
None
}
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty
))),
}
}
fn gen_jump(blockindex: BlockIndex) -> Inst {
Inst::jmp_known(BranchTarget::Block(blockindex))
}
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
// This is identical (modulo renaming) to the arm64 version.
match self {
&mut Inst::JmpKnown { ref mut dest } => {
dest.map(block_target_map);
}
&mut Inst::JmpCondSymm {
cc: _,
ref mut taken,
ref mut not_taken,
} => {
taken.map(block_target_map);
not_taken.map(block_target_map);
}
&mut Inst::JmpCond { .. } | &mut Inst::JmpCondCompound { .. } => {
panic!("with_block_rewrites called after branch lowering!");
}
_ => {}
}
}
fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
// This is identical (modulo renaming) to the arm64 version.
match self {
&mut Inst::JmpCondSymm {
cc,
taken,
not_taken,
} => {
if taken.as_block_index() == fallthrough {
*self = Inst::jmp_cond(cc.invert(), not_taken);
} else if not_taken.as_block_index() == fallthrough {
*self = Inst::jmp_cond(cc, taken);
} else {
// We need a compound sequence (condbr / uncond-br).
*self = Inst::jmp_cond_compound(cc, taken, not_taken);
}
}
&mut Inst::JmpKnown { dest } => {
if dest.as_block_index() == fallthrough {
*self = Inst::nop(0);
}
}
_ => {}
}
}
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
// This is identical (modulo renaming) to the arm64 version.
match self {
&mut Self::JmpCond {
cc: _,
ref mut target,
} => {
target.lower(targets, my_offset);
}
&mut Self::JmpCondCompound {
cc: _,
ref mut taken,
ref mut not_taken,
..
} => {
taken.lower(targets, my_offset);
not_taken.lower(targets, my_offset);
}
&mut Self::JmpKnown { ref mut dest } => {
dest.lower(targets, my_offset);
}
_ => {}
}
}
fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
create_reg_universe_systemv(flags)
}
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
fn emit(&self, sink: &mut O, _flags: &settings::Flags) {
emit::emit(self, sink);
}
}

View File

@@ -0,0 +1,261 @@
//! Registers, the Universe thereof, and printing.
//!
//! These are ordered by sequence number, as required in the Universe. The strange ordering is
//! intended to make callee-save registers available before caller-saved ones. This is a net win
//! provided that each function makes at least one onward call. It'll be a net loss for leaf
//! functions, and we should change the ordering in that case, so as to make caller-save regs
//! available first.
//!
//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
//! for each function we compile.
use alloc::vec::Vec;
use std::string::String;
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
use crate::machinst::pretty_print::ShowWithRRU;
use crate::settings;
// Hardware encodings for a few registers.
pub const ENC_RBX: u8 = 3;
pub const ENC_RSP: u8 = 4;
pub const ENC_RBP: u8 = 5;
pub const ENC_R12: u8 = 12;
pub const ENC_R13: u8 = 13;
pub const ENC_R14: u8 = 14;
pub const ENC_R15: u8 = 15;
fn gpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::I64, enc, index)
}
pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 0)
}
pub(crate) fn r13() -> Reg {
gpr(ENC_R13, 1)
}
pub(crate) fn r14() -> Reg {
gpr(ENC_R14, 2)
}
pub(crate) fn r15() -> Reg {
gpr(ENC_R15, 3)
}
pub(crate) fn rbx() -> Reg {
gpr(ENC_RBX, 4)
}
pub(crate) fn rsi() -> Reg {
gpr(6, 5)
}
pub(crate) fn rdi() -> Reg {
gpr(7, 6)
}
pub(crate) fn rax() -> Reg {
gpr(0, 7)
}
pub(crate) fn rcx() -> Reg {
gpr(1, 8)
}
pub(crate) fn rdx() -> Reg {
gpr(2, 9)
}
pub(crate) fn r8() -> Reg {
gpr(8, 10)
}
pub(crate) fn r9() -> Reg {
gpr(9, 11)
}
pub(crate) fn r10() -> Reg {
gpr(10, 12)
}
pub(crate) fn r11() -> Reg {
gpr(11, 13)
}
fn fpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::V128, enc, index)
}
fn xmm0() -> Reg {
fpr(0, 14)
}
fn xmm1() -> Reg {
fpr(1, 15)
}
fn xmm2() -> Reg {
fpr(2, 16)
}
fn xmm3() -> Reg {
fpr(3, 17)
}
fn xmm4() -> Reg {
fpr(4, 18)
}
fn xmm5() -> Reg {
fpr(5, 19)
}
fn xmm6() -> Reg {
fpr(6, 20)
}
fn xmm7() -> Reg {
fpr(7, 21)
}
fn xmm8() -> Reg {
fpr(8, 22)
}
fn xmm9() -> Reg {
fpr(9, 23)
}
fn xmm10() -> Reg {
fpr(10, 24)
}
fn xmm11() -> Reg {
fpr(11, 25)
}
fn xmm12() -> Reg {
fpr(12, 26)
}
fn xmm13() -> Reg {
fpr(13, 27)
}
fn xmm14() -> Reg {
fpr(14, 28)
}
fn xmm15() -> Reg {
fpr(15, 29)
}
pub(crate) fn rsp() -> Reg {
gpr(ENC_RSP, 30)
}
pub(crate) fn rbp() -> Reg {
gpr(ENC_RBP, 31)
}
/// Create the register universe for X64.
///
/// The ordering of registers matters, as commented in the file doc comment: assumes the
/// calling-convention is SystemV, at the moment.
pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
let mut regs = Vec::<(RealReg, String)>::new();
let mut allocable_by_class = [None; NUM_REG_CLASSES];
// Integer regs.
let mut base = regs.len();
// Callee-saved, in the SystemV x86_64 ABI.
regs.push((r12().to_real_reg(), "%r12".into()));
regs.push((r13().to_real_reg(), "%r13".into()));
regs.push((r14().to_real_reg(), "%r14".into()));
regs.push((r15().to_real_reg(), "%r15".into()));
regs.push((rbx().to_real_reg(), "%rbx".into()));
// Caller-saved, in the SystemV x86_64 ABI.
regs.push((rsi().to_real_reg(), "%rsi".into()));
regs.push((rdi().to_real_reg(), "%rdi".into()));
regs.push((rax().to_real_reg(), "%rax".into()));
regs.push((rcx().to_real_reg(), "%rcx".into()));
regs.push((rdx().to_real_reg(), "%rdx".into()));
regs.push((r8().to_real_reg(), "%r8".into()));
regs.push((r9().to_real_reg(), "%r9".into()));
regs.push((r10().to_real_reg(), "%r10".into()));
regs.push((r11().to_real_reg(), "%r11".into()));
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
first: base,
last: regs.len() - 1,
suggested_scratch: Some(r12().get_index()),
});
// XMM registers
base = regs.len();
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
regs.push((xmm3().to_real_reg(), "%xmm3".into()));
regs.push((xmm4().to_real_reg(), "%xmm4".into()));
regs.push((xmm5().to_real_reg(), "%xmm5".into()));
regs.push((xmm6().to_real_reg(), "%xmm6".into()));
regs.push((xmm7().to_real_reg(), "%xmm7".into()));
regs.push((xmm8().to_real_reg(), "%xmm8".into()));
regs.push((xmm9().to_real_reg(), "%xmm9".into()));
regs.push((xmm10().to_real_reg(), "%xmm10".into()));
regs.push((xmm11().to_real_reg(), "%xmm11".into()));
regs.push((xmm12().to_real_reg(), "%xmm12".into()));
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
first: base,
last: regs.len() - 1,
suggested_scratch: Some(xmm15().get_index()),
});
// Other regs, not available to the allocator.
let allocable = regs.len();
regs.push((rsp().to_real_reg(), "%rsp".into()));
regs.push((rbp().to_real_reg(), "%rbp".into()));
RealRegUniverse {
regs,
allocable,
allocable_by_class,
}
}
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
/// smaller size (4, 2 or 1 bytes).
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
let mut s = reg.show_rru(mb_rru);
if reg.get_class() != RegClass::I64 || size == 8 {
// We can't do any better.
return s;
}
if reg.is_real() {
// Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could
// describe diplomatically as "a kludge", but it's only debug code.
let remapper = match s.as_str() {
"%rax" => Some(["%eax", "%ax", "%al"]),
"%rbx" => Some(["%ebx", "%bx", "%bl"]),
"%rcx" => Some(["%ecx", "%cx", "%cl"]),
"%rdx" => Some(["%edx", "%dx", "%dl"]),
"%rsi" => Some(["%esi", "%si", "%sil"]),
"%rdi" => Some(["%edi", "%di", "%dil"]),
"%rbp" => Some(["%ebp", "%bp", "%bpl"]),
"%rsp" => Some(["%esp", "%sp", "%spl"]),
"%r8" => Some(["%r8d", "%r8w", "%r8b"]),
"%r9" => Some(["%r9d", "%r9w", "%r9b"]),
"%r10" => Some(["%r10d", "%r10w", "%r10b"]),
"%r11" => Some(["%r11d", "%r11w", "%r11b"]),
"%r12" => Some(["%r12d", "%r12w", "%r12b"]),
"%r13" => Some(["%r13d", "%r13w", "%r13b"]),
"%r14" => Some(["%r14d", "%r14w", "%r14b"]),
"%r15" => Some(["%r15d", "%r15w", "%r15b"]),
_ => None,
};
if let Some(smaller_names) = remapper {
match size {
4 => s = smaller_names[0].into(),
2 => s = smaller_names[1].into(),
1 => s = smaller_names[2].into(),
_ => panic!("show_ireg_sized: real"),
}
}
} else {
// Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
let suffix = match size {
4 => "l",
2 => "w",
1 => "b",
_ => panic!("show_ireg_sized: virtual"),
};
s = s + suffix;
}
s
}

View File

@@ -0,0 +1,358 @@
//! Lowering rules for X64.
#![allow(dead_code)]
#![allow(non_snake_case)]
use regalloc::{Reg, Writable};
use crate::ir::condcodes::IntCC;
use crate::ir::types;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, Type};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
use crate::isa::x64::X64Backend;
/// Context passed to all lowering functions.
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
//=============================================================================
// Helpers for instruction lowering.
fn is_int_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 => true,
_ => false,
}
}
fn int_ty_to_is64(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 => false,
types::I64 => true,
_ => panic!("type {} is none of I8, I16, I32 or I64", ty),
}
}
fn int_ty_to_sizeB(ty: Type) -> u8 {
match ty {
types::I8 => 1,
types::I16 => 2,
types::I32 => 4,
types::I64 => 8,
_ => panic!("ity_to_sizeB"),
}
}
fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
let inst_data = ctx.data(iri);
if inst_data.opcode() == Opcode::Null {
Some(0)
} else {
match inst_data {
&InstructionData::UnaryImm { opcode: _, imm } => {
// Only has Into for i64; we use u64 elsewhere, so we cast.
let imm: i64 = imm.into();
Some(imm as u64)
}
_ => None,
}
}
}
fn inst_condcode(data: &InstructionData) -> IntCC {
match data {
&InstructionData::IntCond { cond, .. }
| &InstructionData::BranchIcmp { cond, .. }
| &InstructionData::IntCompare { cond, .. }
| &InstructionData::IntCondTrap { cond, .. }
| &InstructionData::BranchInt { cond, .. }
| &InstructionData::IntSelect { cond, .. }
| &InstructionData::IntCompareImm { cond, .. } => cond,
_ => panic!("inst_condcode(x64): unhandled: {:?}", data),
}
}
fn intCC_to_x64_CC(cc: IntCC) -> CC {
match cc {
IntCC::Equal => CC::Z,
IntCC::NotEqual => CC::NZ,
IntCC::SignedGreaterThanOrEqual => CC::NL,
IntCC::SignedGreaterThan => CC::NLE,
IntCC::SignedLessThanOrEqual => CC::LE,
IntCC::SignedLessThan => CC::L,
IntCC::UnsignedGreaterThanOrEqual => CC::NB,
IntCC::UnsignedGreaterThan => CC::NBE,
IntCC::UnsignedLessThanOrEqual => CC::BE,
IntCC::UnsignedLessThan => CC::B,
IntCC::Overflow => CC::O,
IntCC::NotOverflow => CC::NO,
}
}
//=============================================================================
// Top-level instruction lowering entry point, for one instruction.
/// Actually codegen an instruction's results into registers.
fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
let op = ctx.data(iri).opcode();
let ty = if ctx.num_outputs(iri) == 1 {
Some(ctx.output_ty(iri, 0))
} else {
None
};
// This is all outstandingly feeble. TODO: much better!
match op {
Opcode::Iconst => {
if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
// Get exactly the bit pattern in 'w64' into the dest. No
// monkeying with sign extension etc.
let dstIs64 = w64 > 0xFFFF_FFFF;
let regD = ctx.output(iri, 0);
ctx.emit(Inst::imm_r(dstIs64, w64, regD));
} else {
unimplemented!();
}
}
Opcode::Iadd | Opcode::Isub => {
let regD = ctx.output(iri, 0);
let regL = ctx.input(iri, 0);
let regR = ctx.input(iri, 1);
let is64 = int_ty_to_is64(ty.unwrap());
let how = if op == Opcode::Iadd {
RMI_R_Op::Add
} else {
RMI_R_Op::Sub
};
ctx.emit(Inst::mov_r_r(true, regL, regD));
ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
}
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
// TODO: implement imm shift value into insn
let tySL = ctx.input_ty(iri, 0);
let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
let regSL = ctx.input(iri, 0);
let regSR = ctx.input(iri, 1);
let regD = ctx.output(iri, 0);
if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
let how = match op {
Opcode::Ishl => ShiftKind::Left,
Opcode::Ushr => ShiftKind::RightZ,
Opcode::Sshr => ShiftKind::RightS,
_ => unreachable!(),
};
let is64 = tyD == types::I64;
let r_rcx = regs::rcx();
let w_rcx = Writable::<Reg>::from_reg(r_rcx);
ctx.emit(Inst::mov_r_r(true, regSL, regD));
ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
} else {
unimplemented!()
}
}
Opcode::Uextend | Opcode::Sextend => {
// TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
// don't accept a register source operand. They should be changed
// so as to have _RM_R form.
// TODO2: if the source operand is a load, incorporate that.
let isZX = op == Opcode::Uextend;
let tyS = ctx.input_ty(iri, 0);
let tyD = ctx.output_ty(iri, 0);
let regS = ctx.input(iri, 0);
let regD = ctx.output(iri, 0);
ctx.emit(Inst::mov_r_r(true, regS, regD));
match (tyS, tyD, isZX) {
(types::I8, types::I64, false) => {
ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
}
_ => unimplemented!(),
}
}
Opcode::FallthroughReturn | Opcode::Return => {
for i in 0..ctx.num_inputs(iri) {
let src_reg = ctx.input(iri, i);
let retval_reg = ctx.retval(i);
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
}
// N.B.: the Ret itself is generated by the ABI.
}
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::UdivImm
| Opcode::SdivImm
| Opcode::UremImm
| Opcode::SremImm
| Opcode::IrsubImm
| Opcode::IaddCin
| Opcode::IaddIfcin
| Opcode::IaddCout
| Opcode::IaddIfcout
| Opcode::IaddCarry
| Opcode::IaddIfcarry
| Opcode::IsubBin
| Opcode::IsubIfbin
| Opcode::IsubBout
| Opcode::IsubIfbout
| Opcode::IsubBorrow
| Opcode::IsubIfborrow
| Opcode::BandImm
| Opcode::BorImm
| Opcode::BxorImm
| Opcode::RotlImm
| Opcode::RotrImm
| Opcode::IshlImm
| Opcode::UshrImm
| Opcode::SshrImm => {
panic!("ALU+imm and ALU+carry ops should not appear here!");
}
Opcode::X86Udivmodx
| Opcode::X86Sdivmodx
| Opcode::X86Umulx
| Opcode::X86Smulx
| Opcode::X86Cvtt2si
| Opcode::X86Fmin
| Opcode::X86Fmax
| Opcode::X86Push
| Opcode::X86Pop
| Opcode::X86Bsr
| Opcode::X86Bsf
| Opcode::X86Pshufd
| Opcode::X86Pshufb
| Opcode::X86Pextr
| Opcode::X86Pinsr
| Opcode::X86Insertps
| Opcode::X86Movsd
| Opcode::X86Movlhps
| Opcode::X86Psll
| Opcode::X86Psrl
| Opcode::X86Psra
| Opcode::X86Ptest
| Opcode::X86Pmaxs
| Opcode::X86Pmaxu
| Opcode::X86Pmins
| Opcode::X86Pminu => {
panic!("x86-specific opcode in supposedly arch-neutral IR!");
}
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
}
}
//=============================================================================
// Lowering-backend trait implementation.
impl LowerBackend for X64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
lower_insn_to_regs(ctx, ir_inst);
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
&self,
ctx: &mut C,
branches: &[IRInst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
) {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
// it may be an unconditional branch, a fallthrough, a return, or a
// trap. These conditions are verified by `is_ebb_basic()` during the
// verifier pass.
assert!(branches.len() <= 2);
let mut unimplemented = false;
if branches.len() == 2 {
// Must be a conditional branch followed by an unconditional branch.
let op0 = ctx.data(branches[0]).opcode();
let op1 = ctx.data(branches[1]).opcode();
println!(
"QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
op0, op1
);
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
let taken = BranchTarget::Block(targets[0]);
let not_taken = match op1 {
Opcode::Jump => BranchTarget::Block(targets[1]),
Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
_ => unreachable!(), // assert above.
};
match op0 {
Opcode::Brz | Opcode::Brnz => {
let tyS = ctx.input_ty(branches[0], 0);
if is_int_ty(tyS) {
let rS = ctx.input(branches[0], 0);
let cc = match op0 {
Opcode::Brz => CC::Z,
Opcode::Brnz => CC::NZ,
_ => unreachable!(),
};
let sizeB = int_ty_to_sizeB(tyS);
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
} else {
unimplemented = true;
}
}
Opcode::BrIcmp => {
let tyS = ctx.input_ty(branches[0], 0);
if is_int_ty(tyS) {
let rSL = ctx.input(branches[0], 0);
let rSR = ctx.input(branches[0], 1);
let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
let sizeB = int_ty_to_sizeB(tyS);
// FIXME verify rSR vs rSL ordering
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
} else {
unimplemented = true;
}
}
// TODO: Brif/icmp, Brff/icmp, jump tables
_ => {
unimplemented = true;
}
}
} else {
assert!(branches.len() == 1);
// Must be an unconditional branch or trap.
let op = ctx.data(branches[0]).opcode();
match op {
Opcode::Jump => {
ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
}
Opcode::Fallthrough => {
ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
}
Opcode::Trap => {
unimplemented = true;
}
_ => panic!("Unknown branch type!"),
}
}
if unimplemented {
unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
}
}
}

View File

@@ -0,0 +1,92 @@
//! X86_64-bit Instruction Set Architecture.
use alloc::boxed::Box;
use regalloc::RealRegUniverse;
use target_lexicon::Triple;
use crate::ir::Function;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::pretty_print::ShowWithRRU;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self, Flags};
use crate::isa::x64::inst::regs::create_reg_universe_systemv;
mod abi;
mod inst;
mod lower;
/// An X64 backend.
pub(crate) struct X64Backend {
triple: Triple,
flags: Flags,
}
impl X64Backend {
/// Create a new X64 backend with the given (shared) flags.
fn new_with_flags(triple: Triple, flags: Flags) -> Self {
Self { triple, flags }
}
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
// This performs lowering to VCode, register-allocates the code, computes
// block layout and finalizes branches. The result is ready for binary emission.
let abi = Box::new(abi::X64ABIBody::new(&func, flags));
compile::compile::<Self>(&func, self, abi)
}
}
impl MachBackend for X64Backend {
fn compile_function(
&self,
func: &Function,
want_disasm: bool,
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let sections = vcode.emit();
let frame_size = vcode.frame_size();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
} else {
None
};
Ok(MachCompileResult {
sections,
frame_size,
disasm,
})
}
fn flags(&self) -> &Flags {
&self.flags
}
fn name(&self) -> &'static str {
"x64"
}
fn triple(&self) -> Triple {
self.triple.clone()
}
fn reg_universe(&self) -> RealRegUniverse {
create_reg_universe_systemv(&self.flags)
}
}
/// Create a new `isa::Builder`.
pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
let backend = X64Backend::new_with_flags(triple, flags);
Box::new(TargetIsaAdapter::new(backend))
},
}
}

View File

@@ -53,12 +53,23 @@ fn isa_constructor(
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..], PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..], PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
}; };
Box::new(Isa {
triple, let isa_flags = settings::Flags::new(&shared_flags, builder);
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags, if isa_flags.use_new_backend() {
cpumode: level1, #[cfg(not(feature = "x64"))]
}) panic!("new backend x86 support not included by cargo features!");
#[cfg(feature = "x64")]
super::x64::isa_builder(triple).finish(shared_flags)
} else {
Box::new(Isa {
triple,
isa_flags,
shared_flags,
cpumode: level1,
})
}
} }
impl TargetIsa for Isa { impl TargetIsa for Isa {