moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
579
cranelift/codegen/src/isa/x86/abi.rs
Normal file
579
cranelift/codegen/src/isa/x86/abi.rs
Normal file
@@ -0,0 +1,579 @@
|
||||
//! x86 ABI implementation.
|
||||
|
||||
use super::registers::{FPR, GPR, RU};
|
||||
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
||||
use crate::cursor::{Cursor, CursorPosition, EncCursor};
|
||||
use crate::ir;
|
||||
use crate::ir::immediates::Imm64;
|
||||
use crate::ir::stackslot::{StackOffset, StackSize};
|
||||
use crate::ir::{
|
||||
get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder,
|
||||
ValueLoc,
|
||||
};
|
||||
use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
|
||||
use crate::regalloc::RegisterSet;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::stack_layout::layout_stack;
|
||||
use core::i32;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
/// Argument registers for x86-64
|
||||
static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
|
||||
|
||||
/// Return value registers.
|
||||
static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
|
||||
|
||||
/// Argument registers for x86-64, when using windows fastcall
|
||||
static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9];
|
||||
|
||||
/// Return value registers for x86-64, when using windows fastcall
|
||||
static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
|
||||
|
||||
struct Args {
|
||||
pointer_bytes: u8,
|
||||
pointer_bits: u8,
|
||||
pointer_type: ir::Type,
|
||||
gpr: &'static [RU],
|
||||
gpr_used: usize,
|
||||
fpr_limit: usize,
|
||||
fpr_used: usize,
|
||||
offset: u32,
|
||||
call_conv: CallConv,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
fn new(bits: u8, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Self {
|
||||
let offset = if let CallConv::WindowsFastcall = call_conv {
|
||||
// [1] "The caller is responsible for allocating space for parameters to the callee,
|
||||
// and must always allocate sufficient space to store four register parameters"
|
||||
32
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
Self {
|
||||
pointer_bytes: bits / 8,
|
||||
pointer_bits: bits,
|
||||
pointer_type: ir::Type::int(u16::from(bits)).unwrap(),
|
||||
gpr,
|
||||
gpr_used: 0,
|
||||
fpr_limit,
|
||||
fpr_used: 0,
|
||||
offset,
|
||||
call_conv,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgAssigner for Args {
|
||||
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
|
||||
let ty = arg.value_type;
|
||||
|
||||
// Check for a legal type.
|
||||
// We don't support SIMD yet, so break all vectors down.
|
||||
if ty.is_vector() {
|
||||
return ValueConversion::VectorSplit.into();
|
||||
}
|
||||
|
||||
// Large integers and booleans are broken down to fit in a register.
|
||||
if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
|
||||
// Small integers are extended to the size of a pointer register.
|
||||
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
|
||||
match arg.extension {
|
||||
ArgumentExtension::None => {}
|
||||
ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
|
||||
ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
|
||||
}
|
||||
}
|
||||
|
||||
// Handle special-purpose arguments.
|
||||
if ty.is_int() && self.call_conv == CallConv::Baldrdash {
|
||||
match arg.purpose {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
ArgumentPurpose::VMContext => {
|
||||
return ArgumentLoc::Reg(if self.pointer_bits == 64 {
|
||||
RU::r14
|
||||
} else {
|
||||
RU::rsi
|
||||
} as RegUnit)
|
||||
.into();
|
||||
}
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
ArgumentPurpose::SignatureId => return ArgumentLoc::Reg(RU::r10 as RegUnit).into(),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to use a GPR.
|
||||
if !ty.is_float() && self.gpr_used < self.gpr.len() {
|
||||
let reg = self.gpr[self.gpr_used] as RegUnit;
|
||||
self.gpr_used += 1;
|
||||
return ArgumentLoc::Reg(reg).into();
|
||||
}
|
||||
|
||||
// Try to use an FPR.
|
||||
if ty.is_float() && self.fpr_used < self.fpr_limit {
|
||||
let reg = FPR.unit(self.fpr_used);
|
||||
self.fpr_used += 1;
|
||||
return ArgumentLoc::Reg(reg).into();
|
||||
}
|
||||
|
||||
// Assign a stack location.
|
||||
let loc = ArgumentLoc::Stack(self.offset as i32);
|
||||
self.offset += u32::from(self.pointer_bytes);
|
||||
debug_assert!(self.offset <= i32::MAX as u32);
|
||||
loc.into()
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalize `sig`.
|
||||
pub fn legalize_signature(sig: &mut ir::Signature, triple: &Triple, _current: bool) {
|
||||
let bits;
|
||||
let mut args;
|
||||
|
||||
match triple.pointer_width().unwrap() {
|
||||
PointerWidth::U16 => panic!(),
|
||||
PointerWidth::U32 => {
|
||||
bits = 32;
|
||||
args = Args::new(bits, &[], 0, sig.call_conv);
|
||||
}
|
||||
PointerWidth::U64 => {
|
||||
bits = 64;
|
||||
args = if sig.call_conv == CallConv::WindowsFastcall {
|
||||
Args::new(bits, &ARG_GPRS_WIN_FASTCALL_X64[..], 4, sig.call_conv)
|
||||
} else {
|
||||
Args::new(bits, &ARG_GPRS[..], 8, sig.call_conv)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
legalize_args(&mut sig.params, &mut args);
|
||||
|
||||
let regs = if sig.call_conv == CallConv::WindowsFastcall {
|
||||
&RET_GPRS_WIN_FASTCALL_X64[..]
|
||||
} else {
|
||||
&RET_GPRS[..]
|
||||
};
|
||||
|
||||
let mut rets = Args::new(bits, regs, 2, sig.call_conv);
|
||||
legalize_args(&mut sig.returns, &mut rets);
|
||||
}
|
||||
|
||||
/// Get register class for a type appearing in a legalized signature.
|
||||
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
if ty.is_int() || ty.is_bool() {
|
||||
GPR
|
||||
} else {
|
||||
FPR
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function, triple: &Triple) -> RegisterSet {
|
||||
let mut regs = RegisterSet::new();
|
||||
regs.take(GPR, RU::rsp as RegUnit);
|
||||
regs.take(GPR, RU::rbp as RegUnit);
|
||||
|
||||
// 32-bit arch only has 8 registers.
|
||||
if triple.pointer_width().unwrap() != PointerWidth::U64 {
|
||||
for i in 8..16 {
|
||||
regs.take(GPR, GPR.unit(i));
|
||||
regs.take(FPR, FPR.unit(i));
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
/// Get the set of callee-saved registers.
|
||||
fn callee_saved_gprs(isa: &TargetIsa, call_conv: CallConv) -> &'static [RU] {
|
||||
match isa.triple().pointer_width().unwrap() {
|
||||
PointerWidth::U16 => panic!(),
|
||||
PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
|
||||
PointerWidth::U64 => {
|
||||
if call_conv == CallConv::WindowsFastcall {
|
||||
// "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile
|
||||
// and must be saved and restored by a function that uses them."
|
||||
// as per https://msdn.microsoft.com/en-us/library/6t169e9c.aspx
|
||||
// RSP & RSB are not listed below, since they are restored automatically during
|
||||
// a function call. If that wasn't the case, function calls (RET) would not work.
|
||||
&[
|
||||
RU::rbx,
|
||||
RU::rdi,
|
||||
RU::rsi,
|
||||
RU::r12,
|
||||
RU::r13,
|
||||
RU::r14,
|
||||
RU::r15,
|
||||
]
|
||||
} else {
|
||||
&[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the set of callee-saved registers that are used.
|
||||
fn callee_saved_gprs_used(isa: &TargetIsa, func: &ir::Function) -> RegisterSet {
|
||||
let mut all_callee_saved = RegisterSet::empty();
|
||||
for reg in callee_saved_gprs(isa, func.signature.call_conv) {
|
||||
all_callee_saved.free(GPR, *reg as RegUnit);
|
||||
}
|
||||
|
||||
let mut used = RegisterSet::empty();
|
||||
for value_loc in func.locations.values() {
|
||||
// Note that `value_loc` here contains only a single unit of a potentially multi-unit
|
||||
// register. We don't use registers that overlap each other in the x86 ISA, but in others
|
||||
// we do. So this should not be blindly reused.
|
||||
if let ValueLoc::Reg(ru) = *value_loc {
|
||||
if !used.is_avail(GPR, ru) {
|
||||
used.free(GPR, ru);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// regmove and regfill instructions may temporarily divert values into other registers,
|
||||
// and these are not reflected in `func.locations`. Scan the function for such instructions
|
||||
// and note which callee-saved registers they use.
|
||||
//
|
||||
// TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
|
||||
// to avoid this step.
|
||||
for ebb in &func.layout {
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
match func.dfg[inst] {
|
||||
ir::instructions::InstructionData::RegMove { dst, .. }
|
||||
| ir::instructions::InstructionData::RegFill { dst, .. } => {
|
||||
if !used.is_avail(GPR, dst) {
|
||||
used.free(GPR, dst);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
used.intersect(&all_callee_saved);
|
||||
used
|
||||
}
|
||||
|
||||
pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
|
||||
match func.signature.call_conv {
|
||||
// For now, just translate fast and cold as system_v.
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
|
||||
system_v_prologue_epilogue(func, isa)
|
||||
}
|
||||
CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
|
||||
CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
|
||||
CallConv::Probestack => unimplemented!("probestack calling convention"),
|
||||
}
|
||||
}
|
||||
|
||||
fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
|
||||
debug_assert!(
|
||||
!isa.flags().probestack_enabled(),
|
||||
"baldrdash does not expect cranelift to emit stack probes"
|
||||
);
|
||||
|
||||
// Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
|
||||
let stack_align = 16;
|
||||
let word_size = StackSize::from(isa.pointer_bytes());
|
||||
let bytes = StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size;
|
||||
|
||||
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
|
||||
ss.offset = Some(-(bytes as StackOffset));
|
||||
func.stack_slots.push(ss);
|
||||
|
||||
layout_stack(&mut func.stack_slots, stack_align)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Implementation of the fastcall-based Win64 calling convention described at [1]
|
||||
/// [1] https://msdn.microsoft.com/en-us/library/ms235286.aspx
|
||||
fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
|
||||
if isa.triple().pointer_width().unwrap() != PointerWidth::U64 {
|
||||
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
||||
}
|
||||
|
||||
// [1] "The primary exceptions are the stack pointer and malloc or alloca memory,
|
||||
// which are aligned to 16 bytes in order to aid performance"
|
||||
let stack_align = 16;
|
||||
|
||||
let word_size = isa.pointer_bytes() as usize;
|
||||
let reg_type = isa.pointer_type();
|
||||
|
||||
let csrs = callee_saved_gprs_used(isa, func);
|
||||
|
||||
// [1] "Space is allocated on the call stack as a shadow store for callees to save"
|
||||
// This shadow store contains the parameters which are passed through registers (ARG_GPRS)
|
||||
// and is eventually used by the callee to save & restore the values of the arguments.
|
||||
//
|
||||
// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333
|
||||
// "Although the x64 calling convention reserves spill space for parameters,
|
||||
// you don’t have to use them as such"
|
||||
//
|
||||
// The reserved stack area is composed of:
|
||||
// return address + frame pointer + all callee-saved registers + shadow space
|
||||
//
|
||||
// Pushing the return address is an implicit function of the `call`
|
||||
// instruction. Each of the others we will then push explicitly. Then we
|
||||
// will adjust the stack pointer to make room for the rest of the required
|
||||
// space for this frame.
|
||||
const SHADOW_STORE_SIZE: i32 = 32;
|
||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
||||
|
||||
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
||||
// since cranelift does not support spill slots before incoming args
|
||||
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size as u32,
|
||||
offset: Some(-(SHADOW_STORE_SIZE + csr_stack_size)),
|
||||
});
|
||||
|
||||
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
|
||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||
|
||||
// Add CSRs to function signature
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
RU::rbp as RegUnit,
|
||||
);
|
||||
func.signature.params.push(fp_arg);
|
||||
func.signature.returns.push(fp_arg);
|
||||
|
||||
for csr in csrs.iter(GPR) {
|
||||
let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
|
||||
func.signature.params.push(csr_arg);
|
||||
func.signature.returns.push(csr_arg);
|
||||
}
|
||||
|
||||
// Set up the cursor and insert the prologue
|
||||
let entry_ebb = func.layout.entry_block().expect("missing entry block");
|
||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
|
||||
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert a System V-compatible prologue and epilogue.
|
||||
fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
|
||||
// The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
|
||||
// newer versions use a 16-byte aligned stack pointer.
|
||||
let stack_align = 16;
|
||||
let pointer_width = isa.triple().pointer_width().unwrap();
|
||||
let word_size = pointer_width.bytes() as usize;
|
||||
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
|
||||
|
||||
let csrs = callee_saved_gprs_used(isa, func);
|
||||
|
||||
// The reserved stack area is composed of:
|
||||
// return address + frame pointer + all callee-saved registers
|
||||
//
|
||||
// Pushing the return address is an implicit function of the `call`
|
||||
// instruction. Each of the others we will then push explicitly. Then we
|
||||
// will adjust the stack pointer to make room for the rest of the required
|
||||
// space for this frame.
|
||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size as u32,
|
||||
offset: Some(-csr_stack_size),
|
||||
});
|
||||
|
||||
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
|
||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||
|
||||
// Add CSRs to function signature
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
RU::rbp as RegUnit,
|
||||
);
|
||||
func.signature.params.push(fp_arg);
|
||||
func.signature.returns.push(fp_arg);
|
||||
|
||||
for csr in csrs.iter(GPR) {
|
||||
let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
|
||||
func.signature.params.push(csr_arg);
|
||||
func.signature.returns.push(csr_arg);
|
||||
}
|
||||
|
||||
// Set up the cursor and insert the prologue
|
||||
let entry_ebb = func.layout.entry_block().expect("missing entry block");
|
||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
|
||||
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert the prologue for a given function.
|
||||
/// This is used by common calling conventions such as System V.
|
||||
fn insert_common_prologue(
|
||||
pos: &mut EncCursor,
|
||||
stack_size: i64,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
isa: &TargetIsa,
|
||||
) {
|
||||
if stack_size > 0 {
|
||||
// Check if there is a special stack limit parameter. If so insert stack check.
|
||||
if let Some(stack_limit_arg) = pos.func.special_param(ArgumentPurpose::StackLimit) {
|
||||
// Total stack size is the size of all stack area used by the function, including
|
||||
// pushed CSRs, frame pointer.
|
||||
// Also, the size of a return address, implicitly pushed by a x86 `call` instruction,
|
||||
// also should be accounted for.
|
||||
// TODO: Check if the function body actually contains a `call` instruction.
|
||||
let word_size = isa.pointer_bytes();
|
||||
let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64;
|
||||
|
||||
insert_stack_check(pos, total_stack_size, stack_limit_arg);
|
||||
}
|
||||
}
|
||||
|
||||
// Append param to entry EBB
|
||||
let ebb = pos.current_ebb().expect("missing ebb under cursor");
|
||||
let fp = pos.func.dfg.append_ebb_param(ebb, reg_type);
|
||||
pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
|
||||
|
||||
pos.ins().x86_push(fp);
|
||||
pos.ins()
|
||||
.copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit);
|
||||
|
||||
for reg in csrs.iter(GPR) {
|
||||
// Append param to entry EBB
|
||||
let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type);
|
||||
|
||||
// Assign it a location
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
|
||||
// Remember it so we can push it momentarily
|
||||
pos.ins().x86_push(csr_arg);
|
||||
}
|
||||
|
||||
// Allocate stack frame storage.
|
||||
if stack_size > 0 {
|
||||
if isa.flags().probestack_enabled()
|
||||
&& stack_size > (1 << isa.flags().probestack_size_log2())
|
||||
{
|
||||
// Emit a stack probe.
|
||||
let rax = RU::rax as RegUnit;
|
||||
let rax_val = ir::ValueLoc::Reg(rax);
|
||||
|
||||
// The probestack function expects its input in %rax.
|
||||
let arg = pos.ins().iconst(reg_type, stack_size);
|
||||
pos.func.locations[arg] = rax_val;
|
||||
|
||||
// Call the probestack function.
|
||||
let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
|
||||
|
||||
// Make the call.
|
||||
let call = if !isa.flags().is_pic()
|
||||
&& isa.triple().pointer_width().unwrap() == PointerWidth::U64
|
||||
&& !pos.func.dfg.ext_funcs[callee].colocated
|
||||
{
|
||||
// 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
|
||||
// Use r11 as it may be clobbered under all supported calling conventions.
|
||||
let r11 = RU::r11 as RegUnit;
|
||||
let sig = pos.func.dfg.ext_funcs[callee].signature;
|
||||
let addr = pos.ins().func_addr(reg_type, callee);
|
||||
pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
|
||||
pos.ins().call_indirect(sig, addr, &[arg])
|
||||
} else {
|
||||
// Otherwise just do a normal call.
|
||||
pos.ins().call(callee, &[arg])
|
||||
};
|
||||
|
||||
// If the probestack function doesn't adjust sp, do it ourselves.
|
||||
if !isa.flags().probestack_func_adjusts_sp() {
|
||||
let result = pos.func.dfg.inst_results(call)[0];
|
||||
pos.func.locations[result] = rax_val;
|
||||
pos.ins().adjust_sp_down(result);
|
||||
}
|
||||
} else {
|
||||
// Simply decrement the stack pointer.
|
||||
pos.ins().adjust_sp_down_imm(Imm64::new(stack_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a check that generates a trap if the stack pointer goes
|
||||
/// below a value in `stack_limit_arg`.
|
||||
fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) {
|
||||
use crate::ir::condcodes::IntCC;
|
||||
|
||||
// Copy `stack_limit_arg` into a %rax and use it for calculating
|
||||
// a SP threshold.
|
||||
let stack_limit_copy = pos.ins().copy(stack_limit_arg);
|
||||
pos.func.locations[stack_limit_copy] = ir::ValueLoc::Reg(RU::rax as RegUnit);
|
||||
let sp_threshold = pos.ins().iadd_imm(stack_limit_copy, stack_size);
|
||||
pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit);
|
||||
|
||||
// If the stack pointer currently reaches the SP threshold or below it then after opening
|
||||
// the current stack frame, the current stack pointer will reach the limit.
|
||||
let cflags = pos.ins().ifcmp_sp(sp_threshold);
|
||||
pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
|
||||
pos.ins().trapif(
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
cflags,
|
||||
ir::TrapCode::StackOverflow,
|
||||
);
|
||||
}
|
||||
|
||||
/// Find all `return` instructions and insert epilogues before them.
|
||||
fn insert_common_epilogues(
|
||||
pos: &mut EncCursor,
|
||||
stack_size: i64,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
while let Some(ebb) = pos.next_ebb() {
|
||||
pos.goto_last_inst(ebb);
|
||||
if let Some(inst) = pos.current_inst() {
|
||||
if pos.func.dfg[inst].opcode().is_return() {
|
||||
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert an epilogue given a specific `return` instruction.
|
||||
/// This is used by common calling conventions such as System V.
|
||||
fn insert_common_epilogue(
|
||||
inst: ir::Inst,
|
||||
stack_size: i64,
|
||||
pos: &mut EncCursor,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
if stack_size > 0 {
|
||||
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
|
||||
}
|
||||
|
||||
// Pop all the callee-saved registers, stepping backward each time to
|
||||
// preserve the correct order.
|
||||
let fp_ret = pos.ins().x86_pop(reg_type);
|
||||
pos.prev_inst();
|
||||
|
||||
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
|
||||
pos.func.dfg.append_inst_arg(inst, fp_ret);
|
||||
|
||||
for reg in csrs.iter(GPR) {
|
||||
let csr_ret = pos.ins().x86_pop(reg_type);
|
||||
pos.prev_inst();
|
||||
|
||||
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, csr_ret);
|
||||
}
|
||||
}
|
||||
342
cranelift/codegen/src/isa/x86/binemit.rs
Normal file
342
cranelift/codegen/src/isa/x86/binemit.rs
Normal file
@@ -0,0 +1,342 @@
|
||||
//! Emitting binary x86 machine code.
|
||||
|
||||
use super::enc_tables::{needs_offset, needs_sib_byte};
|
||||
use super::registers::RU;
|
||||
use crate::binemit::{bad_encoding, CodeSink, Reloc};
|
||||
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
|
||||
use crate::ir::{Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode};
|
||||
use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef};
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
|
||||
|
||||
// Convert a stack base to the corresponding register.
|
||||
fn stk_base(base: StackBase) -> RegUnit {
|
||||
let ru = match base {
|
||||
StackBase::SP => RU::rsp,
|
||||
StackBase::FP => RU::rbp,
|
||||
StackBase::Zone => unimplemented!(),
|
||||
};
|
||||
ru as RegUnit
|
||||
}
|
||||
|
||||
// Mandatory prefix bytes for Mp* opcodes.
|
||||
const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
|
||||
|
||||
// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
|
||||
const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
|
||||
|
||||
// A REX prefix with no bits set: 0b0100WRXB.
|
||||
const BASE_REX: u8 = 0b0100_0000;
|
||||
|
||||
// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
|
||||
// This is used for instructions that encode a register in the low 3 bits of the opcode and for
|
||||
// instructions that use the ModR/M `reg` field for something else.
|
||||
fn rex1(reg_b: RegUnit) -> u8 {
|
||||
let b = ((reg_b >> 3) & 1) as u8;
|
||||
BASE_REX | b
|
||||
}
|
||||
|
||||
// Create a dual-register REX prefix, setting:
|
||||
//
|
||||
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
|
||||
// REX.R = bit 3 of reg register.
|
||||
fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
|
||||
let b = ((rm >> 3) & 1) as u8;
|
||||
let r = ((reg >> 3) & 1) as u8;
|
||||
BASE_REX | b | (r << 2)
|
||||
}
|
||||
|
||||
// Create a three-register REX prefix, setting:
|
||||
//
|
||||
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
|
||||
// REX.R = bit 3 of reg register.
|
||||
// REX.X = bit 3 of SIB index register.
|
||||
fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
|
||||
let b = ((rm >> 3) & 1) as u8;
|
||||
let r = ((reg >> 3) & 1) as u8;
|
||||
let x = ((index >> 3) & 1) as u8;
|
||||
BASE_REX | b | (x << 1) | (r << 2)
|
||||
}
|
||||
|
||||
// Emit a REX prefix.
|
||||
//
|
||||
// The R, X, and B bits are computed from registers using the functions above. The W bit is
|
||||
// extracted from `bits`.
|
||||
fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(rex & 0xf8, BASE_REX);
|
||||
let w = ((bits >> 15) & 1) as u8;
|
||||
sink.put1(rex | (w << 3));
|
||||
}
|
||||
|
||||
// Emit a single-byte opcode with no REX prefix.
|
||||
fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit a single-byte opcode with REX prefix.
|
||||
fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode: 0F XX
|
||||
fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode: 0F XX with REX prefix.
|
||||
fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix.
|
||||
fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix and REX.
|
||||
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix.
|
||||
fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
|
||||
fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
|
||||
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
|
||||
let mm = (bits >> 10) & 3;
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
|
||||
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
let mm = (bits >> 10) & 3;
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit a ModR/M byte for reg-reg operands.
|
||||
fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b11000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a ModR/M byte where the reg bits are part of the opcode.
|
||||
fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
|
||||
let reg = (bits >> 12) as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b11000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
|
||||
/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
|
||||
/// absolute immediate 32-bit address.
|
||||
fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b00000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
|
||||
/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
|
||||
/// section 2.2.1.6.
|
||||
fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_rm(0b101, reg, sink)
|
||||
}
|
||||
|
||||
/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
|
||||
/// displacement.
|
||||
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
|
||||
fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b01000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
|
||||
/// displacement.
|
||||
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
|
||||
fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b10000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
|
||||
fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_rm(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
|
||||
/// displacement are present.
|
||||
fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp8(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
|
||||
/// displacement are present.
|
||||
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp32(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a SIB byte with a base register and no scale+index.
|
||||
fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
|
||||
let base = base as u8 & 7;
|
||||
// SIB SS_III_BBB.
|
||||
let mut b = 0b00_100_000;
|
||||
b |= base;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a SIB byte with a scale, base, and index.
|
||||
fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
|
||||
// SIB SS_III_BBB.
|
||||
debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
|
||||
let scale = scale & 3;
|
||||
let index = index as u8 & 7;
|
||||
let base = base as u8 & 7;
|
||||
let b: u8 = (scale << 6) | (index << 3) | base;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for an integer condition code.
|
||||
///
|
||||
/// Add this offset to a base opcode for:
|
||||
///
|
||||
/// ---- 0x70: Short conditional branch.
|
||||
/// 0x0f 0x80: Long conditional branch.
|
||||
/// 0x0f 0x90: SetCC.
|
||||
///
|
||||
fn icc2opc(cond: IntCC) -> u16 {
|
||||
use crate::ir::condcodes::IntCC::*;
|
||||
match cond {
|
||||
// 0x0 = Overflow.
|
||||
// 0x1 = !Overflow.
|
||||
UnsignedLessThan => 0x2,
|
||||
UnsignedGreaterThanOrEqual => 0x3,
|
||||
Equal => 0x4,
|
||||
NotEqual => 0x5,
|
||||
UnsignedLessThanOrEqual => 0x6,
|
||||
UnsignedGreaterThan => 0x7,
|
||||
// 0x8 = Sign.
|
||||
// 0x9 = !Sign.
|
||||
// 0xa = Parity even.
|
||||
// 0xb = Parity odd.
|
||||
SignedLessThan => 0xc,
|
||||
SignedGreaterThanOrEqual => 0xd,
|
||||
SignedLessThanOrEqual => 0xe,
|
||||
SignedGreaterThan => 0xf,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for a floating point condition code.
|
||||
///
|
||||
/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
|
||||
///
|
||||
/// ZPC OSA
|
||||
/// UN 111 000
|
||||
/// GT 000 000
|
||||
/// LT 001 000
|
||||
/// EQ 100 000
|
||||
///
|
||||
/// Not all floating point condition codes are supported.
|
||||
fn fcc2opc(cond: FloatCC) -> u16 {
|
||||
use crate::ir::condcodes::FloatCC::*;
|
||||
match cond {
|
||||
Ordered => 0xb, // EQ|LT|GT => *np (P=0)
|
||||
Unordered => 0xa, // UN => *p (P=1)
|
||||
OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0),
|
||||
UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1)
|
||||
GreaterThan => 0x7, // GT => *a (C=0&Z=0)
|
||||
GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0)
|
||||
UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1)
|
||||
UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
|
||||
Equal | // EQ
|
||||
NotEqual | // UN|LT|GT
|
||||
LessThan | // LT
|
||||
LessThanOrEqual | // LT|EQ
|
||||
UnorderedOrGreaterThan | // UN|GT
|
||||
UnorderedOrGreaterThanOrEqual // UN|GT|EQ
|
||||
=> panic!("{} not supported", cond),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a single-byte branch displacement to `destination`.
|
||||
fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
|
||||
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
|
||||
sink.put1(delta as u8);
|
||||
}
|
||||
|
||||
/// Emit a four-byte branch displacement to `destination`.
|
||||
fn disp4<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
|
||||
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
|
||||
sink.put4(delta);
|
||||
}
|
||||
|
||||
/// Emit a four-byte displacement to jump table `jt`.
|
||||
fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS) {
|
||||
let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4);
|
||||
sink.put4(delta);
|
||||
}
|
||||
778
cranelift/codegen/src/isa/x86/enc_tables.rs
Normal file
778
cranelift/codegen/src/isa/x86/enc_tables.rs
Normal file
@@ -0,0 +1,778 @@
|
||||
//! Encoding tables for x86 ISAs.
|
||||
|
||||
use super::registers::*;
|
||||
use crate::bitset::BitSet;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::{self, Function, Inst, InstBuilder};
|
||||
use crate::isa;
|
||||
use crate::isa::constraints::*;
|
||||
use crate::isa::enc_tables::*;
|
||||
use crate::isa::encoding::base_size;
|
||||
use crate::isa::encoding::RecipeSizing;
|
||||
use crate::isa::RegUnit;
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
|
||||
|
||||
pub fn needs_sib_byte(reg: RegUnit) -> bool {
|
||||
reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
|
||||
}
|
||||
pub fn needs_offset(reg: RegUnit) -> bool {
|
||||
reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit
|
||||
}
|
||||
pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
|
||||
needs_sib_byte(reg) || needs_offset(reg)
|
||||
}
|
||||
|
||||
fn additional_size_if(
|
||||
op_index: usize,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
condition_func: fn(RegUnit) -> bool,
|
||||
) -> u8 {
|
||||
let addr_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
|
||||
if condition_func(addr_reg) {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
fn size_plus_maybe_offset_for_in_reg_0(
|
||||
sizing: &RecipeSizing,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size + additional_size_if(0, inst, divert, func, needs_offset)
|
||||
}
|
||||
fn size_plus_maybe_offset_for_in_reg_1(
|
||||
sizing: &RecipeSizing,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size + additional_size_if(1, inst, divert, func, needs_offset)
|
||||
}
|
||||
fn size_plus_maybe_sib_for_in_reg_0(
|
||||
sizing: &RecipeSizing,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte)
|
||||
}
|
||||
fn size_plus_maybe_sib_for_in_reg_1(
|
||||
sizing: &RecipeSizing,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte)
|
||||
}
|
||||
fn size_plus_maybe_sib_or_offset_for_in_reg_0(
|
||||
sizing: &RecipeSizing,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte_or_offset)
|
||||
}
|
||||
fn size_plus_maybe_sib_or_offset_for_in_reg_1(
|
||||
sizing: &RecipeSizing,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte_or_offset)
|
||||
}
|
||||
|
||||
/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
|
||||
fn expand_sdivrem(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
isa: &isa::TargetIsa,
|
||||
) {
|
||||
let (x, y, is_srem) = match func.dfg[inst] {
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Sdiv,
|
||||
args,
|
||||
} => (args[0], args[1], false),
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Srem,
|
||||
args,
|
||||
} => (args[0], args[1], true),
|
||||
_ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let avoid_div_traps = isa.flags().avoid_div_traps();
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.func.dfg.clear_results(inst);
|
||||
|
||||
// If we can tolerate native division traps, sdiv doesn't need branching.
|
||||
if !avoid_div_traps && !is_srem {
|
||||
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
||||
pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
|
||||
pos.remove_inst();
|
||||
return;
|
||||
}
|
||||
|
||||
// EBB handling the -1 divisor case.
|
||||
let minus_one = pos.func.dfg.make_ebb();
|
||||
|
||||
// Final EBB with one argument representing the final result value.
|
||||
let done = pos.func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
pos.func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
// Start by checking for a -1 divisor which needs to be handled specially.
|
||||
let is_m1 = pos.ins().ifcmp_imm(y, -1);
|
||||
pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
|
||||
|
||||
// Put in an explicit division-by-zero trap if the environment requires it.
|
||||
if avoid_div_traps {
|
||||
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
|
||||
}
|
||||
|
||||
// Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
|
||||
// by zero.
|
||||
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
||||
let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
|
||||
let divres = if is_srem { rem } else { quot };
|
||||
pos.ins().jump(done, &[divres]);
|
||||
|
||||
// Now deal with the -1 divisor case.
|
||||
pos.insert_ebb(minus_one);
|
||||
let m1_result = if is_srem {
|
||||
// x % -1 = 0.
|
||||
pos.ins().iconst(ty, 0)
|
||||
} else {
|
||||
// Explicitly check for overflow: Trap when x == INT_MIN.
|
||||
debug_assert!(avoid_div_traps, "Native trapping divide handled above");
|
||||
let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
|
||||
pos.ins()
|
||||
.trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow);
|
||||
// x / -1 = -x.
|
||||
pos.ins().irsub_imm(x, 0)
|
||||
};
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[m1_result]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, minus_one);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
|
||||
fn expand_udivrem(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
isa: &isa::TargetIsa,
|
||||
) {
|
||||
let (x, y, is_urem) = match func.dfg[inst] {
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Udiv,
|
||||
args,
|
||||
} => (args[0], args[1], false),
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Urem,
|
||||
args,
|
||||
} => (args[0], args[1], true),
|
||||
_ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let avoid_div_traps = isa.flags().avoid_div_traps();
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.func.dfg.clear_results(inst);
|
||||
|
||||
// Put in an explicit division-by-zero trap if the environment requires it.
|
||||
if avoid_div_traps {
|
||||
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
|
||||
}
|
||||
|
||||
// Now it is safe to execute the `x86_udivmodx` instruction.
|
||||
let xhi = pos.ins().iconst(ty, 0);
|
||||
let reuse = if is_urem {
|
||||
[None, Some(result)]
|
||||
} else {
|
||||
[Some(result), None]
|
||||
};
|
||||
pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
|
||||
pos.remove_inst();
|
||||
}
|
||||
|
||||
/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
|
||||
/// instructions.
|
||||
fn expand_minmax(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use crate::ir::condcodes::FloatCC;
|
||||
|
||||
let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Fmin,
|
||||
args,
|
||||
} => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Fmax,
|
||||
args,
|
||||
} => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
|
||||
_ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
|
||||
// We need to handle the following conditions, depending on how x and y compare:
|
||||
//
|
||||
// 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
|
||||
// 2. EQ: We need to use `bitwise_opc` to make sure that
|
||||
// fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
|
||||
// 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
|
||||
|
||||
// EBB handling case 3) where one operand is NaN.
|
||||
let uno_ebb = func.dfg.make_ebb();
|
||||
|
||||
// EBB that handles the unordered or equal cases 2) and 3).
|
||||
let ueq_ebb = func.dfg.make_ebb();
|
||||
|
||||
// Final EBB with one argument representing the final result value.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// The basic blocks are laid out to minimize branching for the common cases:
|
||||
//
|
||||
// 1) One branch not taken, one jump.
|
||||
// 2) One branch taken.
|
||||
// 3) Two branches taken, one jump.
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
func.dfg.clear_results(inst);
|
||||
func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
// Test for case 1) ordered and not equal.
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
|
||||
pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
|
||||
|
||||
// Handle the common ordered, not equal (LT|GT) case.
|
||||
let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
|
||||
let one_result = pos.func.dfg.first_result(one_inst);
|
||||
pos.ins().jump(done, &[one_result]);
|
||||
|
||||
// Case 3) Unordered.
|
||||
// We know that at least one operand is a NaN that needs to be propagated. We simply use an
|
||||
// `fadd` instruction which has the same NaN propagation semantics.
|
||||
pos.insert_ebb(uno_ebb);
|
||||
let uno_result = pos.ins().fadd(x, y);
|
||||
pos.ins().jump(done, &[uno_result]);
|
||||
|
||||
// Case 2) or 3).
|
||||
pos.insert_ebb(ueq_ebb);
|
||||
// Test for case 3) (UN) one value is NaN.
|
||||
// TODO: When we get support for flag values, we can reuse the above comparison.
|
||||
let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
|
||||
pos.ins().brnz(cmp_uno, uno_ebb, &[]);
|
||||
|
||||
// We are now in case 2) where x and y compare EQ.
|
||||
// We need a bitwise operation to get the sign right.
|
||||
let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
|
||||
let bw_result = pos.func.dfg.first_result(bw_inst);
|
||||
// This should become a fall-through for this second most common case.
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[bw_result]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, ueq_ebb);
|
||||
cfg.recompute_ebb(pos.func, uno_ebb);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
|
||||
/// i64 with a pattern, the rest needs more code.
|
||||
fn expand_fcvt_from_uint(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use crate::ir::condcodes::IntCC;
|
||||
|
||||
let x;
|
||||
match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtFromUint,
|
||||
arg,
|
||||
} => x = arg,
|
||||
_ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
|
||||
}
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Conversion from unsigned 32-bit is easy on x86-64.
|
||||
// TODO: This should be guarded by an ISA check.
|
||||
if xty == ir::types::I32 {
|
||||
let wide = pos.ins().uextend(ir::types::I64, x);
|
||||
pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
|
||||
return;
|
||||
}
|
||||
|
||||
let old_ebb = pos.func.layout.pp_ebb(inst);
|
||||
|
||||
// EBB handling the case where x < 0.
|
||||
let neg_ebb = pos.func.dfg.make_ebb();
|
||||
|
||||
// Final EBB with one argument representing the final result value.
|
||||
let done = pos.func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
pos.func.dfg.clear_results(inst);
|
||||
pos.func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
// If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
|
||||
let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
|
||||
pos.ins().brnz(is_neg, neg_ebb, &[]);
|
||||
|
||||
// Easy case: just use a signed conversion.
|
||||
let posres = pos.ins().fcvt_from_sint(ty, x);
|
||||
pos.ins().jump(done, &[posres]);
|
||||
|
||||
// Now handle the negative case.
|
||||
pos.insert_ebb(neg_ebb);
|
||||
|
||||
// Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
|
||||
// back up on the FP side.
|
||||
let ihalf = pos.ins().ushr_imm(x, 1);
|
||||
let lsb = pos.ins().band_imm(x, 1);
|
||||
let ifinal = pos.ins().bor(ihalf, lsb);
|
||||
let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
|
||||
let negres = pos.ins().fadd(fhalf, fhalf);
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[negres]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, neg_ebb);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
fn expand_fcvt_to_sint(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::immediates::{Ieee32, Ieee64};
|
||||
|
||||
let x = match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToSint,
|
||||
arg,
|
||||
} => arg,
|
||||
_ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
// Final EBB after the bad value checks.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
|
||||
// It produces an INT_MIN result instead.
|
||||
func.dfg.replace(inst).x86_cvtt2si(ty, x);
|
||||
|
||||
let mut pos = FuncCursor::new(func).after_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
let is_done = pos
|
||||
.ins()
|
||||
.icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
|
||||
pos.ins().brnz(is_done, done, &[]);
|
||||
|
||||
// We now have the following possibilities:
|
||||
//
|
||||
// 1. INT_MIN was actually the correct conversion result.
|
||||
// 2. The input was NaN -> trap bad_toint
|
||||
// 3. The input was out of range -> trap int_ovf
|
||||
//
|
||||
|
||||
// Check for NaN.
|
||||
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
|
||||
pos.ins()
|
||||
.trapnz(is_nan, ir::TrapCode::BadConversionToInteger);
|
||||
|
||||
// Check for case 1: INT_MIN is the correct result.
|
||||
// Determine the smallest floating point number that would convert to INT_MIN.
|
||||
let mut overflow_cc = FloatCC::LessThan;
|
||||
let output_bits = ty.lane_bits();
|
||||
let flimit = match xty {
|
||||
ir::types::F32 =>
|
||||
// An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
|
||||
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
|
||||
{
|
||||
pos.ins().f32const(if output_bits < 32 {
|
||||
overflow_cc = FloatCC::LessThanOrEqual;
|
||||
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
|
||||
} else {
|
||||
Ieee32::pow2(output_bits - 1).neg()
|
||||
})
|
||||
}
|
||||
ir::types::F64 =>
|
||||
// An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
|
||||
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
|
||||
{
|
||||
pos.ins().f64const(if output_bits < 64 {
|
||||
overflow_cc = FloatCC::LessThanOrEqual;
|
||||
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
|
||||
} else {
|
||||
Ieee64::pow2(output_bits - 1).neg()
|
||||
})
|
||||
}
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
|
||||
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
|
||||
|
||||
// Finally, we could have a positive value that is too large.
|
||||
let fzero = match xty {
|
||||
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
|
||||
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
|
||||
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
|
||||
|
||||
pos.ins().jump(done, &[]);
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
fn expand_fcvt_to_sint_sat(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::immediates::{Ieee32, Ieee64};
|
||||
|
||||
let x = match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToSintSat,
|
||||
arg,
|
||||
} => arg,
|
||||
_ => panic!(
|
||||
"Need fcvt_to_sint_sat: {}",
|
||||
func.dfg.display_inst(inst, None)
|
||||
),
|
||||
};
|
||||
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
// Final EBB after the bad value checks.
|
||||
let done_ebb = func.dfg.make_ebb();
|
||||
func.dfg.clear_results(inst);
|
||||
func.dfg.attach_ebb_param(done_ebb, result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or
|
||||
// overflow. It produces an INT_MIN result instead.
|
||||
let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
|
||||
|
||||
let is_done = pos
|
||||
.ins()
|
||||
.icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
|
||||
pos.ins().brnz(is_done, done_ebb, &[cvtt2si]);
|
||||
|
||||
// We now have the following possibilities:
|
||||
//
|
||||
// 1. INT_MIN was actually the correct conversion result.
|
||||
// 2. The input was NaN -> replace the result value with 0.
|
||||
// 3. The input was out of range -> saturate the result to the min/max value.
|
||||
|
||||
// Check for NaN, which is truncated to 0.
|
||||
let zero = pos.ins().iconst(ty, 0);
|
||||
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
|
||||
pos.ins().brnz(is_nan, done_ebb, &[zero]);
|
||||
|
||||
// Check for case 1: INT_MIN is the correct result.
|
||||
// Determine the smallest floating point number that would convert to INT_MIN.
|
||||
let mut overflow_cc = FloatCC::LessThan;
|
||||
let output_bits = ty.lane_bits();
|
||||
let flimit = match xty {
|
||||
ir::types::F32 =>
|
||||
// An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
|
||||
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
|
||||
{
|
||||
pos.ins().f32const(if output_bits < 32 {
|
||||
overflow_cc = FloatCC::LessThanOrEqual;
|
||||
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
|
||||
} else {
|
||||
Ieee32::pow2(output_bits - 1).neg()
|
||||
})
|
||||
}
|
||||
ir::types::F64 =>
|
||||
// An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
|
||||
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
|
||||
{
|
||||
pos.ins().f64const(if output_bits < 64 {
|
||||
overflow_cc = FloatCC::LessThanOrEqual;
|
||||
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
|
||||
} else {
|
||||
Ieee64::pow2(output_bits - 1).neg()
|
||||
})
|
||||
}
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
|
||||
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
|
||||
let min_imm = match ty {
|
||||
ir::types::I32 => i32::min_value() as i64,
|
||||
ir::types::I64 => i64::min_value(),
|
||||
_ => panic!("Don't know the min value for {}", ty),
|
||||
};
|
||||
let min_value = pos.ins().iconst(ty, min_imm);
|
||||
pos.ins().brnz(overflow, done_ebb, &[min_value]);
|
||||
|
||||
// Finally, we could have a positive value that is too large.
|
||||
let fzero = match xty {
|
||||
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
|
||||
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
|
||||
let max_imm = match ty {
|
||||
ir::types::I32 => i32::max_value() as i64,
|
||||
ir::types::I64 => i64::max_value(),
|
||||
_ => panic!("Don't know the max value for {}", ty),
|
||||
};
|
||||
let max_value = pos.ins().iconst(ty, max_imm);
|
||||
|
||||
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
|
||||
pos.ins().brnz(overflow, done_ebb, &[max_value]);
|
||||
|
||||
// Recycle the original instruction.
|
||||
pos.func.dfg.replace(inst).jump(done_ebb, &[cvtt2si]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done_ebb);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, done_ebb);
|
||||
}
|
||||
|
||||
fn expand_fcvt_to_uint(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::immediates::{Ieee32, Ieee64};
|
||||
|
||||
let x = match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToUint,
|
||||
arg,
|
||||
} => arg,
|
||||
_ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
// EBB handling numbers >= 2^(N-1).
|
||||
let large = func.dfg.make_ebb();
|
||||
|
||||
// Final EBB after the bad value checks.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
func.dfg.clear_results(inst);
|
||||
func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
|
||||
// the destination integer type.
|
||||
let pow2nm1 = match xty {
|
||||
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
|
||||
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let is_large = pos.ins().ffcmp(x, pow2nm1);
|
||||
pos.ins()
|
||||
.brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
|
||||
|
||||
// We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
|
||||
// previous comparison.
|
||||
pos.ins().trapff(
|
||||
FloatCC::Unordered,
|
||||
is_large,
|
||||
ir::TrapCode::BadConversionToInteger,
|
||||
);
|
||||
|
||||
// Now we know that x < 2^(N-1) and not NaN.
|
||||
let sres = pos.ins().x86_cvtt2si(ty, x);
|
||||
let is_neg = pos.ins().ifcmp_imm(sres, 0);
|
||||
pos.ins()
|
||||
.brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
|
||||
pos.ins().trap(ir::TrapCode::IntegerOverflow);
|
||||
|
||||
// Handle the case where x >= 2^(N-1) and not NaN.
|
||||
pos.insert_ebb(large);
|
||||
let adjx = pos.ins().fsub(x, pow2nm1);
|
||||
let lres = pos.ins().x86_cvtt2si(ty, adjx);
|
||||
let is_neg = pos.ins().ifcmp_imm(lres, 0);
|
||||
pos.ins()
|
||||
.trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow);
|
||||
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, large);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
fn expand_fcvt_to_uint_sat(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::immediates::{Ieee32, Ieee64};
|
||||
|
||||
let x = match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToUintSat,
|
||||
arg,
|
||||
} => arg,
|
||||
_ => panic!(
|
||||
"Need fcvt_to_uint_sat: {}",
|
||||
func.dfg.display_inst(inst, None)
|
||||
),
|
||||
};
|
||||
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
// EBB handling numbers >= 2^(N-1).
|
||||
let large = func.dfg.make_ebb();
|
||||
|
||||
// Final EBB after the bad value checks.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
func.dfg.clear_results(inst);
|
||||
func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
|
||||
// the destination integer type.
|
||||
let pow2nm1 = match xty {
|
||||
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
|
||||
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let zero = pos.ins().iconst(ty, 0);
|
||||
let is_large = pos.ins().ffcmp(x, pow2nm1);
|
||||
pos.ins()
|
||||
.brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
|
||||
|
||||
// We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
|
||||
pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
|
||||
|
||||
// Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
|
||||
// done; otherwise saturate to the minimum unsigned value, that is 0.
|
||||
let sres = pos.ins().x86_cvtt2si(ty, x);
|
||||
let is_neg = pos.ins().ifcmp_imm(sres, 0);
|
||||
pos.ins()
|
||||
.brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
|
||||
pos.ins().jump(done, &[zero]);
|
||||
|
||||
// Handle the case where x >= 2^(N-1) and not NaN.
|
||||
pos.insert_ebb(large);
|
||||
let adjx = pos.ins().fsub(x, pow2nm1);
|
||||
let lres = pos.ins().x86_cvtt2si(ty, adjx);
|
||||
let max_value = pos.ins().iconst(
|
||||
ty,
|
||||
match ty {
|
||||
ir::types::I32 => u32::max_value() as i64,
|
||||
ir::types::I64 => u64::max_value() as i64,
|
||||
_ => panic!("Can't convert {}", ty),
|
||||
},
|
||||
);
|
||||
let is_neg = pos.ins().ifcmp_imm(lres, 0);
|
||||
pos.ins()
|
||||
.brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
|
||||
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, large);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
145
cranelift/codegen/src/isa/x86/mod.rs
Normal file
145
cranelift/codegen/src/isa/x86/mod.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
//! x86 Instruction Set Architectures.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::binemit::CodeSink;
|
||||
use crate::binemit::{emit_function, MemoryCodeSink};
|
||||
use crate::ir;
|
||||
use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::timing;
|
||||
use core::fmt;
|
||||
use std::boxed::Box;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
triple: Triple,
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating x86 targets.
|
||||
pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
triple: Triple,
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: shared_settings::Builder,
|
||||
) -> Box<TargetIsa> {
|
||||
let level1 = match triple.pointer_width().unwrap() {
|
||||
PointerWidth::U16 => unimplemented!("x86-16"),
|
||||
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
|
||||
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
|
||||
};
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"x86"
|
||||
}
|
||||
|
||||
fn triple(&self) -> &Triple {
|
||||
&self.triple
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn uses_cpu_flags(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn uses_complex_addresses(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
self.cpumode,
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
|
||||
abi::legalize_signature(sig, &self.triple, current)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func, &self.triple)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink)
|
||||
}
|
||||
|
||||
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink)
|
||||
}
|
||||
|
||||
fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
|
||||
let _tt = timing::prologue_epilogue();
|
||||
abi::prologue_epilogue(func, self)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
63
cranelift/codegen/src/isa/x86/registers.rs
Normal file
63
cranelift/codegen/src/isa/x86/registers.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! x86 register descriptions.
|
||||
|
||||
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::RegUnit;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
// The encoding of integer registers is not alphabetical.
|
||||
assert_eq!(INFO.parse_regunit("rax"), Some(0));
|
||||
assert_eq!(INFO.parse_regunit("rbx"), Some(3));
|
||||
assert_eq!(INFO.parse_regunit("rcx"), Some(1));
|
||||
assert_eq!(INFO.parse_regunit("rdx"), Some(2));
|
||||
assert_eq!(INFO.parse_regunit("rsi"), Some(6));
|
||||
assert_eq!(INFO.parse_regunit("rdi"), Some(7));
|
||||
assert_eq!(INFO.parse_regunit("rbp"), Some(5));
|
||||
assert_eq!(INFO.parse_regunit("rsp"), Some(4));
|
||||
assert_eq!(INFO.parse_regunit("r8"), Some(8));
|
||||
assert_eq!(INFO.parse_regunit("r15"), Some(15));
|
||||
|
||||
assert_eq!(INFO.parse_regunit("xmm0"), Some(16));
|
||||
assert_eq!(INFO.parse_regunit("xmm15"), Some(31));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn uname(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(ru).to_string()
|
||||
}
|
||||
|
||||
assert_eq!(uname(0), "%rax");
|
||||
assert_eq!(uname(3), "%rbx");
|
||||
assert_eq!(uname(1), "%rcx");
|
||||
assert_eq!(uname(2), "%rdx");
|
||||
assert_eq!(uname(6), "%rsi");
|
||||
assert_eq!(uname(7), "%rdi");
|
||||
assert_eq!(uname(5), "%rbp");
|
||||
assert_eq!(uname(4), "%rsp");
|
||||
assert_eq!(uname(8), "%r8");
|
||||
assert_eq!(uname(15), "%r15");
|
||||
assert_eq!(uname(16), "%xmm0");
|
||||
assert_eq!(uname(31), "%xmm15");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regclasses() {
|
||||
assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
|
||||
assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
|
||||
assert_eq!(GPR.intersect_index(FPR), None);
|
||||
assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
|
||||
assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
|
||||
assert_eq!(ABCD.intersect_index(FPR), None);
|
||||
assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
|
||||
assert_eq!(FPR.intersect_index(GPR), None);
|
||||
assert_eq!(FPR.intersect_index(ABCD), None);
|
||||
}
|
||||
}
|
||||
52
cranelift/codegen/src/isa/x86/settings.rs
Normal file
52
cranelift/codegen/src/isa/x86/settings.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
//! x86 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
|
||||
// `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta-python/isa/x86/settings.py`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{builder, Flags};
|
||||
use crate::settings::{self, Configurable};
|
||||
|
||||
#[test]
|
||||
fn presets() {
|
||||
let shared = settings::Flags::new(settings::builder());
|
||||
|
||||
// Nehalem has SSE4.1 but not BMI1.
|
||||
let mut b0 = builder();
|
||||
b0.enable("nehalem").unwrap();
|
||||
let f0 = Flags::new(&shared, b0);
|
||||
assert_eq!(f0.has_sse41(), true);
|
||||
assert_eq!(f0.has_bmi1(), false);
|
||||
|
||||
let mut b1 = builder();
|
||||
b1.enable("haswell").unwrap();
|
||||
let f1 = Flags::new(&shared, b1);
|
||||
assert_eq!(f1.has_sse41(), true);
|
||||
assert_eq!(f1.has_bmi1(), true);
|
||||
}
|
||||
#[test]
|
||||
fn display_presets() {
|
||||
// Spot check that the flags Display impl does not cause a panic
|
||||
let shared = settings::Flags::new(settings::builder());
|
||||
|
||||
let b0 = builder();
|
||||
let f0 = Flags::new(&shared, b0);
|
||||
let _ = format!("{}", f0);
|
||||
|
||||
let mut b1 = builder();
|
||||
b1.enable("nehalem").unwrap();
|
||||
let f1 = Flags::new(&shared, b1);
|
||||
let _ = format!("{}", f1);
|
||||
|
||||
let mut b2 = builder();
|
||||
b2.enable("haswell").unwrap();
|
||||
let f2 = Flags::new(&shared, b2);
|
||||
let _ = format!("{}", f2);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user