machinst x64: implement float Floor/Ceil/Trunc/Nearest as VM calls;
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
use crate::isa::TargetIsa;
|
use crate::settings::{self, LibcallCallConv};
|
||||||
use crate::settings::LibcallCallConv;
|
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
use core::str;
|
use core::str;
|
||||||
use target_lexicon::{CallingConvention, Triple};
|
use target_lexicon::{CallingConvention, Triple};
|
||||||
@@ -39,10 +38,10 @@ impl CallConv {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the calling convention used for libcalls for the given ISA.
|
/// Returns the calling convention used for libcalls according to the current flags.
|
||||||
pub fn for_libcall(isa: &dyn TargetIsa) -> Self {
|
pub fn for_libcall(flags: &settings::Flags, default_call_conv: CallConv) -> Self {
|
||||||
match isa.flags().libcall_call_conv() {
|
match flags.libcall_call_conv() {
|
||||||
LibcallCallConv::IsaDefault => isa.default_call_conv(),
|
LibcallCallConv::IsaDefault => default_call_conv,
|
||||||
LibcallCallConv::Fast => Self::Fast,
|
LibcallCallConv::Fast => Self::Fast,
|
||||||
LibcallCallConv::Cold => Self::Cold,
|
LibcallCallConv::Cold => Self::Cold,
|
||||||
LibcallCallConv::SystemV => Self::SystemV,
|
LibcallCallConv::SystemV => Self::SystemV,
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
//! Implementation of the standard x64 ABI.
|
//! Implementation of the standard x64 ABI.
|
||||||
|
|
||||||
use alloc::vec::Vec;
|
|
||||||
use log::trace;
|
use log::trace;
|
||||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||||
|
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::vec::Vec;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
use crate::binemit::Stackmap;
|
use crate::binemit::Stackmap;
|
||||||
@@ -1156,13 +1158,29 @@ impl ABICall for X64ABICall {
|
|||||||
}
|
}
|
||||||
|
|
||||||
match &self.dest {
|
match &self.dest {
|
||||||
&CallDest::ExtName(ref name, ref _reloc_distance) => ctx.emit(Inst::call_known(
|
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::call_known(
|
||||||
name.clone(),
|
name.clone(),
|
||||||
uses,
|
uses,
|
||||||
defs,
|
defs,
|
||||||
self.loc,
|
self.loc,
|
||||||
self.opcode,
|
self.opcode,
|
||||||
)),
|
)),
|
||||||
|
&CallDest::ExtName(ref name, RelocDistance::Far) => {
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
|
ctx.emit(Inst::LoadExtName {
|
||||||
|
dst: tmp,
|
||||||
|
name: Box::new(name.clone()),
|
||||||
|
offset: 0,
|
||||||
|
srcloc: self.loc,
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::call_unknown(
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
uses,
|
||||||
|
defs,
|
||||||
|
self.loc,
|
||||||
|
self.opcode,
|
||||||
|
));
|
||||||
|
}
|
||||||
&CallDest::Reg(reg) => ctx.emit(Inst::call_unknown(
|
&CallDest::Reg(reg) => ctx.emit(Inst::call_unknown(
|
||||||
RegMem::reg(reg),
|
RegMem::reg(reg),
|
||||||
uses,
|
uses,
|
||||||
|
|||||||
@@ -9,7 +9,10 @@ use smallvec::SmallVec;
|
|||||||
use crate::ir::types;
|
use crate::ir::types;
|
||||||
use crate::ir::types::*;
|
use crate::ir::types::*;
|
||||||
use crate::ir::Inst as IRInst;
|
use crate::ir::Inst as IRInst;
|
||||||
use crate::ir::{condcodes::FloatCC, condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};
|
use crate::ir::{
|
||||||
|
condcodes::FloatCC, condcodes::IntCC, AbiParam, ArgumentPurpose, ExternalName, InstructionData,
|
||||||
|
LibCall, Opcode, Signature, TrapCode, Type,
|
||||||
|
};
|
||||||
use alloc::boxed::Box;
|
use alloc::boxed::Box;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use cranelift_codegen_shared::condcodes::CondCode;
|
use cranelift_codegen_shared::condcodes::CondCode;
|
||||||
@@ -23,7 +26,8 @@ use crate::settings::Flags;
|
|||||||
use crate::isa::x64::abi::*;
|
use crate::isa::x64::abi::*;
|
||||||
use crate::isa::x64::inst::args::*;
|
use crate::isa::x64::inst::args::*;
|
||||||
use crate::isa::x64::inst::*;
|
use crate::isa::x64::inst::*;
|
||||||
use crate::isa::x64::X64Backend;
|
use crate::isa::{x64::X64Backend, CallConv};
|
||||||
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
/// Context passed to all lowering functions.
|
/// Context passed to all lowering functions.
|
||||||
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
|
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
|
||||||
@@ -222,6 +226,72 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) {
|
|||||||
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
|
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
|
||||||
|
let mut sig = Signature::new(call_conv);
|
||||||
|
for i in 0..ctx.num_inputs(insn) {
|
||||||
|
sig.params.push(AbiParam::new(ctx.input_ty(insn, i)));
|
||||||
|
}
|
||||||
|
for i in 0..ctx.num_outputs(insn) {
|
||||||
|
sig.returns.push(AbiParam::new(ctx.output_ty(insn, i)));
|
||||||
|
}
|
||||||
|
if call_conv.extends_baldrdash() {
|
||||||
|
// Adds the special VMContext parameter to the signature.
|
||||||
|
sig.params
|
||||||
|
.push(AbiParam::special(ptr_ty, ArgumentPurpose::VMContext));
|
||||||
|
}
|
||||||
|
sig
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_vm_call<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
flags: &Flags,
|
||||||
|
triple: &Triple,
|
||||||
|
libcall: LibCall,
|
||||||
|
insn: IRInst,
|
||||||
|
inputs: SmallVec<[InsnInput; 4]>,
|
||||||
|
outputs: SmallVec<[InsnOutput; 2]>,
|
||||||
|
) -> CodegenResult<()> {
|
||||||
|
let extname = ExternalName::LibCall(libcall);
|
||||||
|
|
||||||
|
let dist = if flags.use_colocated_libcalls() {
|
||||||
|
RelocDistance::Near
|
||||||
|
} else {
|
||||||
|
RelocDistance::Far
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO avoid recreating signatures for every single Libcall function.
|
||||||
|
let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
|
||||||
|
let sig = make_libcall_sig(ctx, insn, call_conv, I64);
|
||||||
|
|
||||||
|
let loc = ctx.srcloc(insn);
|
||||||
|
let mut abi = X64ABICall::from_func(&sig, &extname, dist, loc)?;
|
||||||
|
|
||||||
|
abi.emit_stack_pre_adjust(ctx);
|
||||||
|
|
||||||
|
let vm_context = if call_conv.extends_baldrdash() { 1 } else { 0 };
|
||||||
|
assert!(inputs.len() + vm_context == abi.num_args());
|
||||||
|
|
||||||
|
for (i, input) in inputs.iter().enumerate() {
|
||||||
|
let arg_reg = input_to_reg(ctx, *input);
|
||||||
|
abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
|
||||||
|
}
|
||||||
|
if call_conv.extends_baldrdash() {
|
||||||
|
let vm_context_vreg = ctx
|
||||||
|
.get_vm_context()
|
||||||
|
.expect("should have a VMContext to pass to libcall funcs");
|
||||||
|
abi.emit_copy_reg_to_arg(ctx, inputs.len(), vm_context_vreg);
|
||||||
|
}
|
||||||
|
|
||||||
|
abi.emit_call(ctx);
|
||||||
|
for (i, output) in outputs.iter().enumerate() {
|
||||||
|
let retval_reg = output_to_reg(ctx, *output);
|
||||||
|
abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
|
||||||
|
}
|
||||||
|
abi.emit_stack_post_adjust(ctx);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Top-level instruction lowering entry point, for one instruction.
|
// Top-level instruction lowering entry point, for one instruction.
|
||||||
|
|
||||||
@@ -230,6 +300,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx: &mut C,
|
ctx: &mut C,
|
||||||
insn: IRInst,
|
insn: IRInst,
|
||||||
flags: &Flags,
|
flags: &Flags,
|
||||||
|
triple: &Triple,
|
||||||
) -> CodegenResult<()> {
|
) -> CodegenResult<()> {
|
||||||
let op = ctx.data(insn).opcode();
|
let op = ctx.data(insn).opcode();
|
||||||
|
|
||||||
@@ -1203,6 +1274,29 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {
|
||||||
|
// TODO use ROUNDSS/ROUNDSD after sse4.1.
|
||||||
|
|
||||||
|
// Lower to VM calls when there's no access to SSE4.1.
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let libcall = match (ty, op) {
|
||||||
|
(F32, Opcode::Ceil) => LibCall::CeilF32,
|
||||||
|
(F64, Opcode::Ceil) => LibCall::CeilF64,
|
||||||
|
(F32, Opcode::Floor) => LibCall::FloorF32,
|
||||||
|
(F64, Opcode::Floor) => LibCall::FloorF64,
|
||||||
|
(F32, Opcode::Nearest) => LibCall::NearestF32,
|
||||||
|
(F64, Opcode::Nearest) => LibCall::NearestF64,
|
||||||
|
(F32, Opcode::Trunc) => LibCall::TruncF32,
|
||||||
|
(F64, Opcode::Trunc) => LibCall::TruncF64,
|
||||||
|
_ => panic!(
|
||||||
|
"unexpected type/opcode {:?}/{:?} in Ceil/Floor/Nearest/Trunc",
|
||||||
|
ty, op
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
emit_vm_call(ctx, flags, triple, libcall, insn, inputs, outputs)?;
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Load
|
Opcode::Load
|
||||||
| Opcode::Uload8
|
| Opcode::Uload8
|
||||||
| Opcode::Sload8
|
| Opcode::Sload8
|
||||||
@@ -1630,7 +1724,7 @@ impl LowerBackend for X64Backend {
|
|||||||
type MInst = Inst;
|
type MInst = Inst;
|
||||||
|
|
||||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||||
lower_insn_to_regs(ctx, ir_inst, &self.flags)
|
lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.triple)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn Targ
|
|||||||
let mut args = Vec::new();
|
let mut args = Vec::new();
|
||||||
args.extend_from_slice(func.dfg.inst_args(inst));
|
args.extend_from_slice(func.dfg.inst_args(inst));
|
||||||
|
|
||||||
let call_conv = CallConv::for_libcall(isa);
|
let call_conv = CallConv::for_libcall(isa.flags(), isa.default_call_conv());
|
||||||
if call_conv.extends_baldrdash() {
|
if call_conv.extends_baldrdash() {
|
||||||
let vmctx = func
|
let vmctx = func
|
||||||
.special_param(ir::ArgumentPurpose::VMContext)
|
.special_param(ir::ArgumentPurpose::VMContext)
|
||||||
|
|||||||
@@ -8,8 +8,9 @@ use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit};
|
|||||||
use crate::ir::instructions::BranchInfo;
|
use crate::ir::instructions::BranchInfo;
|
||||||
use crate::ir::types::I64;
|
use crate::ir::types::I64;
|
||||||
use crate::ir::{
|
use crate::ir::{
|
||||||
ArgumentExtension, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData,
|
ArgumentExtension, ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function,
|
||||||
Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef,
|
GlobalValueData, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value,
|
||||||
|
ValueDef,
|
||||||
};
|
};
|
||||||
use crate::machinst::{
|
use crate::machinst::{
|
||||||
ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
|
ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
|
||||||
@@ -67,6 +68,8 @@ pub trait LowerCtx {
|
|||||||
/// not allow the backend to specify its own result register for the return?
|
/// not allow the backend to specify its own result register for the return?
|
||||||
/// Because there may be multiple return points.)
|
/// Because there may be multiple return points.)
|
||||||
fn retval(&self, idx: usize) -> Writable<Reg>;
|
fn retval(&self, idx: usize) -> Writable<Reg>;
|
||||||
|
/// Returns the vreg containing the VmContext parameter, if there's one.
|
||||||
|
fn get_vm_context(&self) -> Option<Reg>;
|
||||||
|
|
||||||
// General instruction queries:
|
// General instruction queries:
|
||||||
|
|
||||||
@@ -261,6 +264,10 @@ pub struct Lower<'func, I: VCodeInst> {
|
|||||||
|
|
||||||
/// The register to use for GetPinnedReg, if any, on this architecture.
|
/// The register to use for GetPinnedReg, if any, on this architecture.
|
||||||
pinned_reg: Option<Reg>,
|
pinned_reg: Option<Reg>,
|
||||||
|
|
||||||
|
/// The vreg containing the special VmContext parameter, if it is present in the current
|
||||||
|
/// function's signature.
|
||||||
|
vm_context: Option<Reg>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Notion of "relocation distance". This gives an estimate of how far away a symbol will be from a
|
/// Notion of "relocation distance". This gives an estimate of how far away a symbol will be from a
|
||||||
@@ -331,6 +338,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let vm_context = f
|
||||||
|
.signature
|
||||||
|
.special_param_index(ArgumentPurpose::VMContext)
|
||||||
|
.map(|vm_context_index| {
|
||||||
|
let entry_block = f.layout.entry_block().unwrap();
|
||||||
|
let param = f.dfg.block_params(entry_block)[vm_context_index];
|
||||||
|
value_regs[param]
|
||||||
|
});
|
||||||
|
|
||||||
// Assign a vreg to each return value.
|
// Assign a vreg to each return value.
|
||||||
let mut retval_regs = vec![];
|
let mut retval_regs = vec![];
|
||||||
for ret in &f.signature.returns {
|
for ret in &f.signature.returns {
|
||||||
@@ -387,6 +403,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
bb_insts: vec![],
|
bb_insts: vec![],
|
||||||
ir_insts: vec![],
|
ir_insts: vec![],
|
||||||
pinned_reg: None,
|
pinned_reg: None,
|
||||||
|
vm_context,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -830,6 +847,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
|||||||
Writable::from_reg(self.retval_regs[idx].0)
|
Writable::from_reg(self.retval_regs[idx].0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_vm_context(&self) -> Option<Reg> {
|
||||||
|
self.vm_context
|
||||||
|
}
|
||||||
|
|
||||||
fn data(&self, ir_inst: Inst) -> &InstructionData {
|
fn data(&self, ir_inst: Inst) -> &InstructionData {
|
||||||
&self.f.dfg[ir_inst]
|
&self.f.dfg[ir_inst]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user