Windows FPRs preservation (#1216)
Preserve FPRs as required by the Windows fastcall calling convention. This exposes an implementation limit due to Cranelift's approach to stack layout, which conflicts with expectations Windows makes in SEH layout - functions where the Cranelift user desires fastcall unwind information, that require preservation of an ABI-reserved FPR, that have a stack frame 240 bytes or larger, now produce an error when compiled. Several wasm spectests were disabled because they would trip this limit. This is a temporary constraint that should be fixed promptly. Co-authored-by: bjorn3 <bjorn3@users.noreply.github.com>
This commit is contained in:
11
build.rs
11
build.rs
@@ -194,6 +194,17 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("reference_types", "table_copy_on_imported_tables") => return false,
|
("reference_types", "table_copy_on_imported_tables") => return false,
|
||||||
("reference_types", _) => return true,
|
("reference_types", _) => return true,
|
||||||
|
|
||||||
|
("misc_testsuite", "export_large_signature")
|
||||||
|
| ("spec_testsuite", "call")
|
||||||
|
| ("multi_value", "call")
|
||||||
|
| ("multi_value", "func") => {
|
||||||
|
// FIXME These involves functions with very large stack frames that Cranelift currently
|
||||||
|
// cannot compile using the fastcall (Windows) calling convention.
|
||||||
|
// See https://github.com/bytecodealliance/wasmtime/pull/1216.
|
||||||
|
#[cfg(windows)]
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
_ => panic!("unrecognized strategy"),
|
_ => panic!("unrecognized strategy"),
|
||||||
|
|||||||
@@ -206,8 +206,8 @@ impl Context {
|
|||||||
isa: &dyn TargetIsa,
|
isa: &dyn TargetIsa,
|
||||||
kind: FrameUnwindKind,
|
kind: FrameUnwindKind,
|
||||||
sink: &mut dyn FrameUnwindSink,
|
sink: &mut dyn FrameUnwindSink,
|
||||||
) {
|
) -> CodegenResult<()> {
|
||||||
isa.emit_unwind_info(&self.func, kind, sink);
|
isa.emit_unwind_info(&self.func, kind, sink)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the verifier on the function.
|
/// Run the verifier on the function.
|
||||||
|
|||||||
@@ -398,8 +398,9 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
|||||||
_func: &ir::Function,
|
_func: &ir::Function,
|
||||||
_kind: binemit::FrameUnwindKind,
|
_kind: binemit::FrameUnwindKind,
|
||||||
_sink: &mut dyn binemit::FrameUnwindSink,
|
_sink: &mut dyn binemit::FrameUnwindSink,
|
||||||
) {
|
) -> CodegenResult<()> {
|
||||||
// No-op by default
|
// No-op by default
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,10 @@ use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
|||||||
use crate::binemit::{FrameUnwindKind, FrameUnwindSink};
|
use crate::binemit::{FrameUnwindKind, FrameUnwindSink};
|
||||||
use crate::cursor::{Cursor, CursorPosition, EncCursor};
|
use crate::cursor::{Cursor, CursorPosition, EncCursor};
|
||||||
use crate::ir;
|
use crate::ir;
|
||||||
|
use crate::ir::entities::StackSlot;
|
||||||
use crate::ir::immediates::Imm64;
|
use crate::ir::immediates::Imm64;
|
||||||
use crate::ir::stackslot::{StackOffset, StackSize};
|
use crate::ir::stackslot::{StackOffset, StackSize};
|
||||||
|
use crate::ir::types;
|
||||||
use crate::ir::{
|
use crate::ir::{
|
||||||
get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose,
|
get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose,
|
||||||
FrameLayoutChange, InstBuilder, ValueLoc,
|
FrameLayoutChange, InstBuilder, ValueLoc,
|
||||||
@@ -23,6 +25,7 @@ use crate::regalloc::RegisterSet;
|
|||||||
use crate::result::CodegenResult;
|
use crate::result::CodegenResult;
|
||||||
use crate::stack_layout::layout_stack;
|
use crate::stack_layout::layout_stack;
|
||||||
use alloc::borrow::Cow;
|
use alloc::borrow::Cow;
|
||||||
|
use alloc::vec::Vec;
|
||||||
use core::i32;
|
use core::i32;
|
||||||
use std::boxed::Box;
|
use std::boxed::Box;
|
||||||
use target_lexicon::{PointerWidth, Triple};
|
use target_lexicon::{PointerWidth, Triple};
|
||||||
@@ -366,17 +369,18 @@ pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) ->
|
|||||||
regs
|
regs
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the set of callee-saved registers.
|
/// Get the set of callee-saved general-purpose registers.
|
||||||
fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
|
fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
|
||||||
match isa.triple().pointer_width().unwrap() {
|
match isa.triple().pointer_width().unwrap() {
|
||||||
PointerWidth::U16 => panic!(),
|
PointerWidth::U16 => panic!(),
|
||||||
PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
|
PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
|
||||||
PointerWidth::U64 => {
|
PointerWidth::U64 => {
|
||||||
if call_conv.extends_windows_fastcall() {
|
if call_conv.extends_windows_fastcall() {
|
||||||
// "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile
|
// "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are
|
||||||
// and must be saved and restored by a function that uses them."
|
// considered nonvolatile and must be saved and restored by a function that uses
|
||||||
|
// them."
|
||||||
// as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
|
// as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
|
||||||
// RSP & RSB are not listed below, since they are restored automatically during
|
// RSP & RBP are not listed below, since they are restored automatically during
|
||||||
// a function call. If that wasn't the case, function calls (RET) would not work.
|
// a function call. If that wasn't the case, function calls (RET) would not work.
|
||||||
&[
|
&[
|
||||||
RU::rbx,
|
RU::rbx,
|
||||||
@@ -394,12 +398,45 @@ fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU]
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the set of callee-saved floating-point (SIMD) registers.
|
||||||
|
fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
|
||||||
|
match isa.triple().pointer_width().unwrap() {
|
||||||
|
PointerWidth::U16 => panic!(),
|
||||||
|
PointerWidth::U32 => &[],
|
||||||
|
PointerWidth::U64 => {
|
||||||
|
if call_conv.extends_windows_fastcall() {
|
||||||
|
// "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved
|
||||||
|
// and restored by a function that uses them."
|
||||||
|
// as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of
|
||||||
|
// February 5th, 2020.
|
||||||
|
&[
|
||||||
|
RU::xmm6,
|
||||||
|
RU::xmm7,
|
||||||
|
RU::xmm8,
|
||||||
|
RU::xmm9,
|
||||||
|
RU::xmm10,
|
||||||
|
RU::xmm11,
|
||||||
|
RU::xmm12,
|
||||||
|
RU::xmm13,
|
||||||
|
RU::xmm14,
|
||||||
|
RU::xmm15,
|
||||||
|
]
|
||||||
|
} else {
|
||||||
|
&[]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the set of callee-saved registers that are used.
|
/// Get the set of callee-saved registers that are used.
|
||||||
fn callee_saved_gprs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet {
|
fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet {
|
||||||
let mut all_callee_saved = RegisterSet::empty();
|
let mut all_callee_saved = RegisterSet::empty();
|
||||||
for reg in callee_saved_gprs(isa, func.signature.call_conv) {
|
for reg in callee_saved_gprs(isa, func.signature.call_conv) {
|
||||||
all_callee_saved.free(GPR, *reg as RegUnit);
|
all_callee_saved.free(GPR, *reg as RegUnit);
|
||||||
}
|
}
|
||||||
|
for reg in callee_saved_fprs(isa, func.signature.call_conv) {
|
||||||
|
all_callee_saved.free(FPR, *reg as RegUnit);
|
||||||
|
}
|
||||||
|
|
||||||
let mut used = RegisterSet::empty();
|
let mut used = RegisterSet::empty();
|
||||||
for value_loc in func.locations.values() {
|
for value_loc in func.locations.values() {
|
||||||
@@ -407,8 +444,14 @@ fn callee_saved_gprs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS
|
|||||||
// register. We don't use registers that overlap each other in the x86 ISA, but in others
|
// register. We don't use registers that overlap each other in the x86 ISA, but in others
|
||||||
// we do. So this should not be blindly reused.
|
// we do. So this should not be blindly reused.
|
||||||
if let ValueLoc::Reg(ru) = *value_loc {
|
if let ValueLoc::Reg(ru) = *value_loc {
|
||||||
if !used.is_avail(GPR, ru) {
|
if GPR.contains(ru) {
|
||||||
used.free(GPR, ru);
|
if !used.is_avail(GPR, ru) {
|
||||||
|
used.free(GPR, ru);
|
||||||
|
}
|
||||||
|
} else if FPR.contains(ru) {
|
||||||
|
if !used.is_avail(FPR, ru) {
|
||||||
|
used.free(FPR, ru);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -424,8 +467,14 @@ fn callee_saved_gprs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS
|
|||||||
match func.dfg[inst] {
|
match func.dfg[inst] {
|
||||||
ir::instructions::InstructionData::RegMove { dst, .. }
|
ir::instructions::InstructionData::RegMove { dst, .. }
|
||||||
| ir::instructions::InstructionData::RegFill { dst, .. } => {
|
| ir::instructions::InstructionData::RegFill { dst, .. } => {
|
||||||
if !used.is_avail(GPR, dst) {
|
if GPR.contains(dst) {
|
||||||
used.free(GPR, dst);
|
if !used.is_avail(GPR, dst) {
|
||||||
|
used.free(GPR, dst);
|
||||||
|
}
|
||||||
|
} else if FPR.contains(dst) {
|
||||||
|
if !used.is_avail(FPR, dst) {
|
||||||
|
used.free(FPR, dst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
@@ -509,7 +558,7 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
||||||
}
|
}
|
||||||
|
|
||||||
let csrs = callee_saved_gprs_used(isa, func);
|
let csrs = callee_saved_regs_used(isa, func);
|
||||||
|
|
||||||
// The reserved stack area is composed of:
|
// The reserved stack area is composed of:
|
||||||
// return address + frame pointer + all callee-saved registers + shadow space
|
// return address + frame pointer + all callee-saved registers + shadow space
|
||||||
@@ -519,11 +568,28 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
// will adjust the stack pointer to make room for the rest of the required
|
// will adjust the stack pointer to make room for the rest of the required
|
||||||
// space for this frame.
|
// space for this frame.
|
||||||
let word_size = isa.pointer_bytes() as usize;
|
let word_size = isa.pointer_bytes() as usize;
|
||||||
|
let num_fprs = csrs.iter(FPR).len();
|
||||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
||||||
|
|
||||||
|
// Only create an FPR stack slot if we're going to save FPRs.
|
||||||
|
let fpr_slot = if num_fprs > 0 {
|
||||||
|
// Create a stack slot for FPRs to be preserved in. This is an `ExplicitSlot` because it
|
||||||
|
// seems to most closely map to it as a `StackSlotKind`: FPR preserve/restore should be
|
||||||
|
// through `stack_load` and `stack_store` (see later comment about issue #1198). Even
|
||||||
|
// though in a certain light FPR preserve/restore is "spilling" an argument, regalloc
|
||||||
|
// implies that `SpillSlot` may be eligible for certain optimizations, and we know with
|
||||||
|
// certainty that this space may not be reused in the function, nor moved around.
|
||||||
|
Some(func.create_stack_slot(ir::StackSlotData {
|
||||||
|
kind: ir::StackSlotKind::ExplicitSlot,
|
||||||
|
size: (num_fprs * types::F64X2.bytes() as usize) as u32,
|
||||||
|
offset: None,
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
||||||
// since cranelift does not support spill slots before incoming args
|
// since cranelift does not support spill slots before incoming args
|
||||||
|
|
||||||
func.create_stack_slot(ir::StackSlotData {
|
func.create_stack_slot(ir::StackSlotData {
|
||||||
kind: ir::StackSlotKind::IncomingArg,
|
kind: ir::StackSlotKind::IncomingArg,
|
||||||
size: csr_stack_size as u32,
|
size: csr_stack_size as u32,
|
||||||
@@ -544,8 +610,22 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
func.signature.params.push(fp_arg);
|
func.signature.params.push(fp_arg);
|
||||||
func.signature.returns.push(fp_arg);
|
func.signature.returns.push(fp_arg);
|
||||||
|
|
||||||
for csr in csrs.iter(GPR) {
|
for gp_csr in csrs.iter(GPR) {
|
||||||
let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
|
let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr);
|
||||||
|
func.signature.params.push(csr_arg);
|
||||||
|
func.signature.returns.push(csr_arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
for fp_csr in csrs.iter(FPR) {
|
||||||
|
// The calling convention described in
|
||||||
|
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
|
||||||
|
// preserving the low 128 bits of XMM6-XMM15.
|
||||||
|
//
|
||||||
|
// TODO: For now, add just an `F64` rather than `F64X2` because `F64X2` would require
|
||||||
|
// encoding a fstDisp8 with REX bits set, and we currently can't encode that. F64 causes a
|
||||||
|
// whole XMM register to be preserved anyway.
|
||||||
|
let csr_arg =
|
||||||
|
ir::AbiParam::special_reg(types::F64, ir::ArgumentPurpose::CalleeSaved, fp_csr);
|
||||||
func.signature.params.push(csr_arg);
|
func.signature.params.push(csr_arg);
|
||||||
func.signature.returns.push(csr_arg);
|
func.signature.returns.push(csr_arg);
|
||||||
}
|
}
|
||||||
@@ -553,8 +633,14 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
// Set up the cursor and insert the prologue
|
// Set up the cursor and insert the prologue
|
||||||
let entry_block = func.layout.entry_block().expect("missing entry block");
|
let entry_block = func.layout.entry_block().expect("missing entry block");
|
||||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
|
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
|
||||||
let prologue_cfa_state =
|
let prologue_cfa_state = insert_common_prologue(
|
||||||
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
|
&mut pos,
|
||||||
|
local_stack_size,
|
||||||
|
reg_type,
|
||||||
|
&csrs,
|
||||||
|
fpr_slot.as_ref(),
|
||||||
|
isa,
|
||||||
|
);
|
||||||
|
|
||||||
// Reset the cursor and insert the epilogue
|
// Reset the cursor and insert the epilogue
|
||||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||||
@@ -563,6 +649,7 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
local_stack_size,
|
local_stack_size,
|
||||||
reg_type,
|
reg_type,
|
||||||
&csrs,
|
&csrs,
|
||||||
|
fpr_slot.as_ref(),
|
||||||
isa,
|
isa,
|
||||||
prologue_cfa_state,
|
prologue_cfa_state,
|
||||||
);
|
);
|
||||||
@@ -575,7 +662,11 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
let pointer_width = isa.triple().pointer_width().unwrap();
|
let pointer_width = isa.triple().pointer_width().unwrap();
|
||||||
let word_size = pointer_width.bytes() as usize;
|
let word_size = pointer_width.bytes() as usize;
|
||||||
|
|
||||||
let csrs = callee_saved_gprs_used(isa, func);
|
let csrs = callee_saved_regs_used(isa, func);
|
||||||
|
assert!(
|
||||||
|
csrs.iter(FPR).len() == 0,
|
||||||
|
"SysV ABI does not have callee-save SIMD registers"
|
||||||
|
);
|
||||||
|
|
||||||
// The reserved stack area is composed of:
|
// The reserved stack area is composed of:
|
||||||
// return address + frame pointer + all callee-saved registers
|
// return address + frame pointer + all callee-saved registers
|
||||||
@@ -615,7 +706,7 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
let entry_block = func.layout.entry_block().expect("missing entry block");
|
let entry_block = func.layout.entry_block().expect("missing entry block");
|
||||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
|
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
|
||||||
let prologue_cfa_state =
|
let prologue_cfa_state =
|
||||||
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
|
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);
|
||||||
|
|
||||||
// Reset the cursor and insert the epilogue
|
// Reset the cursor and insert the epilogue
|
||||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||||
@@ -624,6 +715,7 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
local_stack_size,
|
local_stack_size,
|
||||||
reg_type,
|
reg_type,
|
||||||
&csrs,
|
&csrs,
|
||||||
|
None,
|
||||||
isa,
|
isa,
|
||||||
prologue_cfa_state,
|
prologue_cfa_state,
|
||||||
);
|
);
|
||||||
@@ -638,6 +730,7 @@ fn insert_common_prologue(
|
|||||||
stack_size: i64,
|
stack_size: i64,
|
||||||
reg_type: ir::types::Type,
|
reg_type: ir::types::Type,
|
||||||
csrs: &RegisterSet,
|
csrs: &RegisterSet,
|
||||||
|
fpr_slot: Option<&StackSlot>,
|
||||||
isa: &dyn TargetIsa,
|
isa: &dyn TargetIsa,
|
||||||
) -> Option<CFAState> {
|
) -> Option<CFAState> {
|
||||||
let word_size = isa.pointer_bytes() as isize;
|
let word_size = isa.pointer_bytes() as isize;
|
||||||
@@ -648,8 +741,11 @@ fn insert_common_prologue(
|
|||||||
// pushed CSRs, frame pointer.
|
// pushed CSRs, frame pointer.
|
||||||
// Also, the size of a return address, implicitly pushed by a x86 `call` instruction,
|
// Also, the size of a return address, implicitly pushed by a x86 `call` instruction,
|
||||||
// also should be accounted for.
|
// also should be accounted for.
|
||||||
|
// If any FPR are present, count them as well as necessary alignment space.
|
||||||
// TODO: Check if the function body actually contains a `call` instruction.
|
// TODO: Check if the function body actually contains a `call` instruction.
|
||||||
let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64;
|
let mut total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64;
|
||||||
|
|
||||||
|
total_stack_size += csrs.iter(FPR).len() as i64 * types::F64X2.bytes() as i64;
|
||||||
|
|
||||||
insert_stack_check(pos, total_stack_size, stack_limit_arg);
|
insert_stack_check(pos, total_stack_size, stack_limit_arg);
|
||||||
}
|
}
|
||||||
@@ -796,6 +892,55 @@ fn insert_common_prologue(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now that RSP is prepared for the function, we can use stack slots:
|
||||||
|
if let Some(fpr_slot) = fpr_slot {
|
||||||
|
debug_assert!(csrs.iter(FPR).len() != 0);
|
||||||
|
|
||||||
|
// `stack_store` is not directly encodable in x86_64 at the moment, so we'll need a base
|
||||||
|
// address. We are well after postopt could run, so load the CSR region base once here,
|
||||||
|
// instead of hoping that the addr/store will be combined later.
|
||||||
|
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
|
||||||
|
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
|
||||||
|
|
||||||
|
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
|
||||||
|
// function entry.
|
||||||
|
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
|
||||||
|
|
||||||
|
let mut fpr_offset = 0;
|
||||||
|
|
||||||
|
for reg in csrs.iter(FPR) {
|
||||||
|
// Append param to entry Block
|
||||||
|
let csr_arg = pos.func.dfg.append_block_param(block, types::F64);
|
||||||
|
|
||||||
|
// Since regalloc has already run, we must assign a location.
|
||||||
|
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||||
|
|
||||||
|
let reg_store_inst =
|
||||||
|
pos.ins()
|
||||||
|
.store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset);
|
||||||
|
|
||||||
|
// If we preserve FPRs, they occur after SP is adjusted, so also fix up the end point
|
||||||
|
// to this new instruction.
|
||||||
|
pos.func.prologue_end = Some(reg_store_inst);
|
||||||
|
fpr_offset += types::F64X2.bytes() as i32;
|
||||||
|
|
||||||
|
if let Some(ref mut frame_layout) = pos.func.frame_layout {
|
||||||
|
let mut cfa_state = cfa_state
|
||||||
|
.as_mut()
|
||||||
|
.expect("cfa state exists when recording frame layout");
|
||||||
|
cfa_state.current_depth -= types::F64X2.bytes() as isize;
|
||||||
|
frame_layout.instructions.insert(
|
||||||
|
reg_store_inst,
|
||||||
|
vec![FrameLayoutChange::RegAt {
|
||||||
|
reg,
|
||||||
|
cfa_offset: cfa_state.current_depth,
|
||||||
|
}]
|
||||||
|
.into_boxed_slice(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cfa_state
|
cfa_state
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -828,6 +973,7 @@ fn insert_common_epilogues(
|
|||||||
stack_size: i64,
|
stack_size: i64,
|
||||||
reg_type: ir::types::Type,
|
reg_type: ir::types::Type,
|
||||||
csrs: &RegisterSet,
|
csrs: &RegisterSet,
|
||||||
|
fpr_slot: Option<&StackSlot>,
|
||||||
isa: &dyn TargetIsa,
|
isa: &dyn TargetIsa,
|
||||||
cfa_state: Option<CFAState>,
|
cfa_state: Option<CFAState>,
|
||||||
) {
|
) {
|
||||||
@@ -842,6 +988,7 @@ fn insert_common_epilogues(
|
|||||||
pos,
|
pos,
|
||||||
reg_type,
|
reg_type,
|
||||||
csrs,
|
csrs,
|
||||||
|
fpr_slot,
|
||||||
isa,
|
isa,
|
||||||
is_last,
|
is_last,
|
||||||
cfa_state.clone(),
|
cfa_state.clone(),
|
||||||
@@ -859,11 +1006,57 @@ fn insert_common_epilogue(
|
|||||||
pos: &mut EncCursor,
|
pos: &mut EncCursor,
|
||||||
reg_type: ir::types::Type,
|
reg_type: ir::types::Type,
|
||||||
csrs: &RegisterSet,
|
csrs: &RegisterSet,
|
||||||
|
fpr_slot: Option<&StackSlot>,
|
||||||
isa: &dyn TargetIsa,
|
isa: &dyn TargetIsa,
|
||||||
is_last: bool,
|
is_last: bool,
|
||||||
mut cfa_state: Option<CFAState>,
|
mut cfa_state: Option<CFAState>,
|
||||||
) {
|
) {
|
||||||
let word_size = isa.pointer_bytes() as isize;
|
let word_size = isa.pointer_bytes() as isize;
|
||||||
|
|
||||||
|
// Even though instructions to restore FPRs are inserted first, we have to append them after
|
||||||
|
// restored GPRs to satisfy parameter order in the return.
|
||||||
|
let mut restored_fpr_values = Vec::new();
|
||||||
|
|
||||||
|
// Restore FPRs before we move RSP and invalidate stack slots.
|
||||||
|
if let Some(fpr_slot) = fpr_slot {
|
||||||
|
debug_assert!(csrs.iter(FPR).len() != 0);
|
||||||
|
|
||||||
|
// `stack_load` is not directly encodable in x86_64 at the moment, so we'll need a base
|
||||||
|
// address. We are well after postopt could run, so load the CSR region base once here,
|
||||||
|
// instead of hoping that the addr/store will be combined later.
|
||||||
|
//
|
||||||
|
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
|
||||||
|
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
|
||||||
|
|
||||||
|
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
|
||||||
|
// function exit.
|
||||||
|
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
|
||||||
|
|
||||||
|
let mut fpr_offset = 0;
|
||||||
|
|
||||||
|
for reg in csrs.iter(FPR) {
|
||||||
|
let value = pos
|
||||||
|
.ins()
|
||||||
|
.load(types::F64, ir::MemFlags::trusted(), stack_addr, fpr_offset);
|
||||||
|
fpr_offset += types::F64X2.bytes() as i32;
|
||||||
|
|
||||||
|
if let Some(ref mut cfa_state) = cfa_state.as_mut() {
|
||||||
|
// Note: don't bother recording a frame layout change because the popped value is
|
||||||
|
// still correct in memory, and won't be overwritten until we've returned where the
|
||||||
|
// current frame's layout would no longer matter. Only adjust `current_depth` for a
|
||||||
|
// consistency check later.
|
||||||
|
cfa_state.current_depth += types::F64X2.bytes() as isize;
|
||||||
|
}
|
||||||
|
// Unlike GPRs before, we don't need to step back after reach restoration because FPR
|
||||||
|
// restoration is order-insensitive. Furthermore: we want GPR restoration to begin
|
||||||
|
// after FPR restoration, so that stack adjustments occur after we're done relying on
|
||||||
|
// StackSlot validity.
|
||||||
|
|
||||||
|
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
|
||||||
|
restored_fpr_values.push(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if stack_size > 0 {
|
if stack_size > 0 {
|
||||||
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
|
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
|
||||||
}
|
}
|
||||||
@@ -903,6 +1096,10 @@ fn insert_common_epilogue(
|
|||||||
pos.func.dfg.append_inst_arg(inst, csr_ret);
|
pos.func.dfg.append_inst_arg(inst, csr_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for value in restored_fpr_values.into_iter() {
|
||||||
|
pos.func.dfg.append_inst_arg(inst, value);
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(ref mut frame_layout) = pos.func.frame_layout {
|
if let Some(ref mut frame_layout) = pos.func.frame_layout {
|
||||||
let cfa_state = cfa_state
|
let cfa_state = cfa_state
|
||||||
.as_mut()
|
.as_mut()
|
||||||
@@ -953,19 +1150,21 @@ pub fn emit_unwind_info(
|
|||||||
isa: &dyn TargetIsa,
|
isa: &dyn TargetIsa,
|
||||||
kind: FrameUnwindKind,
|
kind: FrameUnwindKind,
|
||||||
sink: &mut dyn FrameUnwindSink,
|
sink: &mut dyn FrameUnwindSink,
|
||||||
) {
|
) -> CodegenResult<()> {
|
||||||
match kind {
|
match kind {
|
||||||
FrameUnwindKind::Fastcall => {
|
FrameUnwindKind::Fastcall => {
|
||||||
// Assumption: RBP is being used as the frame pointer
|
// Assumption: RBP is being used as the frame pointer
|
||||||
// In the future, Windows fastcall codegen should usually omit the frame pointer
|
// In the future, Windows fastcall codegen should usually omit the frame pointer
|
||||||
if let Some(info) = UnwindInfo::try_from_func(func, isa, Some(RU::rbp.into())) {
|
if let Some(info) = UnwindInfo::try_from_func(func, isa, Some(RU::rbp.into()))? {
|
||||||
info.emit(sink);
|
info.emit(sink);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FrameUnwindKind::Libunwind => {
|
FrameUnwindKind::Libunwind => {
|
||||||
if func.frame_layout.is_some() {
|
if func.frame_layout.is_some() {
|
||||||
emit_fde(func, isa, sink);
|
emit_fde(func, isa, sink)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use crate::binemit::{FrameUnwindOffset, FrameUnwindSink, Reloc};
|
|||||||
use crate::ir::{FrameLayoutChange, Function};
|
use crate::ir::{FrameLayoutChange, Function};
|
||||||
use crate::isa::fde::RegisterMappingError;
|
use crate::isa::fde::RegisterMappingError;
|
||||||
use crate::isa::{CallConv, RegUnit, TargetIsa};
|
use crate::isa::{CallConv, RegUnit, TargetIsa};
|
||||||
|
use crate::result::CodegenResult;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use core::convert::TryInto;
|
use core::convert::TryInto;
|
||||||
use gimli::write::{
|
use gimli::write::{
|
||||||
@@ -178,7 +179,11 @@ fn to_cfi(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Creates FDE structure from FrameLayout.
|
/// Creates FDE structure from FrameLayout.
|
||||||
pub fn emit_fde(func: &Function, isa: &dyn TargetIsa, sink: &mut dyn FrameUnwindSink) {
|
pub fn emit_fde(
|
||||||
|
func: &Function,
|
||||||
|
isa: &dyn TargetIsa,
|
||||||
|
sink: &mut dyn FrameUnwindSink,
|
||||||
|
) -> CodegenResult<()> {
|
||||||
assert!(isa.name() == "x86");
|
assert!(isa.name() == "x86");
|
||||||
|
|
||||||
// Expecting function with System V prologue
|
// Expecting function with System V prologue
|
||||||
@@ -266,6 +271,8 @@ pub fn emit_fde(func: &Function, isa: &dyn TargetIsa, sink: &mut dyn FrameUnwind
|
|||||||
|
|
||||||
// Need 0 marker for GCC unwind to end FDE "list".
|
// Need 0 marker for GCC unwind to end FDE "list".
|
||||||
sink.bytes(&[0, 0, 0, 0]);
|
sink.bytes(&[0, 0, 0, 0]);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -314,7 +321,7 @@ mod tests {
|
|||||||
context.compile(&*isa).expect("expected compilation");
|
context.compile(&*isa).expect("expected compilation");
|
||||||
|
|
||||||
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
|
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
|
||||||
emit_fde(&context.func, &*isa, &mut sink);
|
emit_fde(&context.func, &*isa, &mut sink).expect("can emit fde");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
sink.0,
|
sink.0,
|
||||||
@@ -376,7 +383,7 @@ mod tests {
|
|||||||
context.compile(&*isa).expect("expected compilation");
|
context.compile(&*isa).expect("expected compilation");
|
||||||
|
|
||||||
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
|
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
|
||||||
emit_fde(&context.func, &*isa, &mut sink);
|
emit_fde(&context.func, &*isa, &mut sink).expect("can emit fde");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
sink.0,
|
sink.0,
|
||||||
|
|||||||
@@ -177,8 +177,8 @@ impl TargetIsa for Isa {
|
|||||||
func: &ir::Function,
|
func: &ir::Function,
|
||||||
kind: FrameUnwindKind,
|
kind: FrameUnwindKind,
|
||||||
sink: &mut dyn FrameUnwindSink,
|
sink: &mut dyn FrameUnwindSink,
|
||||||
) {
|
) -> CodegenResult<()> {
|
||||||
abi::emit_unwind_info(func, self, kind, sink);
|
abi::emit_unwind_info(func, self, kind, sink)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
//! Unwind information for x64 Windows.
|
//! Unwind information for x64 Windows.
|
||||||
|
|
||||||
use super::registers::{GPR, RU};
|
use super::registers::{FPR, GPR, RU};
|
||||||
use crate::binemit::FrameUnwindSink;
|
use crate::binemit::FrameUnwindSink;
|
||||||
use crate::ir::{Function, InstructionData, Opcode};
|
use crate::ir::{Function, InstructionData, Opcode, ValueLoc};
|
||||||
use crate::isa::{CallConv, RegUnit, TargetIsa};
|
use crate::isa::{CallConv, RegUnit, TargetIsa};
|
||||||
|
use crate::result::{CodegenError, CodegenResult};
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use byteorder::{ByteOrder, LittleEndian};
|
use byteorder::{ByteOrder, LittleEndian};
|
||||||
|
use log::warn;
|
||||||
|
|
||||||
/// Maximum (inclusive) size of a "small" stack allocation
|
/// Maximum (inclusive) size of a "small" stack allocation
|
||||||
const SMALL_ALLOC_MAX_SIZE: u32 = 128;
|
const SMALL_ALLOC_MAX_SIZE: u32 = 128;
|
||||||
@@ -35,18 +37,34 @@ fn write_u32<T: ByteOrder>(sink: &mut dyn FrameUnwindSink, v: u32) {
|
|||||||
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
|
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
enum UnwindCode {
|
enum UnwindCode {
|
||||||
PushRegister { offset: u8, reg: RegUnit },
|
PushRegister {
|
||||||
StackAlloc { offset: u8, size: u32 },
|
offset: u8,
|
||||||
SetFramePointer { offset: u8, sp_offset: u8 },
|
reg: RegUnit,
|
||||||
|
},
|
||||||
|
SaveXmm {
|
||||||
|
offset: u8,
|
||||||
|
reg: RegUnit,
|
||||||
|
stack_offset: u32,
|
||||||
|
},
|
||||||
|
StackAlloc {
|
||||||
|
offset: u8,
|
||||||
|
size: u32,
|
||||||
|
},
|
||||||
|
SetFramePointer {
|
||||||
|
offset: u8,
|
||||||
|
sp_offset: u8,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UnwindCode {
|
impl UnwindCode {
|
||||||
fn emit(&self, sink: &mut dyn FrameUnwindSink) {
|
fn emit(&self, sink: &mut dyn FrameUnwindSink) {
|
||||||
enum UnwindOperation {
|
enum UnwindOperation {
|
||||||
PushNonvolatileRegister,
|
PushNonvolatileRegister = 0,
|
||||||
LargeStackAlloc,
|
LargeStackAlloc = 1,
|
||||||
SmallStackAlloc,
|
SmallStackAlloc = 2,
|
||||||
SetFramePointer,
|
SetFramePointer = 3,
|
||||||
|
SaveXmm128 = 8,
|
||||||
|
SaveXmm128Far = 9,
|
||||||
}
|
}
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
@@ -58,6 +76,28 @@ impl UnwindCode {
|
|||||||
| (UnwindOperation::PushNonvolatileRegister as u8),
|
| (UnwindOperation::PushNonvolatileRegister as u8),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
Self::SaveXmm {
|
||||||
|
offset,
|
||||||
|
reg,
|
||||||
|
stack_offset,
|
||||||
|
} => {
|
||||||
|
write_u8(sink, *offset);
|
||||||
|
let stack_offset = stack_offset / 16;
|
||||||
|
if stack_offset <= core::u16::MAX as u32 {
|
||||||
|
write_u8(
|
||||||
|
sink,
|
||||||
|
(FPR.index_of(*reg) << 4) as u8 | (UnwindOperation::SaveXmm128 as u8),
|
||||||
|
);
|
||||||
|
write_u16::<LittleEndian>(sink, stack_offset as u16);
|
||||||
|
} else {
|
||||||
|
write_u8(
|
||||||
|
sink,
|
||||||
|
(FPR.index_of(*reg) << 4) as u8 | (UnwindOperation::SaveXmm128Far as u8),
|
||||||
|
);
|
||||||
|
write_u16::<LittleEndian>(sink, stack_offset as u16);
|
||||||
|
write_u16::<LittleEndian>(sink, (stack_offset >> 16) as u16);
|
||||||
|
}
|
||||||
|
}
|
||||||
Self::StackAlloc { offset, size } => {
|
Self::StackAlloc { offset, size } => {
|
||||||
// Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
|
// Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
|
||||||
assert!(*size >= 8);
|
assert!(*size >= 8);
|
||||||
@@ -98,6 +138,13 @@ impl UnwindCode {
|
|||||||
3
|
3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Self::SaveXmm { stack_offset, .. } => {
|
||||||
|
if *stack_offset <= core::u16::MAX as u32 {
|
||||||
|
2
|
||||||
|
} else {
|
||||||
|
3
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => 1,
|
_ => 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -121,10 +168,10 @@ impl UnwindInfo {
|
|||||||
func: &Function,
|
func: &Function,
|
||||||
isa: &dyn TargetIsa,
|
isa: &dyn TargetIsa,
|
||||||
frame_register: Option<RegUnit>,
|
frame_register: Option<RegUnit>,
|
||||||
) -> Option<Self> {
|
) -> CodegenResult<Option<Self>> {
|
||||||
// Only Windows fastcall is supported for unwind information
|
// Only Windows fastcall is supported for unwind information
|
||||||
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
|
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
let prologue_end = func.prologue_end.unwrap();
|
let prologue_end = func.prologue_end.unwrap();
|
||||||
@@ -136,10 +183,27 @@ impl UnwindInfo {
|
|||||||
let mut unwind_codes = Vec::new();
|
let mut unwind_codes = Vec::new();
|
||||||
let mut found_end = false;
|
let mut found_end = false;
|
||||||
|
|
||||||
|
// Have we saved at least one FPR? if so, we might have to check additional constraints.
|
||||||
|
let mut saved_fpr = false;
|
||||||
|
|
||||||
|
// In addition to the min offset for a callee-save, we need to know the offset from the
|
||||||
|
// frame base to the stack pointer, so that we can record an unwind offset that spans only
|
||||||
|
// to the end of callee-save space.
|
||||||
|
let mut static_frame_allocation_size = 0u32;
|
||||||
|
|
||||||
|
// For the time being, FPR preservation is split into a stack_addr and later store/load.
|
||||||
|
// Store the register used for stack store and ensure it is the same register with no
|
||||||
|
// intervening changes to the frame size.
|
||||||
|
let mut callee_save_region_reg = None;
|
||||||
|
// Also record the callee-save region's offset from RSP, because it must be added to FPR
|
||||||
|
// save offsets to compute an offset from the frame base.
|
||||||
|
let mut callee_save_offset = None;
|
||||||
|
|
||||||
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
|
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
|
||||||
// x64 ABI prologues cannot exceed 255 bytes in length
|
// x64 ABI prologues cannot exceed 255 bytes in length
|
||||||
if (offset + size) > 255 {
|
if (offset + size) > 255 {
|
||||||
panic!("function prologues cannot exceed 255 bytes in size for Windows x64");
|
warn!("function prologues cannot exceed 255 bytes in size for Windows x64");
|
||||||
|
return Err(CodegenError::CodeTooLarge);
|
||||||
}
|
}
|
||||||
|
|
||||||
prologue_size += size;
|
prologue_size += size;
|
||||||
@@ -150,18 +214,23 @@ impl UnwindInfo {
|
|||||||
InstructionData::Unary { opcode, arg } => {
|
InstructionData::Unary { opcode, arg } => {
|
||||||
match opcode {
|
match opcode {
|
||||||
Opcode::X86Push => {
|
Opcode::X86Push => {
|
||||||
|
static_frame_allocation_size += 8;
|
||||||
|
|
||||||
unwind_codes.push(UnwindCode::PushRegister {
|
unwind_codes.push(UnwindCode::PushRegister {
|
||||||
offset: unwind_offset,
|
offset: unwind_offset,
|
||||||
reg: func.locations[arg].unwrap_reg(),
|
reg: func.locations[arg].unwrap_reg(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Opcode::AdjustSpDown => {
|
Opcode::AdjustSpDown => {
|
||||||
|
let stack_size =
|
||||||
|
stack_size.expect("expected a previous stack size instruction");
|
||||||
|
static_frame_allocation_size += stack_size;
|
||||||
|
|
||||||
// This is used when calling a stack check function
|
// This is used when calling a stack check function
|
||||||
// We need to track the assignment to RAX which has the size of the stack
|
// We need to track the assignment to RAX which has the size of the stack
|
||||||
unwind_codes.push(UnwindCode::StackAlloc {
|
unwind_codes.push(UnwindCode::StackAlloc {
|
||||||
offset: unwind_offset,
|
offset: unwind_offset,
|
||||||
size: stack_size
|
size: stack_size,
|
||||||
.expect("expected a previous stack size instruction"),
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
@@ -170,6 +239,10 @@ impl UnwindInfo {
|
|||||||
InstructionData::CopySpecial { src, dst, .. } => {
|
InstructionData::CopySpecial { src, dst, .. } => {
|
||||||
if let Some(frame_register) = frame_register {
|
if let Some(frame_register) = frame_register {
|
||||||
if src == (RU::rsp as RegUnit) && dst == frame_register {
|
if src == (RU::rsp as RegUnit) && dst == frame_register {
|
||||||
|
// Constructing an rbp-based stack frame, so the static frame
|
||||||
|
// allocation restarts at 0 from here.
|
||||||
|
static_frame_allocation_size = 0;
|
||||||
|
|
||||||
unwind_codes.push(UnwindCode::SetFramePointer {
|
unwind_codes.push(UnwindCode::SetFramePointer {
|
||||||
offset: unwind_offset,
|
offset: unwind_offset,
|
||||||
sp_offset: 0,
|
sp_offset: 0,
|
||||||
@@ -194,6 +267,8 @@ impl UnwindInfo {
|
|||||||
let imm: i64 = imm.into();
|
let imm: i64 = imm.into();
|
||||||
assert!(imm <= core::u32::MAX as i64);
|
assert!(imm <= core::u32::MAX as i64);
|
||||||
|
|
||||||
|
static_frame_allocation_size += imm as u32;
|
||||||
|
|
||||||
unwind_codes.push(UnwindCode::StackAlloc {
|
unwind_codes.push(UnwindCode::StackAlloc {
|
||||||
offset: unwind_offset,
|
offset: unwind_offset,
|
||||||
size: imm as u32,
|
size: imm as u32,
|
||||||
@@ -202,6 +277,55 @@ impl UnwindInfo {
|
|||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
InstructionData::StackLoad {
|
||||||
|
opcode: Opcode::StackAddr,
|
||||||
|
stack_slot,
|
||||||
|
offset: _,
|
||||||
|
} => {
|
||||||
|
let result = func.dfg.inst_results(inst).get(0).unwrap();
|
||||||
|
if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
|
||||||
|
callee_save_region_reg = Some(frame_reg);
|
||||||
|
|
||||||
|
// Figure out the offset in the call frame that `frame_reg` will have.
|
||||||
|
let frame_size = func
|
||||||
|
.stack_slots
|
||||||
|
.layout_info
|
||||||
|
.expect("func's stack slots have layout info if stack operations exist")
|
||||||
|
.frame_size;
|
||||||
|
// Because we're well after the prologue has been constructed, stack slots
|
||||||
|
// must have been laid out...
|
||||||
|
let slot_offset = func.stack_slots[stack_slot]
|
||||||
|
.offset
|
||||||
|
.expect("callee-save slot has an offset computed");
|
||||||
|
let frame_offset = frame_size as i32 + slot_offset;
|
||||||
|
|
||||||
|
callee_save_offset = Some(frame_offset as u32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
InstructionData::Store {
|
||||||
|
opcode: Opcode::Store,
|
||||||
|
args: [arg1, arg2],
|
||||||
|
flags: _flags,
|
||||||
|
offset,
|
||||||
|
} => {
|
||||||
|
if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
|
||||||
|
(func.locations[arg1], func.locations[arg2])
|
||||||
|
{
|
||||||
|
if Some(base_ru) == callee_save_region_reg {
|
||||||
|
let offset_int: i32 = offset.into();
|
||||||
|
assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
|
||||||
|
let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
|
||||||
|
if FPR.contains(ru) {
|
||||||
|
saved_fpr = true;
|
||||||
|
unwind_codes.push(UnwindCode::SaveXmm {
|
||||||
|
offset: unwind_offset,
|
||||||
|
reg: ru,
|
||||||
|
stack_offset: offset_int,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -212,16 +336,46 @@ impl UnwindInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !found_end {
|
if !found_end {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(Self {
|
if saved_fpr {
|
||||||
|
if static_frame_allocation_size > 240 && saved_fpr {
|
||||||
|
warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
|
||||||
|
This is a Cranelift implementation limit, see \
|
||||||
|
https://github.com/bytecodealliance/wasmtime/issues/1475",
|
||||||
|
static_frame_allocation_size);
|
||||||
|
return Err(CodegenError::ImplLimitExceeded);
|
||||||
|
}
|
||||||
|
// Only test static frame size is 16-byte aligned when an FPR is saved to avoid
|
||||||
|
// panicking when alignment is elided because no FPRs are saved and no child calls are
|
||||||
|
// made.
|
||||||
|
assert!(
|
||||||
|
static_frame_allocation_size % 16 == 0,
|
||||||
|
"static frame allocation must be a multiple of 16"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
|
||||||
|
// one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
|
||||||
|
// Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
|
||||||
|
// frames are inexpressible, and we cannot actually compile the function. In case there are
|
||||||
|
// no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
|
||||||
|
// will actually check it. This, then, avoids panics when compiling functions with large
|
||||||
|
// call frames.
|
||||||
|
let reported_frame_offset = if saved_fpr {
|
||||||
|
(static_frame_allocation_size / 16) as u8
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Some(Self {
|
||||||
flags: 0, // this assumes cranelift functions have no SEH handlers
|
flags: 0, // this assumes cranelift functions have no SEH handlers
|
||||||
prologue_size: prologue_size as u8,
|
prologue_size: prologue_size as u8,
|
||||||
frame_register,
|
frame_register,
|
||||||
frame_register_offset: 0,
|
frame_register_offset: reported_frame_offset,
|
||||||
unwind_codes,
|
unwind_codes,
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn size(&self) -> usize {
|
pub fn size(&self) -> usize {
|
||||||
@@ -320,7 +474,10 @@ mod tests {
|
|||||||
|
|
||||||
context.compile(&*isa).expect("expected compilation");
|
context.compile(&*isa).expect("expected compilation");
|
||||||
|
|
||||||
assert_eq!(UnwindInfo::try_from_func(&context.func, &*isa, None), None);
|
assert_eq!(
|
||||||
|
UnwindInfo::try_from_func(&context.func, &*isa, None).expect("can emit unwind info"),
|
||||||
|
None
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -337,6 +494,7 @@ mod tests {
|
|||||||
context.compile(&*isa).expect("expected compilation");
|
context.compile(&*isa).expect("expected compilation");
|
||||||
|
|
||||||
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
||||||
|
.expect("can emit unwind info")
|
||||||
.expect("expected unwind info");
|
.expect("expected unwind info");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -401,6 +559,7 @@ mod tests {
|
|||||||
context.compile(&*isa).expect("expected compilation");
|
context.compile(&*isa).expect("expected compilation");
|
||||||
|
|
||||||
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
||||||
|
.expect("can emit unwind info")
|
||||||
.expect("expected unwind info");
|
.expect("expected unwind info");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -465,6 +624,7 @@ mod tests {
|
|||||||
context.compile(&*isa).expect("expected compilation");
|
context.compile(&*isa).expect("expected compilation");
|
||||||
|
|
||||||
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
||||||
|
.expect("can emit unwind info")
|
||||||
.expect("expected unwind info");
|
.expect("expected unwind info");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
@@ -32,6 +32,31 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64):
|
|||||||
}
|
}
|
||||||
; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||||
|
|
||||||
|
; check that we preserve xmm6 and above if we're using them locally
|
||||||
|
function %float_callee_saves(f64, f64, f64, f64) windows_fastcall {
|
||||||
|
block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
||||||
|
; explicitly use a callee-save register
|
||||||
|
[-, %xmm6] v4 = fadd v0, v1
|
||||||
|
[-, %xmm7] v5 = fadd v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall {
|
||||||
|
; nextln: ss0 = explicit_slot 32, offset -80
|
||||||
|
; nextln: ss1 = incoming_arg 16, offset -48
|
||||||
|
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]):
|
||||||
|
; nextln: x86_push v6
|
||||||
|
; nextln: copy_special %rsp -> %rbp
|
||||||
|
; nextln: adjust_sp_down_imm 64
|
||||||
|
; nextln: v7 = stack_addr.i64 ss0
|
||||||
|
; nextln: store notrap aligned v8, v7
|
||||||
|
; nextln: store notrap aligned v9, v7+16
|
||||||
|
; check: v10 = stack_addr.i64 ss0
|
||||||
|
; nextln: v11 = load.f64 notrap aligned v10
|
||||||
|
; nextln: v12 = load.f64 notrap aligned v10+16
|
||||||
|
; nextln: adjust_sp_up_imm 64
|
||||||
|
; nextln: v13 = x86_pop.i64
|
||||||
|
; nextln: v13, v11, v12
|
||||||
|
|
||||||
function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
|
function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
|
||||||
block0(v0: i64, v1: f64, v2: i64, v3: f32):
|
block0(v0: i64, v1: f64, v2: i64, v3: f32):
|
||||||
return
|
return
|
||||||
@@ -43,3 +68,29 @@ block0(v0: f32, v1: f64, v2: i64, v3: i64):
|
|||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
|
; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
|
||||||
|
|
||||||
|
function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
|
||||||
|
fn0 = %foo(i64, i64, i64, i64) -> i64
|
||||||
|
fn1 = %foo2(i64, i64, i64, i64) -> i64
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.i64 v0+0
|
||||||
|
v2 = load.i64 v0+8
|
||||||
|
v3 = load.i64 v0+16
|
||||||
|
v4 = load.i64 v0+24
|
||||||
|
v5 = load.i64 v0+32
|
||||||
|
v6 = load.i64 v0+40
|
||||||
|
v7 = load.i64 v0+48
|
||||||
|
v8 = load.i64 v0+56
|
||||||
|
v9 = load.i64 v0+64
|
||||||
|
v10 = call fn0(v1, v2, v3, v4)
|
||||||
|
store.i64 v1, v0+8
|
||||||
|
store.i64 v2, v0+16
|
||||||
|
store.i64 v3, v0+24
|
||||||
|
store.i64 v4, v0+32
|
||||||
|
store.i64 v5, v0+40
|
||||||
|
store.i64 v6, v0+48
|
||||||
|
store.i64 v7, v0+56
|
||||||
|
store.i64 v8, v0+64
|
||||||
|
store.i64 v9, v0+72
|
||||||
|
return v10
|
||||||
|
}
|
||||||
|
|||||||
@@ -118,6 +118,53 @@ block0:
|
|||||||
; nextln: ],
|
; nextln: ],
|
||||||
; nextln: }
|
; nextln: }
|
||||||
|
|
||||||
|
function %fpr_with_function_call(i64, i64) windows_fastcall {
|
||||||
|
fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall;
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = load.f64 v0+0
|
||||||
|
v3 = load.f64 v0+8
|
||||||
|
v4 = load.i64 v0+16
|
||||||
|
v15 = load.f64 v0+104
|
||||||
|
v16 = load.f64 v0+112
|
||||||
|
v17 = load.f64 v0+120
|
||||||
|
v18 = load.f64 v0+128
|
||||||
|
v19 = load.f64 v0+136
|
||||||
|
v20 = load.f64 v0+144
|
||||||
|
v21 = load.f64 v0+152
|
||||||
|
v22 = load.f64 v0+160
|
||||||
|
v23 = load.f64 v0+168
|
||||||
|
call fn0(v2, v3, v4, v1, v1)
|
||||||
|
store.f64 v15, v1+104
|
||||||
|
store.f64 v16, v1+112
|
||||||
|
store.f64 v17, v1+120
|
||||||
|
store.f64 v18, v1+128
|
||||||
|
store.f64 v19, v1+136
|
||||||
|
store.f64 v20, v1+144
|
||||||
|
store.f64 v21, v1+152
|
||||||
|
store.f64 v22, v1+160
|
||||||
|
store.f64 v23, v1+168
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; Only check the first unwind code here because this test specifically looks to
|
||||||
|
; see that in a function that is not a leaf, a callee-save FPR is stored in an
|
||||||
|
; area that does not overlap either the callee's shadow space or stack argument
|
||||||
|
; space.
|
||||||
|
;
|
||||||
|
; sameln: UnwindInfo {
|
||||||
|
; nextln: version: 1,
|
||||||
|
; nextln: flags: 0,
|
||||||
|
; nextln: prologue_size: 26,
|
||||||
|
; nextln: unwind_code_count_raw: 7,
|
||||||
|
; nextln: frame_register: 5,
|
||||||
|
; nextln: frame_register_offset: 12,
|
||||||
|
; nextln: unwind_codes: [
|
||||||
|
; nextln: UnwindCode {
|
||||||
|
; nextln: offset: 26,
|
||||||
|
; nextln: op: SaveXmm128,
|
||||||
|
; nextln: info: 15,
|
||||||
|
; nextln: value: U16(
|
||||||
|
; nextln: 3,
|
||||||
|
|
||||||
; check a function that has CSRs
|
; check a function that has CSRs
|
||||||
function %lots_of_registers(i64, i64) windows_fastcall {
|
function %lots_of_registers(i64, i64) windows_fastcall {
|
||||||
block0(v0: i64, v1: i64):
|
block0(v0: i64, v1: i64):
|
||||||
@@ -134,6 +181,15 @@ block0(v0: i64, v1: i64):
|
|||||||
v12 = load.i32 v0+80
|
v12 = load.i32 v0+80
|
||||||
v13 = load.i32 v0+88
|
v13 = load.i32 v0+88
|
||||||
v14 = load.i32 v0+96
|
v14 = load.i32 v0+96
|
||||||
|
v15 = load.f64 v0+104
|
||||||
|
v16 = load.f64 v0+112
|
||||||
|
v17 = load.f64 v0+120
|
||||||
|
v18 = load.f64 v0+128
|
||||||
|
v19 = load.f64 v0+136
|
||||||
|
v20 = load.f64 v0+144
|
||||||
|
v21 = load.f64 v0+152
|
||||||
|
v22 = load.f64 v0+160
|
||||||
|
v23 = load.f64 v0+168
|
||||||
store.i32 v2, v1+0
|
store.i32 v2, v1+0
|
||||||
store.i32 v3, v1+8
|
store.i32 v3, v1+8
|
||||||
store.i32 v4, v1+16
|
store.i32 v4, v1+16
|
||||||
@@ -147,20 +203,53 @@ block0(v0: i64, v1: i64):
|
|||||||
store.i32 v12, v1+80
|
store.i32 v12, v1+80
|
||||||
store.i32 v13, v1+88
|
store.i32 v13, v1+88
|
||||||
store.i32 v14, v1+96
|
store.i32 v14, v1+96
|
||||||
|
store.f64 v15, v1+104
|
||||||
|
store.f64 v16, v1+112
|
||||||
|
store.f64 v17, v1+120
|
||||||
|
store.f64 v18, v1+128
|
||||||
|
store.f64 v19, v1+136
|
||||||
|
store.f64 v20, v1+144
|
||||||
|
store.f64 v21, v1+152
|
||||||
|
store.f64 v22, v1+160
|
||||||
|
store.f64 v23, v1+168
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
; sameln: UnwindInfo {
|
; sameln: UnwindInfo {
|
||||||
; nextln: version: 1,
|
; nextln: version: 1,
|
||||||
; nextln: flags: 0,
|
; nextln: flags: 0,
|
||||||
; nextln: prologue_size: 19,
|
; nextln: prologue_size: 44,
|
||||||
; nextln: unwind_code_count_raw: 10,
|
; nextln: unwind_code_count_raw: 16,
|
||||||
; nextln: frame_register: 5,
|
; nextln: frame_register: 5,
|
||||||
; nextln: frame_register_offset: 0,
|
; nextln: frame_register_offset: 10,
|
||||||
; nextln: unwind_codes: [
|
; nextln: unwind_codes: [
|
||||||
; nextln: UnwindCode {
|
; nextln: UnwindCode {
|
||||||
|
; nextln: offset: 44,
|
||||||
|
; nextln: op: SaveXmm128,
|
||||||
|
; nextln: info: 8,
|
||||||
|
; nextln: value: U16(
|
||||||
|
; nextln: 2,
|
||||||
|
; nextln: ),
|
||||||
|
; nextln: },
|
||||||
|
; nextln: UnwindCode {
|
||||||
|
; nextln: offset: 38,
|
||||||
|
; nextln: op: SaveXmm128,
|
||||||
|
; nextln: info: 7,
|
||||||
|
; nextln: value: U16(
|
||||||
|
; nextln: 1,
|
||||||
|
; nextln: ),
|
||||||
|
; nextln: },
|
||||||
|
; nextln: UnwindCode {
|
||||||
|
; nextln: offset: 32,
|
||||||
|
; nextln: op: SaveXmm128,
|
||||||
|
; nextln: info: 6,
|
||||||
|
; nextln: value: U16(
|
||||||
|
; nextln: 0,
|
||||||
|
; nextln: ),
|
||||||
|
; nextln: },
|
||||||
|
; nextln: UnwindCode {
|
||||||
; nextln: offset: 19,
|
; nextln: offset: 19,
|
||||||
; nextln: op: SmallStackAlloc,
|
; nextln: op: SmallStackAlloc,
|
||||||
; nextln: info: 3,
|
; nextln: info: 12,
|
||||||
; nextln: value: None,
|
; nextln: value: None,
|
||||||
; nextln: },
|
; nextln: },
|
||||||
; nextln: UnwindCode {
|
; nextln: UnwindCode {
|
||||||
|
|||||||
@@ -64,7 +64,9 @@ impl SubTest for TestUnwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
|
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
|
||||||
comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink);
|
comp_ctx
|
||||||
|
.emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink)
|
||||||
|
.expect("can emit unwind info");
|
||||||
|
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
if sink.0.is_empty() {
|
if sink.0.is_empty() {
|
||||||
|
|||||||
@@ -59,7 +59,9 @@ impl SubTest for TestUnwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut sink = Sink(Vec::new());
|
let mut sink = Sink(Vec::new());
|
||||||
comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink);
|
comp_ctx
|
||||||
|
.emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink)
|
||||||
|
.expect("can emit unwind info");
|
||||||
|
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
if sink.0.is_empty() {
|
if sink.0.is_empty() {
|
||||||
@@ -177,15 +179,15 @@ impl UnwindCode {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum UnwindOperation {
|
enum UnwindOperation {
|
||||||
PushNonvolatileRegister,
|
PushNonvolatileRegister = 0,
|
||||||
LargeStackAlloc,
|
LargeStackAlloc = 1,
|
||||||
SmallStackAlloc,
|
SmallStackAlloc = 2,
|
||||||
SetFramePointer,
|
SetFramePointer = 3,
|
||||||
SaveNonvolatileRegister,
|
SaveNonvolatileRegister = 4,
|
||||||
SaveNonvolatileRegisterFar,
|
SaveNonvolatileRegisterFar = 5,
|
||||||
SaveXmm128,
|
SaveXmm128 = 8,
|
||||||
SaveXmm128Far,
|
SaveXmm128Far = 9,
|
||||||
PushMachineFrame,
|
PushMachineFrame = 10,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<u8> for UnwindOperation {
|
impl From<u8> for UnwindOperation {
|
||||||
@@ -198,9 +200,9 @@ impl From<u8> for UnwindOperation {
|
|||||||
3 => Self::SetFramePointer,
|
3 => Self::SetFramePointer,
|
||||||
4 => Self::SaveNonvolatileRegister,
|
4 => Self::SaveNonvolatileRegister,
|
||||||
5 => Self::SaveNonvolatileRegisterFar,
|
5 => Self::SaveNonvolatileRegisterFar,
|
||||||
6 => Self::SaveXmm128,
|
8 => Self::SaveXmm128,
|
||||||
7 => Self::SaveXmm128Far,
|
9 => Self::SaveXmm128Far,
|
||||||
8 => Self::PushMachineFrame,
|
10 => Self::PushMachineFrame,
|
||||||
_ => panic!("unsupported unwind operation"),
|
_ => panic!("unsupported unwind operation"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -188,7 +188,9 @@ fn make_trampoline(
|
|||||||
.map_err(|error| pretty_error(&context.func, Some(isa), error))
|
.map_err(|error| pretty_error(&context.func, Some(isa), error))
|
||||||
.expect("compile_and_emit");
|
.expect("compile_and_emit");
|
||||||
|
|
||||||
let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context);
|
let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context)
|
||||||
|
.map_err(|error| pretty_error(&context.func, Some(isa), error))
|
||||||
|
.expect("emit unwind info");
|
||||||
|
|
||||||
code_memory
|
code_memory
|
||||||
.allocate_for_function(&CompiledFunction {
|
.allocate_for_function(&CompiledFunction {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
use crate::cache::ModuleCacheDataTupleType;
|
use crate::cache::ModuleCacheDataTupleType;
|
||||||
use crate::CacheConfig;
|
use crate::CacheConfig;
|
||||||
use crate::ModuleTranslation;
|
use crate::ModuleTranslation;
|
||||||
use cranelift_codegen::{binemit, ir, isa, Context};
|
use cranelift_codegen::{binemit, ir, isa, CodegenResult, Context};
|
||||||
use cranelift_entity::PrimaryMap;
|
use cranelift_entity::PrimaryMap;
|
||||||
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, WasmError};
|
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, WasmError};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -36,7 +36,7 @@ pub enum CompiledFunctionUnwindInfo {
|
|||||||
|
|
||||||
impl CompiledFunctionUnwindInfo {
|
impl CompiledFunctionUnwindInfo {
|
||||||
/// Constructs unwind info object.
|
/// Constructs unwind info object.
|
||||||
pub fn new(isa: &dyn isa::TargetIsa, context: &Context) -> Self {
|
pub fn new(isa: &dyn isa::TargetIsa, context: &Context) -> CodegenResult<Self> {
|
||||||
use cranelift_codegen::binemit::{
|
use cranelift_codegen::binemit::{
|
||||||
FrameUnwindKind, FrameUnwindOffset, FrameUnwindSink, Reloc,
|
FrameUnwindKind, FrameUnwindOffset, FrameUnwindSink, Reloc,
|
||||||
};
|
};
|
||||||
@@ -75,24 +75,26 @@ impl CompiledFunctionUnwindInfo {
|
|||||||
CallConv::SystemV | CallConv::Fast | CallConv::Cold => FrameUnwindKind::Libunwind,
|
CallConv::SystemV | CallConv::Fast | CallConv::Cold => FrameUnwindKind::Libunwind,
|
||||||
CallConv::WindowsFastcall => FrameUnwindKind::Fastcall,
|
CallConv::WindowsFastcall => FrameUnwindKind::Fastcall,
|
||||||
_ => {
|
_ => {
|
||||||
return CompiledFunctionUnwindInfo::None;
|
return Ok(CompiledFunctionUnwindInfo::None);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut sink = Sink(Vec::new(), 0, Vec::new());
|
let mut sink = Sink(Vec::new(), 0, Vec::new());
|
||||||
context.emit_unwind_info(isa, kind, &mut sink);
|
context.emit_unwind_info(isa, kind, &mut sink)?;
|
||||||
|
|
||||||
let Sink(data, offset, relocs) = sink;
|
let Sink(data, offset, relocs) = sink;
|
||||||
if data.is_empty() {
|
if data.is_empty() {
|
||||||
return CompiledFunctionUnwindInfo::None;
|
return Ok(CompiledFunctionUnwindInfo::None);
|
||||||
}
|
}
|
||||||
|
|
||||||
match kind {
|
let info = match kind {
|
||||||
FrameUnwindKind::Fastcall => CompiledFunctionUnwindInfo::Windows(data),
|
FrameUnwindKind::Fastcall => CompiledFunctionUnwindInfo::Windows(data),
|
||||||
FrameUnwindKind::Libunwind => {
|
FrameUnwindKind::Libunwind => {
|
||||||
CompiledFunctionUnwindInfo::FrameLayout(data, offset, relocs)
|
CompiledFunctionUnwindInfo::FrameLayout(data, offset, relocs)
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
Ok(info)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retuns true is no unwind info data.
|
/// Retuns true is no unwind info data.
|
||||||
|
|||||||
@@ -264,7 +264,9 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
|
|||||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context);
|
let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context).map_err(|error| {
|
||||||
|
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||||
|
})?;
|
||||||
|
|
||||||
let address_transform = if env.tunables.debug_info {
|
let address_transform = if env.tunables.debug_info {
|
||||||
let body_len = code_buf.len();
|
let body_len = code_buf.len();
|
||||||
|
|||||||
@@ -355,7 +355,13 @@ pub fn make_trampoline(
|
|||||||
)))
|
)))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context);
|
let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context).map_err(|error| {
|
||||||
|
SetupError::Compile(CompileError::Codegen(pretty_error(
|
||||||
|
&context.func,
|
||||||
|
Some(isa),
|
||||||
|
error,
|
||||||
|
)))
|
||||||
|
})?;
|
||||||
|
|
||||||
let ptr = code_memory
|
let ptr = code_memory
|
||||||
.allocate_for_function(&CompiledFunction {
|
.allocate_for_function(&CompiledFunction {
|
||||||
|
|||||||
Reference in New Issue
Block a user