Refactor unwind generation in Cranelift.

This commit makes the following changes to unwind information generation in
Cranelift:

* Remove frame layout change implementation in favor of processing the prologue
  and epilogue instructions when unwind information is requested.  This also
  means this work is no longer performed for Windows, which didn't utilize it.
  It also helps simplify the prologue and epilogue generation code.

* Remove the unwind sink implementation that required each unwind information
  to be represented in final form. For FDEs, this meant writing a
  complete frame table per function, which wastes 20 bytes or so for each
  function with duplicate CIEs.  This also enables Cranelift users to collect the
  unwind information and write it as a single frame table.

* For System V calling convention, the unwind information is no longer stored
  in code memory (it's only a requirement for Windows ABI to do so).  This allows
  for more compact code memory for modules with a lot of functions.

* Deletes some duplicate code relating to frame table generation.  Users can
  now simply use gimli to create a frame table from each function's unwind
  information.

Fixes #1181.
This commit is contained in:
Peter Huene
2020-03-30 19:48:02 -07:00
parent 7da6101732
commit f7e9f86ba9
42 changed files with 2678 additions and 3161 deletions

View File

@@ -173,36 +173,6 @@ pub trait CodeSink {
}
}
/// Type of the frame unwind information.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum FrameUnwindKind {
/// Windows fastcall unwinding (as in .pdata).
Fastcall,
/// FDE entry for libunwind (similar to .eh_frame format).
Libunwind,
}
/// Offset in frame unwind information buffer.
pub type FrameUnwindOffset = usize;
/// Sink for frame unwind information.
pub trait FrameUnwindSink {
/// Get the current position.
fn len(&self) -> FrameUnwindOffset;
/// Add bytes to the code section.
fn bytes(&mut self, _: &[u8]);
/// Reserves bytes in the buffer.
fn reserve(&mut self, _len: usize) {}
/// Add a relocation entry.
fn reloc(&mut self, _: Reloc, _: FrameUnwindOffset);
/// Specified offset to main structure.
fn set_entry_offset(&mut self, _: FrameUnwindOffset);
}
/// Report a bad encoding error.
#[cold]
pub fn bad_encoding(func: &Function, inst: Inst) -> ! {

View File

@@ -10,8 +10,8 @@
//! single ISA instance.
use crate::binemit::{
relax_branches, shrink_instructions, CodeInfo, FrameUnwindKind, FrameUnwindSink,
MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, StackmapSink,
TrapSink,
};
use crate::dce::do_dce;
use crate::dominator_tree::DominatorTree;
@@ -231,19 +231,15 @@ impl Context {
sink.info
}
/// Emit unwind information.
/// Creates unwind information for the function.
///
/// Requires that the function layout be calculated (see `relax_branches`).
///
/// Only some calling conventions (e.g. Windows fastcall) will have unwind information.
/// This is a no-op if the function has no unwind information.
pub fn emit_unwind_info(
/// Returns `None` if the function has no unwind information.
#[cfg(feature = "unwind")]
pub fn create_unwind_info(
&self,
isa: &dyn TargetIsa,
kind: FrameUnwindKind,
sink: &mut dyn FrameUnwindSink,
) -> CodegenResult<()> {
isa.emit_unwind_info(&self.func, kind, sink)
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
isa.create_unwind_info(&self.func)
}
/// Run the verifier on the function.

View File

@@ -1,74 +0,0 @@
//! Frame layout item changes.
use crate::ir::entities::Inst;
use crate::isa::RegUnit;
use std::boxed::Box;
use crate::HashMap;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Change in the frame layout information.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum FrameLayoutChange {
/// Base CallFrameAddress (CFA) pointer moved to different register/offset.
CallFrameAddressAt {
/// CFA register.
reg: RegUnit,
/// CFA offset.
offset: isize,
},
/// Register saved at.
RegAt {
/// Saved register.
reg: RegUnit,
/// Offset in the frame (offset from CFA).
cfa_offset: isize,
},
/// Return address saved at.
ReturnAddressAt {
/// Offset in the frame (offset from CFA).
cfa_offset: isize,
},
/// The entire frame layout must be preserved somewhere to be restored at a corresponding
/// `Restore` change.
///
/// This likely maps to the DWARF call frame instruction `.cfa_remember_state`.
Preserve,
/// Restore the entire frame layout from a corresponding prior `Preserve` frame change.
///
/// This likely maps to the DWARF call frame instruction `.cfa_restore_state`.
Restore,
}
/// Set of frame layout changes.
pub type FrameLayoutChanges = Box<[FrameLayoutChange]>;
/// Frame items layout for (prologue/epilogue) instructions.
#[derive(Debug, Clone)]
pub struct FrameLayout {
/// Initial frame layout.
pub initial: FrameLayoutChanges,
/// Instruction frame layout (changes). Because the map will not be dense,
/// a HashMap is used instead of a SecondaryMap.
pub instructions: HashMap<Inst, FrameLayoutChanges>,
}
impl FrameLayout {
/// Create instance of FrameLayout.
pub fn new() -> Self {
Self {
initial: vec![].into_boxed_slice(),
instructions: HashMap::new(),
}
}
/// Clear the structure.
pub fn clear(&mut self) {
self.initial = vec![].into_boxed_slice();
self.instructions.clear();
}
}

View File

@@ -10,13 +10,14 @@ use crate::ir::{
Block, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, JumpTable,
JumpTableData, Opcode, SigRef, StackSlot, StackSlotData, Table, TableData,
};
use crate::ir::{BlockOffsets, FrameLayout, InstEncodings, SourceLocs, StackSlots, ValueLocations};
use crate::ir::{BlockOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
use crate::ir::{JumpTableOffsets, JumpTables};
use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
use crate::regalloc::{EntryRegDiversions, RegDiversions};
use crate::value_label::ValueLabelsRanges;
use crate::write::write_function;
use alloc::vec::Vec;
use core::fmt;
/// A function.
@@ -87,15 +88,13 @@ pub struct Function {
/// Instruction that marks the end (inclusive) of the function's prologue.
///
/// This is used for some calling conventions to track the end of unwind information.
/// This is used for some ABIs to generate unwind information.
pub prologue_end: Option<Inst>,
/// Frame layout for the instructions.
/// The instructions that mark the start (inclusive) of an epilogue in the function.
///
/// The stack unwinding requires to have information about which registers and where they
/// are saved in the frame. This information is created during the prologue and epilogue
/// passes.
pub frame_layout: Option<FrameLayout>,
/// This is used for some ABIs to generate unwind information.
pub epilogues_start: Vec<Inst>,
}
impl Function {
@@ -119,7 +118,7 @@ impl Function {
jt_offsets: SecondaryMap::new(),
srclocs: SecondaryMap::new(),
prologue_end: None,
frame_layout: None,
epilogues_start: Vec::new(),
}
}
@@ -140,7 +139,7 @@ impl Function {
self.jt_offsets.clear();
self.srclocs.clear();
self.prologue_end = None;
self.frame_layout = None;
self.epilogues_start.clear();
}
/// Create a new empty, anonymous function with a Fast calling convention.
@@ -258,12 +257,6 @@ impl Function {
/// Starts collection of debug information.
pub fn collect_debug_info(&mut self) {
self.dfg.collect_debug_info();
self.collect_frame_layout_info();
}
/// Starts collection of frame layout information.
pub fn collect_frame_layout_info(&mut self) {
self.frame_layout = Some(FrameLayout::new());
}
/// Changes the destination of a jump or branch instruction.

View File

@@ -6,7 +6,6 @@ pub mod dfg;
pub mod entities;
mod extfunc;
mod extname;
mod framelayout;
pub mod function;
mod globalvalue;
mod heap;
@@ -40,7 +39,6 @@ pub use crate::ir::extfunc::{
AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
};
pub use crate::ir::extname::ExternalName;
pub use crate::ir::framelayout::{FrameLayout, FrameLayoutChange, FrameLayoutChanges};
pub use crate::ir::function::{DisplayFunctionAnnotations, Function};
pub use crate::ir::globalvalue::GlobalValueData;
pub use crate::ir::heap::{HeapData, HeapStyle};

View File

@@ -1,14 +0,0 @@
//! Support for FDE data generation.
use thiserror::Error;
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
#[allow(missing_docs)]
#[derive(Error, Debug)]
pub enum RegisterMappingError {
#[error("unable to find bank for register info")]
MissingBank,
#[error("register mapping is currently only implemented for x86_64")]
UnsupportedArchitecture,
#[error("unsupported register bank: {0}")]
UnsupportedRegisterBank(&'static str),
}

View File

@@ -56,8 +56,8 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
use crate::binemit;
use crate::flowgraph;
use crate::ir;
use crate::isa::fde::RegisterMappingError;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv::RegisterMappingError;
use crate::machinst::MachBackend;
use crate::regalloc;
use crate::result::CodegenResult;
@@ -77,15 +77,15 @@ mod riscv;
#[cfg(feature = "x86")]
mod x86;
#[cfg(feature = "unwind")]
pub mod fde;
#[cfg(feature = "arm32")]
mod arm32;
#[cfg(feature = "arm64")]
mod aarch64;
#[cfg(feature = "unwind")]
pub mod unwind;
mod call_conv;
mod constraints;
mod enc_tables;
@@ -394,17 +394,25 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
/// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry).
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC;
/// Emit unwind information for the given function.
/// Creates unwind information for the function.
///
/// Only some calling conventions (e.g. Windows fastcall) will have unwind information.
fn emit_unwind_info(
/// Returns `None` if there is no unwind information for the function.
#[cfg(feature = "unwind")]
fn create_unwind_info(
&self,
_func: &ir::Function,
_kind: binemit::FrameUnwindKind,
_sink: &mut dyn binemit::FrameUnwindSink,
) -> CodegenResult<()> {
// No-op by default
Ok(())
) -> CodegenResult<Option<unwind::UnwindInfo>> {
// By default, an ISA has no unwind information
Ok(None)
}
/// Creates a new System V Common Information Entry for the ISA.
///
/// Returns `None` if the ISA does not support System V unwind information.
#[cfg(feature = "unwind")]
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
// By default, an ISA cannot create a System V CIE
None
}
/// Get the new-style MachBackend, if this is an adapter around one.

View File

@@ -0,0 +1,16 @@
//! Represents information relating to function unwinding.
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
pub mod systemv;
/// Represents unwind information for a single function.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum UnwindInfo {
/// Windows x64 ABI unwind information.
#[cfg(feature = "x86")]
WindowsX64(super::x86::unwind::windows::UnwindInfo),
/// System V ABI unwind information.
SystemV(systemv::UnwindInfo),
}

View File

@@ -0,0 +1,129 @@
//! System V ABI unwind information.
use alloc::vec::Vec;
use gimli::write::{Address, FrameDescriptionEntry};
use thiserror::Error;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
type Register = u16;
type Expression = Vec<u8>;
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
#[allow(missing_docs)]
#[derive(Error, Debug)]
pub enum RegisterMappingError {
#[error("unable to find bank for register info")]
MissingBank,
#[error("register mapping is currently only implemented for x86_64")]
UnsupportedArchitecture,
#[error("unsupported register bank: {0}")]
UnsupportedRegisterBank(&'static str),
}
// This mirrors gimli's CallFrameInstruction, but is serializable
// TODO: if gimli ever adds serialization support, remove this type
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub(crate) enum CallFrameInstruction {
Cfa(Register, i32),
CfaRegister(Register),
CfaOffset(i32),
CfaExpression(Expression),
Restore(Register),
Undefined(Register),
SameValue(Register),
Offset(Register, i32),
ValOffset(Register, i32),
Register(Register, Register),
Expression(Register, Expression),
ValExpression(Register, Expression),
RememberState,
RestoreState,
ArgsSize(u32),
}
impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
fn from(cfi: gimli::write::CallFrameInstruction) -> Self {
use gimli::write::CallFrameInstruction;
match cfi {
CallFrameInstruction::Cfa(reg, offset) => Self::Cfa(reg.0, offset),
CallFrameInstruction::CfaRegister(reg) => Self::CfaRegister(reg.0),
CallFrameInstruction::CfaOffset(offset) => Self::CfaOffset(offset),
CallFrameInstruction::CfaExpression(expr) => Self::CfaExpression(expr.0),
CallFrameInstruction::Restore(reg) => Self::Restore(reg.0),
CallFrameInstruction::Undefined(reg) => Self::Undefined(reg.0),
CallFrameInstruction::SameValue(reg) => Self::SameValue(reg.0),
CallFrameInstruction::Offset(reg, offset) => Self::Offset(reg.0, offset),
CallFrameInstruction::ValOffset(reg, offset) => Self::ValOffset(reg.0, offset),
CallFrameInstruction::Register(reg1, reg2) => Self::Register(reg1.0, reg2.0),
CallFrameInstruction::Expression(reg, expr) => Self::Expression(reg.0, expr.0),
CallFrameInstruction::ValExpression(reg, expr) => Self::ValExpression(reg.0, expr.0),
CallFrameInstruction::RememberState => Self::RememberState,
CallFrameInstruction::RestoreState => Self::RestoreState,
CallFrameInstruction::ArgsSize(size) => Self::ArgsSize(size),
}
}
}
impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
fn into(self) -> gimli::write::CallFrameInstruction {
use gimli::{
write::{CallFrameInstruction, Expression},
Register,
};
match self {
Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
Self::CfaRegister(reg) => CallFrameInstruction::CfaRegister(Register(reg)),
Self::CfaOffset(offset) => CallFrameInstruction::CfaOffset(offset),
Self::CfaExpression(expr) => CallFrameInstruction::CfaExpression(Expression(expr)),
Self::Restore(reg) => CallFrameInstruction::Restore(Register(reg)),
Self::Undefined(reg) => CallFrameInstruction::Undefined(Register(reg)),
Self::SameValue(reg) => CallFrameInstruction::SameValue(Register(reg)),
Self::Offset(reg, offset) => CallFrameInstruction::Offset(Register(reg), offset),
Self::ValOffset(reg, offset) => CallFrameInstruction::ValOffset(Register(reg), offset),
Self::Register(reg1, reg2) => {
CallFrameInstruction::Register(Register(reg1), Register(reg2))
}
Self::Expression(reg, expr) => {
CallFrameInstruction::Expression(Register(reg), Expression(expr))
}
Self::ValExpression(reg, expr) => {
CallFrameInstruction::ValExpression(Register(reg), Expression(expr))
}
Self::RememberState => CallFrameInstruction::RememberState,
Self::RestoreState => CallFrameInstruction::RestoreState,
Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
}
}
}
/// Represents unwind information for a single System V ABI function.
///
/// This representation is not ISA specific.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct UnwindInfo {
instructions: Vec<(u32, CallFrameInstruction)>,
len: u32,
}
impl UnwindInfo {
pub(crate) fn new(instructions: Vec<(u32, CallFrameInstruction)>, len: u32) -> Self {
Self { instructions, len }
}
/// Converts the unwind information into a `FrameDescriptionEntry`.
pub fn to_fde(&self, address: Address) -> gimli::write::FrameDescriptionEntry {
let mut fde = FrameDescriptionEntry::new(address, self.len);
for (offset, inst) in &self.instructions {
fde.add_instruction(*offset, inst.clone().into());
}
fde
}
}

View File

@@ -1,15 +1,9 @@
//! x86 ABI implementation.
use super::super::settings as shared_settings;
#[cfg(feature = "unwind")]
use super::fde::emit_fde;
use super::registers::{FPR, GPR, RU};
use super::settings as isa_settings;
#[cfg(feature = "unwind")]
use super::unwind::UnwindInfo;
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
#[cfg(feature = "unwind")]
use crate::binemit::{FrameUnwindKind, FrameUnwindSink};
use crate::cursor::{Cursor, CursorPosition, EncCursor};
use crate::ir;
use crate::ir::entities::StackSlot;
@@ -17,8 +11,8 @@ use crate::ir::immediates::Imm64;
use crate::ir::stackslot::{StackOffset, StackSize};
use crate::ir::types;
use crate::ir::{
get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose,
FrameLayoutChange, InstBuilder, ValueLoc,
get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder,
ValueLoc,
};
use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
use crate::regalloc::RegisterSet;
@@ -27,7 +21,6 @@ use crate::stack_layout::layout_stack;
use alloc::borrow::Cow;
use alloc::vec::Vec;
use core::i32;
use std::boxed::Box;
use target_lexicon::{PointerWidth, Triple};
/// Argument registers for x86-64
@@ -525,32 +518,6 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
Ok(())
}
/// CFAState is cranelift's model of the call frame layout at any particular point in a function.
/// It describes the call frame's layout in terms of a call frame address, where it is with respect
/// to the start of the call frame, and the where the top of the stack is with respect to it.
///
/// Changes in this layout are used to derive appropriate `ir::FrameLayoutChange` to record for
/// relevant instructions.
#[derive(Clone)]
struct CFAState {
/// The register from which we can derive the call frame address. On x86_64, this is typically
/// `rbp`, but at function entry and exit may be `rsp` while the call frame is being
/// established.
cf_ptr_reg: RegUnit,
/// Given that `cf_ptr_reg` is a register containing a pointer to some memory, `cf_ptr_offset`
/// is the offset from that pointer to the address of the start of this function's call frame.
///
/// For a concrete x86_64 example, we will start this at 8 - the call frame begins immediately
/// before the return address. This will typically then be set to 16, after pushing `rbp` to
/// preserve the parent call frame. It is very unlikely the offset should be anything other
/// than one or two pointer widths.
cf_ptr_offset: isize,
/// The offset between the start of the call frame and the current stack pointer. This is
/// primarily useful to point to where on the stack preserved registers are, but is maintained
/// through the whole function for consistency.
current_depth: isize,
}
/// Implementation of the fastcall-based Win64 calling convention described at [1]
/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
@@ -629,7 +596,7 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
// Set up the cursor and insert the prologue
let entry_block = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
let prologue_cfa_state = insert_common_prologue(
insert_common_prologue(
&mut pos,
local_stack_size,
reg_type,
@@ -646,8 +613,6 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
reg_type,
&csrs,
fpr_slot.as_ref(),
isa,
prologue_cfa_state,
);
Ok(())
@@ -701,20 +666,11 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
// Set up the cursor and insert the prologue
let entry_block = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
let prologue_cfa_state =
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);
// Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_common_epilogues(
&mut pos,
local_stack_size,
reg_type,
&csrs,
None,
isa,
prologue_cfa_state,
);
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, None);
Ok(())
}
@@ -728,8 +684,7 @@ fn insert_common_prologue(
csrs: &RegisterSet,
fpr_slot: Option<&StackSlot>,
isa: &dyn TargetIsa,
) -> Option<CFAState> {
let word_size = isa.pointer_bytes() as isize;
) {
if stack_size > 0 {
// Check if there is a special stack limit parameter. If so insert stack check.
if let Some(stack_limit_arg) = pos.func.special_param(ArgumentPurpose::StackLimit) {
@@ -739,7 +694,8 @@ fn insert_common_prologue(
// also should be accounted for.
// If any FPR are present, count them as well as necessary alignment space.
// TODO: Check if the function body actually contains a `call` instruction.
let mut total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64;
let mut total_stack_size =
(csrs.iter(GPR).len() + 1 + 1) as i64 * (isa.pointer_bytes() as isize) as i64;
total_stack_size += csrs.iter(FPR).len() as i64 * types::F64X2.bytes() as i64;
@@ -747,104 +703,29 @@ fn insert_common_prologue(
}
}
let mut cfa_state = if let Some(ref mut frame_layout) = pos.func.frame_layout {
let cfa_state = CFAState {
cf_ptr_reg: RU::rsp as RegUnit,
cf_ptr_offset: word_size,
current_depth: -word_size,
};
frame_layout.initial = vec![
FrameLayoutChange::CallFrameAddressAt {
reg: cfa_state.cf_ptr_reg,
offset: cfa_state.cf_ptr_offset,
},
FrameLayoutChange::ReturnAddressAt {
cfa_offset: cfa_state.current_depth,
},
]
.into_boxed_slice();
Some(cfa_state)
} else {
None
};
// Append param to entry block
let block = pos.current_block().expect("missing block under cursor");
let fp = pos.func.dfg.append_block_param(block, reg_type);
pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
let push_fp_inst = pos.ins().x86_push(fp);
if let Some(ref mut frame_layout) = pos.func.frame_layout {
let cfa_state = cfa_state
.as_mut()
.expect("cfa state exists when recording frame layout");
cfa_state.current_depth -= word_size;
cfa_state.cf_ptr_offset += word_size;
frame_layout.instructions.insert(
push_fp_inst,
vec![
FrameLayoutChange::CallFrameAddressAt {
reg: cfa_state.cf_ptr_reg,
offset: cfa_state.cf_ptr_offset,
},
FrameLayoutChange::RegAt {
reg: RU::rbp as RegUnit,
cfa_offset: cfa_state.current_depth,
},
]
.into_boxed_slice(),
);
}
pos.ins().x86_push(fp);
let mov_sp_inst = pos
.ins()
.copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit);
if let Some(ref mut frame_layout) = pos.func.frame_layout {
let mut cfa_state = cfa_state
.as_mut()
.expect("cfa state exists when recording frame layout");
cfa_state.cf_ptr_reg = RU::rbp as RegUnit;
frame_layout.instructions.insert(
mov_sp_inst,
vec![FrameLayoutChange::CallFrameAddressAt {
reg: cfa_state.cf_ptr_reg,
offset: cfa_state.cf_ptr_offset,
}]
.into_boxed_slice(),
);
}
let mut last_csr_push = None;
for reg in csrs.iter(GPR) {
// Append param to entry block
let csr_arg = pos.func.dfg.append_block_param(block, reg_type);
// Assign it a location
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
// Remember it so we can push it momentarily
let reg_push_inst = pos.ins().x86_push(csr_arg);
if let Some(ref mut frame_layout) = pos.func.frame_layout {
let mut cfa_state = cfa_state
.as_mut()
.expect("cfa state exists when recording frame layout");
cfa_state.current_depth -= word_size;
frame_layout.instructions.insert(
reg_push_inst,
vec![FrameLayoutChange::RegAt {
reg,
cfa_offset: cfa_state.current_depth,
}]
.into_boxed_slice(),
);
}
last_csr_push = Some(pos.ins().x86_push(csr_arg));
}
// Allocate stack frame storage.
let mut adjust_sp_inst = None;
if stack_size > 0 {
if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2())
{
@@ -880,15 +761,16 @@ fn insert_common_prologue(
if !isa.flags().probestack_func_adjusts_sp() {
let result = pos.func.dfg.inst_results(call)[0];
pos.func.locations[result] = rax_val;
pos.func.prologue_end = Some(pos.ins().adjust_sp_down(result));
adjust_sp_inst = Some(pos.ins().adjust_sp_down(result));
}
} else {
// Simply decrement the stack pointer.
pos.func.prologue_end = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size)));
adjust_sp_inst = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size)));
}
}
// Now that RSP is prepared for the function, we can use stack slots:
let mut last_fpr_save = None;
if let Some(fpr_slot) = fpr_slot {
debug_assert!(csrs.iter(FPR).len() != 0);
@@ -911,33 +793,22 @@ fn insert_common_prologue(
// Since regalloc has already run, we must assign a location.
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
let reg_store_inst =
pos.ins()
.store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset);
// If we preserve FPRs, they occur after SP is adjusted, so also fix up the end point
// to this new instruction.
pos.func.prologue_end = Some(reg_store_inst);
fpr_offset += types::F64X2.bytes() as i32;
if let Some(ref mut frame_layout) = pos.func.frame_layout {
let mut cfa_state = cfa_state
.as_mut()
.expect("cfa state exists when recording frame layout");
cfa_state.current_depth -= types::F64X2.bytes() as isize;
frame_layout.instructions.insert(
reg_store_inst,
vec![FrameLayoutChange::RegAt {
reg,
cfa_offset: cfa_state.current_depth,
}]
.into_boxed_slice(),
last_fpr_save =
Some(
pos.ins()
.store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset),
);
}
fpr_offset += types::F64X2.bytes() as i32;
}
}
cfa_state
pos.func.prologue_end = Some(
last_fpr_save
.or(adjust_sp_inst)
.or(last_csr_push)
.unwrap_or(mov_sp_inst),
);
}
/// Insert a check that generates a trap if the stack pointer goes
@@ -970,25 +841,12 @@ fn insert_common_epilogues(
reg_type: ir::types::Type,
csrs: &RegisterSet,
fpr_slot: Option<&StackSlot>,
isa: &dyn TargetIsa,
cfa_state: Option<CFAState>,
) {
while let Some(block) = pos.next_block() {
pos.goto_last_inst(block);
if let Some(inst) = pos.current_inst() {
if pos.func.dfg[inst].opcode().is_return() {
let is_last = pos.func.layout.last_block() == Some(block);
insert_common_epilogue(
inst,
stack_size,
pos,
reg_type,
csrs,
fpr_slot,
isa,
is_last,
cfa_state.clone(),
);
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, fpr_slot);
}
}
}
@@ -1003,17 +861,13 @@ fn insert_common_epilogue(
reg_type: ir::types::Type,
csrs: &RegisterSet,
fpr_slot: Option<&StackSlot>,
isa: &dyn TargetIsa,
is_last: bool,
mut cfa_state: Option<CFAState>,
) {
let word_size = isa.pointer_bytes() as isize;
// Even though instructions to restore FPRs are inserted first, we have to append them after
// restored GPRs to satisfy parameter order in the return.
let mut restored_fpr_values = Vec::new();
// Restore FPRs before we move RSP and invalidate stack slots.
let mut first_fpr_load = None;
if let Some(fpr_slot) = fpr_slot {
debug_assert!(csrs.iter(FPR).len() != 0);
@@ -1024,6 +878,8 @@ fn insert_common_epilogue(
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
// function exit.
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
@@ -1039,13 +895,6 @@ fn insert_common_epilogue(
);
fpr_offset += types::F64X2.bytes() as i32;
if let Some(ref mut cfa_state) = cfa_state.as_mut() {
// Note: don't bother recording a frame layout change because the popped value is
// still correct in memory, and won't be overwritten until we've returned where the
// current frame's layout would no longer matter. Only adjust `current_depth` for a
// consistency check later.
cfa_state.current_depth += types::F64X2.bytes() as isize;
}
// Unlike GPRs before, we don't need to step back after reach restoration because FPR
// restoration is order-insensitive. Furthermore: we want GPR restoration to begin
// after FPR restoration, so that stack adjustments occur after we're done relying on
@@ -1056,114 +905,56 @@ fn insert_common_epilogue(
}
}
let mut sp_adjust_inst = None;
if stack_size > 0 {
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
sp_adjust_inst = Some(pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)));
}
// Pop all the callee-saved registers, stepping backward each time to
// preserve the correct order.
let fp_ret = pos.ins().x86_pop(reg_type);
let fp_pop_inst = pos.built_inst();
if let Some(ref mut cfa_state) = cfa_state.as_mut() {
// Account for CFA state in the reverse of `insert_common_prologue`.
cfa_state.current_depth += word_size;
cfa_state.cf_ptr_offset -= word_size;
// And now that we're going to overwrite `rbp`, `rsp` is the only way to get to the call frame.
// We don't apply a frame layout change *yet* because we check that at return the depth is
// exactly one `word_size`.
cfa_state.cf_ptr_reg = RU::rsp as RegUnit;
}
pos.prev_inst();
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.func.dfg.append_inst_arg(inst, fp_ret);
// Insert the pop of the frame pointer
let fp_pop = pos.ins().x86_pop(reg_type);
let fp_pop_inst = pos.prev_inst().unwrap();
pos.func.locations[fp_pop] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.func.dfg.append_inst_arg(inst, fp_pop);
// Insert the CSR pops
let mut first_csr_pop_inst = None;
for reg in csrs.iter(GPR) {
let csr_ret = pos.ins().x86_pop(reg_type);
if let Some(ref mut cfa_state) = cfa_state.as_mut() {
// Note: don't bother recording a frame layout change because the popped value is
// still correct in memory, and won't be overwritten until we've returned where the
// current frame's layout would no longer matter. Only adjust `current_depth` for a
// consistency check later.
cfa_state.current_depth += word_size;
}
pos.prev_inst();
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
pos.func.dfg.append_inst_arg(inst, csr_ret);
let csr_pop = pos.ins().x86_pop(reg_type);
first_csr_pop_inst = Some(pos.prev_inst().unwrap());
pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
pos.func.dfg.append_inst_arg(inst, csr_pop);
}
for value in restored_fpr_values.into_iter() {
pos.func.dfg.append_inst_arg(inst, value);
}
if let Some(ref mut frame_layout) = pos.func.frame_layout {
let cfa_state = cfa_state
.as_mut()
.expect("cfa state exists when recording frame layout");
// Validity checks - if we accounted correctly, CFA state at a return will match CFA state
// at the entry of a function.
//
// Current_depth starts assuming a return address is pushed, and cf_ptr_offset is one
// pointer below current_depth.
assert_eq!(cfa_state.current_depth, -word_size);
assert_eq!(cfa_state.cf_ptr_offset, word_size);
// Inserting preserve CFA state operation after FP pop instructions.
let new_cfa = FrameLayoutChange::CallFrameAddressAt {
reg: cfa_state.cf_ptr_reg,
offset: cfa_state.cf_ptr_offset,
};
let new_cfa = if is_last {
vec![new_cfa]
} else {
vec![FrameLayoutChange::Preserve, new_cfa]
};
frame_layout
.instructions
.entry(fp_pop_inst)
.and_modify(|insts| {
*insts = insts
.iter()
.cloned()
.chain(new_cfa.clone().into_iter())
.collect::<Box<[_]>>();
})
.or_insert_with(|| new_cfa.into_boxed_slice());
if !is_last {
// Inserting restore CFA state operation after each return.
frame_layout
.instructions
.insert(inst, vec![FrameLayoutChange::Restore].into_boxed_slice());
}
}
pos.func.epilogues_start.push(
first_fpr_load
.or(sp_adjust_inst)
.or(first_csr_pop_inst)
.unwrap_or(fp_pop_inst),
);
}
#[cfg(feature = "unwind")]
pub fn emit_unwind_info(
pub fn create_unwind_info(
func: &ir::Function,
isa: &dyn TargetIsa,
kind: FrameUnwindKind,
sink: &mut dyn FrameUnwindSink,
) -> CodegenResult<()> {
match kind {
FrameUnwindKind::Fastcall => {
// Assumption: RBP is being used as the frame pointer
// In the future, Windows fastcall codegen should usually omit the frame pointer
if let Some(info) = UnwindInfo::try_from_func(func, isa, Some(RU::rbp.into()))? {
info.emit(sink);
}
}
FrameUnwindKind::Libunwind => {
if func.frame_layout.is_some() {
emit_fde(func, isa, sink)?;
}
}
}
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
use crate::isa::unwind::UnwindInfo;
Ok(())
// Assumption: RBP is being used as the frame pointer for both calling conventions
// In the future, we should be omitting frame pointer as an optimization, so this will change
Ok(match func.signature.call_conv {
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
super::unwind::systemv::create_unwind_info(func, isa, Some(RU::rbp.into()))?
.map(|u| UnwindInfo::SystemV(u))
}
CallConv::WindowsFastcall => {
super::unwind::windows::create_unwind_info(func, isa, Some(RU::rbp.into()))?
.map(|u| UnwindInfo::WindowsX64(u))
}
_ => None,
})
}

View File

@@ -1,448 +0,0 @@
//! Support for FDE data generation.
use crate::binemit::{FrameUnwindOffset, FrameUnwindSink, Reloc};
use crate::ir::{FrameLayoutChange, Function};
use crate::isa::fde::RegisterMappingError;
use crate::isa::{CallConv, RegUnit, TargetIsa};
use crate::result::CodegenResult;
use alloc::vec::Vec;
use core::convert::TryInto;
use gimli::write::{
Address, CallFrameInstruction, CommonInformationEntry, EhFrame, EndianVec,
FrameDescriptionEntry, FrameTable, Result, Writer,
};
use gimli::{Encoding, Format, LittleEndian, Register, X86_64};
pub type FDERelocEntry = (FrameUnwindOffset, Reloc);
const FUNCTION_ENTRY_ADDRESS: Address = Address::Symbol {
symbol: 0,
addend: 0,
};
#[derive(Clone)]
struct FDEWriter {
vec: EndianVec<LittleEndian>,
relocs: Vec<FDERelocEntry>,
}
impl FDEWriter {
fn new() -> Self {
Self {
vec: EndianVec::new(LittleEndian),
relocs: Vec::new(),
}
}
fn into_vec_and_relocs(self) -> (Vec<u8>, Vec<FDERelocEntry>) {
(self.vec.into_vec(), self.relocs)
}
}
impl Writer for FDEWriter {
type Endian = LittleEndian;
fn endian(&self) -> Self::Endian {
LittleEndian
}
fn len(&self) -> usize {
self.vec.len()
}
fn write(&mut self, bytes: &[u8]) -> Result<()> {
self.vec.write(bytes)
}
fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> {
self.vec.write_at(offset, bytes)
}
fn write_address(&mut self, address: Address, size: u8) -> Result<()> {
match address {
Address::Constant(_) => self.vec.write_address(address, size),
Address::Symbol { .. } => {
assert_eq!(address, FUNCTION_ENTRY_ADDRESS);
let rt = match size {
4 => Reloc::Abs4,
8 => Reloc::Abs8,
_ => {
panic!("Unexpected address size at FDEWriter::write_address");
}
};
self.relocs.push((self.vec.len().try_into().unwrap(), rt));
self.vec.write_udata(0, size)
}
}
}
}
fn return_address_reg(isa: &dyn TargetIsa) -> Register {
assert!(isa.name() == "x86" && isa.pointer_bits() == 64);
X86_64::RA
}
/// Map Cranelift registers to their corresponding Gimli registers.
pub fn map_reg(
isa: &dyn TargetIsa,
reg: RegUnit,
) -> core::result::Result<Register, RegisterMappingError> {
if isa.name() != "x86" || isa.pointer_bits() != 64 {
return Err(RegisterMappingError::UnsupportedArchitecture);
}
// Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
const X86_GP_REG_MAP: [gimli::Register; 16] = [
X86_64::RAX,
X86_64::RCX,
X86_64::RDX,
X86_64::RBX,
X86_64::RSP,
X86_64::RBP,
X86_64::RSI,
X86_64::RDI,
X86_64::R8,
X86_64::R9,
X86_64::R10,
X86_64::R11,
X86_64::R12,
X86_64::R13,
X86_64::R14,
X86_64::R15,
];
const X86_XMM_REG_MAP: [gimli::Register; 16] = [
X86_64::XMM0,
X86_64::XMM1,
X86_64::XMM2,
X86_64::XMM3,
X86_64::XMM4,
X86_64::XMM5,
X86_64::XMM6,
X86_64::XMM7,
X86_64::XMM8,
X86_64::XMM9,
X86_64::XMM10,
X86_64::XMM11,
X86_64::XMM12,
X86_64::XMM13,
X86_64::XMM14,
X86_64::XMM15,
];
let reg_info = isa.register_info();
let bank = reg_info
.bank_containing_regunit(reg)
.ok_or_else(|| RegisterMappingError::MissingBank)?;
match bank.name {
"IntRegs" => {
// x86 GP registers have a weird mapping to DWARF registers, so we use a
// lookup table.
Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize])
}
"FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]),
_ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)),
}
}
fn to_cfi(
isa: &dyn TargetIsa,
change: &FrameLayoutChange,
cfa_def_reg: &mut Register,
cfa_def_offset: &mut i32,
) -> Option<CallFrameInstruction> {
Some(match change {
FrameLayoutChange::CallFrameAddressAt { reg, offset } => {
let mapped = map_reg(isa, *reg).expect("a register mapping from cranelift to gimli");
let offset = (*offset) as i32;
if mapped != *cfa_def_reg && offset != *cfa_def_offset {
*cfa_def_reg = mapped;
*cfa_def_offset = offset;
CallFrameInstruction::Cfa(mapped, offset)
} else if offset != *cfa_def_offset {
*cfa_def_offset = offset;
CallFrameInstruction::CfaOffset(offset)
} else if mapped != *cfa_def_reg {
*cfa_def_reg = mapped;
CallFrameInstruction::CfaRegister(mapped)
} else {
return None;
}
}
FrameLayoutChange::RegAt { reg, cfa_offset } => {
assert!(cfa_offset % -8 == 0);
let cfa_offset = *cfa_offset as i32;
let mapped = map_reg(isa, *reg).expect("a register mapping from cranelift to gimli");
CallFrameInstruction::Offset(mapped, cfa_offset)
}
FrameLayoutChange::ReturnAddressAt { cfa_offset } => {
assert!(cfa_offset % -8 == 0);
let cfa_offset = *cfa_offset as i32;
CallFrameInstruction::Offset(X86_64::RA, cfa_offset)
}
FrameLayoutChange::Preserve => CallFrameInstruction::RememberState,
FrameLayoutChange::Restore => CallFrameInstruction::RestoreState,
})
}
/// Creates FDE structure from FrameLayout.
pub fn emit_fde(
func: &Function,
isa: &dyn TargetIsa,
sink: &mut dyn FrameUnwindSink,
) -> CodegenResult<()> {
assert!(isa.name() == "x86");
// Expecting function with System V prologue
assert!(
func.signature.call_conv == CallConv::Fast
|| func.signature.call_conv == CallConv::Cold
|| func.signature.call_conv == CallConv::SystemV
);
assert!(func.frame_layout.is_some(), "expected func.frame_layout");
let frame_layout = func.frame_layout.as_ref().unwrap();
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase
let encinfo = isa.encoding_info();
let mut last_offset = 0;
let mut changes = Vec::new();
for block in blocks {
for (offset, inst, size) in func.inst_offsets(block, &encinfo) {
let address_offset = (offset + size) as usize;
assert!(last_offset <= address_offset);
if let Some(cmds) = frame_layout.instructions.get(&inst) {
for cmd in cmds.iter() {
changes.push((address_offset, *cmd));
}
}
last_offset = address_offset;
}
}
let len = last_offset as u32;
let word_size = isa.pointer_bytes() as i32;
let encoding = Encoding {
format: Format::Dwarf32,
version: 1,
address_size: word_size as u8,
};
let mut frames = FrameTable::default();
let mut cfa_def_reg = return_address_reg(isa);
let mut cfa_def_offset = 0i32;
let mut cie = CommonInformationEntry::new(
encoding,
/* code_alignment_factor = */ 1,
/* data_alignment_factor = */ -word_size as i8,
return_address_reg(isa),
);
for ch in frame_layout.initial.iter() {
if let Some(cfi) = to_cfi(isa, ch, &mut cfa_def_reg, &mut cfa_def_offset) {
cie.add_instruction(cfi);
}
}
let cie_id = frames.add_cie(cie);
let mut fde = FrameDescriptionEntry::new(FUNCTION_ENTRY_ADDRESS, len);
for (addr, ch) in changes.iter() {
if let Some(cfi) = to_cfi(isa, ch, &mut cfa_def_reg, &mut cfa_def_offset) {
fde.add_instruction((*addr) as u32, cfi);
}
}
frames.add_fde(cie_id, fde);
let mut eh_frame = EhFrame::from(FDEWriter::new());
frames.write_eh_frame(&mut eh_frame).unwrap();
let (bytes, relocs) = eh_frame.clone().into_vec_and_relocs();
let unwind_start = sink.len();
sink.bytes(&bytes);
for (off, r) in relocs {
sink.reloc(r, off + unwind_start);
}
let cie_len = u32::from_le_bytes(bytes.as_slice()[..4].try_into().unwrap());
let fde_offset = cie_len as usize + 4;
sink.set_entry_offset(unwind_start + fde_offset);
// Need 0 marker for GCC unwind to end FDE "list".
sink.bytes(&[0, 0, 0, 0]);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::binemit::{FrameUnwindOffset, Reloc};
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
TrapCode,
};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use std::str::FromStr;
use target_lexicon::triple;
struct SimpleUnwindSink(pub Vec<u8>, pub usize, pub Vec<(Reloc, usize)>);
impl FrameUnwindSink for SimpleUnwindSink {
fn len(&self) -> FrameUnwindOffset {
self.0.len()
}
fn bytes(&mut self, b: &[u8]) {
self.0.extend_from_slice(b);
}
fn reloc(&mut self, r: Reloc, off: FrameUnwindOffset) {
self.2.push((r, off));
}
fn set_entry_offset(&mut self, off: FrameUnwindOffset) {
self.1 = off;
}
}
#[test]
fn test_simple_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.func.collect_frame_layout_info();
context.compile(&*isa).expect("expected compilation");
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
emit_fde(&context.func, &*isa, &mut sink).expect("can emit fde");
assert_eq!(
sink.0,
vec![
20, 0, 0, 0, // CIE len
0, 0, 0, 0, // CIE marker
1, // version
0, // augmentation string
1, // code aligment = 1
120, // data alignment = -8
16, // RA = r16
0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8
0x90, 0x01, // DW_CFA_offset r16, -8 * 1
0, 0, 0, 0, 0, 0, // padding
36, 0, 0, 0, // FDE len
28, 0, 0, 0, // CIE offset
0, 0, 0, 0, 0, 0, 0, 0, // addr reloc
16, 0, 0, 0, 0, 0, 0, 0, // function length
0x42, // DW_CFA_advance_loc 2
0x0e, 0x10, // DW_CFA_def_cfa_offset 16
0x86, 0x02, // DW_CFA_offset r6, -8 * 2
0x43, // DW_CFA_advance_loc 3
0x0d, 0x06, // DW_CFA_def_cfa_register
0x4a, // DW_CFA_advance_loc 10
0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8
0, 0, 0, 0, // padding
0, 0, 0, 0, // End of FDEs
]
);
assert_eq!(sink.1, 24);
assert_eq!(sink.2.len(), 1);
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
#[test]
fn test_multi_return_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
context.func.collect_frame_layout_info();
context.compile(&*isa).expect("expected compilation");
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
emit_fde(&context.func, &*isa, &mut sink).expect("can emit fde");
assert_eq!(
sink.0,
vec![
20, 0, 0, 0, // CIE len
0, 0, 0, 0, // CIE marker
1, // version
0, // augmentation string
1, // code aligment = 1
120, // data alignment = -8
16, // RA = r16
0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8
0x90, 0x01, // DW_CFA_offset r16, -8 * 1
0, 0, 0, 0, 0, 0, // padding
36, 0, 0, 0, // FDE len
28, 0, 0, 0, // CIE offset
0, 0, 0, 0, 0, 0, 0, 0, // addr reloc
15, 0, 0, 0, 0, 0, 0, 0, // function length
0x42, // DW_CFA_advance_loc 2
0x0e, 0x10, // DW_CFA_def_cfa_offset 16
0x86, 0x02, // DW_CFA_offset r6, -8 * 2
0x43, // DW_CFA_advance_loc 3
0x0d, 0x06, // DW_CFA_def_cfa_register
0x47, // DW_CFA_advance_loc 10
0x0a, // DW_CFA_preserve_state
0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8
0x41, // DW_CFA_advance_loc 1
0x0b, // DW_CFA_restore_state
// NOTE: no additional CFA directives -- DW_CFA_restore_state
// is done before trap and it is last instruction in the function.
0, // padding
0, 0, 0, 0, // End of FDEs
]
);
assert_eq!(sink.1, 24);
assert_eq!(sink.2.len(), 1);
}
fn create_multi_return_function(call_conv: CallConv) -> Function {
let mut sig = Signature::new(call_conv);
sig.params.push(AbiParam::new(types::I32));
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, types::I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().brnz(v0, block2, &[]);
pos.ins().jump(block1, &[]);
pos.insert_block(block1);
pos.ins().return_(&[]);
pos.insert_block(block2);
pos.ins().trap(TrapCode::User(0));
func
}
}

View File

@@ -3,27 +3,20 @@
mod abi;
mod binemit;
mod enc_tables;
#[cfg(feature = "unwind")]
mod fde;
mod registers;
pub mod settings;
#[cfg(feature = "unwind")]
mod unwind;
#[cfg(feature = "unwind")]
pub use fde::map_reg;
pub mod unwind;
use super::super::settings as shared_settings;
#[cfg(feature = "testing_hooks")]
use crate::binemit::CodeSink;
use crate::binemit::{emit_function, MemoryCodeSink};
#[cfg(feature = "unwind")]
use crate::binemit::{FrameUnwindKind, FrameUnwindSink};
use crate::ir;
use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
use crate::isa::Builder as IsaBuilder;
#[cfg(feature = "unwind")]
use crate::isa::{fde::RegisterMappingError, RegUnit};
use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit};
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use crate::result::CodegenResult;
@@ -95,7 +88,7 @@ impl TargetIsa for Isa {
#[cfg(feature = "unwind")]
fn map_dwarf_register(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
map_reg(self, reg).map(|r| r.0)
unwind::systemv::map_reg(self, reg).map(|r| r.0)
}
fn encoding_info(&self) -> EncInfo {
@@ -168,17 +161,17 @@ impl TargetIsa for Isa {
ir::condcodes::IntCC::UnsignedLessThan
}
/// Emit unwind information for the given function.
///
/// Only some calling conventions (e.g. Windows fastcall) will have unwind information.
#[cfg(feature = "unwind")]
fn emit_unwind_info(
fn create_unwind_info(
&self,
func: &ir::Function,
kind: FrameUnwindKind,
sink: &mut dyn FrameUnwindSink,
) -> CodegenResult<()> {
abi::emit_unwind_info(func, self, kind, sink)
) -> CodegenResult<Option<super::unwind::UnwindInfo>> {
abi::create_unwind_info(func, self)
}
#[cfg(feature = "unwind")]
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
Some(unwind::systemv::create_cie())
}
}

View File

@@ -1,697 +1,4 @@
//! Unwind information for x64 Windows.
//! Module for x86 unwind generation for supported ABIs.
use super::registers::{FPR, GPR, RU};
use crate::binemit::FrameUnwindSink;
use crate::ir::{Function, InstructionData, Opcode, ValueLoc};
use crate::isa::{CallConv, RegUnit, TargetIsa};
use crate::result::{CodegenError, CodegenResult};
use alloc::vec::Vec;
use byteorder::{ByteOrder, LittleEndian};
use log::warn;
/// Maximum (inclusive) size of a "small" stack allocation
const SMALL_ALLOC_MAX_SIZE: u32 = 128;
/// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits
const LARGE_ALLOC_16BIT_MAX_SIZE: u32 = 524280;
fn write_u8(sink: &mut dyn FrameUnwindSink, v: u8) {
sink.bytes(&[v]);
}
fn write_u16<T: ByteOrder>(sink: &mut dyn FrameUnwindSink, v: u16) {
let mut buf = [0; 2];
T::write_u16(&mut buf, v);
sink.bytes(&buf);
}
fn write_u32<T: ByteOrder>(sink: &mut dyn FrameUnwindSink, v: u32) {
let mut buf = [0; 4];
T::write_u32(&mut buf, v);
sink.bytes(&buf);
}
/// The supported unwind codes for the x64 Windows ABI.
///
/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
#[derive(Debug, PartialEq, Eq)]
enum UnwindCode {
PushRegister {
offset: u8,
reg: RegUnit,
},
SaveXmm {
offset: u8,
reg: RegUnit,
stack_offset: u32,
},
StackAlloc {
offset: u8,
size: u32,
},
SetFramePointer {
offset: u8,
sp_offset: u8,
},
}
impl UnwindCode {
fn emit(&self, sink: &mut dyn FrameUnwindSink) {
enum UnwindOperation {
PushNonvolatileRegister = 0,
LargeStackAlloc = 1,
SmallStackAlloc = 2,
SetFramePointer = 3,
SaveXmm128 = 8,
SaveXmm128Far = 9,
}
match self {
Self::PushRegister { offset, reg } => {
write_u8(sink, *offset);
write_u8(
sink,
((GPR.index_of(*reg) as u8) << 4)
| (UnwindOperation::PushNonvolatileRegister as u8),
);
}
Self::SaveXmm {
offset,
reg,
stack_offset,
} => {
write_u8(sink, *offset);
let stack_offset = stack_offset / 16;
if stack_offset <= core::u16::MAX as u32 {
write_u8(
sink,
(FPR.index_of(*reg) << 4) as u8 | (UnwindOperation::SaveXmm128 as u8),
);
write_u16::<LittleEndian>(sink, stack_offset as u16);
} else {
write_u8(
sink,
(FPR.index_of(*reg) << 4) as u8 | (UnwindOperation::SaveXmm128Far as u8),
);
write_u16::<LittleEndian>(sink, stack_offset as u16);
write_u16::<LittleEndian>(sink, (stack_offset >> 16) as u16);
}
}
Self::StackAlloc { offset, size } => {
// Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
assert!(*size >= 8);
assert!((*size % 8) == 0);
write_u8(sink, *offset);
if *size <= SMALL_ALLOC_MAX_SIZE {
write_u8(
sink,
((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8,
);
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
write_u8(sink, UnwindOperation::LargeStackAlloc as u8);
write_u16::<LittleEndian>(sink, (*size / 8) as u16);
} else {
write_u8(sink, (1 << 4) | (UnwindOperation::LargeStackAlloc as u8));
write_u32::<LittleEndian>(sink, *size);
}
}
Self::SetFramePointer { offset, sp_offset } => {
write_u8(sink, *offset);
write_u8(
sink,
(*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8),
);
}
};
}
fn node_count(&self) -> usize {
match self {
Self::StackAlloc { size, .. } => {
if *size <= SMALL_ALLOC_MAX_SIZE {
1
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
2
} else {
3
}
}
Self::SaveXmm { stack_offset, .. } => {
if *stack_offset <= core::u16::MAX as u32 {
2
} else {
3
}
}
_ => 1,
}
}
}
/// Represents Windows x64 unwind information.
///
/// For information about Windows x64 unwind info, see:
/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
#[derive(Debug, PartialEq, Eq)]
pub struct UnwindInfo {
flags: u8,
prologue_size: u8,
frame_register: Option<RegUnit>,
frame_register_offset: u8,
unwind_codes: Vec<UnwindCode>,
}
impl UnwindInfo {
pub fn try_from_func(
func: &Function,
isa: &dyn TargetIsa,
frame_register: Option<RegUnit>,
) -> CodegenResult<Option<Self>> {
// Only Windows fastcall is supported for unwind information
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
return Ok(None);
}
let prologue_end = func.prologue_end.unwrap();
let entry_block = func.layout.blocks().nth(0).expect("missing entry block");
// Stores the stack size when SP is not adjusted via an immediate value
let mut stack_size = None;
let mut prologue_size = 0;
let mut unwind_codes = Vec::new();
let mut found_end = false;
// Have we saved at least one FPR? if so, we might have to check additional constraints.
let mut saved_fpr = false;
// In addition to the min offset for a callee-save, we need to know the offset from the
// frame base to the stack pointer, so that we can record an unwind offset that spans only
// to the end of callee-save space.
let mut static_frame_allocation_size = 0u32;
// For the time being, FPR preservation is split into a stack_addr and later store/load.
// Store the register used for stack store and ensure it is the same register with no
// intervening changes to the frame size.
let mut callee_save_region_reg = None;
// Also record the callee-save region's offset from RSP, because it must be added to FPR
// save offsets to compute an offset from the frame base.
let mut callee_save_offset = None;
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
// x64 ABI prologues cannot exceed 255 bytes in length
if (offset + size) > 255 {
warn!("function prologues cannot exceed 255 bytes in size for Windows x64");
return Err(CodegenError::CodeTooLarge);
}
prologue_size += size;
let unwind_offset = (offset + size) as u8;
match func.dfg[inst] {
InstructionData::Unary { opcode, arg } => {
match opcode {
Opcode::X86Push => {
static_frame_allocation_size += 8;
unwind_codes.push(UnwindCode::PushRegister {
offset: unwind_offset,
reg: func.locations[arg].unwrap_reg(),
});
}
Opcode::AdjustSpDown => {
let stack_size =
stack_size.expect("expected a previous stack size instruction");
static_frame_allocation_size += stack_size;
// This is used when calling a stack check function
// We need to track the assignment to RAX which has the size of the stack
unwind_codes.push(UnwindCode::StackAlloc {
offset: unwind_offset,
size: stack_size,
});
}
_ => {}
}
}
InstructionData::CopySpecial { src, dst, .. } => {
if let Some(frame_register) = frame_register {
if src == (RU::rsp as RegUnit) && dst == frame_register {
// Constructing an rbp-based stack frame, so the static frame
// allocation restarts at 0 from here.
static_frame_allocation_size = 0;
unwind_codes.push(UnwindCode::SetFramePointer {
offset: unwind_offset,
sp_offset: 0,
});
}
}
}
InstructionData::UnaryImm { opcode, imm } => {
match opcode {
Opcode::Iconst => {
let imm: i64 = imm.into();
assert!(imm <= core::u32::MAX as i64);
assert!(stack_size.is_none());
// This instruction should only appear in a prologue to pass an
// argument of the stack size to a stack check function.
// Record the stack size so we know what it is when we encounter the adjustment
// instruction (which will adjust via the register assigned to this instruction).
stack_size = Some(imm as u32);
}
Opcode::AdjustSpDownImm => {
let imm: i64 = imm.into();
assert!(imm <= core::u32::MAX as i64);
static_frame_allocation_size += imm as u32;
unwind_codes.push(UnwindCode::StackAlloc {
offset: unwind_offset,
size: imm as u32,
});
}
_ => {}
}
}
InstructionData::StackLoad {
opcode: Opcode::StackAddr,
stack_slot,
offset: _,
} => {
let result = func.dfg.inst_results(inst).get(0).unwrap();
if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
callee_save_region_reg = Some(frame_reg);
// Figure out the offset in the call frame that `frame_reg` will have.
let frame_size = func
.stack_slots
.layout_info
.expect("func's stack slots have layout info if stack operations exist")
.frame_size;
// Because we're well after the prologue has been constructed, stack slots
// must have been laid out...
let slot_offset = func.stack_slots[stack_slot]
.offset
.expect("callee-save slot has an offset computed");
let frame_offset = frame_size as i32 + slot_offset;
callee_save_offset = Some(frame_offset as u32);
}
}
InstructionData::Store {
opcode: Opcode::Store,
args: [arg1, arg2],
flags: _flags,
offset,
} => {
if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
(func.locations[arg1], func.locations[arg2])
{
if Some(base_ru) == callee_save_region_reg {
let offset_int: i32 = offset.into();
assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
if FPR.contains(ru) {
saved_fpr = true;
unwind_codes.push(UnwindCode::SaveXmm {
offset: unwind_offset,
reg: ru,
stack_offset: offset_int,
});
}
}
}
}
_ => {}
};
if inst == prologue_end {
found_end = true;
break;
}
}
if !found_end {
return Ok(None);
}
if saved_fpr {
if static_frame_allocation_size > 240 && saved_fpr {
warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
This is a Cranelift implementation limit, see \
https://github.com/bytecodealliance/wasmtime/issues/1475",
static_frame_allocation_size);
return Err(CodegenError::ImplLimitExceeded);
}
// Only test static frame size is 16-byte aligned when an FPR is saved to avoid
// panicking when alignment is elided because no FPRs are saved and no child calls are
// made.
assert!(
static_frame_allocation_size % 16 == 0,
"static frame allocation must be a multiple of 16"
);
}
// Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
// one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
// Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
// frames are inexpressible, and we cannot actually compile the function. In case there are
// no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
// will actually check it. This, then, avoids panics when compiling functions with large
// call frames.
let reported_frame_offset = if saved_fpr {
(static_frame_allocation_size / 16) as u8
} else {
0
};
Ok(Some(Self {
flags: 0, // this assumes cranelift functions have no SEH handlers
prologue_size: prologue_size as u8,
frame_register,
frame_register_offset: reported_frame_offset,
unwind_codes,
}))
}
pub fn size(&self) -> usize {
let node_count = self.node_count();
// Calculation of the size requires no SEH handler or chained info
assert!(self.flags == 0);
// Size of fixed part of UNWIND_INFO is 4 bytes
// Then comes the UNWIND_CODE nodes (2 bytes each)
// Then comes 2 bytes of padding for the unwind codes if necessary
// Next would come the SEH data, but we assert above that the function doesn't have SEH data
4 + (node_count * 2) + if (node_count & 1) == 1 { 2 } else { 0 }
}
pub fn node_count(&self) -> usize {
self.unwind_codes
.iter()
.fold(0, |nodes, c| nodes + c.node_count())
}
pub fn emit(&self, sink: &mut dyn FrameUnwindSink) {
const UNWIND_INFO_VERSION: u8 = 1;
let size = self.size();
let offset = sink.len();
// Ensure the memory is 32-bit aligned
assert_eq!(offset % 4, 0);
sink.reserve(offset + size);
let node_count = self.node_count();
assert!(node_count <= 256);
write_u8(sink, (self.flags << 3) | UNWIND_INFO_VERSION);
write_u8(sink, self.prologue_size);
write_u8(sink, node_count as u8);
if let Some(reg) = self.frame_register {
write_u8(
sink,
(self.frame_register_offset << 4) | GPR.index_of(reg) as u8,
);
} else {
write_u8(sink, 0);
}
// Unwind codes are written in reverse order (prologue offset descending)
for code in self.unwind_codes.iter().rev() {
code.emit(sink);
}
// To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes
if (node_count & 1) == 1 {
write_u16::<LittleEndian>(sink, 0);
}
// Ensure the correct number of bytes was emitted
assert_eq!(sink.len() - offset, size);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::binemit::{FrameUnwindOffset, Reloc};
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use std::str::FromStr;
use target_lexicon::triple;
struct SimpleUnwindSink(pub Vec<u8>);
impl FrameUnwindSink for SimpleUnwindSink {
fn len(&self) -> FrameUnwindOffset {
self.0.len()
}
fn bytes(&mut self, b: &[u8]) {
self.0.extend_from_slice(b);
}
fn reloc(&mut self, _: Reloc, _: FrameUnwindOffset) {}
fn set_entry_offset(&mut self, _: FrameUnwindOffset) {}
}
#[test]
fn test_wrong_calling_convention() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(CallConv::SystemV, None));
context.compile(&*isa).expect("expected compilation");
assert_eq!(
UnwindInfo::try_from_func(&context.func, &*isa, None).expect("can emit unwind info"),
None
);
}
#[test]
fn test_small_alloc() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::WindowsFastcall,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
.expect("can emit unwind info")
.expect("expected unwind info");
assert_eq!(
unwind,
UnwindInfo {
flags: 0,
prologue_size: 9,
frame_register: Some(RU::rbp.into()),
frame_register_offset: 0,
unwind_codes: vec![
UnwindCode::PushRegister {
offset: 2,
reg: RU::rbp.into()
},
UnwindCode::SetFramePointer {
offset: 5,
sp_offset: 0
},
UnwindCode::StackAlloc {
offset: 9,
size: 64 + 32
}
]
}
);
assert_eq!(unwind.size(), 12);
let mut sink = SimpleUnwindSink(Vec::new());
unwind.emit(&mut sink);
assert_eq!(
sink.0,
[
0x01, // Version and flags (version 1, no flags)
0x09, // Prologue size
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x09, // Prolog offset
0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
0x00, // Padding byte
0x00, // Padding byte
]
);
}
#[test]
fn test_medium_alloc() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::WindowsFastcall,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
));
context.compile(&*isa).expect("expected compilation");
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
.expect("can emit unwind info")
.expect("expected unwind info");
assert_eq!(
unwind,
UnwindInfo {
flags: 0,
prologue_size: 27,
frame_register: Some(RU::rbp.into()),
frame_register_offset: 0,
unwind_codes: vec![
UnwindCode::PushRegister {
offset: 2,
reg: RU::rbp.into()
},
UnwindCode::SetFramePointer {
offset: 5,
sp_offset: 0
},
UnwindCode::StackAlloc {
offset: 27,
size: 10000 + 32
}
]
}
);
assert_eq!(unwind.size(), 12);
let mut sink = SimpleUnwindSink(Vec::new());
unwind.emit(&mut sink);
assert_eq!(
sink.0,
[
0x01, // Version and flags (version 1, no flags)
0x1B, // Prologue size
0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x1B, // Prolog offset
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
0xE6, // Low size byte
0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
]
);
}
#[test]
fn test_large_alloc() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::WindowsFastcall,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
));
context.compile(&*isa).expect("expected compilation");
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
.expect("can emit unwind info")
.expect("expected unwind info");
assert_eq!(
unwind,
UnwindInfo {
flags: 0,
prologue_size: 27,
frame_register: Some(RU::rbp.into()),
frame_register_offset: 0,
unwind_codes: vec![
UnwindCode::PushRegister {
offset: 2,
reg: RU::rbp.into()
},
UnwindCode::SetFramePointer {
offset: 5,
sp_offset: 0
},
UnwindCode::StackAlloc {
offset: 27,
size: 1000000 + 32
}
]
}
);
assert_eq!(unwind.size(), 16);
let mut sink = SimpleUnwindSink(Vec::new());
unwind.emit(&mut sink);
assert_eq!(
sink.0,
[
0x01, // Version and flags (version 1, no flags)
0x1B, // Prologue size
0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x1B, // Prolog offset
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
0x60, // Byte 1 of size
0x42, // Byte 2 of size
0x0F, // Byte 3 of size
0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
0x00, // Padding byte
0x00, // Padding byte
]
);
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
}
pub mod systemv;
pub mod windows;

View File

@@ -0,0 +1,485 @@
//! Unwind information for System V ABI (x86-64).
use crate::ir::{Function, Inst, InstructionData, Opcode, Value};
use crate::isa::{
unwind::systemv::{CallFrameInstruction, RegisterMappingError, UnwindInfo},
x86::registers::RU,
CallConv, RegUnit, TargetIsa,
};
use crate::result::CodegenResult;
use alloc::vec::Vec;
use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
/// Creates a new x86-64 common information entry (CIE).
pub fn create_cie() -> CommonInformationEntry {
use gimli::write::CallFrameInstruction;
let mut entry = CommonInformationEntry::new(
Encoding {
address_size: 8,
format: Format::Dwarf32,
version: 1,
},
1, // Code alignment factor
-8, // Data alignment factor
X86_64::RA,
);
// Every frame will start with the call frame address (CFA) at RSP+8
// It is +8 to account for the push of the return address by the call instruction
entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8));
// Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP)
entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8));
entry
}
/// Map Cranelift registers to their corresponding Gimli registers.
pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result<Register, RegisterMappingError> {
if isa.name() != "x86" || isa.pointer_bits() != 64 {
return Err(RegisterMappingError::UnsupportedArchitecture);
}
// Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
const X86_GP_REG_MAP: [gimli::Register; 16] = [
X86_64::RAX,
X86_64::RCX,
X86_64::RDX,
X86_64::RBX,
X86_64::RSP,
X86_64::RBP,
X86_64::RSI,
X86_64::RDI,
X86_64::R8,
X86_64::R9,
X86_64::R10,
X86_64::R11,
X86_64::R12,
X86_64::R13,
X86_64::R14,
X86_64::R15,
];
const X86_XMM_REG_MAP: [gimli::Register; 16] = [
X86_64::XMM0,
X86_64::XMM1,
X86_64::XMM2,
X86_64::XMM3,
X86_64::XMM4,
X86_64::XMM5,
X86_64::XMM6,
X86_64::XMM7,
X86_64::XMM8,
X86_64::XMM9,
X86_64::XMM10,
X86_64::XMM11,
X86_64::XMM12,
X86_64::XMM13,
X86_64::XMM14,
X86_64::XMM15,
];
let reg_info = isa.register_info();
let bank = reg_info
.bank_containing_regunit(reg)
.ok_or_else(|| RegisterMappingError::MissingBank)?;
match bank.name {
"IntRegs" => {
// x86 GP registers have a weird mapping to DWARF registers, so we use a
// lookup table.
Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize])
}
"FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]),
_ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)),
}
}
struct InstructionBuilder<'a> {
func: &'a Function,
isa: &'a dyn TargetIsa,
cfa_offset: i32,
frame_register: Option<RegUnit>,
instructions: Vec<(u32, CallFrameInstruction)>,
stack_size: Option<i32>,
epilogue_pop_offsets: Vec<u32>,
}
impl<'a> InstructionBuilder<'a> {
fn new(func: &'a Function, isa: &'a dyn TargetIsa, frame_register: Option<RegUnit>) -> Self {
Self {
func,
isa,
cfa_offset: 8, // CFA offset starts at 8 to account to return address on stack
frame_register,
instructions: Vec::new(),
stack_size: None,
epilogue_pop_offsets: Vec::new(),
}
}
fn push_reg(&mut self, offset: u32, arg: Value) {
self.cfa_offset += 8;
let reg = self.func.locations[arg].unwrap_reg();
// Update the CFA if this is the save of the frame pointer register or if a frame pointer isn't being used
// When using a frame pointer, we only need to update the CFA to account for the push of the frame pointer itself
if match self.frame_register {
Some(fp) => reg == fp,
None => true,
} {
self.instructions
.push((offset, CallFrameInstruction::CfaOffset(self.cfa_offset)));
}
// Pushes in the prologue are register saves, so record an offset of the save
self.instructions.push((
offset,
CallFrameInstruction::Offset(
map_reg(self.isa, reg)
.expect("a register mapping from cranelift to gimli")
.0,
-self.cfa_offset,
),
));
}
fn adjust_sp_down(&mut self, offset: u32) {
// Don't adjust the CFA if we're using a frame pointer
if self.frame_register.is_some() {
return;
}
self.cfa_offset += self
.stack_size
.expect("expected a previous stack size instruction");
self.instructions
.push((offset, CallFrameInstruction::CfaOffset(self.cfa_offset)));
}
fn adjust_sp_down_imm(&mut self, offset: u32, imm: i64) {
assert!(imm <= core::u32::MAX as i64);
// Don't adjust the CFA if we're using a frame pointer
if self.frame_register.is_some() {
return;
}
self.cfa_offset += imm as i32;
self.instructions
.push((offset, CallFrameInstruction::CfaOffset(self.cfa_offset)));
}
fn adjust_sp_up_imm(&mut self, offset: u32, imm: i64) {
assert!(imm <= core::u32::MAX as i64);
// Don't adjust the CFA if we're using a frame pointer
if self.frame_register.is_some() {
return;
}
self.cfa_offset -= imm as i32;
self.instructions
.push((offset, CallFrameInstruction::CfaOffset(self.cfa_offset)));
}
fn move_reg(&mut self, offset: u32, src: RegUnit, dst: RegUnit) {
if let Some(fp) = self.frame_register {
// Check for change in CFA register (RSP is always the starting CFA)
if src == (RU::rsp as RegUnit) && dst == fp {
self.instructions.push((
offset,
CallFrameInstruction::CfaRegister(
map_reg(self.isa, dst)
.expect("a register mapping from cranelift to gimli")
.0,
),
));
}
}
}
fn prologue_imm_const(&mut self, imm: i64) {
assert!(imm <= core::u32::MAX as i64);
assert!(self.stack_size.is_none());
// This instruction should only appear in a prologue to pass an
// argument of the stack size to a stack check function.
// Record the stack size so we know what it is when we encounter the adjustment
// instruction (which will adjust via the register assigned to this instruction).
self.stack_size = Some(imm as i32);
}
fn ret(&mut self, inst: Inst) {
let args = self.func.dfg.inst_args(inst);
for (i, arg) in args.iter().rev().enumerate() {
// Only walk back the args for the pop instructions encountered
if i >= self.epilogue_pop_offsets.len() {
break;
}
self.cfa_offset -= 8;
let reg = self.func.locations[*arg].unwrap_reg();
// Update the CFA if this is the restore of the frame pointer register or if a frame pointer isn't being used
match self.frame_register {
Some(fp) => {
if reg == fp {
self.instructions.push((
self.epilogue_pop_offsets[i],
CallFrameInstruction::Cfa(
map_reg(self.isa, RU::rsp as RegUnit)
.expect("a register mapping from cranelift to gimli")
.0,
self.cfa_offset,
),
));
}
}
None => {
self.instructions.push((
self.epilogue_pop_offsets[i],
CallFrameInstruction::CfaOffset(self.cfa_offset),
));
// Pops in the epilogue are register restores, so record a "same value" for the register
// This isn't necessary when using a frame pointer as the CFA doesn't change for CSR restores
self.instructions.push((
self.epilogue_pop_offsets[i],
CallFrameInstruction::SameValue(
map_reg(self.isa, reg)
.expect("a register mapping from cranelift to gimli")
.0,
),
));
}
};
}
self.epilogue_pop_offsets.clear();
}
fn insert_pop_offset(&mut self, offset: u32) {
self.epilogue_pop_offsets.push(offset);
}
fn remember_state(&mut self, offset: u32) {
self.instructions
.push((offset, CallFrameInstruction::RememberState));
}
fn restore_state(&mut self, offset: u32) {
self.instructions
.push((offset, CallFrameInstruction::RestoreState));
}
fn is_prologue_end(&self, inst: Inst) -> bool {
self.func.prologue_end == Some(inst)
}
fn is_epilogue_start(&self, inst: Inst) -> bool {
self.func.epilogues_start.contains(&inst)
}
}
pub(crate) fn create_unwind_info(
func: &Function,
isa: &dyn TargetIsa,
frame_register: Option<RegUnit>,
) -> CodegenResult<Option<UnwindInfo>> {
// Only System V-like calling conventions are supported
match func.signature.call_conv {
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {}
_ => return Ok(None),
}
if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 {
return Ok(None);
}
let mut builder = InstructionBuilder::new(func, isa, frame_register);
let mut in_prologue = true;
let mut in_epilogue = false;
let mut len = 0;
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
blocks.sort_by_key(|b| func.offsets[*b]);
for (i, block) in blocks.iter().enumerate() {
for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) {
let offset = offset + size;
assert!(len <= offset);
len = offset;
let is_last_block = i == blocks.len() - 1;
if in_prologue {
// Check for prologue end (inclusive)
in_prologue = !builder.is_prologue_end(inst);
} else if !in_epilogue && builder.is_epilogue_start(inst) {
// Now in an epilogue, emit a remember state instruction if not last block
in_epilogue = true;
if !is_last_block {
builder.remember_state(offset);
}
} else if !in_epilogue {
// Ignore normal instructions
continue;
}
match builder.func.dfg[inst] {
InstructionData::Unary { opcode, arg } => match opcode {
Opcode::X86Push => {
builder.push_reg(offset, arg);
}
Opcode::AdjustSpDown => {
builder.adjust_sp_down(offset);
}
_ => {}
},
InstructionData::CopySpecial { src, dst, .. } => {
builder.move_reg(offset, src, dst);
}
InstructionData::NullAry { opcode } => match opcode {
Opcode::X86Pop => {
builder.insert_pop_offset(offset);
}
_ => {}
},
InstructionData::UnaryImm { opcode, imm } => match opcode {
Opcode::Iconst => {
builder.prologue_imm_const(imm.into());
}
Opcode::AdjustSpDownImm => {
builder.adjust_sp_down_imm(offset, imm.into());
}
Opcode::AdjustSpUpImm => {
builder.adjust_sp_up_imm(offset, imm.into());
}
_ => {}
},
InstructionData::MultiAry { opcode, .. } => match opcode {
Opcode::Return => {
builder.ret(inst);
if !is_last_block {
builder.restore_state(offset);
}
in_epilogue = false;
}
_ => {}
},
_ => {}
};
}
}
Ok(Some(UnwindInfo::new(builder.instructions, len)))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use gimli::write::Address;
use std::str::FromStr;
use target_lexicon::triple;
#[test]
fn test_simple_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let fde = match isa
.create_unwind_info(&context.func)
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(1234))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, Cfa(Register(7), 8))] }");
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
#[test]
fn test_multi_return_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
context.compile(&*isa).expect("expected compilation");
let fde = match isa
.create_unwind_info(&context.func)
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(4321))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, Cfa(Register(7), 0))] }");
}
fn create_multi_return_function(call_conv: CallConv) -> Function {
let mut sig = Signature::new(call_conv);
sig.params.push(AbiParam::new(types::I32));
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, types::I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().brnz(v0, block2, &[]);
pos.ins().jump(block1, &[]);
pos.insert_block(block1);
pos.ins().return_(&[]);
pos.insert_block(block2);
pos.ins().return_(&[]);
func
}
}

View File

@@ -0,0 +1,677 @@
//! Unwind information for Windows x64 ABI.
use crate::ir::{Function, InstructionData, Opcode, ValueLoc};
use crate::isa::x86::registers::{FPR, GPR, RU};
use crate::isa::{CallConv, RegUnit, TargetIsa};
use crate::result::{CodegenError, CodegenResult};
use alloc::vec::Vec;
use byteorder::{ByteOrder, LittleEndian};
use log::warn;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Maximum (inclusive) size of a "small" stack allocation
const SMALL_ALLOC_MAX_SIZE: u32 = 128;
/// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits
const LARGE_ALLOC_16BIT_MAX_SIZE: u32 = 524280;
struct Writer<'a> {
buf: &'a mut [u8],
offset: usize,
}
impl<'a> Writer<'a> {
pub fn new(buf: &'a mut [u8]) -> Self {
Self { buf, offset: 0 }
}
fn write_u8(&mut self, v: u8) {
self.buf[self.offset] = v;
self.offset += 1;
}
fn write_u16<T: ByteOrder>(&mut self, v: u16) {
T::write_u16(&mut self.buf[self.offset..(self.offset + 2)], v);
self.offset += 2;
}
fn write_u32<T: ByteOrder>(&mut self, v: u32) {
T::write_u32(&mut self.buf[self.offset..(self.offset + 4)], v);
self.offset += 4;
}
}
/// The supported unwind codes for the x64 Windows ABI.
///
/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
enum UnwindCode {
PushRegister {
offset: u8,
reg: u8,
},
SaveXmm {
offset: u8,
reg: u8,
stack_offset: u32,
},
StackAlloc {
offset: u8,
size: u32,
},
SetFramePointer {
offset: u8,
sp_offset: u8,
},
}
impl UnwindCode {
fn emit(&self, writer: &mut Writer) {
enum UnwindOperation {
PushNonvolatileRegister = 0,
LargeStackAlloc = 1,
SmallStackAlloc = 2,
SetFramePointer = 3,
SaveXmm128 = 8,
SaveXmm128Far = 9,
}
match self {
Self::PushRegister { offset, reg } => {
writer.write_u8(*offset);
writer.write_u8((*reg << 4) | (UnwindOperation::PushNonvolatileRegister as u8));
}
Self::SaveXmm {
offset,
reg,
stack_offset,
} => {
writer.write_u8(*offset);
let stack_offset = stack_offset / 16;
if stack_offset <= core::u16::MAX as u32 {
writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8));
writer.write_u16::<LittleEndian>(stack_offset as u16);
} else {
writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8));
writer.write_u16::<LittleEndian>(stack_offset as u16);
writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
}
}
Self::StackAlloc { offset, size } => {
// Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
assert!(*size >= 8);
assert!((*size % 8) == 0);
writer.write_u8(*offset);
if *size <= SMALL_ALLOC_MAX_SIZE {
writer.write_u8(
((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8,
);
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
writer.write_u8(UnwindOperation::LargeStackAlloc as u8);
writer.write_u16::<LittleEndian>((*size / 8) as u16);
} else {
writer.write_u8((1 << 4) | (UnwindOperation::LargeStackAlloc as u8));
writer.write_u32::<LittleEndian>(*size);
}
}
Self::SetFramePointer { offset, sp_offset } => {
writer.write_u8(*offset);
writer.write_u8((*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8));
}
};
}
fn node_count(&self) -> usize {
match self {
Self::StackAlloc { size, .. } => {
if *size <= SMALL_ALLOC_MAX_SIZE {
1
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
2
} else {
3
}
}
Self::SaveXmm { stack_offset, .. } => {
if *stack_offset <= core::u16::MAX as u32 {
2
} else {
3
}
}
_ => 1,
}
}
}
pub(crate) fn create_unwind_info(
func: &Function,
isa: &dyn TargetIsa,
frame_register: Option<RegUnit>,
) -> CodegenResult<Option<UnwindInfo>> {
// Only Windows fastcall is supported for unwind information
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
return Ok(None);
}
let prologue_end = func.prologue_end.unwrap();
let entry_block = func.layout.entry_block().expect("missing entry block");
// Stores the stack size when SP is not adjusted via an immediate value
let mut stack_size = None;
let mut prologue_size = 0;
let mut unwind_codes = Vec::new();
let mut found_end = false;
// Have we saved at least one FPR? if so, we might have to check additional constraints.
let mut saved_fpr = false;
// In addition to the min offset for a callee-save, we need to know the offset from the
// frame base to the stack pointer, so that we can record an unwind offset that spans only
// to the end of callee-save space.
let mut static_frame_allocation_size = 0u32;
// For the time being, FPR preservation is split into a stack_addr and later store/load.
// Store the register used for stack store and ensure it is the same register with no
// intervening changes to the frame size.
let mut callee_save_region_reg = None;
// Also record the callee-save region's offset from RSP, because it must be added to FPR
// save offsets to compute an offset from the frame base.
let mut callee_save_offset = None;
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
// x64 ABI prologues cannot exceed 255 bytes in length
if (offset + size) > 255 {
warn!("function prologues cannot exceed 255 bytes in size for Windows x64");
return Err(CodegenError::CodeTooLarge);
}
prologue_size += size;
let unwind_offset = (offset + size) as u8;
match func.dfg[inst] {
InstructionData::Unary { opcode, arg } => {
match opcode {
Opcode::X86Push => {
static_frame_allocation_size += 8;
unwind_codes.push(UnwindCode::PushRegister {
offset: unwind_offset,
reg: GPR.index_of(func.locations[arg].unwrap_reg()) as u8,
});
}
Opcode::AdjustSpDown => {
let stack_size =
stack_size.expect("expected a previous stack size instruction");
static_frame_allocation_size += stack_size;
// This is used when calling a stack check function
// We need to track the assignment to RAX which has the size of the stack
unwind_codes.push(UnwindCode::StackAlloc {
offset: unwind_offset,
size: stack_size,
});
}
_ => {}
}
}
InstructionData::CopySpecial { src, dst, .. } => {
if let Some(frame_register) = frame_register {
if src == (RU::rsp as RegUnit) && dst == frame_register {
// Constructing an rbp-based stack frame, so the static frame
// allocation restarts at 0 from here.
static_frame_allocation_size = 0;
unwind_codes.push(UnwindCode::SetFramePointer {
offset: unwind_offset,
sp_offset: 0,
});
}
}
}
InstructionData::UnaryImm { opcode, imm } => {
match opcode {
Opcode::Iconst => {
let imm: i64 = imm.into();
assert!(imm <= core::u32::MAX as i64);
assert!(stack_size.is_none());
// This instruction should only appear in a prologue to pass an
// argument of the stack size to a stack check function.
// Record the stack size so we know what it is when we encounter the adjustment
// instruction (which will adjust via the register assigned to this instruction).
stack_size = Some(imm as u32);
}
Opcode::AdjustSpDownImm => {
let imm: i64 = imm.into();
assert!(imm <= core::u32::MAX as i64);
static_frame_allocation_size += imm as u32;
unwind_codes.push(UnwindCode::StackAlloc {
offset: unwind_offset,
size: imm as u32,
});
}
_ => {}
}
}
InstructionData::StackLoad {
opcode: Opcode::StackAddr,
stack_slot,
offset: _,
} => {
let result = func.dfg.inst_results(inst).get(0).unwrap();
if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
callee_save_region_reg = Some(frame_reg);
// Figure out the offset in the call frame that `frame_reg` will have.
let frame_size = func
.stack_slots
.layout_info
.expect("func's stack slots have layout info if stack operations exist")
.frame_size;
// Because we're well after the prologue has been constructed, stack slots
// must have been laid out...
let slot_offset = func.stack_slots[stack_slot]
.offset
.expect("callee-save slot has an offset computed");
let frame_offset = frame_size as i32 + slot_offset;
callee_save_offset = Some(frame_offset as u32);
}
}
InstructionData::Store {
opcode: Opcode::Store,
args: [arg1, arg2],
flags: _flags,
offset,
} => {
if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
(func.locations[arg1], func.locations[arg2])
{
if Some(base_ru) == callee_save_region_reg {
let offset_int: i32 = offset.into();
assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
if FPR.contains(ru) {
saved_fpr = true;
unwind_codes.push(UnwindCode::SaveXmm {
offset: unwind_offset,
reg: ru as u8,
stack_offset: offset_int,
});
}
}
}
}
_ => {}
};
if inst == prologue_end {
found_end = true;
break;
}
}
assert!(found_end);
if saved_fpr {
if static_frame_allocation_size > 240 && saved_fpr {
warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
This is a Cranelift implementation limit, see \
https://github.com/bytecodealliance/wasmtime/issues/1475",
static_frame_allocation_size);
return Err(CodegenError::ImplLimitExceeded);
}
// Only test static frame size is 16-byte aligned when an FPR is saved to avoid
// panicking when alignment is elided because no FPRs are saved and no child calls are
// made.
assert!(
static_frame_allocation_size % 16 == 0,
"static frame allocation must be a multiple of 16"
);
}
// Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
// one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
// Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
// frames are inexpressible, and we cannot actually compile the function. In case there are
// no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
// will actually check it. This, then, avoids panics when compiling functions with large
// call frames.
let reported_frame_offset = if saved_fpr {
(static_frame_allocation_size / 16) as u8
} else {
0
};
Ok(Some(UnwindInfo {
flags: 0, // this assumes cranelift functions have no SEH handlers
prologue_size: prologue_size as u8,
frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
frame_register_offset: reported_frame_offset,
unwind_codes,
}))
}
/// Represents Windows x64 unwind information.
///
/// For information about Windows x64 unwind info, see:
/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct UnwindInfo {
flags: u8,
prologue_size: u8,
frame_register: Option<u8>,
frame_register_offset: u8,
unwind_codes: Vec<UnwindCode>,
}
impl UnwindInfo {
/// Gets the emit size of the unwind information, in bytes.
pub fn emit_size(&self) -> usize {
let node_count = self.node_count();
// Calculation of the size requires no SEH handler or chained info
assert!(self.flags == 0);
// Size of fixed part of UNWIND_INFO is 4 bytes
// Then comes the UNWIND_CODE nodes (2 bytes each)
// Then comes 2 bytes of padding for the unwind codes if necessary
// Next would come the SEH data, but we assert above that the function doesn't have SEH data
4 + (node_count * 2) + if (node_count & 1) == 1 { 2 } else { 0 }
}
/// Emits the unwind information into the given mutable byte slice.
///
/// This function will panic if the slice is not at least `emit_size` in length.
pub fn emit(&self, buf: &mut [u8]) {
const UNWIND_INFO_VERSION: u8 = 1;
let node_count = self.node_count();
assert!(node_count <= 256);
let mut writer = Writer::new(buf);
writer.write_u8((self.flags << 3) | UNWIND_INFO_VERSION);
writer.write_u8(self.prologue_size);
writer.write_u8(node_count as u8);
if let Some(reg) = self.frame_register {
writer.write_u8((self.frame_register_offset << 4) | reg);
} else {
writer.write_u8(0);
}
// Unwind codes are written in reverse order (prologue offset descending)
for code in self.unwind_codes.iter().rev() {
code.emit(&mut writer);
}
// To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes
if (node_count & 1) == 1 {
writer.write_u16::<LittleEndian>(0);
}
// Ensure the correct number of bytes was emitted
assert_eq!(writer.offset, self.emit_size());
}
fn node_count(&self) -> usize {
self.unwind_codes
.iter()
.fold(0, |nodes, c| nodes + c.node_count())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use std::str::FromStr;
use target_lexicon::triple;
#[test]
fn test_wrong_calling_convention() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(CallConv::SystemV, None));
context.compile(&*isa).expect("expected compilation");
assert_eq!(
create_unwind_info(&context.func, &*isa, None).expect("can create unwind info"),
None
);
}
#[test]
fn test_small_alloc() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::WindowsFastcall,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
.expect("can create unwind info")
.expect("expected unwind info");
assert_eq!(
unwind,
UnwindInfo {
flags: 0,
prologue_size: 9,
frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
frame_register_offset: 0,
unwind_codes: vec![
UnwindCode::PushRegister {
offset: 2,
reg: GPR.index_of(RU::rbp.into()) as u8
},
UnwindCode::SetFramePointer {
offset: 5,
sp_offset: 0
},
UnwindCode::StackAlloc {
offset: 9,
size: 64 + 32
}
]
}
);
assert_eq!(unwind.emit_size(), 12);
let mut buf = [0u8; 12];
unwind.emit(&mut buf);
assert_eq!(
buf,
[
0x01, // Version and flags (version 1, no flags)
0x09, // Prologue size
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x09, // Prolog offset
0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
0x00, // Padding byte
0x00, // Padding byte
]
);
}
#[test]
fn test_medium_alloc() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::WindowsFastcall,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
));
context.compile(&*isa).expect("expected compilation");
let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
.expect("can create unwind info")
.expect("expected unwind info");
assert_eq!(
unwind,
UnwindInfo {
flags: 0,
prologue_size: 27,
frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
frame_register_offset: 0,
unwind_codes: vec![
UnwindCode::PushRegister {
offset: 2,
reg: GPR.index_of(RU::rbp.into()) as u8
},
UnwindCode::SetFramePointer {
offset: 5,
sp_offset: 0
},
UnwindCode::StackAlloc {
offset: 27,
size: 10000 + 32
}
]
}
);
assert_eq!(unwind.emit_size(), 12);
let mut buf = [0u8; 12];
unwind.emit(&mut buf);
assert_eq!(
buf,
[
0x01, // Version and flags (version 1, no flags)
0x1B, // Prologue size
0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x1B, // Prolog offset
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
0xE6, // Low size byte
0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
]
);
}
#[test]
fn test_large_alloc() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::WindowsFastcall,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
));
context.compile(&*isa).expect("expected compilation");
let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
.expect("can create unwind info")
.expect("expected unwind info");
assert_eq!(
unwind,
UnwindInfo {
flags: 0,
prologue_size: 27,
frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
frame_register_offset: 0,
unwind_codes: vec![
UnwindCode::PushRegister {
offset: 2,
reg: GPR.index_of(RU::rbp.into()) as u8
},
UnwindCode::SetFramePointer {
offset: 5,
sp_offset: 0
},
UnwindCode::StackAlloc {
offset: 27,
size: 1000000 + 32
}
]
}
);
assert_eq!(unwind.emit_size(), 16);
let mut buf = [0u8; 16];
unwind.emit(&mut buf);
assert_eq!(
buf,
[
0x01, // Version and flags (version 1, no flags)
0x1B, // Prologue size
0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x1B, // Prolog offset
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
0x60, // Byte 1 of size
0x42, // Byte 2 of size
0x0F, // Byte 3 of size
0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
0x00, // Padding byte
0x00, // Padding byte
]
);
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
}