Implement emitting Windows unwind information for fastcall functions. (#1155)
* Implement emitting Windows unwind information for fastcall functions. This commit implements emitting Windows unwind information for x64 fastcall calling convention functions. The unwind information can be used to construct a Windows function table at runtime for JIT'd code, enabling stack walking and unwinding by the operating system. * Address code review feedback. This commit addresses code review feedback: * Remove unnecessary unsafe code. * Emit the unwind information always as little endian. * Fix comments. A dependency from cranelift-codegen to the byteorder crate was added. The byteorder crate is a no-dependencies crate with a reasonable abstraction for writing binary data for a specific endianness. * Address code review feedback. * Disable default features for the `byteorder` crate. * Add a comment regarding the Windows ABI unwind code numerical values. * Panic if we encounter a Windows function with a prologue greater than 256 bytes in size.
This commit is contained in:
508
cranelift/codegen/src/isa/x86/unwind.rs
Normal file
508
cranelift/codegen/src/isa/x86/unwind.rs
Normal file
@@ -0,0 +1,508 @@
|
||||
//! Unwind information for x64 Windows.
|
||||
|
||||
use super::registers::RU;
|
||||
use crate::ir::{Function, InstructionData, Opcode};
|
||||
use crate::isa::{CallConv, RegUnit, TargetIsa};
|
||||
use alloc::vec::Vec;
|
||||
use byteorder::{LittleEndian, WriteBytesExt};
|
||||
|
||||
/// Maximum (inclusive) size of a "small" stack allocation
|
||||
const SMALL_ALLOC_MAX_SIZE: u32 = 128;
|
||||
/// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits
|
||||
const LARGE_ALLOC_16BIT_MAX_SIZE: u32 = 524280;
|
||||
|
||||
/// The supported unwind codes for the x64 Windows ABI.
|
||||
///
|
||||
/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
|
||||
/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
|
||||
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum UnwindCode {
|
||||
PushRegister { offset: u8, reg: RegUnit },
|
||||
StackAlloc { offset: u8, size: u32 },
|
||||
SetFramePointer { offset: u8, sp_offset: u8 },
|
||||
}
|
||||
|
||||
impl UnwindCode {
|
||||
fn emit(&self, mem: &mut Vec<u8>) -> std::io::Result<()> {
|
||||
enum UnwindOperation {
|
||||
PushNonvolatileRegister,
|
||||
LargeStackAlloc,
|
||||
SmallStackAlloc,
|
||||
SetFramePointer,
|
||||
}
|
||||
|
||||
match self {
|
||||
Self::PushRegister { offset, reg } => {
|
||||
mem.write_u8(*offset)?;
|
||||
mem.write_u8(
|
||||
((*reg as u8) << 4) | (UnwindOperation::PushNonvolatileRegister as u8),
|
||||
)?;
|
||||
}
|
||||
Self::StackAlloc { offset, size } => {
|
||||
// Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
|
||||
assert!(*size >= 8);
|
||||
assert!((*size % 8) == 0);
|
||||
|
||||
mem.write_u8(*offset)?;
|
||||
if *size <= SMALL_ALLOC_MAX_SIZE {
|
||||
mem.write_u8(
|
||||
((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8,
|
||||
)?;
|
||||
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
|
||||
mem.write_u8(UnwindOperation::LargeStackAlloc as u8)?;
|
||||
mem.write_u16::<LittleEndian>((*size / 8) as u16)?;
|
||||
} else {
|
||||
mem.write_u8((1 << 4) | (UnwindOperation::LargeStackAlloc as u8))?;
|
||||
mem.write_u32::<LittleEndian>(*size)?;
|
||||
}
|
||||
}
|
||||
Self::SetFramePointer { offset, sp_offset } => {
|
||||
mem.write_u8(*offset)?;
|
||||
mem.write_u8((*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8))?;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn node_count(&self) -> usize {
|
||||
match self {
|
||||
Self::StackAlloc { size, .. } => {
|
||||
if *size <= SMALL_ALLOC_MAX_SIZE {
|
||||
1
|
||||
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
|
||||
2
|
||||
} else {
|
||||
3
|
||||
}
|
||||
}
|
||||
_ => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents Windows x64 unwind information.
|
||||
///
|
||||
/// For information about Windows x64 unwind info, see:
|
||||
/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct UnwindInfo {
|
||||
flags: u8,
|
||||
prologue_size: u8,
|
||||
frame_register: Option<RegUnit>,
|
||||
frame_register_offset: u8,
|
||||
unwind_codes: Vec<UnwindCode>,
|
||||
}
|
||||
|
||||
impl UnwindInfo {
|
||||
pub fn try_from_func(
|
||||
func: &Function,
|
||||
isa: &dyn TargetIsa,
|
||||
frame_register: Option<RegUnit>,
|
||||
) -> Option<Self> {
|
||||
// Only Windows fastcall is supported for unwind information
|
||||
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let prologue_end = func.prologue_end.unwrap();
|
||||
let entry_block = func.layout.ebbs().nth(0).expect("missing entry block");
|
||||
|
||||
// Stores the stack size when SP is not adjusted via an immediate value
|
||||
let mut stack_size = None;
|
||||
let mut prologue_size = 0;
|
||||
let mut unwind_codes = Vec::new();
|
||||
let mut found_end = false;
|
||||
|
||||
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
|
||||
// x64 ABI prologues cannot exceed 255 bytes in length
|
||||
if (offset + size) > 255 {
|
||||
panic!("function prologues cannot exceed 255 bytes in size for Windows x64");
|
||||
}
|
||||
|
||||
prologue_size += size;
|
||||
|
||||
let unwind_offset = (offset + size) as u8;
|
||||
|
||||
match func.dfg[inst] {
|
||||
InstructionData::Unary { opcode, arg } => {
|
||||
match opcode {
|
||||
Opcode::X86Push => {
|
||||
unwind_codes.push(UnwindCode::PushRegister {
|
||||
offset: unwind_offset,
|
||||
reg: func.locations[arg].unwrap_reg(),
|
||||
});
|
||||
}
|
||||
Opcode::AdjustSpDown => {
|
||||
// This is used when calling a stack check function
|
||||
// We need to track the assignment to RAX which has the size of the stack
|
||||
unwind_codes.push(UnwindCode::StackAlloc {
|
||||
offset: unwind_offset,
|
||||
size: stack_size
|
||||
.expect("expected a previous stack size instruction"),
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
InstructionData::CopySpecial { src, dst, .. } => {
|
||||
if let Some(frame_register) = frame_register {
|
||||
if src == (RU::rsp as RegUnit) && dst == frame_register {
|
||||
unwind_codes.push(UnwindCode::SetFramePointer {
|
||||
offset: unwind_offset,
|
||||
sp_offset: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
InstructionData::UnaryImm { opcode, imm } => {
|
||||
match opcode {
|
||||
Opcode::Iconst => {
|
||||
let imm: i64 = imm.into();
|
||||
assert!(imm <= std::u32::MAX as i64);
|
||||
assert!(stack_size.is_none());
|
||||
|
||||
// This instruction should only appear in a prologue to pass an
|
||||
// argument of the stack size to a stack check function.
|
||||
// Record the stack size so we know what it is when we encounter the adjustment
|
||||
// instruction (which will adjust via the register assigned to this instruction).
|
||||
stack_size = Some(imm as u32);
|
||||
}
|
||||
Opcode::AdjustSpDownImm => {
|
||||
let imm: i64 = imm.into();
|
||||
assert!(imm <= std::u32::MAX as i64);
|
||||
|
||||
unwind_codes.push(UnwindCode::StackAlloc {
|
||||
offset: unwind_offset,
|
||||
size: imm as u32,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
if inst == prologue_end {
|
||||
found_end = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !found_end {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Self {
|
||||
flags: 0, // this assumes cranelift functions have no SEH handlers
|
||||
prologue_size: prologue_size as u8,
|
||||
frame_register,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
let node_count = self.node_count();
|
||||
|
||||
// Calculation of the size requires no SEH handler or chained info
|
||||
assert!(self.flags == 0);
|
||||
|
||||
// Size of fixed part of UNWIND_INFO is 4 bytes
|
||||
// Then comes the UNWIND_CODE nodes (2 bytes each)
|
||||
// Then comes 2 bytes of padding for the unwind codes if necessary
|
||||
// Next would come the SEH data, but we assert above that the function doesn't have SEH data
|
||||
|
||||
4 + (node_count * 2) + if (node_count & 1) == 1 { 2 } else { 0 }
|
||||
}
|
||||
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.unwind_codes
|
||||
.iter()
|
||||
.fold(0, |nodes, c| nodes + c.node_count())
|
||||
}
|
||||
|
||||
pub fn emit(&self, mem: &mut Vec<u8>) -> std::io::Result<()> {
|
||||
const UNWIND_INFO_VERSION: u8 = 1;
|
||||
|
||||
let size = self.size();
|
||||
let offset = mem.len();
|
||||
|
||||
// Ensure the memory is 32-bit aligned
|
||||
assert_eq!(offset % 4, 0);
|
||||
|
||||
mem.reserve(offset + size);
|
||||
|
||||
let node_count = self.node_count();
|
||||
assert!(node_count <= 256);
|
||||
|
||||
mem.write_u8((self.flags << 3) | UNWIND_INFO_VERSION)?;
|
||||
mem.write_u8(self.prologue_size)?;
|
||||
mem.write_u8(node_count as u8)?;
|
||||
|
||||
if let Some(reg) = self.frame_register {
|
||||
mem.write_u8((self.frame_register_offset << 4) | reg as u8)?;
|
||||
} else {
|
||||
mem.write_u8(0)?;
|
||||
}
|
||||
|
||||
// Unwind codes are written in reverse order (prologue offset descending)
|
||||
for code in self.unwind_codes.iter().rev() {
|
||||
code.emit(mem)?;
|
||||
}
|
||||
|
||||
// To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes
|
||||
if (node_count & 1) == 1 {
|
||||
mem.write_u16::<LittleEndian>(0)?;
|
||||
}
|
||||
|
||||
// Ensure the correct number of bytes was emitted
|
||||
assert_eq!(mem.len() - offset, size);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind};
|
||||
use crate::isa::{lookup, CallConv};
|
||||
use crate::settings::{builder, Flags};
|
||||
use crate::Context;
|
||||
use std::str::FromStr;
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
fn test_wrong_calling_convention() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(CallConv::SystemV, None));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
assert_eq!(UnwindInfo::try_from_func(&context.func, &*isa, None), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_alloc() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 9,
|
||||
frame_register: Some(RU::rbp.into()),
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
offset: 2,
|
||||
reg: RU::rbp.into()
|
||||
},
|
||||
UnwindCode::SetFramePointer {
|
||||
offset: 5,
|
||||
sp_offset: 0
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 9,
|
||||
size: 64 + 32
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.size(), 12);
|
||||
|
||||
let mut mem = Vec::new();
|
||||
unwind
|
||||
.emit(&mut mem)
|
||||
.expect("failed to emit unwind information");
|
||||
|
||||
assert_eq!(
|
||||
mem,
|
||||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x09, // Prologue size
|
||||
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x09, // Prolog offset
|
||||
0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
|
||||
0x00, // Padding byte
|
||||
0x00, // Padding byte
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_medium_alloc() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 27,
|
||||
frame_register: Some(RU::rbp.into()),
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
offset: 2,
|
||||
reg: RU::rbp.into()
|
||||
},
|
||||
UnwindCode::SetFramePointer {
|
||||
offset: 5,
|
||||
sp_offset: 0
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 10000 + 32
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.size(), 12);
|
||||
|
||||
let mut mem = Vec::new();
|
||||
unwind
|
||||
.emit(&mut mem)
|
||||
.expect("failed to emit unwind information");
|
||||
|
||||
assert_eq!(
|
||||
mem,
|
||||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x1B, // Prologue size
|
||||
0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x1B, // Prolog offset
|
||||
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
|
||||
0xE6, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_alloc() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into()))
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 27,
|
||||
frame_register: Some(RU::rbp.into()),
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
offset: 2,
|
||||
reg: RU::rbp.into()
|
||||
},
|
||||
UnwindCode::SetFramePointer {
|
||||
offset: 5,
|
||||
sp_offset: 0
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 1000000 + 32
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.size(), 16);
|
||||
|
||||
let mut mem = Vec::new();
|
||||
unwind
|
||||
.emit(&mut mem)
|
||||
.expect("failed to emit unwind information");
|
||||
|
||||
assert_eq!(
|
||||
mem,
|
||||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x1B, // Prologue size
|
||||
0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x1B, // Prolog offset
|
||||
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
|
||||
0x60, // Byte 1 of size
|
||||
0x42, // Byte 2 of size
|
||||
0x0F, // Byte 3 of size
|
||||
0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
|
||||
0x00, // Padding byte
|
||||
0x00, // Padding byte
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
|
||||
let mut func =
|
||||
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
|
||||
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_ebb(ebb0);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user