Hoist the stack alignment and Windows64 fastcall shadow stack space constants.
This commit is contained in:
@@ -34,6 +34,29 @@ static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9];
|
|||||||
/// Return value registers for x86-64, when using windows fastcall
|
/// Return value registers for x86-64, when using windows fastcall
|
||||||
static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
|
static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
|
||||||
|
|
||||||
|
/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used
|
||||||
|
/// by the callee for temporary values.
|
||||||
|
///
|
||||||
|
/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow
|
||||||
|
/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually
|
||||||
|
/// used by the callee to save & restore the values of the arguments.
|
||||||
|
///
|
||||||
|
/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
|
||||||
|
/// convention reserves spill space for parameters, you don’t have to use them as such"
|
||||||
|
const WIN_SHADOW_STACK_SPACE: i32 = 32;
|
||||||
|
|
||||||
|
/// Stack alignment requirement for functions.
|
||||||
|
///
|
||||||
|
/// 16 bytes is the perfect stack alignment, because:
|
||||||
|
///
|
||||||
|
/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which
|
||||||
|
/// are aligned to 16 bytes in order to aid performance".
|
||||||
|
/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a
|
||||||
|
/// 16-byte aligned stack pointer.
|
||||||
|
/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located
|
||||||
|
/// higher up in the stack.
|
||||||
|
const STACK_ALIGNMENT: u32 = 16;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct Args {
|
struct Args {
|
||||||
pointer_bytes: u8,
|
pointer_bytes: u8,
|
||||||
@@ -60,12 +83,10 @@ impl Args {
|
|||||||
isa_flags: &isa_settings::Flags,
|
isa_flags: &isa_settings::Flags,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let offset = if call_conv.extends_windows_fastcall() {
|
let offset = if call_conv.extends_windows_fastcall() {
|
||||||
// [1] "The caller is responsible for allocating space for parameters to the callee,
|
WIN_SHADOW_STACK_SPACE
|
||||||
// and must always allocate sufficient space to store four register parameters"
|
|
||||||
32
|
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
};
|
} as u32;
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
pointer_bytes: bits / 8,
|
pointer_bytes: bits / 8,
|
||||||
@@ -431,11 +452,9 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
|
|||||||
"baldrdash does not expect cranelift to emit stack probes"
|
"baldrdash does not expect cranelift to emit stack probes"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
|
|
||||||
let stack_align = 16;
|
|
||||||
let word_size = StackSize::from(isa.pointer_bytes());
|
let word_size = StackSize::from(isa.pointer_bytes());
|
||||||
let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
|
let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
|
||||||
32
|
WIN_SHADOW_STACK_SPACE as u32
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
@@ -448,7 +467,7 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
|
|||||||
func.stack_slots.push(ss);
|
func.stack_slots.push(ss);
|
||||||
|
|
||||||
let is_leaf = func.is_leaf();
|
let is_leaf = func.is_leaf();
|
||||||
layout_stack(&mut func.stack_slots, is_leaf, stack_align)?;
|
layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -459,23 +478,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
||||||
}
|
}
|
||||||
|
|
||||||
// [1] "The primary exceptions are the stack pointer and malloc or alloca memory,
|
|
||||||
// which are aligned to 16 bytes in order to aid performance"
|
|
||||||
let stack_align = 16;
|
|
||||||
|
|
||||||
let word_size = isa.pointer_bytes() as usize;
|
|
||||||
let reg_type = isa.pointer_type();
|
|
||||||
|
|
||||||
let csrs = callee_saved_gprs_used(isa, func);
|
let csrs = callee_saved_gprs_used(isa, func);
|
||||||
|
|
||||||
// [1] "Space is allocated on the call stack as a shadow store for callees to save"
|
|
||||||
// This shadow store contains the parameters which are passed through registers (ARG_GPRS)
|
|
||||||
// and is eventually used by the callee to save & restore the values of the arguments.
|
|
||||||
//
|
|
||||||
// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333
|
|
||||||
// "Although the x64 calling convention reserves spill space for parameters,
|
|
||||||
// you don’t have to use them as such"
|
|
||||||
//
|
|
||||||
// The reserved stack area is composed of:
|
// The reserved stack area is composed of:
|
||||||
// return address + frame pointer + all callee-saved registers + shadow space
|
// return address + frame pointer + all callee-saved registers + shadow space
|
||||||
//
|
//
|
||||||
@@ -483,7 +487,7 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
// instruction. Each of the others we will then push explicitly. Then we
|
// instruction. Each of the others we will then push explicitly. Then we
|
||||||
// will adjust the stack pointer to make room for the rest of the required
|
// will adjust the stack pointer to make room for the rest of the required
|
||||||
// space for this frame.
|
// space for this frame.
|
||||||
const SHADOW_STORE_SIZE: i32 = 32;
|
let word_size = isa.pointer_bytes() as usize;
|
||||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
||||||
|
|
||||||
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
||||||
@@ -492,14 +496,15 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
func.create_stack_slot(ir::StackSlotData {
|
func.create_stack_slot(ir::StackSlotData {
|
||||||
kind: ir::StackSlotKind::IncomingArg,
|
kind: ir::StackSlotKind::IncomingArg,
|
||||||
size: csr_stack_size as u32,
|
size: csr_stack_size as u32,
|
||||||
offset: Some(-(SHADOW_STORE_SIZE + csr_stack_size)),
|
offset: Some(-(WIN_SHADOW_STACK_SPACE + csr_stack_size)),
|
||||||
});
|
});
|
||||||
|
|
||||||
let is_leaf = func.is_leaf();
|
let is_leaf = func.is_leaf();
|
||||||
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, stack_align)? as i32;
|
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
|
||||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||||
|
|
||||||
// Add CSRs to function signature
|
// Add CSRs to function signature
|
||||||
|
let reg_type = isa.pointer_type();
|
||||||
let fp_arg = ir::AbiParam::special_reg(
|
let fp_arg = ir::AbiParam::special_reg(
|
||||||
reg_type,
|
reg_type,
|
||||||
ir::ArgumentPurpose::FramePointer,
|
ir::ArgumentPurpose::FramePointer,
|
||||||
@@ -528,12 +533,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
|
|
||||||
/// Insert a System V-compatible prologue and epilogue.
|
/// Insert a System V-compatible prologue and epilogue.
|
||||||
fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||||
// The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
|
|
||||||
// newer versions use a 16-byte aligned stack pointer.
|
|
||||||
let stack_align = 16;
|
|
||||||
let pointer_width = isa.triple().pointer_width().unwrap();
|
let pointer_width = isa.triple().pointer_width().unwrap();
|
||||||
let word_size = pointer_width.bytes() as usize;
|
let word_size = pointer_width.bytes() as usize;
|
||||||
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
|
|
||||||
|
|
||||||
let csrs = callee_saved_gprs_used(isa, func);
|
let csrs = callee_saved_gprs_used(isa, func);
|
||||||
|
|
||||||
@@ -552,10 +553,11 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
});
|
});
|
||||||
|
|
||||||
let is_leaf = func.is_leaf();
|
let is_leaf = func.is_leaf();
|
||||||
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, stack_align)? as i32;
|
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
|
||||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||||
|
|
||||||
// Add CSRs to function signature
|
// Add CSRs to function signature
|
||||||
|
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
|
||||||
let fp_arg = ir::AbiParam::special_reg(
|
let fp_arg = ir::AbiParam::special_reg(
|
||||||
reg_type,
|
reg_type,
|
||||||
ir::ArgumentPurpose::FramePointer,
|
ir::ArgumentPurpose::FramePointer,
|
||||||
|
|||||||
Reference in New Issue
Block a user