Do not align the stack frame for leaf functions not using the stack.

This commit is contained in:
Benjamin Bouvier
2019-11-06 16:48:34 +01:00
parent 1074c7675e
commit 143cb01489
6 changed files with 54 additions and 24 deletions

View File

@@ -279,6 +279,14 @@ impl Function {
Ok(())
}
/// Returns true if the function is function that doesn't call any other functions. This is not
/// to be confused with a "leaf function" in Windows terminology.
pub fn is_leaf(&self) -> bool {
// Conservative result: if there's at least one function signature referenced in this
// function, assume it may call.
!self.dfg.signatures.is_empty()
}
}
/// Additional annotations for function display.

View File

@@ -351,7 +351,8 @@ pub trait TargetIsa: fmt::Display + Sync {
func.stack_slots.push(ss);
}
layout_stack(&mut func.stack_slots, word_size)?;
let is_leaf = func.is_leaf();
layout_stack(&mut func.stack_slots, is_leaf, word_size)?;
Ok(())
}

View File

@@ -540,7 +540,8 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
ss.offset = Some(-(bytes as StackOffset));
func.stack_slots.push(ss);
layout_stack(&mut func.stack_slots, stack_align)?;
let is_leaf = func.is_leaf();
layout_stack(&mut func.stack_slots, is_leaf, stack_align)?;
Ok(())
}
@@ -587,7 +588,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
offset: Some(-(SHADOW_STORE_SIZE + csr_stack_size)),
});
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
let is_leaf = func.is_leaf();
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, stack_align)? as i32;
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
// Add CSRs to function signature
@@ -642,7 +644,8 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
offset: Some(-csr_stack_size),
});
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
let is_leaf = func.is_leaf();
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, stack_align)? as i32;
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
// Add CSRs to function signature

View File

@@ -7,15 +7,20 @@ use core::cmp::{max, min};
/// Compute the stack frame layout.
///
/// Determine the total size of this stack frame and assign offsets to all `Spill` and
/// `Explicit` stack slots.
/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit`
/// stack slots.
///
/// The total frame size will be a multiple of `alignment` which must be a power of two.
/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the
/// function doesn't perform any call.
///
/// Returns the total stack frame size which is also saved in `frame.frame_size`.
///
/// If the stack frame is too big, returns an `ImplLimitExceeded` error.
pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResult<StackSize> {
pub fn layout_stack(
frame: &mut StackSlots,
is_leaf: bool,
alignment: StackSize,
) -> CodegenResult<StackSize> {
// Each object and the whole stack frame must fit in 2 GB such that any relative offset within
// the frame fits in a `StackOffset`.
let max_size = StackOffset::max_value() as StackSize;
@@ -34,10 +39,14 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
//
// Both incoming and outgoing argument slots have fixed offsets that are treated as
// reserved zones by the layout algorithm.
//
// If a function only has incoming arguments and does not perform any calls, then it doesn't
// require the stack to be aligned.
let mut incoming_min = 0;
let mut outgoing_max = 0;
let mut min_align = alignment;
let mut must_align = is_leaf;
for slot in frame.values() {
if slot.size > max_size {
@@ -55,6 +64,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
.checked_add(slot.size as StackOffset)
.ok_or(CodegenError::ImplLimitExceeded)?;
outgoing_max = max(outgoing_max, offset);
must_align = true;
}
StackSlotKind::StructReturnSlot
| StackSlotKind::SpillSlot
@@ -62,6 +72,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
| StackSlotKind::EmergencySlot => {
// Determine the smallest alignment of any explicit or spill slot.
min_align = slot.alignment(min_align);
must_align = true;
}
}
}
@@ -103,7 +114,10 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
offset = offset
.checked_sub(outgoing_max)
.ok_or(CodegenError::ImplLimitExceeded)?;
if must_align {
offset &= -(alignment as StackOffset);
}
let frame_size = (offset as StackSize).wrapping_neg();
frame.frame_size = Some(frame_size);
@@ -122,16 +136,19 @@ mod tests {
fn layout() {
let sss = &mut StackSlots::new();
// For all these test cases, assume it will call.
let is_leaf = true;
// An empty layout should have 0-sized stack frame.
assert_eq!(layout_stack(sss, 1), Ok(0));
assert_eq!(layout_stack(sss, 16), Ok(0));
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0));
// Same for incoming arguments with non-negative offsets.
let in0 = sss.make_incoming_arg(types::I64, 0);
let in1 = sss.make_incoming_arg(types::I64, 8);
assert_eq!(layout_stack(sss, 1), Ok(0));
assert_eq!(layout_stack(sss, 16), Ok(0));
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
@@ -139,13 +156,13 @@ mod tests {
let ss0 = sss.make_spill_slot(types::I64);
let ss1 = sss.make_spill_slot(types::I32);
assert_eq!(layout_stack(sss, 1), Ok(12));
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[ss0].offset, Some(-8));
assert_eq!(sss[ss1].offset, Some(-12));
assert_eq!(layout_stack(sss, 16), Ok(16));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[ss0].offset, Some(-16));
@@ -155,14 +172,14 @@ mod tests {
// should still pack nicely with the spill slots.
let in2 = sss.make_incoming_arg(types::I32, -4);
assert_eq!(layout_stack(sss, 1), Ok(16));
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
assert_eq!(sss[ss0].offset, Some(-12));
assert_eq!(sss[ss1].offset, Some(-16));
assert_eq!(layout_stack(sss, 16), Ok(16));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
@@ -172,7 +189,7 @@ mod tests {
// Finally, make sure there is room for the outgoing args.
let out0 = sss.get_outgoing_arg(types::I32, 0);
assert_eq!(layout_stack(sss, 1), Ok(20));
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
@@ -180,7 +197,7 @@ mod tests {
assert_eq!(sss[ss1].offset, Some(-16));
assert_eq!(sss[out0].offset, Some(0));
assert_eq!(layout_stack(sss, 16), Ok(32));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
@@ -190,7 +207,10 @@ mod tests {
// Also test that an unsupported offset is rejected.
sss.get_outgoing_arg(types::I8, StackOffset::max_value() - 1);
assert_eq!(layout_stack(sss, 1), Err(CodegenError::ImplLimitExceeded));
assert_eq!(
layout_stack(sss, is_leaf, 1),
Err(CodegenError::ImplLimitExceeded)
);
}
#[test]
@@ -205,7 +225,7 @@ mod tests {
));
let ss2 = sss.get_emergency_slot(types::I32, &[]);
assert_eq!(layout_stack(sss, 1), Ok(12));
assert_eq!(layout_stack(sss, true, 1), Ok(12));
assert_eq!(sss[ss0].offset, Some(-4));
assert_eq!(sss[ss1].offset, Some(-8));
assert_eq!(sss[ss2].offset, Some(-12));

View File

@@ -108,7 +108,6 @@ ebb0(v0: i64, v1: i64):
; nextln: x86_push v18
; nextln: x86_push v19
; nextln: x86_push v20
; nextln: adjust_sp_down_imm 8
; nextln: v2 = load.i32 v0
; nextln: v3 = load.i32 v0+8
; nextln: v4 = load.i32 v0+16
@@ -135,7 +134,6 @@ ebb0(v0: i64, v1: i64):
; nextln: store v12, v1+80
; nextln: store v13, v1+88
; nextln: store v14, v1+96
; nextln: adjust_sp_up_imm 8
; nextln: v26 = x86_pop.i64
; nextln: v25 = x86_pop.i64
; nextln: v24 = x86_pop.i64

View File

@@ -160,7 +160,7 @@ ebb0(v0: i64, v1: i64):
; nextln: UnwindCode {
; nextln: offset: 19,
; nextln: op: SmallStackAlloc,
; nextln: info: 4,
; nextln: info: 3,
; nextln: value: None,
; nextln: },
; nextln: UnwindCode {