diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index 5318871689..f226eea98f 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -279,6 +279,14 @@ impl Function { Ok(()) } + + /// Returns true if the function is function that doesn't call any other functions. This is not + /// to be confused with a "leaf function" in Windows terminology. + pub fn is_leaf(&self) -> bool { + // Conservative result: if there's at least one function signature referenced in this + // function, assume it may call. + !self.dfg.signatures.is_empty() + } } /// Additional annotations for function display. diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index aaa802b935..35f3581d7e 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -351,7 +351,8 @@ pub trait TargetIsa: fmt::Display + Sync { func.stack_slots.push(ss); } - layout_stack(&mut func.stack_slots, word_size)?; + let is_leaf = func.is_leaf(); + layout_stack(&mut func.stack_slots, is_leaf, word_size)?; Ok(()) } diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index ef4b9f693e..fb4f9e53d5 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -540,7 +540,8 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> ss.offset = Some(-(bytes as StackOffset)); func.stack_slots.push(ss); - layout_stack(&mut func.stack_slots, stack_align)?; + let is_leaf = func.is_leaf(); + layout_stack(&mut func.stack_slots, is_leaf, stack_align)?; Ok(()) } @@ -587,7 +588,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C offset: Some(-(SHADOW_STORE_SIZE + csr_stack_size)), }); - let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32; + let is_leaf = func.is_leaf(); + let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, stack_align)? as i32; let local_stack_size = i64::from(total_stack_size - csr_stack_size); // Add CSRs to function signature @@ -642,7 +644,8 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C offset: Some(-csr_stack_size), }); - let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32; + let is_leaf = func.is_leaf(); + let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, stack_align)? as i32; let local_stack_size = i64::from(total_stack_size - csr_stack_size); // Add CSRs to function signature diff --git a/cranelift/codegen/src/stack_layout.rs b/cranelift/codegen/src/stack_layout.rs index 732f9365c0..c335b844af 100644 --- a/cranelift/codegen/src/stack_layout.rs +++ b/cranelift/codegen/src/stack_layout.rs @@ -7,15 +7,20 @@ use core::cmp::{max, min}; /// Compute the stack frame layout. /// -/// Determine the total size of this stack frame and assign offsets to all `Spill` and -/// `Explicit` stack slots. +/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit` +/// stack slots. /// -/// The total frame size will be a multiple of `alignment` which must be a power of two. +/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the +/// function doesn't perform any call. /// /// Returns the total stack frame size which is also saved in `frame.frame_size`. /// /// If the stack frame is too big, returns an `ImplLimitExceeded` error. -pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResult { +pub fn layout_stack( + frame: &mut StackSlots, + is_leaf: bool, + alignment: StackSize, +) -> CodegenResult { // Each object and the whole stack frame must fit in 2 GB such that any relative offset within // the frame fits in a `StackOffset`. let max_size = StackOffset::max_value() as StackSize; @@ -34,10 +39,14 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu // // Both incoming and outgoing argument slots have fixed offsets that are treated as // reserved zones by the layout algorithm. + // + // If a function only has incoming arguments and does not perform any calls, then it doesn't + // require the stack to be aligned. let mut incoming_min = 0; let mut outgoing_max = 0; let mut min_align = alignment; + let mut must_align = is_leaf; for slot in frame.values() { if slot.size > max_size { @@ -55,6 +64,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu .checked_add(slot.size as StackOffset) .ok_or(CodegenError::ImplLimitExceeded)?; outgoing_max = max(outgoing_max, offset); + must_align = true; } StackSlotKind::StructReturnSlot | StackSlotKind::SpillSlot @@ -62,6 +72,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu | StackSlotKind::EmergencySlot => { // Determine the smallest alignment of any explicit or spill slot. min_align = slot.alignment(min_align); + must_align = true; } } } @@ -103,7 +114,10 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu offset = offset .checked_sub(outgoing_max) .ok_or(CodegenError::ImplLimitExceeded)?; - offset &= -(alignment as StackOffset); + + if must_align { + offset &= -(alignment as StackOffset); + } let frame_size = (offset as StackSize).wrapping_neg(); frame.frame_size = Some(frame_size); @@ -122,16 +136,19 @@ mod tests { fn layout() { let sss = &mut StackSlots::new(); + // For all these test cases, assume it will call. + let is_leaf = true; + // An empty layout should have 0-sized stack frame. - assert_eq!(layout_stack(sss, 1), Ok(0)); - assert_eq!(layout_stack(sss, 16), Ok(0)); + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); // Same for incoming arguments with non-negative offsets. let in0 = sss.make_incoming_arg(types::I64, 0); let in1 = sss.make_incoming_arg(types::I64, 8); - assert_eq!(layout_stack(sss, 1), Ok(0)); - assert_eq!(layout_stack(sss, 16), Ok(0)); + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); @@ -139,13 +156,13 @@ mod tests { let ss0 = sss.make_spill_slot(types::I64); let ss1 = sss.make_spill_slot(types::I32); - assert_eq!(layout_stack(sss, 1), Ok(12)); + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); assert_eq!(sss[ss0].offset, Some(-8)); assert_eq!(sss[ss1].offset, Some(-12)); - assert_eq!(layout_stack(sss, 16), Ok(16)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); assert_eq!(sss[ss0].offset, Some(-16)); @@ -155,14 +172,14 @@ mod tests { // should still pack nicely with the spill slots. let in2 = sss.make_incoming_arg(types::I32, -4); - assert_eq!(layout_stack(sss, 1), Ok(16)); + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); assert_eq!(sss[in2].offset, Some(-4)); assert_eq!(sss[ss0].offset, Some(-12)); assert_eq!(sss[ss1].offset, Some(-16)); - assert_eq!(layout_stack(sss, 16), Ok(16)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); assert_eq!(sss[in2].offset, Some(-4)); @@ -172,7 +189,7 @@ mod tests { // Finally, make sure there is room for the outgoing args. let out0 = sss.get_outgoing_arg(types::I32, 0); - assert_eq!(layout_stack(sss, 1), Ok(20)); + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); assert_eq!(sss[in2].offset, Some(-4)); @@ -180,7 +197,7 @@ mod tests { assert_eq!(sss[ss1].offset, Some(-16)); assert_eq!(sss[out0].offset, Some(0)); - assert_eq!(layout_stack(sss, 16), Ok(32)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32)); assert_eq!(sss[in0].offset, Some(0)); assert_eq!(sss[in1].offset, Some(8)); assert_eq!(sss[in2].offset, Some(-4)); @@ -190,7 +207,10 @@ mod tests { // Also test that an unsupported offset is rejected. sss.get_outgoing_arg(types::I8, StackOffset::max_value() - 1); - assert_eq!(layout_stack(sss, 1), Err(CodegenError::ImplLimitExceeded)); + assert_eq!( + layout_stack(sss, is_leaf, 1), + Err(CodegenError::ImplLimitExceeded) + ); } #[test] @@ -205,7 +225,7 @@ mod tests { )); let ss2 = sss.get_emergency_slot(types::I32, &[]); - assert_eq!(layout_stack(sss, 1), Ok(12)); + assert_eq!(layout_stack(sss, true, 1), Ok(12)); assert_eq!(sss[ss0].offset, Some(-4)); assert_eq!(sss[ss1].offset, Some(-8)); assert_eq!(sss[ss2].offset, Some(-12)); diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif index f2fd3c68ee..f8a0c0146c 100644 --- a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif +++ b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif @@ -108,7 +108,6 @@ ebb0(v0: i64, v1: i64): ; nextln: x86_push v18 ; nextln: x86_push v19 ; nextln: x86_push v20 -; nextln: adjust_sp_down_imm 8 ; nextln: v2 = load.i32 v0 ; nextln: v3 = load.i32 v0+8 ; nextln: v4 = load.i32 v0+16 @@ -135,7 +134,6 @@ ebb0(v0: i64, v1: i64): ; nextln: store v12, v1+80 ; nextln: store v13, v1+88 ; nextln: store v14, v1+96 -; nextln: adjust_sp_up_imm 8 ; nextln: v26 = x86_pop.i64 ; nextln: v25 = x86_pop.i64 ; nextln: v24 = x86_pop.i64 diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif index 6782d8cdce..7dc024f33c 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -160,7 +160,7 @@ ebb0(v0: i64, v1: i64): ; nextln: UnwindCode { ; nextln: offset: 19, ; nextln: op: SmallStackAlloc, -; nextln: info: 4, +; nextln: info: 3, ; nextln: value: None, ; nextln: }, ; nextln: UnwindCode {