From 80c2d70d2d700c39c95398a390abbc9517a5a8f0 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 3 Nov 2020 18:37:51 +0100 Subject: [PATCH] machinst ABI: Support for accumulating outgoing args When performing a function call, the platform ABI may require space on the stack to hold outgoing arguments and/or return values. Currently, this is supported via decrementing the stack pointer before the call and incrementing it afterwards, using the emit_stack_pre_adjust and emit_stack_post_adjust methods of ABICaller. However, on some platforms it would be preferable to just allocate enough space for any call done in the function in the caller's prologue instead. This patch adds support to allow back-ends to choose that method. Instead of calling emit_stack_pre/post_adjust around a call, they simply call a new accumulate_outgoing_args_size method of ABICaller instead. This will pass on the required size to the ABICallee structure of the calling function, which will accumulate the maximum size required for all function calls. That accumulated size is then passed to the gen_clobber_save and gen_clobber_restore functions so they can include the size in the stack allocation / deallocation that already happens in the prologue / epilogue code. --- cranelift/codegen/src/isa/aarch64/abi.rs | 2 ++ cranelift/codegen/src/isa/arm32/abi.rs | 2 ++ cranelift/codegen/src/isa/x64/abi.rs | 2 ++ cranelift/codegen/src/machinst/abi.rs | 13 ++++++++++++ cranelift/codegen/src/machinst/abi_impl.rs | 23 ++++++++++++++++++++-- cranelift/codegen/src/machinst/lower.rs | 4 ++-- 6 files changed, 42 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 287b570efd..ba9abc5cb7 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -513,6 +513,7 @@ impl ABIMachineSpec for AArch64MachineDeps { _: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { let mut insts = SmallVec::new(); let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); @@ -565,6 +566,7 @@ impl ABIMachineSpec for AArch64MachineDeps { flags: &settings::Flags, clobbers: &Set>, _fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index 8de86ca861..d13e44c6a7 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -313,6 +313,7 @@ impl ABIMachineSpec for Arm32MachineDeps { _flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { let mut insts = SmallVec::new(); if fixed_frame_storage_size > 0 { @@ -342,6 +343,7 @@ impl ABIMachineSpec for Arm32MachineDeps { _flags: &settings::Flags, clobbers: &Set>, _fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let clobbered_vec = get_callee_saves(clobbers); diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 48f5d92a77..9471a943c6 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -395,6 +395,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> (u64, SmallVec<[Self::I; 16]>) { let mut insts = SmallVec::new(); // Find all clobbered registers that are callee-save. These are only I64 @@ -443,6 +444,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, _fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> SmallVec<[Self::I; 16]> { let mut insts = SmallVec::new(); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 8382ee8848..c72a81dcc2 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -23,6 +23,12 @@ pub trait ABICallee { /// lowering context exists. fn init(&mut self, maybe_tmp: Option>); + /// Accumulate outgoing arguments. This ensures that at least SIZE bytes + /// are allocated in the prologue to be available for use in function calls + /// to hold arguments and/or return values. If this function is called + /// multiple times, the maximum of all SIZE values will be available. + fn accumulate_outgoing_args_size(&mut self, size: u32); + /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; @@ -203,6 +209,13 @@ pub trait ABICaller { /// Emit code to post-adjust the satck, after call return and return-value copies. fn emit_stack_post_adjust>(&self, ctx: &mut C); + /// Accumulate outgoing arguments. This ensures that the caller (as + /// identified via the CTX argument) allocates enough space in the + /// prologue to hold all arguments and return values for this call. + /// There is no code emitted at the call site, everything is done + /// in the caller's function prologue. + fn accumulate_outgoing_args_size>(&self, ctx: &mut C); + /// Emit the call itself. /// /// The returned instruction should have proper use- and def-sets according diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 70ffaa753f..e967e939d1 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -329,6 +329,7 @@ pub trait ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + outgoing_args_size: u32, ) -> (u64, SmallVec<[Self::I; 16]>); /// Generate a clobber-restore sequence. This sequence should perform the @@ -340,6 +341,7 @@ pub trait ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + outgoing_args_size: u32, ) -> SmallVec<[Self::I; 16]>; /// Generate a call instruction/sequence. This method is provided one @@ -435,6 +437,8 @@ pub struct ABICalleeImpl { stackslots: Vec, /// Total stack size of all stackslots. stackslots_size: u32, + /// Stack size to be reserved for outgoing arguments. + outgoing_args_size: u32, /// Clobbered registers, from regalloc. clobbered: Set>, /// Total number of spillslots, from regalloc. @@ -527,6 +531,7 @@ impl ABICalleeImpl { sig, stackslots, stackslots_size: stack_offset, + outgoing_args_size: 0, clobbered: Set::empty(), spillslots: None, fixed_frame_storage_size: 0, @@ -690,6 +695,12 @@ impl ABICallee for ABICalleeImpl { } } + fn accumulate_outgoing_args_size(&mut self, size: u32) { + if size > self.outgoing_args_size { + self.outgoing_args_size = size; + } + } + fn flags(&self) -> &settings::Flags { &self.flags } @@ -978,11 +989,13 @@ impl ABICallee for ABICalleeImpl { &self.flags, &self.clobbered, self.fixed_frame_storage_size, + self.outgoing_args_size, ); insts.extend(clobber_insts); - if clobber_size > 0 { - insts.push(M::gen_nominal_sp_adj(clobber_size as i32)); + let sp_adj = self.outgoing_args_size as i32 + clobber_size as i32; + if sp_adj > 0 { + insts.push(M::gen_nominal_sp_adj(sp_adj)); } self.total_frame_size = Some(total_stacksize); @@ -998,6 +1011,7 @@ impl ABICallee for ABICalleeImpl { &self.flags, &self.clobbered, self.fixed_frame_storage_size, + self.outgoing_args_size, )); // N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no @@ -1180,6 +1194,11 @@ impl ABICaller for ABICallerImpl { } } + fn accumulate_outgoing_args_size>(&self, ctx: &mut C) { + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + ctx.abi().accumulate_outgoing_args_size(off as u32); + } + fn emit_stack_pre_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index abac6fa37d..01e54e2b84 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -63,7 +63,7 @@ pub trait LowerCtx { // Function-level queries: /// Get the `ABICallee`. - fn abi(&mut self) -> &dyn ABICallee; + fn abi(&mut self) -> &mut dyn ABICallee; /// Get the (virtual) register that receives the return value. A return /// instruction should lower into a sequence that fills this register. (Why /// not allow the backend to specify its own result register for the return? @@ -850,7 +850,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { type I = I; - fn abi(&mut self) -> &dyn ABICallee { + fn abi(&mut self) -> &mut dyn ABICallee { self.vcode.abi() }