From f18a1f148838c69e1aa39bc76005dae4354433f7 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 31 Aug 2022 13:39:32 -0700 Subject: [PATCH] Cranelift: Deduplicate ABI signatures during lowering (#4829) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cranelift: Deduplicate ABI signatures during lowering This commit creates the `SigSet` type which interns and deduplicates the ABI signatures that we create from `ir::Signature`s. The ABI signatures are now referred to indirectly via a `Sig` (which is a `cranelift_entity` ID), and we pass around a `SigSet` to anything that needs to access the actual underlying `SigData` (which is what `ABISig` used to be). I had to change a couple methods to return a `SmallInstVec` instead of emitting directly to work around what would otherwise be shared and exclusive borrows of the lowering context overlapping. I don't expect any of these to heap allocate in practice. This does not remove the often-unnecessary allocations caused by `ensure_struct_return_ptr_is_returned`. That is left for follow up work. This also opens the door for further shuffling of signature data into more efficient representations in the future, now that we have `SigSet` to store it all in one place and it is threaded through all the code. We could potentially move each signature's parameter and return vectors into one big vector shared between all signatures, for example, which could cut down on allocations and shrink the size of `SigData` since those `SmallVec`s have pretty large inline capacity. Overall, this refactoring gives a 1-7% speedup for compilation on `pulldown-cmark`: ``` compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 8754213.66 ± 7526266.23 (confidence = 99%) dedupe.so is 1.01x to 1.07x faster than main.so! [191003295 234620642.20 280597986] dedupe.so [197626699 243374855.86 321816763] main.so compilation :: cycles :: benchmarks/bz2/benchmark.wasm No difference in performance. [170406200 194299792.68 253001201] dedupe.so [172071888 193230743.11 223608329] main.so compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [3870997347 4437735062.59 5216007266] dedupe.so [4019924063 4424595349.24 4965088931] main.so ``` * Use full path instead of import to avoid warnings in some build configurations Warnings will then cause CI to fail. * Move `SigSet` into `VCode` --- .../codegen/src/isa/aarch64/lower/isle.rs | 4 +- cranelift/codegen/src/isa/aarch64/mod.rs | 9 +- cranelift/codegen/src/isa/s390x/inst.isle | 14 +- cranelift/codegen/src/isa/s390x/lower.isle | 16 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 42 ++- cranelift/codegen/src/isa/s390x/mod.rs | 9 +- cranelift/codegen/src/isa/x64/lower.rs | 20 +- cranelift/codegen/src/isa/x64/lower/isle.rs | 6 +- cranelift/codegen/src/isa/x64/mod.rs | 9 +- cranelift/codegen/src/machinst/abi.rs | 343 +++++++++++++----- cranelift/codegen/src/machinst/compile.rs | 5 +- cranelift/codegen/src/machinst/isle.rs | 83 +++-- cranelift/codegen/src/machinst/lower.rs | 43 ++- cranelift/codegen/src/machinst/vcode.rs | 30 +- cranelift/codegen/src/prelude.isle | 18 +- 15 files changed, 455 insertions(+), 196 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 231011bd93..e4fb6ea6f5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -26,7 +26,7 @@ use crate::{ immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags, TrapCode, Value, ValueList, }, - isa::aarch64::abi::{AArch64Caller, AArch64MachineDeps}, + isa::aarch64::abi::AArch64Caller, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, isa::aarch64::lower::{writable_vreg, writable_xreg, xreg}, isa::unwind::UnwindInst, @@ -80,7 +80,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { isle_prelude_methods!(); - isle_prelude_caller_methods!(AArch64MachineDeps, AArch64Caller); + isle_prelude_caller_methods!(crate::isa::aarch64::abi::AArch64MachineDeps, AArch64Caller); fn sign_return_address_disabled(&mut self) -> Option<()> { if self.isa_flags.sign_return_address() { diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 694cc48ef8..9e6918d011 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -7,8 +7,8 @@ use crate::isa::aarch64::settings as aarch64_settings; use crate::isa::unwind::systemv; use crate::isa::{Builder as IsaBuilder, TargetIsa}; use crate::machinst::{ - compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, TextSectionBuilder, - VCode, + compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet, + TextSectionBuilder, VCode, }; use crate::result::CodegenResult; use crate::settings as shared_settings; @@ -60,8 +60,9 @@ impl AArch64Backend { flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone()); - let abi = abi::AArch64Callee::new(func, self, &self.isa_flags)?; - compile::compile::(func, self, abi, &self.machine_env, emit_info) + let sigs = SigSet::new::(func, &self.flags)?; + let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?; + compile::compile::(func, self, abi, &self.machine_env, emit_info, sigs) } } diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 172e672147..32b1d4924d 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -3610,30 +3610,30 @@ ;; Helpers for generating `call` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl abi_sig (SigRef) ABISig) +(decl abi_sig (SigRef) Sig) (extern constructor abi_sig abi_sig) -(decl abi_call_info (ABISig ExternalName Opcode) BoxCallInfo) +(decl abi_call_info (Sig ExternalName Opcode) BoxCallInfo) (extern constructor abi_call_info abi_call_info) -(decl abi_call_ind_info (ABISig Reg Opcode) BoxCallIndInfo) +(decl abi_call_ind_info (Sig Reg Opcode) BoxCallIndInfo) (extern constructor abi_call_ind_info abi_call_ind_info) (decl writable_link_reg () WritableReg) (rule (writable_link_reg) (writable_gpr 14)) -(decl abi_call (ABISig ExternalName Opcode) SideEffectNoResult) +(decl abi_call (Sig ExternalName Opcode) SideEffectNoResult) (rule (abi_call abi name opcode) (call_impl (writable_link_reg) (abi_call_info abi name opcode))) -(decl abi_call_ind (ABISig Reg Opcode) SideEffectNoResult) +(decl abi_call_ind (Sig Reg Opcode) SideEffectNoResult) (rule (abi_call_ind abi target opcode) (call_ind_impl (writable_link_reg) (abi_call_ind_info abi target opcode))) -(decl abi_accumulate_outgoing_args_size (ABISig) Unit) +(decl abi_accumulate_outgoing_args_size (Sig) Unit) (extern constructor abi_accumulate_outgoing_args_size abi_accumulate_outgoing_args_size) -(decl abi_lane_order (ABISig) LaneOrder) +(decl abi_lane_order (Sig) LaneOrder) (extern constructor abi_lane_order abi_lane_order) diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 7d15263a4f..c6ed272739 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -4060,7 +4060,7 @@ ;; Direct call to an in-range function. (rule (lower (call (func_ref_data sig_ref name (reloc_distance_near)) args)) - (let ((abi ABISig (abi_sig sig_ref)) + (let ((abi Sig (abi_sig sig_ref)) (_ Unit (abi_accumulate_outgoing_args_size abi)) (_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args)) (_ InstOutput (side_effect (abi_call abi name (Opcode.Call))))) @@ -4068,7 +4068,7 @@ ;; Direct call to an out-of-range function (implicitly via pointer). (rule (lower (call (func_ref_data sig_ref name _) args)) - (let ((abi ABISig (abi_sig sig_ref)) + (let ((abi Sig (abi_sig sig_ref)) (_ Unit (abi_accumulate_outgoing_args_size abi)) (_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args)) (target Reg (load_symbol_reloc (SymbolReloc.Absolute name 0))) @@ -4077,7 +4077,7 @@ ;; Indirect call. (rule (lower (call_indirect sig_ref ptr args)) - (let ((abi ABISig (abi_sig sig_ref)) + (let ((abi Sig (abi_sig sig_ref)) (target Reg (put_in_reg ptr)) (_ Unit (abi_accumulate_outgoing_args_size abi)) (_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args)) @@ -4085,14 +4085,14 @@ (lower_call_rets abi (range 0 (abi_num_rets abi)) (output_builder_new)))) ;; Lower function arguments. -(decl lower_call_args (ABISig Range ValueSlice) InstOutput) +(decl lower_call_args (Sig Range ValueSlice) InstOutput) (rule (lower_call_args abi range args) (let ((_ InstOutput (lower_call_args_buffer abi range args)) (_ InstOutput (lower_call_args_slots abi range args))) (lower_call_ret_arg abi))) ;; Lower function arguments (part 1): prepare buffer copies. -(decl lower_call_args_buffer (ABISig Range ValueSlice) InstOutput) +(decl lower_call_args_buffer (Sig Range ValueSlice) InstOutput) (rule (lower_call_args_buffer abi (range_empty) _) (output_none)) (rule (lower_call_args_buffer abi (range_unwrap head tail) args) (let ((_ InstOutput (copy_to_buffer 0 (abi_get_arg abi head) @@ -4100,7 +4100,7 @@ (lower_call_args_buffer abi tail args))) ;; Lower function arguments (part 2): set up registers / stack slots. -(decl lower_call_args_slots (ABISig Range ValueSlice) InstOutput) +(decl lower_call_args_slots (Sig Range ValueSlice) InstOutput) (rule (lower_call_args_slots abi (range_empty) _) (output_none)) (rule (lower_call_args_slots abi (range_unwrap head tail) args) (let ((_ Unit (copy_to_arg (abi_lane_order abi) @@ -4109,7 +4109,7 @@ (lower_call_args_slots abi tail args))) ;; Lower function arguments (part 3): implicit return-area pointer. -(decl lower_call_ret_arg (ABISig) InstOutput) +(decl lower_call_ret_arg (Sig) InstOutput) (rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none)) (rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) (let ((ret_arg Reg (load_addr (memarg_stack_off (abi_sized_stack_arg_space abi) 0))) @@ -4117,7 +4117,7 @@ (output_none))) ;; Lower function return values by collecting them from registers / stack slots. -(decl lower_call_rets (ABISig Range InstOutputBuilder) InstOutput) +(decl lower_call_rets (Sig Range InstOutputBuilder) InstOutput) (rule (lower_call_rets abi (range_empty) builder) (output_builder_finish builder)) (rule (lower_call_rets abi (range_unwrap head tail) builder) (let ((ret ValueRegs (copy_from_arg (abi_lane_order abi) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index c2b6903dac..117125e7a3 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -89,46 +89,48 @@ pub(crate) fn lower_branch( impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { isle_prelude_methods!(); - fn abi_sig(&mut self, sig_ref: SigRef) -> ABISig { - let sig = &self.lower_ctx.dfg().signatures[sig_ref]; - ABISig::from_func_sig::(sig, self.flags).unwrap() + fn abi_sig(&mut self, sig_ref: SigRef) -> Sig { + self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref) } - fn abi_lane_order(&mut self, abi: &ABISig) -> LaneOrder { - lane_order_for_call_conv(abi.call_conv()) + fn abi_lane_order(&mut self, abi: &Sig) -> LaneOrder { + lane_order_for_call_conv(self.lower_ctx.sigs()[*abi].call_conv()) } - fn abi_accumulate_outgoing_args_size(&mut self, abi: &ABISig) -> Unit { - let off = abi.sized_stack_arg_space() + abi.sized_stack_ret_space(); + fn abi_accumulate_outgoing_args_size(&mut self, abi: &Sig) -> Unit { + let off = self.lower_ctx.sigs()[*abi].sized_stack_arg_space() + + self.lower_ctx.sigs()[*abi].sized_stack_ret_space(); self.lower_ctx - .abi() + .abi_mut() .accumulate_outgoing_args_size(off as u32); } - fn abi_call_info(&mut self, abi: &ABISig, name: ExternalName, opcode: &Opcode) -> BoxCallInfo { - let (uses, defs, clobbers) = abi.call_uses_defs_clobbers::(); + fn abi_call_info(&mut self, abi: &Sig, name: ExternalName, opcode: &Opcode) -> BoxCallInfo { + let (uses, defs, clobbers) = + self.lower_ctx.sigs()[*abi].call_uses_defs_clobbers::(); Box::new(CallInfo { dest: name.clone(), uses, defs, clobbers, opcode: *opcode, - caller_callconv: self.lower_ctx.abi().call_conv(), - callee_callconv: abi.call_conv(), + caller_callconv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + callee_callconv: self.lower_ctx.sigs()[*abi].call_conv(), tls_symbol: None, }) } - fn abi_call_ind_info(&mut self, abi: &ABISig, target: Reg, opcode: &Opcode) -> BoxCallIndInfo { - let (uses, defs, clobbers) = abi.call_uses_defs_clobbers::(); + fn abi_call_ind_info(&mut self, abi: &Sig, target: Reg, opcode: &Opcode) -> BoxCallIndInfo { + let (uses, defs, clobbers) = + self.lower_ctx.sigs()[*abi].call_uses_defs_clobbers::(); Box::new(CallIndInfo { rn: target, uses, defs, clobbers, opcode: *opcode, - caller_callconv: self.lower_ctx.abi().call_conv(), - callee_callconv: abi.call_conv(), + caller_callconv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + callee_callconv: self.lower_ctx.sigs()[*abi].call_conv(), }) } @@ -149,12 +151,12 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> fn lib_accumulate_outgoing_args_size(&mut self, _: &LibCallInfo) -> Unit { // Libcalls only require the register save area. self.lower_ctx - .abi() + .abi_mut() .accumulate_outgoing_args_size(REG_SAVE_AREA_SIZE); } fn lib_call_info(&mut self, info: &LibCallInfo) -> BoxCallInfo { - let caller_callconv = self.lower_ctx.abi().call_conv(); + let caller_callconv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()); let callee_callconv = CallConv::for_libcall(&self.flags, caller_callconv); // Uses and defs are defined by the particular libcall. @@ -403,7 +405,9 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> #[inline] fn lane_order(&mut self) -> Option { - Some(lane_order_for_call_conv(self.lower_ctx.abi().call_conv())) + Some(lane_order_for_call_conv( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + )) } #[inline] diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 36f241c67e..25886a8a0a 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -7,8 +7,8 @@ use crate::isa::s390x::settings as s390x_settings; use crate::isa::unwind::systemv::RegisterMappingError; use crate::isa::{Builder as IsaBuilder, TargetIsa}; use crate::machinst::{ - compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, TextSectionBuilder, - VCode, + compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet, + TextSectionBuilder, VCode, }; use crate::result::CodegenResult; use crate::settings as shared_settings; @@ -58,8 +58,9 @@ impl S390xBackend { func: &Function, ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(self.isa_flags.clone()); - let abi = abi::S390xCallee::new(func, self, &self.isa_flags)?; - compile::compile::(func, self, abi, &self.machine_env, emit_info) + let sigs = SigSet::new::(func, &self.flags)?; + let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?; + compile::compile::(func, self, abi, &self.machine_env, emit_info, sigs) } } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 88ef7e500c..c9ab04e781 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -153,21 +153,31 @@ fn emit_vm_call( // TODO avoid recreating signatures for every single Libcall function. let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple)); let sig = libcall.signature(call_conv); - let caller_conv = ctx.abi().call_conv(); + let caller_conv = ctx.abi().call_conv(ctx.sigs()); - let mut abi = X64Caller::from_func(&sig, &extname, dist, caller_conv, flags)?; + if !ctx.sigs().have_abi_sig_for_signature(&sig) { + ctx.sigs_mut() + .make_abi_sig_from_ir_signature::(sig.clone(), flags)?; + } + + let mut abi = + X64Caller::from_libcall(ctx.sigs(), &sig, &extname, dist, caller_conv, flags.clone())?; abi.emit_stack_pre_adjust(ctx); - assert_eq!(inputs.len(), abi.num_args()); + assert_eq!(inputs.len(), abi.num_args(ctx.sigs())); for (i, input) in inputs.iter().enumerate() { - abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(*input)); + for inst in abi.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(*input)) { + ctx.emit(inst); + } } abi.emit_call(ctx); for (i, output) in outputs.iter().enumerate() { - abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(*output)); + for inst in abi.gen_copy_retval_to_regs(ctx, i, ValueRegs::one(*output)) { + ctx.emit(inst); + } } abi.emit_stack_post_adjust(ctx); diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 2148d4f400..eaaf875040 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -24,7 +24,7 @@ use crate::{ settings::Flags, unwind::UnwindInst, x64::{ - abi::{X64ABIMachineSpec, X64Caller}, + abi::X64Caller, inst::{args::*, regs, CallInfo}, settings::Flags as IsaFlags, }, @@ -720,7 +720,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg { - let call_conv = self.lower_ctx.abi().call_conv(); + let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()); let ret_ty = libcall.signature(call_conv).returns[0].value_type; let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap(); @@ -738,7 +738,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg { - let call_conv = self.lower_ctx.abi().call_conv(); + let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()); let ret_ty = libcall.signature(call_conv).returns[0].value_type; let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap(); diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 303b90d3ab..b57b6f7dac 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -9,8 +9,8 @@ use crate::isa::unwind::systemv; use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings}; use crate::isa::Builder as IsaBuilder; use crate::machinst::{ - compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, TextSectionBuilder, - VCode, + compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet, + TextSectionBuilder, VCode, }; use crate::result::{CodegenError, CodegenResult}; use crate::settings::{self as shared_settings, Flags}; @@ -53,8 +53,9 @@ impl X64Backend { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone()); - let abi = abi::X64Callee::new(&func, self, &self.x64_flags)?; - compile::compile::(&func, self, abi, &self.reg_env, emit_info) + let sigs = SigSet::new::(func, &self.flags)?; + let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?; + compile::compile::(&func, self, abi, &self.reg_env, emit_info, sigs) } } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index f20ebe156c..92bc906a85 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -102,6 +102,8 @@ //! ABI. See each platform's `abi.rs` implementation for details. use crate::binemit::StackMap; +use crate::entity::{PrimaryMap, SecondaryMap}; +use crate::fx::FxHashMap; use crate::ir::types::*; use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot}; use crate::isa::TargetIsa; @@ -526,9 +528,14 @@ pub trait ABIMachineSpec { // A vector of `ABIArg`s with inline capacity, since they are typically small. pub type ABIArgVec = SmallVec<[ABIArg; 6]>; +/// The id of an ABI signature within the `SigSet`. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Sig(u32); +cranelift_entity::entity_impl!(Sig); + /// ABI information shared between body (callee) and caller. #[derive(Clone)] -pub struct ABISig { +pub struct SigData { /// Argument locations (regs or stack slots). Stack offsets are relative to /// SP on entry to function. args: ABIArgVec, @@ -545,11 +552,11 @@ pub struct ABISig { call_conv: isa::CallConv, } -impl ABISig { +impl SigData { pub fn from_func_sig( sig: &ir::Signature, flags: &settings::Flags, - ) -> CodegenResult { + ) -> CodegenResult { let sig = ensure_struct_return_ptr_is_returned(sig); // Compute args and retvals from signature. Handle retvals first, @@ -580,7 +587,7 @@ impl ABISig { stack_ret_arg, ); - Ok(ABISig { + Ok(SigData { args, rets, sized_stack_arg_space, @@ -698,12 +705,138 @@ impl ABISig { } } +/// A (mostly) deduplicated set of ABI signatures. +/// +/// We say "mostly" because we do not dedupe between signatures interned via +/// `ir::SigRef` (direct and indirect calls; the vast majority of signatures in +/// this set) vs via `ir::Signature` (the callee itself and libcalls). Doing +/// this final bit of deduplication would require filling out the +/// `ir_signature_to_abi_sig`, which is a bunch of allocations (not just the +/// hash map itself but params and returns vecs in each signature) that we want +/// to avoid. +/// +/// In general, prefer using the `ir::SigRef`-taking methods to the +/// `ir::Signature`-taking methods when you can get away with it, as they don't +/// require cloning non-copy types that will trigger heap allocations. +/// +/// This type can be indexed by `Sig` to access its associated `SigData`. +pub struct SigSet { + /// Interned `ir::Signature`s that we already have an ABI signature for. + ir_signature_to_abi_sig: FxHashMap, + + /// Interned `ir::SigRef`s that we already have an ABI signature for. + ir_sig_ref_to_abi_sig: SecondaryMap>, + + /// The actual ABI signatures, keyed by `Sig`. + sigs: PrimaryMap, +} + +impl SigSet { + /// Construct a new `SigSet`, interning all of the signatures used by the + /// given function. + pub fn new(func: &ir::Function, flags: &settings::Flags) -> CodegenResult + where + M: ABIMachineSpec, + { + let mut sigs = SigSet { + ir_signature_to_abi_sig: FxHashMap::default(), + ir_sig_ref_to_abi_sig: SecondaryMap::with_capacity(func.dfg.signatures.len()), + sigs: PrimaryMap::with_capacity(1 + func.dfg.signatures.len()), + }; + + sigs.make_abi_sig_from_ir_signature::(func.signature.clone(), flags)?; + for sig_ref in func.dfg.signatures.keys() { + sigs.make_abi_sig_from_ir_sig_ref::(sig_ref, &func.dfg, flags)?; + } + + Ok(sigs) + } + + /// Have we already interned an ABI signature for the given `ir::Signature`? + pub fn have_abi_sig_for_signature(&self, signature: &ir::Signature) -> bool { + self.ir_signature_to_abi_sig.contains_key(signature) + } + + /// Construct and intern an ABI signature for the given `ir::Signature`. + pub fn make_abi_sig_from_ir_signature( + &mut self, + signature: ir::Signature, + flags: &settings::Flags, + ) -> CodegenResult + where + M: ABIMachineSpec, + { + // Because the `HashMap` entry API requires taking ownership of the + // lookup key -- and we want to avoid unnecessary clones of + // `ir::Signature`s, even at the cost of duplicate lookups -- we can't + // have a single, get-or-create-style method for interning + // `ir::Signature`s into ABI signatures. So at least (debug) assert that + // we aren't creating duplicate ABI signatures for the same + // `ir::Signature`. + debug_assert!(!self.have_abi_sig_for_signature(&signature)); + + let legalized_signature = crate::machinst::ensure_struct_return_ptr_is_returned(&signature); + let sig_data = SigData::from_func_sig::(&legalized_signature, flags)?; + let sig = self.sigs.push(sig_data); + self.ir_signature_to_abi_sig.insert(signature, sig); + Ok(sig) + } + + fn make_abi_sig_from_ir_sig_ref( + &mut self, + sig_ref: ir::SigRef, + dfg: &ir::DataFlowGraph, + flags: &settings::Flags, + ) -> CodegenResult + where + M: ABIMachineSpec, + { + if let Some(sig) = self.ir_sig_ref_to_abi_sig[sig_ref] { + return Ok(sig); + } + let signature = &dfg.signatures[sig_ref]; + let legalized_signature = crate::machinst::ensure_struct_return_ptr_is_returned(&signature); + let sig_data = SigData::from_func_sig::(&legalized_signature, flags)?; + let sig = self.sigs.push(sig_data); + self.ir_sig_ref_to_abi_sig[sig_ref] = Some(sig); + Ok(sig) + } + + /// Get the already-interned ABI signature id for the given `ir::SigRef`. + pub fn abi_sig_for_sig_ref(&self, sig_ref: ir::SigRef) -> Sig { + self.ir_sig_ref_to_abi_sig + .get(sig_ref) + // Should have a secondary map entry... + .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`") + // ...and that entry should be initialized. + .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`") + } + + /// Get the already-interned ABI signature id for the given `ir::Signature`. + pub fn abi_sig_for_signature(&self, signature: &ir::Signature) -> Sig { + self.ir_signature_to_abi_sig + .get(signature) + .copied() + .expect("must call `make_abi_sig_from_ir_signature` before `get_abi_sig_for_signature`") + } +} + +// NB: we do _not_ implement `IndexMut` because these signatures are +// deduplicated and shared! +impl std::ops::Index for SigSet { + type Output = SigData; + + fn index(&self, sig: Sig) -> &Self::Output { + &self.sigs[sig] + } +} + /// ABI object for a function body. pub struct Callee { /// CLIF-level signature, possibly normalized. ir_sig: ir::Signature, /// Signature: arg and retval regs. - sig: ABISig, + sig: Sig, /// Defined dynamic types. dynamic_type_sizes: HashMap, /// Offsets to each dynamic stackslot. @@ -761,7 +894,7 @@ pub struct Callee { fn get_special_purpose_param_register( f: &ir::Function, - abi: &ABISig, + abi: &SigData, purpose: ir::ArgumentPurpose, ) -> Option { let idx = f.signature.special_param_index(purpose)?; @@ -776,12 +909,16 @@ fn get_special_purpose_param_register( impl Callee { /// Create a new body ABI instance. - pub fn new(f: &ir::Function, isa: &dyn TargetIsa, isa_flags: &M::F) -> CodegenResult { + pub fn new<'a>( + f: &ir::Function, + isa: &dyn TargetIsa, + isa_flags: &M::F, + sigs: &SigSet, + ) -> CodegenResult { trace!("ABI: func signature {:?}", f.signature); let flags = isa.flags().clone(); - let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); - let sig = ABISig::from_func_sig::(&ir_sig, &flags)?; + let sig = sigs.abi_sig_for_signature(&f.signature); let call_conv = f.signature.call_conv; // Only these calling conventions are supported. @@ -839,9 +976,12 @@ impl Callee { // argument or as a global value which often calculates the stack limit // from the arguments. let stack_limit = - get_special_purpose_param_register(f, &sig, ir::ArgumentPurpose::StackLimit) + get_special_purpose_param_register(f, &sigs[sig], ir::ArgumentPurpose::StackLimit) .map(|reg| (reg, smallvec![])) - .or_else(|| f.stack_limit.map(|gv| gen_stack_limit::(f, &sig, gv))); + .or_else(|| { + f.stack_limit + .map(|gv| gen_stack_limit::(f, &sigs[sig], gv)) + }); // Determine whether a probestack call is required for large enough // frames (and the minimum frame size if so). @@ -856,7 +996,7 @@ impl Callee { }; Ok(Self { - ir_sig, + ir_sig: ensure_struct_return_ptr_is_returned(&f.signature), sig, dynamic_stackslots, dynamic_type_sizes, @@ -959,7 +1099,7 @@ impl Callee { /// it's used, because we're not participating in register allocation anyway! fn gen_stack_limit( f: &ir::Function, - abi: &ABISig, + abi: &SigData, gv: ir::GlobalValue, ) -> (Reg, SmallInstVec) { let mut insts = smallvec![]; @@ -969,7 +1109,7 @@ fn gen_stack_limit( fn generate_gv( f: &ir::Function, - abi: &ABISig, + abi: &SigData, gv: ir::GlobalValue, insts: &mut SmallInstVec, ) -> Reg { @@ -1033,7 +1173,7 @@ fn gen_store_stack_multi( ret } -fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature { +pub(crate) fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature { let params_structret = sig .params .iter() @@ -1061,9 +1201,9 @@ impl Callee { /// Does the ABI-body code need temp registers (and if so, of what type)? /// They will be provided to `init()` as the `temps` arg if so. - pub fn temps_needed(&self) -> Vec { + pub fn temps_needed(&self, sigs: &SigSet) -> Vec { let mut temp_tys = vec![]; - for arg in &self.sig.args { + for arg in &sigs[self.sig].args { match arg { &ABIArg::ImplicitPtrArg { pointer, .. } => match &pointer { &ABIArgSlot::Reg { .. } => {} @@ -1074,7 +1214,7 @@ impl Callee { _ => {} } } - if self.sig.stack_ret_arg.is_some() { + if sigs[self.sig].stack_ret_arg.is_some() { temp_tys.push(M::word_type()); } temp_tys @@ -1083,9 +1223,9 @@ impl Callee { /// Initialize. This is called after the Callee is constructed because it /// may be provided with a vector of temp vregs, which can only be allocated /// once the lowering context exists. - pub fn init(&mut self, temps: Vec>) { + pub fn init(&mut self, sigs: &SigSet, temps: Vec>) { let mut temps_iter = temps.into_iter(); - for arg in &self.sig.args { + for arg in &sigs[self.sig].args { let temp = match arg { &ABIArg::ImplicitPtrArg { pointer, .. } => match &pointer { &ABIArgSlot::Reg { .. } => None, @@ -1095,7 +1235,7 @@ impl Callee { }; self.arg_temp_reg.push(temp); } - if self.sig.stack_ret_arg.is_some() { + if sigs[self.sig].stack_ret_arg.is_some() { self.ret_area_ptr = Some(temps_iter.next().unwrap()); } } @@ -1113,8 +1253,8 @@ impl Callee { } /// Get the calling convention implemented by this ABI object. - pub fn call_conv(&self) -> isa::CallConv { - self.sig.call_conv + pub fn call_conv(&self, sigs: &SigSet) -> isa::CallConv { + sigs[self.sig].call_conv } /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes. @@ -1131,6 +1271,7 @@ impl Callee { /// register. pub fn gen_copy_arg_to_regs( &self, + sigs: &SigSet, idx: usize, into_regs: ValueRegs>, ) -> SmallInstVec { @@ -1150,7 +1291,7 @@ impl Callee { } => { // However, we have to respect the extention mode for stack // slots, or else we grab the wrong bytes on big-endian. - let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension); let ty = match (ext, ty_bits(ty) as u32) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < M::word_bits() => @@ -1171,7 +1312,7 @@ impl Callee { } }; - match &self.sig.args[idx] { + match &sigs[self.sig].args[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(into_regs.len(), slots.len()); for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) { @@ -1233,12 +1374,13 @@ impl Callee { /// Generate an instruction which copies a source register to a return value slot. pub fn gen_copy_regs_to_retval( &self, + sigs: &SigSet, idx: usize, from_regs: ValueRegs>, ) -> SmallInstVec { let mut ret = smallvec![]; let word_bits = M::word_bits() as u8; - match &self.sig.rets[idx] { + match &sigs[self.sig].rets[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(from_regs.len(), slots.len()); for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) { @@ -1247,7 +1389,7 @@ impl Callee { reg, ty, extension, .. } => { let from_bits = ty_bits(ty) as u8; - let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension); let reg: Writable = Writable::from_reg(Reg::from(reg)); match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) @@ -1281,7 +1423,7 @@ impl Callee { let off = i32::try_from(offset).expect( "Argument stack offset greater than 2GB; should hit impl limit first", ); - let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension); // Trash the from_reg; it should be its last use. match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) @@ -1326,9 +1468,10 @@ impl Callee { /// values or an otherwise large return value that must be passed on the /// stack; typically the ABI specifies an extra hidden argument that is a /// pointer to that memory. - pub fn gen_retval_area_setup(&self) -> Option { - if let Some(i) = self.sig.stack_ret_arg { - let insts = self.gen_copy_arg_to_regs(i, ValueRegs::one(self.ret_area_ptr.unwrap())); + pub fn gen_retval_area_setup(&self, sigs: &SigSet) -> Option { + if let Some(i) = sigs[self.sig].stack_ret_arg { + let insts = + self.gen_copy_arg_to_regs(sigs, i, ValueRegs::one(self.ret_area_ptr.unwrap())); let inst = insts.into_iter().next().unwrap(); trace!( "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", @@ -1343,9 +1486,9 @@ impl Callee { } /// Generate a return instruction. - pub fn gen_ret(&self) -> M::I { + pub fn gen_ret(&self, sigs: &SigSet) -> M::I { let mut rets = vec![]; - for ret in &self.sig.rets { + for ret in &sigs[self.sig].rets { match ret { ABIArg::Slots { slots, .. } => { for slot in slots { @@ -1473,7 +1616,7 @@ impl Callee { /// other methods (`load_arg`, `store_retval`, and spillslot accesses.) /// `self` is mutable so that we can store information in it which will be /// useful when creating the epilogue. - pub fn gen_prologue(&mut self) -> SmallInstVec { + pub fn gen_prologue(&mut self, sigs: &SigSet) -> SmallInstVec { let bytes = M::word_bytes(); let total_stacksize = self.stackslots_size + bytes * self.spillslots.unwrap() as u32; let mask = M::stack_align(self.call_conv) - 1; @@ -1490,7 +1633,7 @@ impl Callee { self.setup_frame = self.flags.preserve_frame_pointers() || M::is_frame_setup_needed( self.is_leaf, - self.stack_args_size(), + self.stack_args_size(sigs), clobbered_callee_saves.len(), self.fixed_frame_storage_size, ); @@ -1597,8 +1740,8 @@ impl Callee { } /// Returns the size of arguments expected on the stack. - pub fn stack_args_size(&self) -> u32 { - self.sig.sized_stack_arg_space as u32 + pub fn stack_args_size(&self, sigs: &SigSet) -> u32 { + sigs[self.sig].sized_stack_arg_space as u32 } /// Get the spill-slot size. @@ -1642,7 +1785,7 @@ impl Callee { /// ABI object for a callsite. pub struct Caller { /// The called function's signature. - sig: ABISig, + sig: Sig, /// All uses for the callsite, i.e., function args. uses: SmallVec<[Reg; 8]>, /// All defs for the callsite, i.e., return values. @@ -1673,15 +1816,15 @@ pub enum CallDest { impl Caller { /// Create a callsite ABI object for a call directly to the specified function. pub fn from_func( - sig: &ir::Signature, + sigs: &SigSet, + sig_ref: ir::SigRef, extname: &ir::ExternalName, dist: RelocDistance, caller_conv: isa::CallConv, - flags: &settings::Flags, + flags: settings::Flags, ) -> CodegenResult> { - let ir_sig = ensure_struct_return_ptr_is_returned(sig); - let sig = ABISig::from_func_sig::(&ir_sig, flags)?; - let (uses, defs, clobbers) = sig.call_uses_defs_clobbers::(); + let sig = sigs.abi_sig_for_sig_ref(sig_ref); + let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); Ok(Caller { sig, uses, @@ -1690,7 +1833,32 @@ impl Caller { dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, caller_conv, - flags: flags.clone(), + flags, + _mach: PhantomData, + }) + } + + /// Create a callsite ABI object for a call directly to the specified + /// libcall. + pub fn from_libcall( + sigs: &SigSet, + sig: &ir::Signature, + extname: &ir::ExternalName, + dist: RelocDistance, + caller_conv: isa::CallConv, + flags: settings::Flags, + ) -> CodegenResult> { + let sig = sigs.abi_sig_for_signature(sig); + let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + Ok(Caller { + sig, + uses, + defs, + clobbers, + dest: CallDest::ExtName(extname.clone(), dist), + opcode: ir::Opcode::Call, + caller_conv, + flags, _mach: PhantomData, }) } @@ -1698,15 +1866,15 @@ impl Caller { /// Create a callsite ABI object for a call to a function pointer with the /// given signature. pub fn from_ptr( - sig: &ir::Signature, + sigs: &SigSet, + sig_ref: ir::SigRef, ptr: Reg, opcode: ir::Opcode, caller_conv: isa::CallConv, - flags: &settings::Flags, + flags: settings::Flags, ) -> CodegenResult> { - let ir_sig = ensure_struct_return_ptr_is_returned(sig); - let sig = ABISig::from_func_sig::(&ir_sig, flags)?; - let (uses, defs, clobbers) = sig.call_uses_defs_clobbers::(); + let sig = sigs.abi_sig_for_sig_ref(sig_ref); + let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); Ok(Caller { sig, uses, @@ -1715,7 +1883,7 @@ impl Caller { dest: CallDest::Reg(ptr), opcode, caller_conv, - flags: flags.clone(), + flags, _mach: PhantomData, }) } @@ -1734,23 +1902,26 @@ fn adjust_stack_and_nominal_sp(ctx: &mut Lower, off: i3 impl Caller { /// Get the number of arguments expected. - pub fn num_args(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - self.sig.args.len() - 1 + pub fn num_args(&self, sigs: &SigSet) -> usize { + let data = &sigs[self.sig]; + if data.stack_ret_arg.is_some() { + data.args.len() - 1 } else { - self.sig.args.len() + data.args.len() } } /// Emit code to pre-adjust the stack, prior to argument copies and call. pub fn emit_stack_pre_adjust(&self, ctx: &mut Lower) { - let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; + let off = + ctx.sigs()[self.sig].sized_stack_arg_space + ctx.sigs()[self.sig].sized_stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) } /// Emit code to post-adjust the satck, after call return and return-value copies. pub fn emit_stack_post_adjust(&self, ctx: &mut Lower) { - let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; + let off = + ctx.sigs()[self.sig].sized_stack_arg_space + ctx.sigs()[self.sig].sized_stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } @@ -1765,7 +1936,7 @@ impl Caller { idx: usize, from_regs: ValueRegs, ) { - match &self.sig.args[idx] { + match &ctx.sigs()[self.sig].args[idx] { &ABIArg::Slots { .. } => {} &ABIArg::StructArg { offset, size, .. } => { let src_ptr = from_regs.only_reg().unwrap(); @@ -1779,7 +1950,8 @@ impl Caller { // N.B.: because we process StructArg params *first*, this is // safe w.r.t. clobbers: we have not yet filled in any other // arg regs. - let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, self.sig.call_conv); + let memcpy_call_conv = + isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv); for insn in M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) .into_iter() @@ -1791,18 +1963,20 @@ impl Caller { } } - /// Emit a copy of an argument value from a source register, prior to the call. - /// For large arguments with associated stack buffer, this may load the address - /// of the buffer into the argument register, if required by the ABI. - pub fn emit_copy_regs_to_arg( + /// Generate a copy of an argument value from a source register, prior to + /// the call. For large arguments with associated stack buffer, this may + /// load the address of the buffer into the argument register, if required + /// by the ABI. + pub fn gen_copy_regs_to_arg( &self, - ctx: &mut Lower, + ctx: &Lower, idx: usize, from_regs: ValueRegs, - ) { + ) -> SmallInstVec { + let mut insts = smallvec![]; let word_rc = M::word_reg_class(); let word_bits = M::word_bits() as usize; - match &self.sig.args[idx] { + match &ctx.sigs()[self.sig].args[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(from_regs.len(), slots.len()); for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) { @@ -1810,7 +1984,7 @@ impl Caller { &ABIArgSlot::Reg { reg, ty, extension, .. } => { - let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { assert_eq!(word_rc, reg.class()); let signed = match ext { @@ -1818,7 +1992,7 @@ impl Caller { ir::ArgumentExtension::Sext => true, _ => unreachable!(), }; - ctx.emit(M::gen_extend( + insts.push(M::gen_extend( Writable::from_reg(Reg::from(reg)), *from_reg, signed, @@ -1826,7 +2000,7 @@ impl Caller { word_bits as u8, )); } else { - ctx.emit(M::gen_move( + insts.push(M::gen_move( Writable::from_reg(Reg::from(reg)), *from_reg, ty, @@ -1840,7 +2014,7 @@ impl Caller { .. } => { let mut ty = ty; - let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { assert_eq!(word_rc, from_reg.class()); let signed = match ext { @@ -1851,7 +2025,7 @@ impl Caller { // Extend in place in the source register. Our convention is to // treat high bits as undefined for values in registers, so this // is safe, even for an argument that is nominally read-only. - ctx.emit(M::gen_extend( + insts.push(M::gen_extend( Writable::from_reg(*from_reg), *from_reg, signed, @@ -1861,7 +2035,7 @@ impl Caller { // Store the extended version. ty = M::word_type(); } - ctx.emit(M::gen_store_stack( + insts.push(M::gen_store_stack( StackAMode::SPOffset(offset, ty), *from_reg, ty, @@ -1875,16 +2049,18 @@ impl Caller { } &ABIArg::ImplicitPtrArg { .. } => unimplemented!(), // Only supported via ISLE. } + insts } /// Emit a copy a return value into a destination register, after the call returns. - pub fn emit_copy_retval_to_regs( + pub fn gen_copy_retval_to_regs( &self, - ctx: &mut Lower, + ctx: &Lower, idx: usize, into_regs: ValueRegs>, - ) { - match &self.sig.rets[idx] { + ) -> SmallInstVec { + let mut insts = smallvec![]; + match &ctx.sigs()[self.sig].rets[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(into_regs.len(), slots.len()); for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) { @@ -1892,11 +2068,11 @@ impl Caller { // Extension mode doesn't matter because we're copying out, not in, // and we ignore high bits in our own registers by convention. &ABIArgSlot::Reg { reg, ty, .. } => { - ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty)); + insts.push(M::gen_move(*into_reg, Reg::from(reg), ty)); } &ABIArgSlot::Stack { offset, ty, .. } => { - let ret_area_base = self.sig.sized_stack_arg_space; - ctx.emit(M::gen_load_stack( + let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space; + insts.push(M::gen_load_stack( StackAMode::SPOffset(offset + ret_area_base, ty), *into_reg, ty, @@ -1912,6 +2088,7 @@ impl Caller { panic!("ImplicitPtrArg not supported in return position"); } } + insts } /// Emit the call itself. @@ -1933,15 +2110,17 @@ impl Caller { mem::replace(&mut self.defs, Default::default()), ); let word_type = M::word_type(); - if let Some(i) = self.sig.stack_ret_arg { + if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg { let rd = ctx.alloc_tmp(word_type).only_reg().unwrap(); - let ret_area_base = self.sig.sized_stack_arg_space; + let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space; ctx.emit(M::gen_get_stack_addr( StackAMode::SPOffset(ret_area_base, I8), rd, I8, )); - self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg())); + for inst in self.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg())) { + ctx.emit(inst); + } } let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap(); for inst in M::gen_call( @@ -1951,7 +2130,7 @@ impl Caller { self.clobbers, self.opcode, tmp, - self.sig.call_conv, + ctx.sigs()[self.sig].call_conv, self.caller_conv, ) .into_iter() diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 003061cdf1..d64d054d38 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -17,11 +17,14 @@ pub fn compile( abi: Callee<<::MInst as MachInst>::ABIMachineSpec>, machine_env: &MachineEnv, emit_info: ::Info, + sigs: SigSet, ) -> CodegenResult<(VCode, regalloc2::Output)> { // Compute lowered block order. let block_order = BlockLoweringOrder::new(f); + // Build the lowering context. - let lower = Lower::new(f, abi, emit_info, block_order)?; + let lower = crate::machinst::Lower::new(f, abi, emit_info, block_order, sigs)?; + // Lower the IR. let vcode = { let _tt = timing::vcode_lower(); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index c918a909c2..fdc0bbf1d6 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -14,7 +14,7 @@ pub use crate::ir::{ }; pub use crate::isa::unwind::UnwindInst; pub use crate::machinst::{ - ABIArg, ABIArgSlot, ABISig, InputSourceInst, Lower, RealReg, Reg, RelocDistance, VCodeInst, + ABIArg, ABIArgSlot, InputSourceInst, Lower, RealReg, Reg, RelocDistance, Sig, VCodeInst, Writable, }; pub use crate::settings::TlsModel; @@ -938,40 +938,40 @@ macro_rules! isle_prelude_methods { regs.regs()[idx] } - fn abi_num_args(&mut self, abi: &ABISig) -> usize { - abi.num_args() + fn abi_num_args(&mut self, abi: &Sig) -> usize { + self.lower_ctx.sigs()[*abi].num_args() } - fn abi_get_arg(&mut self, abi: &ABISig, idx: usize) -> ABIArg { - abi.get_arg(idx) + fn abi_get_arg(&mut self, abi: &Sig, idx: usize) -> ABIArg { + self.lower_ctx.sigs()[*abi].get_arg(idx) } - fn abi_num_rets(&mut self, abi: &ABISig) -> usize { - abi.num_rets() + fn abi_num_rets(&mut self, abi: &Sig) -> usize { + self.lower_ctx.sigs()[*abi].num_rets() } - fn abi_get_ret(&mut self, abi: &ABISig, idx: usize) -> ABIArg { - abi.get_ret(idx) + fn abi_get_ret(&mut self, abi: &Sig, idx: usize) -> ABIArg { + self.lower_ctx.sigs()[*abi].get_ret(idx) } - fn abi_ret_arg(&mut self, abi: &ABISig) -> Option { - abi.get_ret_arg() + fn abi_ret_arg(&mut self, abi: &Sig) -> Option { + self.lower_ctx.sigs()[*abi].get_ret_arg() } - fn abi_no_ret_arg(&mut self, abi: &ABISig) -> Option<()> { - if let Some(_) = abi.get_ret_arg() { + fn abi_no_ret_arg(&mut self, abi: &Sig) -> Option<()> { + if let Some(_) = self.lower_ctx.sigs()[*abi].get_ret_arg() { None } else { Some(()) } } - fn abi_sized_stack_arg_space(&mut self, abi: &ABISig) -> i64 { - abi.sized_stack_arg_space() + fn abi_sized_stack_arg_space(&mut self, abi: &Sig) -> i64 { + self.lower_ctx.sigs()[*abi].sized_stack_arg_space() } - fn abi_sized_stack_ret_space(&mut self, abi: &ABISig) -> i64 { - abi.sized_stack_ret_space() + fn abi_sized_stack_ret_space(&mut self, abi: &Sig) -> i64 { + self.lower_ctx.sigs()[*abi].sized_stack_ret_space() } fn abi_arg_only_slot(&mut self, arg: &ABIArg) -> Option { @@ -1094,12 +1094,19 @@ macro_rules! isle_prelude_caller_methods { dist: RelocDistance, args @ (inputs, off): ValueSlice, ) -> InstOutput { - let caller_conv = self.lower_ctx.abi().call_conv(); + let caller_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()); let sig = &self.lower_ctx.dfg().signatures[sig_ref]; let num_rets = sig.returns.len(); - let abi = ABISig::from_func_sig::<$abispec>(sig, self.flags).unwrap(); - let caller = - <$abicaller>::from_func(sig, &extname, dist, caller_conv, self.flags).unwrap(); + let abi = self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref); + let caller = <$abicaller>::from_func( + self.lower_ctx.sigs(), + sig_ref, + &extname, + dist, + caller_conv, + self.flags.clone(), + ) + .unwrap(); assert_eq!( inputs.len(&self.lower_ctx.dfg().value_lists) - off, @@ -1115,14 +1122,20 @@ macro_rules! isle_prelude_caller_methods { val: Value, args @ (inputs, off): ValueSlice, ) -> InstOutput { - let caller_conv = self.lower_ctx.abi().call_conv(); + let caller_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()); let ptr = self.put_in_reg(val); let sig = &self.lower_ctx.dfg().signatures[sig_ref]; let num_rets = sig.returns.len(); - let abi = ABISig::from_func_sig::<$abispec>(sig, self.flags).unwrap(); - let caller = - <$abicaller>::from_ptr(sig, ptr, Opcode::CallIndirect, caller_conv, self.flags) - .unwrap(); + let abi = self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref); + let caller = <$abicaller>::from_ptr( + self.lower_ctx.sigs(), + sig_ref, + ptr, + Opcode::CallIndirect, + caller_conv, + self.flags.clone(), + ) + .unwrap(); assert_eq!( inputs.len(&self.lower_ctx.dfg().value_lists) - off, @@ -1142,19 +1155,21 @@ macro_rules! isle_prelude_method_helpers { ($abicaller:ty) => { fn gen_call_common( &mut self, - abi: ABISig, + abi: Sig, num_rets: usize, mut caller: $abicaller, (inputs, off): ValueSlice, ) -> InstOutput { caller.emit_stack_pre_adjust(self.lower_ctx); + let num_args = self.lower_ctx.sigs()[abi].num_args(); + assert_eq!( inputs.len(&self.lower_ctx.dfg().value_lists) - off, - abi.num_args() + num_args ); let mut arg_regs = vec![]; - for i in 0..abi.num_args() { + for i in 0..num_args { let input = inputs .get(off + i, &self.lower_ctx.dfg().value_lists) .unwrap(); @@ -1164,15 +1179,19 @@ macro_rules! isle_prelude_method_helpers { caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); } for (i, arg_regs) in arg_regs.iter().enumerate() { - caller.emit_copy_regs_to_arg(self.lower_ctx, i, *arg_regs); + for inst in caller.gen_copy_regs_to_arg(self.lower_ctx, i, *arg_regs) { + self.lower_ctx.emit(inst); + } } caller.emit_call(self.lower_ctx); let mut outputs = InstOutput::new(); for i in 0..num_rets { - let ret = abi.get_ret(i); + let ret = self.lower_ctx.sigs()[abi].get_ret(i); let retval_regs = self.abi_arg_slot_regs(&ret).unwrap(); - caller.emit_copy_retval_to_regs(self.lower_ctx, i, retval_regs.clone()); + for inst in caller.gen_copy_retval_to_regs(self.lower_ctx, i, retval_regs.clone()) { + self.lower_ctx.emit(inst); + } outputs.push(valueregs::non_writable_value_regs(retval_regs)); } caller.emit_stack_post_adjust(self.lower_ctx); diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index caa5afe2df..5e9276cda2 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -10,14 +10,13 @@ use crate::fx::{FxHashMap, FxHashSet}; use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit}; use crate::ir::{ types::{FFLAGS, IFLAGS}, - ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, Function, GlobalValue, - GlobalValueData, Immediate, Inst, InstructionData, MemFlags, Opcode, Type, Value, ValueDef, - ValueLabelAssignments, ValueLabelStart, + ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function, + GlobalValue, GlobalValueData, Immediate, Inst, InstructionData, MemFlags, Opcode, RelSourceLoc, + Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart, }; -use crate::ir::{ExternalName, RelSourceLoc}; use crate::machinst::{ non_writable_value_regs, writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, - LoweredBlock, MachLabel, Reg, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, + LoweredBlock, MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst, ValueRegs, Writable, }; use crate::{trace, CodegenResult}; @@ -345,9 +344,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> { abi: Callee, emit_info: I::Info, block_order: BlockLoweringOrder, + sigs: SigSet, ) -> CodegenResult> { let constants = VCodeConstants::with_capacity(f.dfg.constants.len()); let mut vcode = VCodeBuilder::new( + sigs, abi, emit_info, block_order, @@ -445,6 +446,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> { }) } + pub fn sigs(&self) -> &SigSet { + self.vcode.sigs() + } + + pub fn sigs_mut(&mut self) -> &mut SigSet { + self.vcode.sigs_mut() + } + /// Pre-analysis: compute `value_ir_uses`. See comment on /// `ValueUseState` for a description of what this analysis /// computes. @@ -571,7 +580,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> { continue; } let regs = writable_value_regs(self.value_regs[*param]); - for insn in self.vcode.abi().gen_copy_arg_to_regs(i, regs).into_iter() { + for insn in self + .vcode + .abi() + .gen_copy_arg_to_regs(self.sigs(), i, regs) + .into_iter() + { self.emit(insn); } if self.abi().signature().params[i].purpose == ArgumentPurpose::StructReturn { @@ -593,7 +607,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { )); } } - if let Some(insn) = self.vcode.abi().gen_retval_area_setup() { + if let Some(insn) = self.vcode.abi().gen_retval_area_setup(self.sigs()) { self.emit(insn); } } @@ -606,13 +620,13 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for insn in self .vcode .abi() - .gen_copy_regs_to_retval(i, regs) + .gen_copy_regs_to_retval(self.sigs(), i, regs) .into_iter() { self.emit(insn); } } - let inst = self.vcode.abi().gen_ret(); + let inst = self.vcode.abi().gen_ret(self.sigs()); self.emit(inst); // Hack: generate a virtual instruction that uses vmctx in @@ -906,11 +920,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let temps = self .vcode .abi() - .temps_needed() + .temps_needed(self.sigs()) .into_iter() .map(|temp_ty| self.alloc_tmp(temp_ty).only_reg().unwrap()) .collect::>(); - self.vcode.abi().init(temps); + self.vcode.init_abi(temps); // Get the pinned reg here (we only parameterize this function on `B`, // not the whole `Lower` impl). @@ -1006,10 +1020,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } /// Get the `Callee`. - pub fn abi(&mut self) -> &mut Callee { + pub fn abi(&self) -> &Callee { self.vcode.abi() } + /// Get the `Callee`. + pub fn abi_mut(&mut self) -> &mut Callee { + self.vcode.abi_mut() + } + /// Get the (virtual) register that receives the return value. A return /// instruction should lower into a sequence that fills this register. (Why /// not allow the backend to specify its own result register for the return? diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bcb0e02721..336284503b 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -178,6 +178,8 @@ pub struct VCode { /// Value labels for debuginfo attached to vregs. debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, + + sigs: SigSet, } /// The result of `VCode::emit`. Contains all information computed @@ -279,13 +281,14 @@ pub enum VCodeBuildDirection { impl VCodeBuilder { /// Create a new VCodeBuilder. pub fn new( + sigs: SigSet, abi: Callee, emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, direction: VCodeBuildDirection, ) -> VCodeBuilder { - let vcode = VCode::new(abi, emit_info, block_order, constants); + let vcode = VCode::new(sigs, abi, emit_info, block_order, constants); VCodeBuilder { vcode, @@ -299,11 +302,28 @@ impl VCodeBuilder { } } + pub fn init_abi(&mut self, temps: Vec>) { + self.vcode.abi.init(&self.vcode.sigs, temps); + } + /// Access the ABI object. - pub fn abi(&mut self) -> &mut Callee { + pub fn abi(&self) -> &Callee { + &self.vcode.abi + } + + /// Access the ABI object. + pub fn abi_mut(&mut self) -> &mut Callee { &mut self.vcode.abi } + pub fn sigs(&self) -> &SigSet { + &self.vcode.sigs + } + + pub fn sigs_mut(&mut self) -> &mut SigSet { + &mut self.vcode.sigs + } + /// Access to the BlockLoweringOrder object. pub fn block_order(&self) -> &BlockLoweringOrder { &self.vcode.block_order @@ -625,6 +645,7 @@ fn is_reftype(ty: Type) -> bool { impl VCode { /// New empty VCode. fn new( + sigs: SigSet, abi: Callee, emit_info: I::Info, block_order: BlockLoweringOrder, @@ -632,6 +653,7 @@ impl VCode { ) -> VCode { let n_blocks = block_order.lowered_order().len(); VCode { + sigs, vreg_types: vec![], have_ref_values: false, insts: Vec::with_capacity(10 * n_blocks), @@ -748,7 +770,7 @@ impl VCode { want_metadata: bool, ) -> EmitResult where - I: MachInstEmit, + I: VCodeInst, { // To write into disasm string. use core::fmt::Write; @@ -790,7 +812,7 @@ impl VCode { // We need to generate the prologue in order to get the ABI // object into the right state first. We'll emit it when we // hit the right block below. - let prologue_insts = self.abi.gen_prologue(); + let prologue_insts = self.abi.gen_prologue(&self.sigs); // Emit blocks. let mut cur_srcloc = None; diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 69d7bd2f66..d80929e93e 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -904,7 +904,7 @@ ;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Type to hold information about a function call signature. -(type ABISig extern (enum)) +(type Sig extern (enum)) ;; Information how to pass one argument or return value. (type ABIArg extern (enum)) @@ -934,36 +934,36 @@ )) ;; Get the number of arguments expected. -(decl abi_num_args (ABISig) usize) +(decl abi_num_args (Sig) usize) (extern constructor abi_num_args abi_num_args) ;; Get information specifying how to pass one argument. -(decl abi_get_arg (ABISig usize) ABIArg) +(decl abi_get_arg (Sig usize) ABIArg) (extern constructor abi_get_arg abi_get_arg) ;; Get the number of return values expected. -(decl abi_num_rets (ABISig) usize) +(decl abi_num_rets (Sig) usize) (extern constructor abi_num_rets abi_num_rets) ;; Get information specifying how to pass one return value. -(decl abi_get_ret (ABISig usize) ABIArg) +(decl abi_get_ret (Sig usize) ABIArg) (extern constructor abi_get_ret abi_get_ret) ;; Get information specifying how to pass the implicit pointer ;; to the return-value area on the stack, if required. -(decl abi_ret_arg (ABIArg) ABISig) +(decl abi_ret_arg (ABIArg) Sig) (extern extractor abi_ret_arg abi_ret_arg) ;; Succeeds if no implicit return-value area pointer is required. -(decl abi_no_ret_arg () ABISig) +(decl abi_no_ret_arg () Sig) (extern extractor abi_no_ret_arg abi_no_ret_arg) ;; Size of the argument area. -(decl abi_sized_stack_arg_space (ABISig) i64) +(decl abi_sized_stack_arg_space (Sig) i64) (extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space) ;; Size of the return-value area. -(decl abi_sized_stack_ret_space (ABISig) i64) +(decl abi_sized_stack_ret_space (Sig) i64) (extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space) ;; StackSlot addr