Cranelift: Deduplicate ABI signatures during lowering (#4829)
* Cranelift: Deduplicate ABI signatures during lowering This commit creates the `SigSet` type which interns and deduplicates the ABI signatures that we create from `ir::Signature`s. The ABI signatures are now referred to indirectly via a `Sig` (which is a `cranelift_entity` ID), and we pass around a `SigSet` to anything that needs to access the actual underlying `SigData` (which is what `ABISig` used to be). I had to change a couple methods to return a `SmallInstVec` instead of emitting directly to work around what would otherwise be shared and exclusive borrows of the lowering context overlapping. I don't expect any of these to heap allocate in practice. This does not remove the often-unnecessary allocations caused by `ensure_struct_return_ptr_is_returned`. That is left for follow up work. This also opens the door for further shuffling of signature data into more efficient representations in the future, now that we have `SigSet` to store it all in one place and it is threaded through all the code. We could potentially move each signature's parameter and return vectors into one big vector shared between all signatures, for example, which could cut down on allocations and shrink the size of `SigData` since those `SmallVec`s have pretty large inline capacity. Overall, this refactoring gives a 1-7% speedup for compilation on `pulldown-cmark`: ``` compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 8754213.66 ± 7526266.23 (confidence = 99%) dedupe.so is 1.01x to 1.07x faster than main.so! [191003295 234620642.20 280597986] dedupe.so [197626699 243374855.86 321816763] main.so compilation :: cycles :: benchmarks/bz2/benchmark.wasm No difference in performance. [170406200 194299792.68 253001201] dedupe.so [172071888 193230743.11 223608329] main.so compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [3870997347 4437735062.59 5216007266] dedupe.so [4019924063 4424595349.24 4965088931] main.so ``` * Use full path instead of import to avoid warnings in some build configurations Warnings will then cause CI to fail. * Move `SigSet` into `VCode`
This commit is contained in:
@@ -26,7 +26,7 @@ use crate::{
|
||||
immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags,
|
||||
TrapCode, Value, ValueList,
|
||||
},
|
||||
isa::aarch64::abi::{AArch64Caller, AArch64MachineDeps},
|
||||
isa::aarch64::abi::AArch64Caller,
|
||||
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
|
||||
isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
|
||||
isa::unwind::UnwindInst,
|
||||
@@ -80,7 +80,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
|
||||
impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
isle_prelude_methods!();
|
||||
isle_prelude_caller_methods!(AArch64MachineDeps, AArch64Caller);
|
||||
isle_prelude_caller_methods!(crate::isa::aarch64::abi::AArch64MachineDeps, AArch64Caller);
|
||||
|
||||
fn sign_return_address_disabled(&mut self) -> Option<()> {
|
||||
if self.isa_flags.sign_return_address() {
|
||||
|
||||
@@ -7,8 +7,8 @@ use crate::isa::aarch64::settings as aarch64_settings;
|
||||
use crate::isa::unwind::systemv;
|
||||
use crate::isa::{Builder as IsaBuilder, TargetIsa};
|
||||
use crate::machinst::{
|
||||
compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, TextSectionBuilder,
|
||||
VCode,
|
||||
compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet,
|
||||
TextSectionBuilder, VCode,
|
||||
};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings as shared_settings;
|
||||
@@ -60,8 +60,9 @@ impl AArch64Backend {
|
||||
flags: shared_settings::Flags,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
let emit_info = EmitInfo::new(flags.clone());
|
||||
let abi = abi::AArch64Callee::new(func, self, &self.isa_flags)?;
|
||||
compile::compile::<AArch64Backend>(func, self, abi, &self.machine_env, emit_info)
|
||||
let sigs = SigSet::new::<abi::AArch64MachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<AArch64Backend>(func, self, abi, &self.machine_env, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3610,30 +3610,30 @@
|
||||
|
||||
;; Helpers for generating `call` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl abi_sig (SigRef) ABISig)
|
||||
(decl abi_sig (SigRef) Sig)
|
||||
(extern constructor abi_sig abi_sig)
|
||||
|
||||
(decl abi_call_info (ABISig ExternalName Opcode) BoxCallInfo)
|
||||
(decl abi_call_info (Sig ExternalName Opcode) BoxCallInfo)
|
||||
(extern constructor abi_call_info abi_call_info)
|
||||
|
||||
(decl abi_call_ind_info (ABISig Reg Opcode) BoxCallIndInfo)
|
||||
(decl abi_call_ind_info (Sig Reg Opcode) BoxCallIndInfo)
|
||||
(extern constructor abi_call_ind_info abi_call_ind_info)
|
||||
|
||||
(decl writable_link_reg () WritableReg)
|
||||
(rule (writable_link_reg) (writable_gpr 14))
|
||||
|
||||
(decl abi_call (ABISig ExternalName Opcode) SideEffectNoResult)
|
||||
(decl abi_call (Sig ExternalName Opcode) SideEffectNoResult)
|
||||
(rule (abi_call abi name opcode)
|
||||
(call_impl (writable_link_reg) (abi_call_info abi name opcode)))
|
||||
|
||||
(decl abi_call_ind (ABISig Reg Opcode) SideEffectNoResult)
|
||||
(decl abi_call_ind (Sig Reg Opcode) SideEffectNoResult)
|
||||
(rule (abi_call_ind abi target opcode)
|
||||
(call_ind_impl (writable_link_reg) (abi_call_ind_info abi target opcode)))
|
||||
|
||||
(decl abi_accumulate_outgoing_args_size (ABISig) Unit)
|
||||
(decl abi_accumulate_outgoing_args_size (Sig) Unit)
|
||||
(extern constructor abi_accumulate_outgoing_args_size abi_accumulate_outgoing_args_size)
|
||||
|
||||
(decl abi_lane_order (ABISig) LaneOrder)
|
||||
(decl abi_lane_order (Sig) LaneOrder)
|
||||
(extern constructor abi_lane_order abi_lane_order)
|
||||
|
||||
|
||||
|
||||
@@ -4060,7 +4060,7 @@
|
||||
|
||||
;; Direct call to an in-range function.
|
||||
(rule (lower (call (func_ref_data sig_ref name (reloc_distance_near)) args))
|
||||
(let ((abi ABISig (abi_sig sig_ref))
|
||||
(let ((abi Sig (abi_sig sig_ref))
|
||||
(_ Unit (abi_accumulate_outgoing_args_size abi))
|
||||
(_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args))
|
||||
(_ InstOutput (side_effect (abi_call abi name (Opcode.Call)))))
|
||||
@@ -4068,7 +4068,7 @@
|
||||
|
||||
;; Direct call to an out-of-range function (implicitly via pointer).
|
||||
(rule (lower (call (func_ref_data sig_ref name _) args))
|
||||
(let ((abi ABISig (abi_sig sig_ref))
|
||||
(let ((abi Sig (abi_sig sig_ref))
|
||||
(_ Unit (abi_accumulate_outgoing_args_size abi))
|
||||
(_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args))
|
||||
(target Reg (load_symbol_reloc (SymbolReloc.Absolute name 0)))
|
||||
@@ -4077,7 +4077,7 @@
|
||||
|
||||
;; Indirect call.
|
||||
(rule (lower (call_indirect sig_ref ptr args))
|
||||
(let ((abi ABISig (abi_sig sig_ref))
|
||||
(let ((abi Sig (abi_sig sig_ref))
|
||||
(target Reg (put_in_reg ptr))
|
||||
(_ Unit (abi_accumulate_outgoing_args_size abi))
|
||||
(_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args))
|
||||
@@ -4085,14 +4085,14 @@
|
||||
(lower_call_rets abi (range 0 (abi_num_rets abi)) (output_builder_new))))
|
||||
|
||||
;; Lower function arguments.
|
||||
(decl lower_call_args (ABISig Range ValueSlice) InstOutput)
|
||||
(decl lower_call_args (Sig Range ValueSlice) InstOutput)
|
||||
(rule (lower_call_args abi range args)
|
||||
(let ((_ InstOutput (lower_call_args_buffer abi range args))
|
||||
(_ InstOutput (lower_call_args_slots abi range args)))
|
||||
(lower_call_ret_arg abi)))
|
||||
|
||||
;; Lower function arguments (part 1): prepare buffer copies.
|
||||
(decl lower_call_args_buffer (ABISig Range ValueSlice) InstOutput)
|
||||
(decl lower_call_args_buffer (Sig Range ValueSlice) InstOutput)
|
||||
(rule (lower_call_args_buffer abi (range_empty) _) (output_none))
|
||||
(rule (lower_call_args_buffer abi (range_unwrap head tail) args)
|
||||
(let ((_ InstOutput (copy_to_buffer 0 (abi_get_arg abi head)
|
||||
@@ -4100,7 +4100,7 @@
|
||||
(lower_call_args_buffer abi tail args)))
|
||||
|
||||
;; Lower function arguments (part 2): set up registers / stack slots.
|
||||
(decl lower_call_args_slots (ABISig Range ValueSlice) InstOutput)
|
||||
(decl lower_call_args_slots (Sig Range ValueSlice) InstOutput)
|
||||
(rule (lower_call_args_slots abi (range_empty) _) (output_none))
|
||||
(rule (lower_call_args_slots abi (range_unwrap head tail) args)
|
||||
(let ((_ Unit (copy_to_arg (abi_lane_order abi)
|
||||
@@ -4109,7 +4109,7 @@
|
||||
(lower_call_args_slots abi tail args)))
|
||||
|
||||
;; Lower function arguments (part 3): implicit return-area pointer.
|
||||
(decl lower_call_ret_arg (ABISig) InstOutput)
|
||||
(decl lower_call_ret_arg (Sig) InstOutput)
|
||||
(rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none))
|
||||
(rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot)))
|
||||
(let ((ret_arg Reg (load_addr (memarg_stack_off (abi_sized_stack_arg_space abi) 0)))
|
||||
@@ -4117,7 +4117,7 @@
|
||||
(output_none)))
|
||||
|
||||
;; Lower function return values by collecting them from registers / stack slots.
|
||||
(decl lower_call_rets (ABISig Range InstOutputBuilder) InstOutput)
|
||||
(decl lower_call_rets (Sig Range InstOutputBuilder) InstOutput)
|
||||
(rule (lower_call_rets abi (range_empty) builder) (output_builder_finish builder))
|
||||
(rule (lower_call_rets abi (range_unwrap head tail) builder)
|
||||
(let ((ret ValueRegs (copy_from_arg (abi_lane_order abi)
|
||||
|
||||
@@ -89,46 +89,48 @@ pub(crate) fn lower_branch(
|
||||
impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
isle_prelude_methods!();
|
||||
|
||||
fn abi_sig(&mut self, sig_ref: SigRef) -> ABISig {
|
||||
let sig = &self.lower_ctx.dfg().signatures[sig_ref];
|
||||
ABISig::from_func_sig::<S390xMachineDeps>(sig, self.flags).unwrap()
|
||||
fn abi_sig(&mut self, sig_ref: SigRef) -> Sig {
|
||||
self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref)
|
||||
}
|
||||
|
||||
fn abi_lane_order(&mut self, abi: &ABISig) -> LaneOrder {
|
||||
lane_order_for_call_conv(abi.call_conv())
|
||||
fn abi_lane_order(&mut self, abi: &Sig) -> LaneOrder {
|
||||
lane_order_for_call_conv(self.lower_ctx.sigs()[*abi].call_conv())
|
||||
}
|
||||
|
||||
fn abi_accumulate_outgoing_args_size(&mut self, abi: &ABISig) -> Unit {
|
||||
let off = abi.sized_stack_arg_space() + abi.sized_stack_ret_space();
|
||||
fn abi_accumulate_outgoing_args_size(&mut self, abi: &Sig) -> Unit {
|
||||
let off = self.lower_ctx.sigs()[*abi].sized_stack_arg_space()
|
||||
+ self.lower_ctx.sigs()[*abi].sized_stack_ret_space();
|
||||
self.lower_ctx
|
||||
.abi()
|
||||
.abi_mut()
|
||||
.accumulate_outgoing_args_size(off as u32);
|
||||
}
|
||||
|
||||
fn abi_call_info(&mut self, abi: &ABISig, name: ExternalName, opcode: &Opcode) -> BoxCallInfo {
|
||||
let (uses, defs, clobbers) = abi.call_uses_defs_clobbers::<S390xMachineDeps>();
|
||||
fn abi_call_info(&mut self, abi: &Sig, name: ExternalName, opcode: &Opcode) -> BoxCallInfo {
|
||||
let (uses, defs, clobbers) =
|
||||
self.lower_ctx.sigs()[*abi].call_uses_defs_clobbers::<S390xMachineDeps>();
|
||||
Box::new(CallInfo {
|
||||
dest: name.clone(),
|
||||
uses,
|
||||
defs,
|
||||
clobbers,
|
||||
opcode: *opcode,
|
||||
caller_callconv: self.lower_ctx.abi().call_conv(),
|
||||
callee_callconv: abi.call_conv(),
|
||||
caller_callconv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
|
||||
callee_callconv: self.lower_ctx.sigs()[*abi].call_conv(),
|
||||
tls_symbol: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn abi_call_ind_info(&mut self, abi: &ABISig, target: Reg, opcode: &Opcode) -> BoxCallIndInfo {
|
||||
let (uses, defs, clobbers) = abi.call_uses_defs_clobbers::<S390xMachineDeps>();
|
||||
fn abi_call_ind_info(&mut self, abi: &Sig, target: Reg, opcode: &Opcode) -> BoxCallIndInfo {
|
||||
let (uses, defs, clobbers) =
|
||||
self.lower_ctx.sigs()[*abi].call_uses_defs_clobbers::<S390xMachineDeps>();
|
||||
Box::new(CallIndInfo {
|
||||
rn: target,
|
||||
uses,
|
||||
defs,
|
||||
clobbers,
|
||||
opcode: *opcode,
|
||||
caller_callconv: self.lower_ctx.abi().call_conv(),
|
||||
callee_callconv: abi.call_conv(),
|
||||
caller_callconv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
|
||||
callee_callconv: self.lower_ctx.sigs()[*abi].call_conv(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -149,12 +151,12 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
|
||||
fn lib_accumulate_outgoing_args_size(&mut self, _: &LibCallInfo) -> Unit {
|
||||
// Libcalls only require the register save area.
|
||||
self.lower_ctx
|
||||
.abi()
|
||||
.abi_mut()
|
||||
.accumulate_outgoing_args_size(REG_SAVE_AREA_SIZE);
|
||||
}
|
||||
|
||||
fn lib_call_info(&mut self, info: &LibCallInfo) -> BoxCallInfo {
|
||||
let caller_callconv = self.lower_ctx.abi().call_conv();
|
||||
let caller_callconv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
|
||||
let callee_callconv = CallConv::for_libcall(&self.flags, caller_callconv);
|
||||
|
||||
// Uses and defs are defined by the particular libcall.
|
||||
@@ -403,7 +405,9 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
|
||||
|
||||
#[inline]
|
||||
fn lane_order(&mut self) -> Option<LaneOrder> {
|
||||
Some(lane_order_for_call_conv(self.lower_ctx.abi().call_conv()))
|
||||
Some(lane_order_for_call_conv(
|
||||
self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
|
||||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
@@ -7,8 +7,8 @@ use crate::isa::s390x::settings as s390x_settings;
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
use crate::isa::{Builder as IsaBuilder, TargetIsa};
|
||||
use crate::machinst::{
|
||||
compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, TextSectionBuilder,
|
||||
VCode,
|
||||
compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet,
|
||||
TextSectionBuilder, VCode,
|
||||
};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings as shared_settings;
|
||||
@@ -58,8 +58,9 @@ impl S390xBackend {
|
||||
func: &Function,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
let emit_info = EmitInfo::new(self.isa_flags.clone());
|
||||
let abi = abi::S390xCallee::new(func, self, &self.isa_flags)?;
|
||||
compile::compile::<S390xBackend>(func, self, abi, &self.machine_env, emit_info)
|
||||
let sigs = SigSet::new::<abi::S390xMachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<S390xBackend>(func, self, abi, &self.machine_env, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -153,21 +153,31 @@ fn emit_vm_call(
|
||||
// TODO avoid recreating signatures for every single Libcall function.
|
||||
let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
|
||||
let sig = libcall.signature(call_conv);
|
||||
let caller_conv = ctx.abi().call_conv();
|
||||
let caller_conv = ctx.abi().call_conv(ctx.sigs());
|
||||
|
||||
let mut abi = X64Caller::from_func(&sig, &extname, dist, caller_conv, flags)?;
|
||||
if !ctx.sigs().have_abi_sig_for_signature(&sig) {
|
||||
ctx.sigs_mut()
|
||||
.make_abi_sig_from_ir_signature::<X64ABIMachineSpec>(sig.clone(), flags)?;
|
||||
}
|
||||
|
||||
let mut abi =
|
||||
X64Caller::from_libcall(ctx.sigs(), &sig, &extname, dist, caller_conv, flags.clone())?;
|
||||
|
||||
abi.emit_stack_pre_adjust(ctx);
|
||||
|
||||
assert_eq!(inputs.len(), abi.num_args());
|
||||
assert_eq!(inputs.len(), abi.num_args(ctx.sigs()));
|
||||
|
||||
for (i, input) in inputs.iter().enumerate() {
|
||||
abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(*input));
|
||||
for inst in abi.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(*input)) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
abi.emit_call(ctx);
|
||||
for (i, output) in outputs.iter().enumerate() {
|
||||
abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(*output));
|
||||
for inst in abi.gen_copy_retval_to_regs(ctx, i, ValueRegs::one(*output)) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
abi.emit_stack_post_adjust(ctx);
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::{
|
||||
settings::Flags,
|
||||
unwind::UnwindInst,
|
||||
x64::{
|
||||
abi::{X64ABIMachineSpec, X64Caller},
|
||||
abi::X64Caller,
|
||||
inst::{args::*, regs, CallInfo},
|
||||
settings::Flags as IsaFlags,
|
||||
},
|
||||
@@ -720,7 +720,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
}
|
||||
|
||||
fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
|
||||
let call_conv = self.lower_ctx.abi().call_conv();
|
||||
let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
|
||||
let ret_ty = libcall.signature(call_conv).returns[0].value_type;
|
||||
let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
|
||||
|
||||
@@ -738,7 +738,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
}
|
||||
|
||||
fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
|
||||
let call_conv = self.lower_ctx.abi().call_conv();
|
||||
let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
|
||||
let ret_ty = libcall.signature(call_conv).returns[0].value_type;
|
||||
let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ use crate::isa::unwind::systemv;
|
||||
use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings};
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{
|
||||
compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, TextSectionBuilder,
|
||||
VCode,
|
||||
compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet,
|
||||
TextSectionBuilder, VCode,
|
||||
};
|
||||
use crate::result::{CodegenError, CodegenResult};
|
||||
use crate::settings::{self as shared_settings, Flags};
|
||||
@@ -53,8 +53,9 @@ impl X64Backend {
|
||||
// This performs lowering to VCode, register-allocates the code, computes
|
||||
// block layout and finalizes branches. The result is ready for binary emission.
|
||||
let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
|
||||
let abi = abi::X64Callee::new(&func, self, &self.x64_flags)?;
|
||||
compile::compile::<Self>(&func, self, abi, &self.reg_env, emit_info)
|
||||
let sigs = SigSet::new::<abi::X64ABIMachineSpec>(func, &self.flags)?;
|
||||
let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?;
|
||||
compile::compile::<Self>(&func, self, abi, &self.reg_env, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user