Use a SmallVec for ABIArgs (#4584)
Instead of a regular `Vec`. These vectors are usually very small, for example here is the histogram of sizes when running Sightglass's `pulldown-cmark` benchmark: ``` ;; Number of samples = 10332 ;; Min = 0 ;; Max = 11 ;; ;; Mean = 2.496128532713901 ;; Standard deviation = 2.2859559855427243 ;; Variance = 5.225594767838607 ;; ;; Each ∎ is a count of 62 ;; 0 .. 1 [ 3134 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 1 .. 2 [ 2032 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 2 .. 3 [ 159 ]: ∎∎ 3 .. 4 [ 838 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎ 4 .. 5 [ 970 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 5 .. 6 [ 2566 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 6 .. 7 [ 303 ]: ∎∎∎∎ 7 .. 8 [ 272 ]: ∎∎∎∎ 8 .. 9 [ 40 ]: 9 .. 10 [ 18 ]: ``` By using a `SmallVec` with capacity of 6 we avoid the vast majority of heap allocations and get some nice benchmark wins of up to ~1.11x faster compilation. <h3>Sightglass Benchmark Results</h3> ``` compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm Δ = 340361395.90 ± 63384608.15 (confidence = 99%) main.so is 0.88x to 0.92x faster than smallvec.so! smallvec.so is 1.09x to 1.13x faster than main.so! [3101467423 3425524333.41 4060621653] main.so [2820915877 3085162937.51 3375167352] smallvec.so compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm Δ = 988446098.59 ± 184075718.89 (confidence = 99%) main.so is 0.88x to 0.92x faster than smallvec.so! smallvec.so is 1.09x to 1.13x faster than main.so! [9006994951 9948091070.66 11792481990] main.so [8192243090 8959644972.07 9801848982] smallvec.so compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm Δ = 7854567.87 ± 2215491.16 (confidence = 99%) main.so is 0.89x to 0.94x faster than smallvec.so! smallvec.so is 1.07x to 1.12x faster than main.so! [80354527 93864666.76 119789198] main.so [77554917 86010098.89 94726994] smallvec.so compilation :: cycles :: benchmarks/bz2/benchmark.wasm Δ = 22810509.85 ± 6434024.63 (confidence = 99%) main.so is 0.89x to 0.94x faster than smallvec.so! smallvec.so is 1.07x to 1.12x faster than main.so! [233358190 272593088.57 347880715] main.so [225227821 249782578.72 275097380] smallvec.so compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 10849521.41 ± 4324757.85 (confidence = 99%) main.so is 0.90x to 0.96x faster than smallvec.so! smallvec.so is 1.04x to 1.10x faster than main.so! [133875427 156859544.47 222455440] main.so [126073854 146010023.06 181611647] smallvec.so compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 31508176.97 ± 12559561.91 (confidence = 99%) main.so is 0.90x to 0.96x faster than smallvec.so! smallvec.so is 1.04x to 1.10x faster than main.so! [388788638 455536988.31 646034523] main.so [366132033 424028811.34 527419755] smallvec.so ```
This commit is contained in:
@@ -85,7 +85,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
|
||||||
let is_apple_cc = call_conv.extends_apple_aarch64();
|
let is_apple_cc = call_conv.extends_apple_aarch64();
|
||||||
|
|
||||||
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
|
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
|
||||||
@@ -105,7 +105,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
let mut next_xreg = 0;
|
let mut next_xreg = 0;
|
||||||
let mut next_vreg = 0;
|
let mut next_vreg = 0;
|
||||||
let mut next_stack: u64 = 0;
|
let mut next_stack: u64 = 0;
|
||||||
let mut ret = vec![];
|
let mut ret = ABIArgVec::new();
|
||||||
|
|
||||||
let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
|
let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
|
||||||
ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
|
ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
|
||||||
|
|||||||
@@ -224,12 +224,12 @@ impl ABIMachineSpec for S390xMachineDeps {
|
|||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
|
||||||
let mut next_gpr = 0;
|
let mut next_gpr = 0;
|
||||||
let mut next_fpr = 0;
|
let mut next_fpr = 0;
|
||||||
let mut next_vr = 0;
|
let mut next_vr = 0;
|
||||||
let mut next_stack: u64 = 0;
|
let mut next_stack: u64 = 0;
|
||||||
let mut ret = vec![];
|
let mut ret = ABIArgVec::new();
|
||||||
|
|
||||||
if args_or_rets == ArgsOrRets::Args {
|
if args_or_rets == ArgsOrRets::Args {
|
||||||
next_stack = REG_SAVE_AREA_SIZE as u64;
|
next_stack = REG_SAVE_AREA_SIZE as u64;
|
||||||
|
|||||||
@@ -47,14 +47,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
|
||||||
let is_fastcall = call_conv.extends_windows_fastcall();
|
let is_fastcall = call_conv.extends_windows_fastcall();
|
||||||
|
|
||||||
let mut next_gpr = 0;
|
let mut next_gpr = 0;
|
||||||
let mut next_vreg = 0;
|
let mut next_vreg = 0;
|
||||||
let mut next_stack: u64 = 0;
|
let mut next_stack: u64 = 0;
|
||||||
let mut next_param_idx = 0; // Fastcall cares about overall param index
|
let mut next_param_idx = 0; // Fastcall cares about overall param index
|
||||||
let mut ret = vec![];
|
let mut ret = ABIArgVec::new();
|
||||||
|
|
||||||
if args_or_rets == ArgsOrRets::Args && is_fastcall {
|
if args_or_rets == ArgsOrRets::Args && is_fastcall {
|
||||||
// Fastcall always reserves 32 bytes of shadow space corresponding to
|
// Fastcall always reserves 32 bytes of shadow space corresponding to
|
||||||
|
|||||||
@@ -310,7 +310,7 @@ pub trait ABIMachineSpec {
|
|||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)>;
|
) -> CodegenResult<(ABIArgVec, i64, Option<usize>)>;
|
||||||
|
|
||||||
/// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
|
/// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
|
||||||
/// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
|
/// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
|
||||||
@@ -499,15 +499,18 @@ pub trait ABIMachineSpec {
|
|||||||
) -> ir::ArgumentExtension;
|
) -> ir::ArgumentExtension;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A vector of `ABIArg`s with inline capacity, since they are typically small.
|
||||||
|
pub type ABIArgVec = SmallVec<[ABIArg; 6]>;
|
||||||
|
|
||||||
/// ABI information shared between body (callee) and caller.
|
/// ABI information shared between body (callee) and caller.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ABISig {
|
pub struct ABISig {
|
||||||
/// Argument locations (regs or stack slots). Stack offsets are relative to
|
/// Argument locations (regs or stack slots). Stack offsets are relative to
|
||||||
/// SP on entry to function.
|
/// SP on entry to function.
|
||||||
args: Vec<ABIArg>,
|
args: ABIArgVec,
|
||||||
/// Return-value locations. Stack offsets are relative to the return-area
|
/// Return-value locations. Stack offsets are relative to the return-area
|
||||||
/// pointer.
|
/// pointer.
|
||||||
rets: Vec<ABIArg>,
|
rets: ABIArgVec,
|
||||||
/// Space on stack used to store arguments.
|
/// Space on stack used to store arguments.
|
||||||
sized_stack_arg_space: i64,
|
sized_stack_arg_space: i64,
|
||||||
/// Space on stack used to store return values.
|
/// Space on stack used to store return values.
|
||||||
|
|||||||
Reference in New Issue
Block a user