Use a SmallVec for ABIArgs (#4584)

Instead of a regular `Vec`.

These vectors are usually very small, for example here is the histogram of sizes
when running Sightglass's `pulldown-cmark` benchmark:

```
;; Number of samples = 10332
;; Min = 0
;; Max = 11
;;
;; Mean = 2.496128532713901
;; Standard deviation = 2.2859559855427243
;; Variance = 5.225594767838607
;;
;; Each ∎ is a count of 62
;;
 0 ..  1 [ 3134 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
 1 ..  2 [ 2032 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
 2 ..  3 [  159 ]: ∎∎
 3 ..  4 [  838 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎
 4 ..  5 [  970 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
 5 ..  6 [ 2566 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
 6 ..  7 [  303 ]: ∎∎∎∎
 7 ..  8 [  272 ]: ∎∎∎∎
 8 ..  9 [   40 ]:
 9 .. 10 [   18 ]:
```

By using a `SmallVec` with capacity of 6 we avoid the vast majority of heap
allocations and get some nice benchmark wins of up to ~1.11x faster compilation.

<h3>Sightglass Benchmark Results</h3>

```
compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm

  Δ = 340361395.90 ± 63384608.15 (confidence = 99%)

  main.so is 0.88x to 0.92x faster than smallvec.so!
  smallvec.so is 1.09x to 1.13x faster than main.so!

  [3101467423 3425524333.41 4060621653] main.so
  [2820915877 3085162937.51 3375167352] smallvec.so

compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm

  Δ = 988446098.59 ± 184075718.89 (confidence = 99%)

  main.so is 0.88x to 0.92x faster than smallvec.so!
  smallvec.so is 1.09x to 1.13x faster than main.so!

  [9006994951 9948091070.66 11792481990] main.so
  [8192243090 8959644972.07 9801848982] smallvec.so

compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm

  Δ = 7854567.87 ± 2215491.16 (confidence = 99%)

  main.so is 0.89x to 0.94x faster than smallvec.so!
  smallvec.so is 1.07x to 1.12x faster than main.so!

  [80354527 93864666.76 119789198] main.so
  [77554917 86010098.89 94726994] smallvec.so

compilation :: cycles :: benchmarks/bz2/benchmark.wasm

  Δ = 22810509.85 ± 6434024.63 (confidence = 99%)

  main.so is 0.89x to 0.94x faster than smallvec.so!
  smallvec.so is 1.07x to 1.12x faster than main.so!

  [233358190 272593088.57 347880715] main.so
  [225227821 249782578.72 275097380] smallvec.so

compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm

  Δ = 10849521.41 ± 4324757.85 (confidence = 99%)

  main.so is 0.90x to 0.96x faster than smallvec.so!
  smallvec.so is 1.04x to 1.10x faster than main.so!

  [133875427 156859544.47 222455440] main.so
  [126073854 146010023.06 181611647] smallvec.so

compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm

  Δ = 31508176.97 ± 12559561.91 (confidence = 99%)

  main.so is 0.90x to 0.96x faster than smallvec.so!
  smallvec.so is 1.04x to 1.10x faster than main.so!

  [388788638 455536988.31 646034523] main.so
  [366132033 424028811.34 527419755] smallvec.so
```
This commit is contained in:
Nick Fitzgerald
2022-08-02 15:53:44 -07:00
committed by GitHub
parent edf7f9f2bb
commit ab1cf3df2d
4 changed files with 12 additions and 9 deletions

View File

@@ -85,7 +85,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> { ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
let is_apple_cc = call_conv.extends_apple_aarch64(); let is_apple_cc = call_conv.extends_apple_aarch64();
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4. // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
@@ -105,7 +105,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
let mut next_xreg = 0; let mut next_xreg = 0;
let mut next_vreg = 0; let mut next_vreg = 0;
let mut next_stack: u64 = 0; let mut next_stack: u64 = 0;
let mut ret = vec![]; let mut ret = ABIArgVec::new();
let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets { let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7 ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7

View File

@@ -224,12 +224,12 @@ impl ABIMachineSpec for S390xMachineDeps {
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> { ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
let mut next_gpr = 0; let mut next_gpr = 0;
let mut next_fpr = 0; let mut next_fpr = 0;
let mut next_vr = 0; let mut next_vr = 0;
let mut next_stack: u64 = 0; let mut next_stack: u64 = 0;
let mut ret = vec![]; let mut ret = ABIArgVec::new();
if args_or_rets == ArgsOrRets::Args { if args_or_rets == ArgsOrRets::Args {
next_stack = REG_SAVE_AREA_SIZE as u64; next_stack = REG_SAVE_AREA_SIZE as u64;

View File

@@ -47,14 +47,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> { ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
let is_fastcall = call_conv.extends_windows_fastcall(); let is_fastcall = call_conv.extends_windows_fastcall();
let mut next_gpr = 0; let mut next_gpr = 0;
let mut next_vreg = 0; let mut next_vreg = 0;
let mut next_stack: u64 = 0; let mut next_stack: u64 = 0;
let mut next_param_idx = 0; // Fastcall cares about overall param index let mut next_param_idx = 0; // Fastcall cares about overall param index
let mut ret = vec![]; let mut ret = ABIArgVec::new();
if args_or_rets == ArgsOrRets::Args && is_fastcall { if args_or_rets == ArgsOrRets::Args && is_fastcall {
// Fastcall always reserves 32 bytes of shadow space corresponding to // Fastcall always reserves 32 bytes of shadow space corresponding to

View File

@@ -310,7 +310,7 @@ pub trait ABIMachineSpec {
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)>; ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)>;
/// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
/// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
@@ -499,15 +499,18 @@ pub trait ABIMachineSpec {
) -> ir::ArgumentExtension; ) -> ir::ArgumentExtension;
} }
// A vector of `ABIArg`s with inline capacity, since they are typically small.
pub type ABIArgVec = SmallVec<[ABIArg; 6]>;
/// ABI information shared between body (callee) and caller. /// ABI information shared between body (callee) and caller.
#[derive(Clone)] #[derive(Clone)]
pub struct ABISig { pub struct ABISig {
/// Argument locations (regs or stack slots). Stack offsets are relative to /// Argument locations (regs or stack slots). Stack offsets are relative to
/// SP on entry to function. /// SP on entry to function.
args: Vec<ABIArg>, args: ABIArgVec,
/// Return-value locations. Stack offsets are relative to the return-area /// Return-value locations. Stack offsets are relative to the return-area
/// pointer. /// pointer.
rets: Vec<ABIArg>, rets: ABIArgVec,
/// Space on stack used to store arguments. /// Space on stack used to store arguments.
sized_stack_arg_space: i64, sized_stack_arg_space: i64,
/// Space on stack used to store return values. /// Space on stack used to store return values.