Use a SmallVec for ABIArgs (#4584)

Instead of a regular `Vec`. These vectors are usually very small, for example here is the histogram of sizes when running Sightglass's `pulldown-cmark` benchmark: ``` ;; Number of samples = 10332 ;; Min = 0 ;; Max = 11 ;; ;; Mean = 2.496128532713901 ;; Standard deviation = 2.2859559855427243 ;; Variance = 5.225594767838607 ;; ;; Each ∎ is a count of 62 ;; 0 .. 1 [ 3134 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 1 .. 2 [ 2032 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 2 .. 3 [ 159 ]: ∎∎ 3 .. 4 [ 838 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎ 4 .. 5 [ 970 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 5 .. 6 [ 2566 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 6 .. 7 [ 303 ]: ∎∎∎∎ 7 .. 8 [ 272 ]: ∎∎∎∎ 8 .. 9 [ 40 ]: 9 .. 10 [ 18 ]: ``` By using a `SmallVec` with capacity of 6 we avoid the vast majority of heap allocations and get some nice benchmark wins of up to ~1.11x faster compilation. <h3>Sightglass Benchmark Results</h3> ``` compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm Δ = 340361395.90 ± 63384608.15 (confidence = 99%) main.so is 0.88x to 0.92x faster than smallvec.so! smallvec.so is 1.09x to 1.13x faster than main.so! [3101467423 3425524333.41 4060621653] main.so [2820915877 3085162937.51 3375167352] smallvec.so compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm Δ = 988446098.59 ± 184075718.89 (confidence = 99%) main.so is 0.88x to 0.92x faster than smallvec.so! smallvec.so is 1.09x to 1.13x faster than main.so! [9006994951 9948091070.66 11792481990] main.so [8192243090 8959644972.07 9801848982] smallvec.so compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm Δ = 7854567.87 ± 2215491.16 (confidence = 99%) main.so is 0.89x to 0.94x faster than smallvec.so! smallvec.so is 1.07x to 1.12x faster than main.so! [80354527 93864666.76 119789198] main.so [77554917 86010098.89 94726994] smallvec.so compilation :: cycles :: benchmarks/bz2/benchmark.wasm Δ = 22810509.85 ± 6434024.63 (confidence = 99%) main.so is 0.89x to 0.94x faster than smallvec.so! smallvec.so is 1.07x to 1.12x faster than main.so! [233358190 272593088.57 347880715] main.so [225227821 249782578.72 275097380] smallvec.so compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 10849521.41 ± 4324757.85 (confidence = 99%) main.so is 0.90x to 0.96x faster than smallvec.so! smallvec.so is 1.04x to 1.10x faster than main.so! [133875427 156859544.47 222455440] main.so [126073854 146010023.06 181611647] smallvec.so compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 31508176.97 ± 12559561.91 (confidence = 99%) main.so is 0.90x to 0.96x faster than smallvec.so! smallvec.so is 1.04x to 1.10x faster than main.so! [388788638 455536988.31 646034523] main.so [366132033 424028811.34 527419755] smallvec.so ```
2022-08-02 15:53:44 -07:00
parent edf7f9f2bb
commit ab1cf3df2d
4 changed files with 12 additions and 9 deletions
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -85,7 +85,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
-    ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+    ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
        let is_apple_cc = call_conv.extends_apple_aarch64();
        // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
@@ -105,7 +105,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
        let mut next_xreg = 0;
        let mut next_vreg = 0;
        let mut next_stack: u64 = 0;
-        let mut ret = vec![];
+        let mut ret = ABIArgVec::new();
        let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
            ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
--- a/cranelift/codegen/src/isa/s390x/abi.rs
+++ b/cranelift/codegen/src/isa/s390x/abi.rs
@@ -224,12 +224,12 @@ impl ABIMachineSpec for S390xMachineDeps {
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
-    ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+    ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
        let mut next_gpr = 0;
        let mut next_fpr = 0;
        let mut next_vr = 0;
        let mut next_stack: u64 = 0;
-        let mut ret = vec![];
+        let mut ret = ABIArgVec::new();
        if args_or_rets == ArgsOrRets::Args {
            next_stack = REG_SAVE_AREA_SIZE as u64;
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -47,14 +47,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
-    ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+    ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
        let is_fastcall = call_conv.extends_windows_fastcall();
        let mut next_gpr = 0;
        let mut next_vreg = 0;
        let mut next_stack: u64 = 0;
        let mut next_param_idx = 0; // Fastcall cares about overall param index
-        let mut ret = vec![];
+        let mut ret = ABIArgVec::new();
        if args_or_rets == ArgsOrRets::Args && is_fastcall {
            // Fastcall always reserves 32 bytes of shadow space corresponding to
--- a/cranelift/codegen/src/machinst/abi_impl.rs
+++ b/cranelift/codegen/src/machinst/abi_impl.rs
@@ -310,7 +310,7 @@ pub trait ABIMachineSpec {
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
-    ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)>;
+    ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)>;
    /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
    /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
@@ -499,15 +499,18 @@ pub trait ABIMachineSpec {
    ) -> ir::ArgumentExtension;
 }
 // A vector of `ABIArg`s with inline capacity, since they are typically small.
 pub type ABIArgVec = SmallVec<[ABIArg; 6]>;
 /// ABI information shared between body (callee) and caller.
 #[derive(Clone)]
 pub struct ABISig {
    /// Argument locations (regs or stack slots). Stack offsets are relative to
    /// SP on entry to function.
-    args: Vec<ABIArg>,
+    args: ABIArgVec,
    /// Return-value locations. Stack offsets are relative to the return-area
    /// pointer.
-    rets: Vec<ABIArg>,
+    rets: ABIArgVec,
    /// Space on stack used to store arguments.
    sized_stack_arg_space: i64,
    /// Space on stack used to store return values.