From c73673559b0058e5cd9637a3f3cf4d92ff852f21 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 3 Sep 2021 15:14:21 -0500 Subject: [PATCH] Avoid vector allocations in wasm->host calls (#3294) This commit improves the runtime support for wasm-to-host invocations for functions created with `Func::new` or `wasmtime_func_new` in the C API. Previously a `Vec` (sometimes a `SmallVec`) would be dynamically allocated on each host call to store the arguments that are coming from wasm and going to the host. In the case of the `wasmtime` crate we need to decode the `u128`-stored values, and in the case of the C API we need to decode the `Val` into the C API's `wasmtime_val_t`. The technique used in this commit is to store a singular `Vec` inside the "store", be it the literal `Store` or within the `T` in the case of the C API, which can be reused across wasm->host calls. This means that we're unlikely to actually perform dynamic memory allocation and instead we should hit a faster path where the `Vec` always has enough capacity. Note that this is just a mild improvement for `Func::new`-based functions. It's still the case that `Func::wrap` is much faster, but unfortunately the C API doesn't have access to `Func::wrap`, so the main motivation here is accelerating the C API. --- Cargo.lock | 1 - crates/c-api/src/func.rs | 36 +++++++++++++++++++++------------ crates/c-api/src/store.rs | 7 ++++++- crates/wasmtime/Cargo.toml | 1 - crates/wasmtime/src/func.rs | 38 ++++++++++++++++++++++------------- crates/wasmtime/src/store.rs | 25 +++++++++++++++++++++-- crates/wasmtime/src/values.rs | 14 ++++++------- 7 files changed, 83 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f6b9d66392..6aab4ad0ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3567,7 +3567,6 @@ dependencies = [ "region", "rustc-demangle", "serde", - "smallvec", "target-lexicon", "tempfile", "wasi-cap-std-sync", diff --git a/crates/c-api/src/func.rs b/crates/c-api/src/func.rs index 1bfdc07b8d..7e6a2e8c2d 100644 --- a/crates/c-api/src/func.rs +++ b/crates/c-api/src/func.rs @@ -5,7 +5,7 @@ use crate::{ }; use anyhow::anyhow; use std::ffi::c_void; -use std::mem::MaybeUninit; +use std::mem::{self, MaybeUninit}; use std::panic::{self, AssertUnwindSafe}; use std::ptr; use std::str; @@ -217,18 +217,21 @@ pub(crate) unsafe fn c_callback_to_rust_fn( finalizer: Option, ) -> impl Fn(Caller<'_, crate::StoreData>, &[Val], &mut [Val]) -> Result<(), Trap> { let foreign = crate::ForeignData { data, finalizer }; - move |caller, params, results| { - let params = params - .iter() - .cloned() - .map(|p| wasmtime_val_t::from_val(p)) - .collect::>(); - let mut out_results = (0..results.len()) - .map(|_| wasmtime_val_t { - kind: crate::WASMTIME_I32, - of: wasmtime_val_union { i32: 0 }, - }) - .collect::>(); + move |mut caller, params, results| { + // Convert `params/results` to `wasmtime_val_t`. Use the previous + // storage in `hostcall_val_storage` to help avoid allocations all the + // time. + let mut vals = mem::take(&mut caller.data_mut().hostcall_val_storage); + debug_assert!(vals.is_empty()); + vals.reserve(params.len() + results.len()); + vals.extend(params.iter().cloned().map(|p| wasmtime_val_t::from_val(p))); + vals.extend((0..results.len()).map(|_| wasmtime_val_t { + kind: crate::WASMTIME_I32, + of: wasmtime_val_union { i32: 0 }, + })); + let (params, out_results) = vals.split_at_mut(params.len()); + + // Invoke the C function pointer, getting the results. let mut caller = wasmtime_caller_t { caller }; let out = callback( foreign.data, @@ -242,9 +245,16 @@ pub(crate) unsafe fn c_callback_to_rust_fn( return Err(trap.trap); } + // Translate the `wasmtime_val_t` results into the `results` space for (i, result) in out_results.iter().enumerate() { results[i] = unsafe { result.to_val() }; } + + // Move our `vals` storage back into the store now that we no longer + // need it. This'll get picked up by the next hostcall and reuse our + // same storage. + vals.truncate(0); + caller.caller.data_mut().hostcall_val_storage = vals; Ok(()) } } diff --git a/crates/c-api/src/store.rs b/crates/c-api/src/store.rs index 5d588a0e0a..86af26b318 100644 --- a/crates/c-api/src/store.rs +++ b/crates/c-api/src/store.rs @@ -1,4 +1,4 @@ -use crate::{wasm_engine_t, wasmtime_error_t, ForeignData}; +use crate::{wasm_engine_t, wasmtime_error_t, wasmtime_val_t, ForeignData}; use std::cell::UnsafeCell; use std::ffi::c_void; use std::sync::Arc; @@ -67,6 +67,10 @@ pub struct StoreData { foreign: crate::ForeignData, #[cfg(feature = "wasi")] pub(crate) wasi: Option, + + /// Temporary storage for usage during a wasm->host call to store values + /// in a slice we pass to the C API. + pub hostcall_val_storage: Vec, } #[no_mangle] @@ -85,6 +89,7 @@ pub extern "C" fn wasmtime_store_new( foreign: ForeignData { data, finalizer }, #[cfg(feature = "wasi")] wasi: None, + hostcall_val_storage: Vec::new(), }, ), }) diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 006b65bcc5..7276af6328 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -30,7 +30,6 @@ rustc-demangle = "0.1.16" cpp_demangle = "0.3.2" log = "0.4.8" wat = { version = "1.0.36", optional = true } -smallvec = "1.6.1" serde = { version = "1.0.94", features = ["derive"] } bincode = "1.2.1" indexmap = "1.6" diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 2e7c823a8f..e12ba4eb05 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -4,7 +4,6 @@ use crate::{ StoreContextMut, Trap, Val, ValType, }; use anyhow::{bail, Context as _, Result}; -use smallvec::{smallvec, SmallVec}; use std::cmp::max; use std::error::Error; use std::fmt; @@ -847,35 +846,42 @@ impl Func { func: &dyn Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap>, ) -> Result<(), Trap> { caller.store.0.entering_native_hook()?; - // We have a dynamic guarantee that `values_vec` has the right - // number of arguments and the right types of arguments. As a result - // we should be able to safely run through them all and read them. - const STACK_ARGS: usize = 4; - const STACK_RETURNS: usize = 2; - let mut args: SmallVec<[Val; STACK_ARGS]> = SmallVec::with_capacity(ty.params().len()); + + // Translate the raw JIT arguments in `values_vec` into a `Val` which + // we'll be passing as a slice. The storage for our slice-of-`Val` we'll + // be taking from the `Store`. We preserve our slice back into the + // `Store` after the hostcall, ideally amortizing the cost of allocating + // the storage across wasm->host calls. + // + // Note that we have a dynamic guarantee that `values_vec` is the + // appropriate length to both read all arguments from as well as store + // all results into. + let mut val_vec = caller.store.0.take_hostcall_val_storage(); + debug_assert!(val_vec.is_empty()); + let nparams = ty.params().len(); + val_vec.reserve(nparams + ty.results().len()); for (i, ty) in ty.params().enumerate() { unsafe { let val = Val::read_value_from(caller.store.0, values_vec.add(i), ty); - args.push(val); + val_vec.push(val); } } - let mut returns: SmallVec<[Val; STACK_RETURNS]> = - smallvec![Val::null(); ty.results().len()]; - - func(caller.sub_caller(), &args, &mut returns)?; + val_vec.extend((0..ty.results().len()).map(|_| Val::null())); + let (params, results) = val_vec.split_at_mut(nparams); + func(caller.sub_caller(), params, results)?; // Unlike our arguments we need to dynamically check that the return // values produced are correct. There could be a bug in `func` that // produces the wrong number, wrong types, or wrong stores of // values, and we need to catch that here. - for (i, (ret, ty)) in returns.into_iter().zip(ty.results()).enumerate() { + for (i, (ret, ty)) in results.iter().zip(ty.results()).enumerate() { if ret.ty() != ty { return Err(Trap::new( "function attempted to return an incompatible value", )); } - if !ret.comes_from_same_store(&caller.store.0) { + if !ret.comes_from_same_store(caller.store.0) { return Err(Trap::new( "cross-`Store` values are not currently supported", )); @@ -885,6 +891,10 @@ impl Func { } } + // Restore our `val_vec` back into the store so it's usable for the next + // hostcall to reuse our own storage. + val_vec.truncate(0); + caller.store.0.save_hostcall_val_storage(val_vec); caller.store.0.exiting_native_hook()?; Ok(()) } diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index a00b5c760d..de3ad8f047 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -76,7 +76,7 @@ //! contents of `StoreOpaque`. This is an invariant that we, as the authors of //! `wasmtime`, must uphold for the public interface to be safe. -use crate::{module::ModuleRegistry, Engine, Module, Trap}; +use crate::{module::ModuleRegistry, Engine, Module, Trap, Val}; use anyhow::{bail, Result}; use std::cell::UnsafeCell; use std::collections::HashMap; @@ -85,7 +85,7 @@ use std::error::Error; use std::fmt; use std::future::Future; use std::marker; -use std::mem::ManuallyDrop; +use std::mem::{self, ManuallyDrop}; use std::ops::{Deref, DerefMut}; use std::pin::Pin; use std::ptr; @@ -239,6 +239,11 @@ pub struct StoreOpaque { out_of_gas_behavior: OutOfGas, store_data: StoreData, default_callee: InstanceHandle, + + /// Used to optimzed wasm->host calls when the host function is defined with + /// `Func::new` to avoid allocating a new vector each time a function is + /// called. + hostcall_val_storage: Vec, } #[cfg(feature = "async")] @@ -332,6 +337,7 @@ impl Store { out_of_gas_behavior: OutOfGas::Trap, store_data: StoreData::new(), default_callee, + hostcall_val_storage: Vec::new(), }, limiter: None, entering_native_hook: None, @@ -1056,6 +1062,21 @@ impl StoreOpaque { pub fn traitobj(&self) -> *mut dyn wasmtime_runtime::Store { self.default_callee.store() } + + /// Takes the cached `Vec` stored internally across hostcalls to get + /// used as part of calling the host in a `Func::new` method invocation. + pub fn take_hostcall_val_storage(&mut self) -> Vec { + mem::take(&mut self.hostcall_val_storage) + } + + /// Restores the vector previously taken by `take_hostcall_val_storage` + /// above back into the store, allowing it to be used in the future for the + /// next wasm->host call. + pub fn save_hostcall_val_storage(&mut self, storage: Vec) { + if storage.capacity() > self.hostcall_val_storage.capacity() { + self.hostcall_val_storage = storage; + } + } } impl StoreContextMut<'_, T> { diff --git a/crates/wasmtime/src/values.rs b/crates/wasmtime/src/values.rs index 2bfa42200f..623151d978 100644 --- a/crates/wasmtime/src/values.rs +++ b/crates/wasmtime/src/values.rs @@ -93,17 +93,17 @@ impl Val { } } - pub(crate) unsafe fn write_value_to(self, store: &mut StoreOpaque, p: *mut u128) { + pub(crate) unsafe fn write_value_to(&self, store: &mut StoreOpaque, p: *mut u128) { match self { - Val::I32(i) => ptr::write(p as *mut i32, i), - Val::I64(i) => ptr::write(p as *mut i64, i), - Val::F32(u) => ptr::write(p as *mut u32, u), - Val::F64(u) => ptr::write(p as *mut u64, u), - Val::V128(b) => ptr::write(p as *mut u128, b), + Val::I32(i) => ptr::write(p as *mut i32, *i), + Val::I64(i) => ptr::write(p as *mut i64, *i), + Val::F32(u) => ptr::write(p as *mut u32, *u), + Val::F64(u) => ptr::write(p as *mut u64, *u), + Val::V128(b) => ptr::write(p as *mut u128, *b), Val::ExternRef(None) => ptr::write(p, 0), Val::ExternRef(Some(x)) => { let externref_ptr = x.inner.as_raw(); - store.insert_vmexternref(x.inner); + store.insert_vmexternref(x.inner.clone()); ptr::write(p as *mut *mut u8, externref_ptr) } Val::FuncRef(f) => ptr::write(