Avoid vector allocations in wasm->host calls (#3294)

This commit improves the runtime support for wasm-to-host invocations
for functions created with `Func::new` or `wasmtime_func_new` in the C
API. Previously a `Vec` (sometimes a `SmallVec`) would be dynamically
allocated on each host call to store the arguments that are coming from
wasm and going to the host. In the case of the `wasmtime` crate we need
to decode the `u128`-stored values, and in the case of the C API we need
to decode the `Val` into the C API's `wasmtime_val_t`.

The technique used in this commit is to store a singular `Vec<T>` inside
the "store", be it the literal `Store<T>` or within the `T` in the case
of the C API, which can be reused across wasm->host calls. This means
that we're unlikely to actually perform dynamic memory allocation and
instead we should hit a faster path where the `Vec` always has enough
capacity.

Note that this is just a mild improvement for `Func::new`-based
functions. It's still the case that `Func::wrap` is much faster, but
unfortunately the C API doesn't have access to `Func::wrap`, so the main
motivation here is accelerating the C API.
This commit is contained in:
Alex Crichton
2021-09-03 15:14:21 -05:00
committed by GitHub
parent 0473e1990a
commit c73673559b
7 changed files with 83 additions and 39 deletions

1
Cargo.lock generated
View File

@@ -3567,7 +3567,6 @@ dependencies = [
"region", "region",
"rustc-demangle", "rustc-demangle",
"serde", "serde",
"smallvec",
"target-lexicon", "target-lexicon",
"tempfile", "tempfile",
"wasi-cap-std-sync", "wasi-cap-std-sync",

View File

@@ -5,7 +5,7 @@ use crate::{
}; };
use anyhow::anyhow; use anyhow::anyhow;
use std::ffi::c_void; use std::ffi::c_void;
use std::mem::MaybeUninit; use std::mem::{self, MaybeUninit};
use std::panic::{self, AssertUnwindSafe}; use std::panic::{self, AssertUnwindSafe};
use std::ptr; use std::ptr;
use std::str; use std::str;
@@ -217,18 +217,21 @@ pub(crate) unsafe fn c_callback_to_rust_fn(
finalizer: Option<extern "C" fn(*mut std::ffi::c_void)>, finalizer: Option<extern "C" fn(*mut std::ffi::c_void)>,
) -> impl Fn(Caller<'_, crate::StoreData>, &[Val], &mut [Val]) -> Result<(), Trap> { ) -> impl Fn(Caller<'_, crate::StoreData>, &[Val], &mut [Val]) -> Result<(), Trap> {
let foreign = crate::ForeignData { data, finalizer }; let foreign = crate::ForeignData { data, finalizer };
move |caller, params, results| { move |mut caller, params, results| {
let params = params // Convert `params/results` to `wasmtime_val_t`. Use the previous
.iter() // storage in `hostcall_val_storage` to help avoid allocations all the
.cloned() // time.
.map(|p| wasmtime_val_t::from_val(p)) let mut vals = mem::take(&mut caller.data_mut().hostcall_val_storage);
.collect::<Vec<_>>(); debug_assert!(vals.is_empty());
let mut out_results = (0..results.len()) vals.reserve(params.len() + results.len());
.map(|_| wasmtime_val_t { vals.extend(params.iter().cloned().map(|p| wasmtime_val_t::from_val(p)));
kind: crate::WASMTIME_I32, vals.extend((0..results.len()).map(|_| wasmtime_val_t {
of: wasmtime_val_union { i32: 0 }, kind: crate::WASMTIME_I32,
}) of: wasmtime_val_union { i32: 0 },
.collect::<Vec<_>>(); }));
let (params, out_results) = vals.split_at_mut(params.len());
// Invoke the C function pointer, getting the results.
let mut caller = wasmtime_caller_t { caller }; let mut caller = wasmtime_caller_t { caller };
let out = callback( let out = callback(
foreign.data, foreign.data,
@@ -242,9 +245,16 @@ pub(crate) unsafe fn c_callback_to_rust_fn(
return Err(trap.trap); return Err(trap.trap);
} }
// Translate the `wasmtime_val_t` results into the `results` space
for (i, result) in out_results.iter().enumerate() { for (i, result) in out_results.iter().enumerate() {
results[i] = unsafe { result.to_val() }; results[i] = unsafe { result.to_val() };
} }
// Move our `vals` storage back into the store now that we no longer
// need it. This'll get picked up by the next hostcall and reuse our
// same storage.
vals.truncate(0);
caller.caller.data_mut().hostcall_val_storage = vals;
Ok(()) Ok(())
} }
} }

View File

@@ -1,4 +1,4 @@
use crate::{wasm_engine_t, wasmtime_error_t, ForeignData}; use crate::{wasm_engine_t, wasmtime_error_t, wasmtime_val_t, ForeignData};
use std::cell::UnsafeCell; use std::cell::UnsafeCell;
use std::ffi::c_void; use std::ffi::c_void;
use std::sync::Arc; use std::sync::Arc;
@@ -67,6 +67,10 @@ pub struct StoreData {
foreign: crate::ForeignData, foreign: crate::ForeignData,
#[cfg(feature = "wasi")] #[cfg(feature = "wasi")]
pub(crate) wasi: Option<wasmtime_wasi::WasiCtx>, pub(crate) wasi: Option<wasmtime_wasi::WasiCtx>,
/// Temporary storage for usage during a wasm->host call to store values
/// in a slice we pass to the C API.
pub hostcall_val_storage: Vec<wasmtime_val_t>,
} }
#[no_mangle] #[no_mangle]
@@ -85,6 +89,7 @@ pub extern "C" fn wasmtime_store_new(
foreign: ForeignData { data, finalizer }, foreign: ForeignData { data, finalizer },
#[cfg(feature = "wasi")] #[cfg(feature = "wasi")]
wasi: None, wasi: None,
hostcall_val_storage: Vec::new(),
}, },
), ),
}) })

View File

@@ -30,7 +30,6 @@ rustc-demangle = "0.1.16"
cpp_demangle = "0.3.2" cpp_demangle = "0.3.2"
log = "0.4.8" log = "0.4.8"
wat = { version = "1.0.36", optional = true } wat = { version = "1.0.36", optional = true }
smallvec = "1.6.1"
serde = { version = "1.0.94", features = ["derive"] } serde = { version = "1.0.94", features = ["derive"] }
bincode = "1.2.1" bincode = "1.2.1"
indexmap = "1.6" indexmap = "1.6"

View File

@@ -4,7 +4,6 @@ use crate::{
StoreContextMut, Trap, Val, ValType, StoreContextMut, Trap, Val, ValType,
}; };
use anyhow::{bail, Context as _, Result}; use anyhow::{bail, Context as _, Result};
use smallvec::{smallvec, SmallVec};
use std::cmp::max; use std::cmp::max;
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
@@ -847,35 +846,42 @@ impl Func {
func: &dyn Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap>, func: &dyn Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap>,
) -> Result<(), Trap> { ) -> Result<(), Trap> {
caller.store.0.entering_native_hook()?; caller.store.0.entering_native_hook()?;
// We have a dynamic guarantee that `values_vec` has the right
// number of arguments and the right types of arguments. As a result // Translate the raw JIT arguments in `values_vec` into a `Val` which
// we should be able to safely run through them all and read them. // we'll be passing as a slice. The storage for our slice-of-`Val` we'll
const STACK_ARGS: usize = 4; // be taking from the `Store`. We preserve our slice back into the
const STACK_RETURNS: usize = 2; // `Store` after the hostcall, ideally amortizing the cost of allocating
let mut args: SmallVec<[Val; STACK_ARGS]> = SmallVec::with_capacity(ty.params().len()); // the storage across wasm->host calls.
//
// Note that we have a dynamic guarantee that `values_vec` is the
// appropriate length to both read all arguments from as well as store
// all results into.
let mut val_vec = caller.store.0.take_hostcall_val_storage();
debug_assert!(val_vec.is_empty());
let nparams = ty.params().len();
val_vec.reserve(nparams + ty.results().len());
for (i, ty) in ty.params().enumerate() { for (i, ty) in ty.params().enumerate() {
unsafe { unsafe {
let val = Val::read_value_from(caller.store.0, values_vec.add(i), ty); let val = Val::read_value_from(caller.store.0, values_vec.add(i), ty);
args.push(val); val_vec.push(val);
} }
} }
let mut returns: SmallVec<[Val; STACK_RETURNS]> = val_vec.extend((0..ty.results().len()).map(|_| Val::null()));
smallvec![Val::null(); ty.results().len()]; let (params, results) = val_vec.split_at_mut(nparams);
func(caller.sub_caller(), params, results)?;
func(caller.sub_caller(), &args, &mut returns)?;
// Unlike our arguments we need to dynamically check that the return // Unlike our arguments we need to dynamically check that the return
// values produced are correct. There could be a bug in `func` that // values produced are correct. There could be a bug in `func` that
// produces the wrong number, wrong types, or wrong stores of // produces the wrong number, wrong types, or wrong stores of
// values, and we need to catch that here. // values, and we need to catch that here.
for (i, (ret, ty)) in returns.into_iter().zip(ty.results()).enumerate() { for (i, (ret, ty)) in results.iter().zip(ty.results()).enumerate() {
if ret.ty() != ty { if ret.ty() != ty {
return Err(Trap::new( return Err(Trap::new(
"function attempted to return an incompatible value", "function attempted to return an incompatible value",
)); ));
} }
if !ret.comes_from_same_store(&caller.store.0) { if !ret.comes_from_same_store(caller.store.0) {
return Err(Trap::new( return Err(Trap::new(
"cross-`Store` values are not currently supported", "cross-`Store` values are not currently supported",
)); ));
@@ -885,6 +891,10 @@ impl Func {
} }
} }
// Restore our `val_vec` back into the store so it's usable for the next
// hostcall to reuse our own storage.
val_vec.truncate(0);
caller.store.0.save_hostcall_val_storage(val_vec);
caller.store.0.exiting_native_hook()?; caller.store.0.exiting_native_hook()?;
Ok(()) Ok(())
} }

View File

@@ -76,7 +76,7 @@
//! contents of `StoreOpaque`. This is an invariant that we, as the authors of //! contents of `StoreOpaque`. This is an invariant that we, as the authors of
//! `wasmtime`, must uphold for the public interface to be safe. //! `wasmtime`, must uphold for the public interface to be safe.
use crate::{module::ModuleRegistry, Engine, Module, Trap}; use crate::{module::ModuleRegistry, Engine, Module, Trap, Val};
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use std::cell::UnsafeCell; use std::cell::UnsafeCell;
use std::collections::HashMap; use std::collections::HashMap;
@@ -85,7 +85,7 @@ use std::error::Error;
use std::fmt; use std::fmt;
use std::future::Future; use std::future::Future;
use std::marker; use std::marker;
use std::mem::ManuallyDrop; use std::mem::{self, ManuallyDrop};
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
use std::pin::Pin; use std::pin::Pin;
use std::ptr; use std::ptr;
@@ -239,6 +239,11 @@ pub struct StoreOpaque {
out_of_gas_behavior: OutOfGas, out_of_gas_behavior: OutOfGas,
store_data: StoreData, store_data: StoreData,
default_callee: InstanceHandle, default_callee: InstanceHandle,
/// Used to optimzed wasm->host calls when the host function is defined with
/// `Func::new` to avoid allocating a new vector each time a function is
/// called.
hostcall_val_storage: Vec<Val>,
} }
#[cfg(feature = "async")] #[cfg(feature = "async")]
@@ -332,6 +337,7 @@ impl<T> Store<T> {
out_of_gas_behavior: OutOfGas::Trap, out_of_gas_behavior: OutOfGas::Trap,
store_data: StoreData::new(), store_data: StoreData::new(),
default_callee, default_callee,
hostcall_val_storage: Vec::new(),
}, },
limiter: None, limiter: None,
entering_native_hook: None, entering_native_hook: None,
@@ -1056,6 +1062,21 @@ impl StoreOpaque {
pub fn traitobj(&self) -> *mut dyn wasmtime_runtime::Store { pub fn traitobj(&self) -> *mut dyn wasmtime_runtime::Store {
self.default_callee.store() self.default_callee.store()
} }
/// Takes the cached `Vec<Val>` stored internally across hostcalls to get
/// used as part of calling the host in a `Func::new` method invocation.
pub fn take_hostcall_val_storage(&mut self) -> Vec<Val> {
mem::take(&mut self.hostcall_val_storage)
}
/// Restores the vector previously taken by `take_hostcall_val_storage`
/// above back into the store, allowing it to be used in the future for the
/// next wasm->host call.
pub fn save_hostcall_val_storage(&mut self, storage: Vec<Val>) {
if storage.capacity() > self.hostcall_val_storage.capacity() {
self.hostcall_val_storage = storage;
}
}
} }
impl<T> StoreContextMut<'_, T> { impl<T> StoreContextMut<'_, T> {

View File

@@ -93,17 +93,17 @@ impl Val {
} }
} }
pub(crate) unsafe fn write_value_to(self, store: &mut StoreOpaque, p: *mut u128) { pub(crate) unsafe fn write_value_to(&self, store: &mut StoreOpaque, p: *mut u128) {
match self { match self {
Val::I32(i) => ptr::write(p as *mut i32, i), Val::I32(i) => ptr::write(p as *mut i32, *i),
Val::I64(i) => ptr::write(p as *mut i64, i), Val::I64(i) => ptr::write(p as *mut i64, *i),
Val::F32(u) => ptr::write(p as *mut u32, u), Val::F32(u) => ptr::write(p as *mut u32, *u),
Val::F64(u) => ptr::write(p as *mut u64, u), Val::F64(u) => ptr::write(p as *mut u64, *u),
Val::V128(b) => ptr::write(p as *mut u128, b), Val::V128(b) => ptr::write(p as *mut u128, *b),
Val::ExternRef(None) => ptr::write(p, 0), Val::ExternRef(None) => ptr::write(p, 0),
Val::ExternRef(Some(x)) => { Val::ExternRef(Some(x)) => {
let externref_ptr = x.inner.as_raw(); let externref_ptr = x.inner.as_raw();
store.insert_vmexternref(x.inner); store.insert_vmexternref(x.inner.clone());
ptr::write(p as *mut *mut u8, externref_ptr) ptr::write(p as *mut *mut u8, externref_ptr)
} }
Val::FuncRef(f) => ptr::write( Val::FuncRef(f) => ptr::write(