Avoid vector allocations in wasm->host calls (#3294)
This commit improves the runtime support for wasm-to-host invocations for functions created with `Func::new` or `wasmtime_func_new` in the C API. Previously a `Vec` (sometimes a `SmallVec`) would be dynamically allocated on each host call to store the arguments that are coming from wasm and going to the host. In the case of the `wasmtime` crate we need to decode the `u128`-stored values, and in the case of the C API we need to decode the `Val` into the C API's `wasmtime_val_t`. The technique used in this commit is to store a singular `Vec<T>` inside the "store", be it the literal `Store<T>` or within the `T` in the case of the C API, which can be reused across wasm->host calls. This means that we're unlikely to actually perform dynamic memory allocation and instead we should hit a faster path where the `Vec` always has enough capacity. Note that this is just a mild improvement for `Func::new`-based functions. It's still the case that `Func::wrap` is much faster, but unfortunately the C API doesn't have access to `Func::wrap`, so the main motivation here is accelerating the C API.
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -3567,7 +3567,6 @@ dependencies = [
|
||||
"region",
|
||||
"rustc-demangle",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"target-lexicon",
|
||||
"tempfile",
|
||||
"wasi-cap-std-sync",
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::{
|
||||
};
|
||||
use anyhow::anyhow;
|
||||
use std::ffi::c_void;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::mem::{self, MaybeUninit};
|
||||
use std::panic::{self, AssertUnwindSafe};
|
||||
use std::ptr;
|
||||
use std::str;
|
||||
@@ -217,18 +217,21 @@ pub(crate) unsafe fn c_callback_to_rust_fn(
|
||||
finalizer: Option<extern "C" fn(*mut std::ffi::c_void)>,
|
||||
) -> impl Fn(Caller<'_, crate::StoreData>, &[Val], &mut [Val]) -> Result<(), Trap> {
|
||||
let foreign = crate::ForeignData { data, finalizer };
|
||||
move |caller, params, results| {
|
||||
let params = params
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|p| wasmtime_val_t::from_val(p))
|
||||
.collect::<Vec<_>>();
|
||||
let mut out_results = (0..results.len())
|
||||
.map(|_| wasmtime_val_t {
|
||||
kind: crate::WASMTIME_I32,
|
||||
of: wasmtime_val_union { i32: 0 },
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
move |mut caller, params, results| {
|
||||
// Convert `params/results` to `wasmtime_val_t`. Use the previous
|
||||
// storage in `hostcall_val_storage` to help avoid allocations all the
|
||||
// time.
|
||||
let mut vals = mem::take(&mut caller.data_mut().hostcall_val_storage);
|
||||
debug_assert!(vals.is_empty());
|
||||
vals.reserve(params.len() + results.len());
|
||||
vals.extend(params.iter().cloned().map(|p| wasmtime_val_t::from_val(p)));
|
||||
vals.extend((0..results.len()).map(|_| wasmtime_val_t {
|
||||
kind: crate::WASMTIME_I32,
|
||||
of: wasmtime_val_union { i32: 0 },
|
||||
}));
|
||||
let (params, out_results) = vals.split_at_mut(params.len());
|
||||
|
||||
// Invoke the C function pointer, getting the results.
|
||||
let mut caller = wasmtime_caller_t { caller };
|
||||
let out = callback(
|
||||
foreign.data,
|
||||
@@ -242,9 +245,16 @@ pub(crate) unsafe fn c_callback_to_rust_fn(
|
||||
return Err(trap.trap);
|
||||
}
|
||||
|
||||
// Translate the `wasmtime_val_t` results into the `results` space
|
||||
for (i, result) in out_results.iter().enumerate() {
|
||||
results[i] = unsafe { result.to_val() };
|
||||
}
|
||||
|
||||
// Move our `vals` storage back into the store now that we no longer
|
||||
// need it. This'll get picked up by the next hostcall and reuse our
|
||||
// same storage.
|
||||
vals.truncate(0);
|
||||
caller.caller.data_mut().hostcall_val_storage = vals;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::{wasm_engine_t, wasmtime_error_t, ForeignData};
|
||||
use crate::{wasm_engine_t, wasmtime_error_t, wasmtime_val_t, ForeignData};
|
||||
use std::cell::UnsafeCell;
|
||||
use std::ffi::c_void;
|
||||
use std::sync::Arc;
|
||||
@@ -67,6 +67,10 @@ pub struct StoreData {
|
||||
foreign: crate::ForeignData,
|
||||
#[cfg(feature = "wasi")]
|
||||
pub(crate) wasi: Option<wasmtime_wasi::WasiCtx>,
|
||||
|
||||
/// Temporary storage for usage during a wasm->host call to store values
|
||||
/// in a slice we pass to the C API.
|
||||
pub hostcall_val_storage: Vec<wasmtime_val_t>,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@@ -85,6 +89,7 @@ pub extern "C" fn wasmtime_store_new(
|
||||
foreign: ForeignData { data, finalizer },
|
||||
#[cfg(feature = "wasi")]
|
||||
wasi: None,
|
||||
hostcall_val_storage: Vec::new(),
|
||||
},
|
||||
),
|
||||
})
|
||||
|
||||
@@ -30,7 +30,6 @@ rustc-demangle = "0.1.16"
|
||||
cpp_demangle = "0.3.2"
|
||||
log = "0.4.8"
|
||||
wat = { version = "1.0.36", optional = true }
|
||||
smallvec = "1.6.1"
|
||||
serde = { version = "1.0.94", features = ["derive"] }
|
||||
bincode = "1.2.1"
|
||||
indexmap = "1.6"
|
||||
|
||||
@@ -4,7 +4,6 @@ use crate::{
|
||||
StoreContextMut, Trap, Val, ValType,
|
||||
};
|
||||
use anyhow::{bail, Context as _, Result};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::cmp::max;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
@@ -847,35 +846,42 @@ impl Func {
|
||||
func: &dyn Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap>,
|
||||
) -> Result<(), Trap> {
|
||||
caller.store.0.entering_native_hook()?;
|
||||
// We have a dynamic guarantee that `values_vec` has the right
|
||||
// number of arguments and the right types of arguments. As a result
|
||||
// we should be able to safely run through them all and read them.
|
||||
const STACK_ARGS: usize = 4;
|
||||
const STACK_RETURNS: usize = 2;
|
||||
let mut args: SmallVec<[Val; STACK_ARGS]> = SmallVec::with_capacity(ty.params().len());
|
||||
|
||||
// Translate the raw JIT arguments in `values_vec` into a `Val` which
|
||||
// we'll be passing as a slice. The storage for our slice-of-`Val` we'll
|
||||
// be taking from the `Store`. We preserve our slice back into the
|
||||
// `Store` after the hostcall, ideally amortizing the cost of allocating
|
||||
// the storage across wasm->host calls.
|
||||
//
|
||||
// Note that we have a dynamic guarantee that `values_vec` is the
|
||||
// appropriate length to both read all arguments from as well as store
|
||||
// all results into.
|
||||
let mut val_vec = caller.store.0.take_hostcall_val_storage();
|
||||
debug_assert!(val_vec.is_empty());
|
||||
let nparams = ty.params().len();
|
||||
val_vec.reserve(nparams + ty.results().len());
|
||||
for (i, ty) in ty.params().enumerate() {
|
||||
unsafe {
|
||||
let val = Val::read_value_from(caller.store.0, values_vec.add(i), ty);
|
||||
args.push(val);
|
||||
val_vec.push(val);
|
||||
}
|
||||
}
|
||||
|
||||
let mut returns: SmallVec<[Val; STACK_RETURNS]> =
|
||||
smallvec![Val::null(); ty.results().len()];
|
||||
|
||||
func(caller.sub_caller(), &args, &mut returns)?;
|
||||
val_vec.extend((0..ty.results().len()).map(|_| Val::null()));
|
||||
let (params, results) = val_vec.split_at_mut(nparams);
|
||||
func(caller.sub_caller(), params, results)?;
|
||||
|
||||
// Unlike our arguments we need to dynamically check that the return
|
||||
// values produced are correct. There could be a bug in `func` that
|
||||
// produces the wrong number, wrong types, or wrong stores of
|
||||
// values, and we need to catch that here.
|
||||
for (i, (ret, ty)) in returns.into_iter().zip(ty.results()).enumerate() {
|
||||
for (i, (ret, ty)) in results.iter().zip(ty.results()).enumerate() {
|
||||
if ret.ty() != ty {
|
||||
return Err(Trap::new(
|
||||
"function attempted to return an incompatible value",
|
||||
));
|
||||
}
|
||||
if !ret.comes_from_same_store(&caller.store.0) {
|
||||
if !ret.comes_from_same_store(caller.store.0) {
|
||||
return Err(Trap::new(
|
||||
"cross-`Store` values are not currently supported",
|
||||
));
|
||||
@@ -885,6 +891,10 @@ impl Func {
|
||||
}
|
||||
}
|
||||
|
||||
// Restore our `val_vec` back into the store so it's usable for the next
|
||||
// hostcall to reuse our own storage.
|
||||
val_vec.truncate(0);
|
||||
caller.store.0.save_hostcall_val_storage(val_vec);
|
||||
caller.store.0.exiting_native_hook()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
//! contents of `StoreOpaque`. This is an invariant that we, as the authors of
|
||||
//! `wasmtime`, must uphold for the public interface to be safe.
|
||||
|
||||
use crate::{module::ModuleRegistry, Engine, Module, Trap};
|
||||
use crate::{module::ModuleRegistry, Engine, Module, Trap, Val};
|
||||
use anyhow::{bail, Result};
|
||||
use std::cell::UnsafeCell;
|
||||
use std::collections::HashMap;
|
||||
@@ -85,7 +85,7 @@ use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::future::Future;
|
||||
use std::marker;
|
||||
use std::mem::ManuallyDrop;
|
||||
use std::mem::{self, ManuallyDrop};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::pin::Pin;
|
||||
use std::ptr;
|
||||
@@ -239,6 +239,11 @@ pub struct StoreOpaque {
|
||||
out_of_gas_behavior: OutOfGas,
|
||||
store_data: StoreData,
|
||||
default_callee: InstanceHandle,
|
||||
|
||||
/// Used to optimzed wasm->host calls when the host function is defined with
|
||||
/// `Func::new` to avoid allocating a new vector each time a function is
|
||||
/// called.
|
||||
hostcall_val_storage: Vec<Val>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "async")]
|
||||
@@ -332,6 +337,7 @@ impl<T> Store<T> {
|
||||
out_of_gas_behavior: OutOfGas::Trap,
|
||||
store_data: StoreData::new(),
|
||||
default_callee,
|
||||
hostcall_val_storage: Vec::new(),
|
||||
},
|
||||
limiter: None,
|
||||
entering_native_hook: None,
|
||||
@@ -1056,6 +1062,21 @@ impl StoreOpaque {
|
||||
pub fn traitobj(&self) -> *mut dyn wasmtime_runtime::Store {
|
||||
self.default_callee.store()
|
||||
}
|
||||
|
||||
/// Takes the cached `Vec<Val>` stored internally across hostcalls to get
|
||||
/// used as part of calling the host in a `Func::new` method invocation.
|
||||
pub fn take_hostcall_val_storage(&mut self) -> Vec<Val> {
|
||||
mem::take(&mut self.hostcall_val_storage)
|
||||
}
|
||||
|
||||
/// Restores the vector previously taken by `take_hostcall_val_storage`
|
||||
/// above back into the store, allowing it to be used in the future for the
|
||||
/// next wasm->host call.
|
||||
pub fn save_hostcall_val_storage(&mut self, storage: Vec<Val>) {
|
||||
if storage.capacity() > self.hostcall_val_storage.capacity() {
|
||||
self.hostcall_val_storage = storage;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> StoreContextMut<'_, T> {
|
||||
|
||||
@@ -93,17 +93,17 @@ impl Val {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn write_value_to(self, store: &mut StoreOpaque, p: *mut u128) {
|
||||
pub(crate) unsafe fn write_value_to(&self, store: &mut StoreOpaque, p: *mut u128) {
|
||||
match self {
|
||||
Val::I32(i) => ptr::write(p as *mut i32, i),
|
||||
Val::I64(i) => ptr::write(p as *mut i64, i),
|
||||
Val::F32(u) => ptr::write(p as *mut u32, u),
|
||||
Val::F64(u) => ptr::write(p as *mut u64, u),
|
||||
Val::V128(b) => ptr::write(p as *mut u128, b),
|
||||
Val::I32(i) => ptr::write(p as *mut i32, *i),
|
||||
Val::I64(i) => ptr::write(p as *mut i64, *i),
|
||||
Val::F32(u) => ptr::write(p as *mut u32, *u),
|
||||
Val::F64(u) => ptr::write(p as *mut u64, *u),
|
||||
Val::V128(b) => ptr::write(p as *mut u128, *b),
|
||||
Val::ExternRef(None) => ptr::write(p, 0),
|
||||
Val::ExternRef(Some(x)) => {
|
||||
let externref_ptr = x.inner.as_raw();
|
||||
store.insert_vmexternref(x.inner);
|
||||
store.insert_vmexternref(x.inner.clone());
|
||||
ptr::write(p as *mut *mut u8, externref_ptr)
|
||||
}
|
||||
Val::FuncRef(f) => ptr::write(
|
||||
|
||||
Reference in New Issue
Block a user