Reduce clones of Arc<HostFunc> during instantiation (#4051)
This commit implements an optimization to help improve concurrently creating instances of a module on many threads simultaneously. One bottleneck to this measured has been the reference count modification on `Arc<HostFunc>`. Each host function stored within a `Linker<T>` is wrapped in an `Arc<HostFunc>` structure, and when any of those host functions are inserted into a store the reference count is incremented. When the store is dropped the reference count is then decremented. This ends up meaning that when a module imports N functions it ends up doing 2N atomic modifications over the lifetime of the instance. For embeddings where the `Linker<T>` is rarely modified but instances are frequently created this can be a surprising bottleneck to creating many instances. A change implemented here is to optimize the instantiation process when using an `InstancePre<T>`. An `InstancePre` serves as an opportunity to take the list of items used to instantiate a module and wrap them all up in an `Arc<[T]>`. Everything is going to get cloned into a `Store<T>` anyway so to optimize this the `Arc<[T]>` is cloned at the top-level and then nothing else is cloned internally. This continues to, however, preserve a strong reference count for all contained items to prevent them from being deallocated. A new variant of `FuncKind` was added for host functions which is effectively stored via `*mut HostFunc`. This variant is unsafe to create and manage and has been documented internally. Performance-wise the overall impact of this change is somewhat minor. It's already a bit esoteric if this atomic increment and decrement are a bottleneck due to the number of concurrent instances being created. In my measurements I've seen that this can reduce instantiation time by up to 10% for a module that imports two dozen functions. For larger modules with more imports this is expected to have a larger win.
This commit is contained in:
@@ -224,6 +224,21 @@ enum FuncKind {
|
|||||||
/// situations is `SharedHost` and `StoreOwned`, so this ideally isn't
|
/// situations is `SharedHost` and `StoreOwned`, so this ideally isn't
|
||||||
/// larger than those two.
|
/// larger than those two.
|
||||||
Host(Box<HostFunc>),
|
Host(Box<HostFunc>),
|
||||||
|
|
||||||
|
/// A reference to a `HostFunc`, but one that's "rooted" in the `Store`
|
||||||
|
/// itself.
|
||||||
|
///
|
||||||
|
/// This variant is created when an `InstancePre<T>` is instantiated in to a
|
||||||
|
/// `Store<T>`. In that situation the `InstancePre<T>` already has a list of
|
||||||
|
/// host functions that are packaged up in an `Arc`, so the `Arc<[T]>` is
|
||||||
|
/// cloned once into the `Store` to avoid each individual function requiring
|
||||||
|
/// an `Arc::clone`.
|
||||||
|
///
|
||||||
|
/// The lifetime management of this type is `unsafe` because
|
||||||
|
/// `RootedHostFunc` is a small wrapper around `NonNull<HostFunc>`. To be
|
||||||
|
/// safe this is required that the memory of the host function is pinned
|
||||||
|
/// elsewhere (e.g. the `Arc` in the `Store`).
|
||||||
|
RootedHost(RootedHostFunc),
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! for_each_function_signature {
|
macro_rules! for_each_function_signature {
|
||||||
@@ -2070,6 +2085,29 @@ impl HostFunc {
|
|||||||
Func::from_func_kind(FuncKind::SharedHost(me), store)
|
Func::from_func_kind(FuncKind::SharedHost(me), store)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Inserts this `HostFunc` into a `Store`, returning the `Func` pointing to
|
||||||
|
/// it.
|
||||||
|
///
|
||||||
|
/// This function is similar to, but not equivalent, to `HostFunc::to_func`.
|
||||||
|
/// Notably this function requires that the `Arc<Self>` pointer is otherwise
|
||||||
|
/// rooted within the `StoreOpaque` via another means. When in doubt use
|
||||||
|
/// `to_func` above as it's safer.
|
||||||
|
///
|
||||||
|
/// # Unsafety
|
||||||
|
///
|
||||||
|
/// Can only be inserted into stores with a matching `T` relative to when
|
||||||
|
/// this `HostFunc` was first created.
|
||||||
|
///
|
||||||
|
/// Additionally the `&Arc<Self>` is not cloned in this function. Instead a
|
||||||
|
/// raw pointer to `Self` is stored within the `Store` for this function.
|
||||||
|
/// The caller must arrange for the `Arc<Self>` to be "rooted" in the store
|
||||||
|
/// provided via another means, probably by pushing to
|
||||||
|
/// `StoreOpaque::rooted_host_funcs`.
|
||||||
|
pub unsafe fn to_func_store_rooted(self: &Arc<Self>, store: &mut StoreOpaque) -> Func {
|
||||||
|
self.validate_store(store);
|
||||||
|
Func::from_func_kind(FuncKind::RootedHost(RootedHostFunc::new(self)), store)
|
||||||
|
}
|
||||||
|
|
||||||
/// Same as [`HostFunc::to_func`], different ownership.
|
/// Same as [`HostFunc::to_func`], different ownership.
|
||||||
unsafe fn into_func(self, store: &mut StoreOpaque) -> Func {
|
unsafe fn into_func(self, store: &mut StoreOpaque) -> Func {
|
||||||
self.validate_store(store);
|
self.validate_store(store);
|
||||||
@@ -2112,6 +2150,7 @@ impl FuncData {
|
|||||||
match &self.kind {
|
match &self.kind {
|
||||||
FuncKind::StoreOwned { trampoline, .. } => *trampoline,
|
FuncKind::StoreOwned { trampoline, .. } => *trampoline,
|
||||||
FuncKind::SharedHost(host) => host.trampoline,
|
FuncKind::SharedHost(host) => host.trampoline,
|
||||||
|
FuncKind::RootedHost(host) => host.trampoline,
|
||||||
FuncKind::Host(host) => host.trampoline,
|
FuncKind::Host(host) => host.trampoline,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2132,7 +2171,50 @@ impl FuncKind {
|
|||||||
match self {
|
match self {
|
||||||
FuncKind::StoreOwned { export, .. } => export,
|
FuncKind::StoreOwned { export, .. } => export,
|
||||||
FuncKind::SharedHost(host) => &host.export,
|
FuncKind::SharedHost(host) => &host.export,
|
||||||
|
FuncKind::RootedHost(host) => &host.export,
|
||||||
FuncKind::Host(host) => &host.export,
|
FuncKind::Host(host) => &host.export,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use self::rooted::*;
|
||||||
|
|
||||||
|
/// An inner module is used here to force unsafe construction of
|
||||||
|
/// `RootedHostFunc` instead of accidentally safely allowing access to its
|
||||||
|
/// constructor.
|
||||||
|
mod rooted {
|
||||||
|
use super::HostFunc;
|
||||||
|
use std::ops::Deref;
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
/// A variant of a pointer-to-a-host-function used in `FuncKind::RootedHost`
|
||||||
|
/// above.
|
||||||
|
///
|
||||||
|
/// For more documentation see `FuncKind::RootedHost`, `InstancePre`, and
|
||||||
|
/// `HostFunc::to_func_store_rooted`.
|
||||||
|
pub(crate) struct RootedHostFunc(NonNull<HostFunc>);
|
||||||
|
|
||||||
|
// These are required due to the usage of `NonNull` but should be safe
|
||||||
|
// because `HostFunc` is itself send/sync.
|
||||||
|
unsafe impl Send for RootedHostFunc where HostFunc: Send {}
|
||||||
|
unsafe impl Sync for RootedHostFunc where HostFunc: Sync {}
|
||||||
|
|
||||||
|
impl RootedHostFunc {
|
||||||
|
/// Note that this is `unsafe` because this wrapper type allows safe
|
||||||
|
/// access to the pointer given at any time, including outside the
|
||||||
|
/// window of validity of `func`, so callers must not use the return
|
||||||
|
/// value past the lifetime of the provided `func`.
|
||||||
|
pub(crate) unsafe fn new(func: &Arc<HostFunc>) -> RootedHostFunc {
|
||||||
|
RootedHostFunc(NonNull::from(&**func))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for RootedHostFunc {
|
||||||
|
type Target = HostFunc;
|
||||||
|
|
||||||
|
fn deref(&self) -> &HostFunc {
|
||||||
|
unsafe { self.0.as_ref() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -569,10 +569,20 @@ impl OwnedImports {
|
|||||||
/// [`Linker::instantiate_pre`]: crate::Linker::instantiate_pre
|
/// [`Linker::instantiate_pre`]: crate::Linker::instantiate_pre
|
||||||
pub struct InstancePre<T> {
|
pub struct InstancePre<T> {
|
||||||
module: Module,
|
module: Module,
|
||||||
items: Vec<Definition>,
|
|
||||||
|
/// The items which this `InstancePre` use to instantiate the `module`
|
||||||
|
/// provided, passed to `Instance::new_started` after inserting them into a
|
||||||
|
/// `Store`.
|
||||||
|
///
|
||||||
|
/// Note that this is stored as an `Arc<[T]>` to quickly move a strong
|
||||||
|
/// reference to everything internally into a `Store<T>` without having to
|
||||||
|
/// clone each individual item.
|
||||||
|
items: Arc<[Definition]>,
|
||||||
|
|
||||||
/// A count of `Definition::HostFunc` entries in `items` above to
|
/// A count of `Definition::HostFunc` entries in `items` above to
|
||||||
/// preallocate space in a `Store` up front for all entries to be inserted.
|
/// preallocate space in a `Store` up front for all entries to be inserted.
|
||||||
host_funcs: usize,
|
host_funcs: usize,
|
||||||
|
|
||||||
_marker: std::marker::PhantomData<fn() -> T>,
|
_marker: std::marker::PhantomData<fn() -> T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -589,6 +599,14 @@ impl<T> Clone for InstancePre<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T> InstancePre<T> {
|
impl<T> InstancePre<T> {
|
||||||
|
/// Creates a new `InstancePre` which type-checks the `items` provided and
|
||||||
|
/// on success is ready to instantiate a new instance.
|
||||||
|
///
|
||||||
|
/// # Unsafety
|
||||||
|
///
|
||||||
|
/// This method is unsafe as the `T` of the `InstancePre<T>` is not
|
||||||
|
/// guaranteed to be the same as the `T` within the `Store`, the caller must
|
||||||
|
/// verify that.
|
||||||
pub(crate) unsafe fn new(
|
pub(crate) unsafe fn new(
|
||||||
store: &mut StoreOpaque,
|
store: &mut StoreOpaque,
|
||||||
module: &Module,
|
module: &Module,
|
||||||
@@ -612,7 +630,7 @@ impl<T> InstancePre<T> {
|
|||||||
.count();
|
.count();
|
||||||
Ok(InstancePre {
|
Ok(InstancePre {
|
||||||
module: module.clone(),
|
module: module.clone(),
|
||||||
items,
|
items: items.into(),
|
||||||
host_funcs,
|
host_funcs,
|
||||||
_marker: std::marker::PhantomData,
|
_marker: std::marker::PhantomData,
|
||||||
})
|
})
|
||||||
@@ -689,24 +707,33 @@ impl<T> InstancePre<T> {
|
|||||||
fn pre_instantiate_raw(
|
fn pre_instantiate_raw(
|
||||||
store: &mut StoreOpaque,
|
store: &mut StoreOpaque,
|
||||||
module: &Module,
|
module: &Module,
|
||||||
items: &[Definition],
|
items: &Arc<[Definition]>,
|
||||||
host_funcs: usize,
|
host_funcs: usize,
|
||||||
) -> Result<OwnedImports> {
|
) -> Result<OwnedImports> {
|
||||||
// Any linker-defined function of the `Definition::HostFunc` variant
|
if host_funcs > 0 {
|
||||||
// will insert a function into the store automatically as part of
|
// Any linker-defined function of the `Definition::HostFunc` variant
|
||||||
// instantiation, so reserve space here to make insertion more efficient
|
// will insert a function into the store automatically as part of
|
||||||
// as it won't have to realloc during the instantiation.
|
// instantiation, so reserve space here to make insertion more efficient
|
||||||
store.store_data_mut().reserve_funcs(host_funcs);
|
// as it won't have to realloc during the instantiation.
|
||||||
|
store.store_data_mut().reserve_funcs(host_funcs);
|
||||||
|
|
||||||
|
// The usage of `to_extern_store_rooted` requires that the items are
|
||||||
|
// rooted via another means, which happens here by cloning the list of
|
||||||
|
// items into the store once. This avoids cloning each individual item
|
||||||
|
// below.
|
||||||
|
store.push_rooted_funcs(items.clone());
|
||||||
|
}
|
||||||
|
|
||||||
let mut imports = OwnedImports::new(module);
|
let mut imports = OwnedImports::new(module);
|
||||||
for import in items {
|
for import in items.iter() {
|
||||||
if !import.comes_from_same_store(store) {
|
if !import.comes_from_same_store(store) {
|
||||||
bail!("cross-`Store` instantiation is not currently supported");
|
bail!("cross-`Store` instantiation is not currently supported");
|
||||||
}
|
}
|
||||||
// This unsafety should be encapsulated in the constructor of
|
// This unsafety should be encapsulated in the constructor of
|
||||||
// `InstancePre` where the `T` of the original item should match the
|
// `InstancePre` where the `T` of the original item should match the
|
||||||
// `T` of the store.
|
// `T` of the store. Additionally the rooting necessary has happened
|
||||||
let item = unsafe { import.to_extern(store) };
|
// above.
|
||||||
|
let item = unsafe { import.to_extern_store_rooted(store) };
|
||||||
imports.push(&item, store);
|
imports.push(&item, store);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1194,6 +1194,15 @@ impl Definition {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Note the unsafety here is due to calling
|
||||||
|
/// `HostFunc::to_func_store_rooted`.
|
||||||
|
pub(crate) unsafe fn to_extern_store_rooted(&self, store: &mut StoreOpaque) -> Extern {
|
||||||
|
match self {
|
||||||
|
Definition::Extern(e) => e.clone(),
|
||||||
|
Definition::HostFunc(func) => func.to_func_store_rooted(store).into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn comes_from_same_store(&self, store: &StoreOpaque) -> bool {
|
pub(crate) fn comes_from_same_store(&self, store: &StoreOpaque) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Definition::Extern(e) => e.comes_from_same_store(store),
|
Definition::Extern(e) => e.comes_from_same_store(store),
|
||||||
|
|||||||
@@ -76,6 +76,7 @@
|
|||||||
//! contents of `StoreOpaque`. This is an invariant that we, as the authors of
|
//! contents of `StoreOpaque`. This is an invariant that we, as the authors of
|
||||||
//! `wasmtime`, must uphold for the public interface to be safe.
|
//! `wasmtime`, must uphold for the public interface to be safe.
|
||||||
|
|
||||||
|
use crate::linker::Definition;
|
||||||
use crate::module::BareModuleInfo;
|
use crate::module::BareModuleInfo;
|
||||||
use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw};
|
use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw};
|
||||||
use anyhow::{bail, Result};
|
use anyhow::{bail, Result};
|
||||||
@@ -296,7 +297,13 @@ pub struct StoreOpaque {
|
|||||||
async_state: AsyncState,
|
async_state: AsyncState,
|
||||||
out_of_gas_behavior: OutOfGas,
|
out_of_gas_behavior: OutOfGas,
|
||||||
epoch_deadline_behavior: EpochDeadline,
|
epoch_deadline_behavior: EpochDeadline,
|
||||||
store_data: StoreData,
|
/// Indexed data within this `Store`, used to store information about
|
||||||
|
/// globals, functions, memories, etc.
|
||||||
|
///
|
||||||
|
/// Note that this is `ManuallyDrop` because it needs to be dropped before
|
||||||
|
/// `rooted_host_funcs` below. This structure contains pointers which are
|
||||||
|
/// otherwise kept alive by the `Arc` references in `rooted_host_funcs`.
|
||||||
|
store_data: ManuallyDrop<StoreData>,
|
||||||
default_callee: InstanceHandle,
|
default_callee: InstanceHandle,
|
||||||
|
|
||||||
/// Used to optimzed wasm->host calls when the host function is defined with
|
/// Used to optimzed wasm->host calls when the host function is defined with
|
||||||
@@ -306,6 +313,20 @@ pub struct StoreOpaque {
|
|||||||
/// Same as `hostcall_val_storage`, but for the direction of the host
|
/// Same as `hostcall_val_storage`, but for the direction of the host
|
||||||
/// calling wasm.
|
/// calling wasm.
|
||||||
wasm_val_raw_storage: Vec<ValRaw>,
|
wasm_val_raw_storage: Vec<ValRaw>,
|
||||||
|
|
||||||
|
/// A list of lists of definitions which have been used to instantiate
|
||||||
|
/// within this `Store`.
|
||||||
|
///
|
||||||
|
/// Note that not all instantiations end up pushing to this list. At the
|
||||||
|
/// time of this writing only the `InstancePre<T>` type will push to this
|
||||||
|
/// list. Pushes to this list are typically accompanied with
|
||||||
|
/// `HostFunc::to_func_store_rooted` to clone an `Arc` here once which
|
||||||
|
/// preserves a strong reference to the `Arc` for each `HostFunc` stored
|
||||||
|
/// within the list of `Definition`s.
|
||||||
|
///
|
||||||
|
/// Note that this is `ManuallyDrop` as it must be dropped after
|
||||||
|
/// `store_data` above, where the function pointers are stored.
|
||||||
|
rooted_host_funcs: ManuallyDrop<Vec<Arc<[Definition]>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "async")]
|
#[cfg(feature = "async")]
|
||||||
@@ -471,10 +492,11 @@ impl<T> Store<T> {
|
|||||||
},
|
},
|
||||||
out_of_gas_behavior: OutOfGas::Trap,
|
out_of_gas_behavior: OutOfGas::Trap,
|
||||||
epoch_deadline_behavior: EpochDeadline::Trap,
|
epoch_deadline_behavior: EpochDeadline::Trap,
|
||||||
store_data: StoreData::new(),
|
store_data: ManuallyDrop::new(StoreData::new()),
|
||||||
default_callee,
|
default_callee,
|
||||||
hostcall_val_storage: Vec::new(),
|
hostcall_val_storage: Vec::new(),
|
||||||
wasm_val_raw_storage: Vec::new(),
|
wasm_val_raw_storage: Vec::new(),
|
||||||
|
rooted_host_funcs: ManuallyDrop::new(Vec::new()),
|
||||||
},
|
},
|
||||||
limiter: None,
|
limiter: None,
|
||||||
call_hook: None,
|
call_hook: None,
|
||||||
@@ -1401,6 +1423,10 @@ impl StoreOpaque {
|
|||||||
self.wasm_val_raw_storage = storage;
|
self.wasm_val_raw_storage = storage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn push_rooted_funcs(&mut self, funcs: Arc<[Definition]>) {
|
||||||
|
self.rooted_host_funcs.push(funcs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> StoreContextMut<'_, T> {
|
impl<T> StoreContextMut<'_, T> {
|
||||||
@@ -1913,8 +1939,8 @@ impl Drop for StoreOpaque {
|
|||||||
// NB it's important that this destructor does not access `self.data`.
|
// NB it's important that this destructor does not access `self.data`.
|
||||||
// That is deallocated by `Drop for Store<T>` above.
|
// That is deallocated by `Drop for Store<T>` above.
|
||||||
|
|
||||||
let allocator = self.engine.allocator();
|
|
||||||
unsafe {
|
unsafe {
|
||||||
|
let allocator = self.engine.allocator();
|
||||||
let ondemand = OnDemandInstanceAllocator::default();
|
let ondemand = OnDemandInstanceAllocator::default();
|
||||||
for instance in self.instances.iter() {
|
for instance in self.instances.iter() {
|
||||||
if instance.ondemand {
|
if instance.ondemand {
|
||||||
@@ -1924,6 +1950,11 @@ impl Drop for StoreOpaque {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
ondemand.deallocate(&self.default_callee);
|
ondemand.deallocate(&self.default_callee);
|
||||||
|
|
||||||
|
// See documentation for these fields on `StoreOpaque` for why they
|
||||||
|
// must be dropped in this order.
|
||||||
|
ManuallyDrop::drop(&mut self.store_data);
|
||||||
|
ManuallyDrop::drop(&mut self.rooted_host_funcs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user