Reduce clones of Arc<HostFunc> during instantiation (#4051)

This commit implements an optimization to help improve concurrently
creating instances of a module on many threads simultaneously. One
bottleneck to this measured has been the reference count modification on
`Arc<HostFunc>`. Each host function stored within a `Linker<T>` is
wrapped in an `Arc<HostFunc>` structure, and when any of those host
functions are inserted into a store the reference count is incremented.
When the store is dropped the reference count is then decremented.

This ends up meaning that when a module imports N functions it ends up
doing 2N atomic modifications over the lifetime of the instance. For
embeddings where the `Linker<T>` is rarely modified but instances are
frequently created this can be a surprising bottleneck to creating many
instances.

A change implemented here is to optimize the instantiation process when
using an `InstancePre<T>`. An `InstancePre` serves as an opportunity to
take the list of items used to instantiate a module and wrap them all up
in an `Arc<[T]>`. Everything is going to get cloned into a `Store<T>`
anyway so to optimize this the `Arc<[T]>` is cloned at the top-level and
then nothing else is cloned internally. This continues to, however,
preserve a strong reference count for all contained items to prevent
them from being deallocated.

A new variant of `FuncKind` was added for host functions which is
effectively stored via `*mut HostFunc`. This variant is unsafe to create
and manage and has been documented internally.

Performance-wise the overall impact of this change is somewhat minor.
It's already a bit esoteric if this atomic increment and decrement are a
bottleneck due to the number of concurrent instances being created. In
my measurements I've seen that this can reduce instantiation time by up
to 10% for a module that imports two dozen functions. For larger modules
with more imports this is expected to have a larger win.
This commit is contained in:
Alex Crichton
2022-04-19 14:23:36 -05:00
committed by GitHub
parent 19fe0878cb
commit 3394c2bb91
4 changed files with 163 additions and 14 deletions

View File

@@ -224,6 +224,21 @@ enum FuncKind {
/// situations is `SharedHost` and `StoreOwned`, so this ideally isn't
/// larger than those two.
Host(Box<HostFunc>),
/// A reference to a `HostFunc`, but one that's "rooted" in the `Store`
/// itself.
///
/// This variant is created when an `InstancePre<T>` is instantiated in to a
/// `Store<T>`. In that situation the `InstancePre<T>` already has a list of
/// host functions that are packaged up in an `Arc`, so the `Arc<[T]>` is
/// cloned once into the `Store` to avoid each individual function requiring
/// an `Arc::clone`.
///
/// The lifetime management of this type is `unsafe` because
/// `RootedHostFunc` is a small wrapper around `NonNull<HostFunc>`. To be
/// safe this is required that the memory of the host function is pinned
/// elsewhere (e.g. the `Arc` in the `Store`).
RootedHost(RootedHostFunc),
}
macro_rules! for_each_function_signature {
@@ -2070,6 +2085,29 @@ impl HostFunc {
Func::from_func_kind(FuncKind::SharedHost(me), store)
}
/// Inserts this `HostFunc` into a `Store`, returning the `Func` pointing to
/// it.
///
/// This function is similar to, but not equivalent, to `HostFunc::to_func`.
/// Notably this function requires that the `Arc<Self>` pointer is otherwise
/// rooted within the `StoreOpaque` via another means. When in doubt use
/// `to_func` above as it's safer.
///
/// # Unsafety
///
/// Can only be inserted into stores with a matching `T` relative to when
/// this `HostFunc` was first created.
///
/// Additionally the `&Arc<Self>` is not cloned in this function. Instead a
/// raw pointer to `Self` is stored within the `Store` for this function.
/// The caller must arrange for the `Arc<Self>` to be "rooted" in the store
/// provided via another means, probably by pushing to
/// `StoreOpaque::rooted_host_funcs`.
pub unsafe fn to_func_store_rooted(self: &Arc<Self>, store: &mut StoreOpaque) -> Func {
self.validate_store(store);
Func::from_func_kind(FuncKind::RootedHost(RootedHostFunc::new(self)), store)
}
/// Same as [`HostFunc::to_func`], different ownership.
unsafe fn into_func(self, store: &mut StoreOpaque) -> Func {
self.validate_store(store);
@@ -2112,6 +2150,7 @@ impl FuncData {
match &self.kind {
FuncKind::StoreOwned { trampoline, .. } => *trampoline,
FuncKind::SharedHost(host) => host.trampoline,
FuncKind::RootedHost(host) => host.trampoline,
FuncKind::Host(host) => host.trampoline,
}
}
@@ -2132,7 +2171,50 @@ impl FuncKind {
match self {
FuncKind::StoreOwned { export, .. } => export,
FuncKind::SharedHost(host) => &host.export,
FuncKind::RootedHost(host) => &host.export,
FuncKind::Host(host) => &host.export,
}
}
}
use self::rooted::*;
/// An inner module is used here to force unsafe construction of
/// `RootedHostFunc` instead of accidentally safely allowing access to its
/// constructor.
mod rooted {
use super::HostFunc;
use std::ops::Deref;
use std::ptr::NonNull;
use std::sync::Arc;
/// A variant of a pointer-to-a-host-function used in `FuncKind::RootedHost`
/// above.
///
/// For more documentation see `FuncKind::RootedHost`, `InstancePre`, and
/// `HostFunc::to_func_store_rooted`.
pub(crate) struct RootedHostFunc(NonNull<HostFunc>);
// These are required due to the usage of `NonNull` but should be safe
// because `HostFunc` is itself send/sync.
unsafe impl Send for RootedHostFunc where HostFunc: Send {}
unsafe impl Sync for RootedHostFunc where HostFunc: Sync {}
impl RootedHostFunc {
/// Note that this is `unsafe` because this wrapper type allows safe
/// access to the pointer given at any time, including outside the
/// window of validity of `func`, so callers must not use the return
/// value past the lifetime of the provided `func`.
pub(crate) unsafe fn new(func: &Arc<HostFunc>) -> RootedHostFunc {
RootedHostFunc(NonNull::from(&**func))
}
}
impl Deref for RootedHostFunc {
type Target = HostFunc;
fn deref(&self) -> &HostFunc {
unsafe { self.0.as_ref() }
}
}
}

View File

@@ -569,10 +569,20 @@ impl OwnedImports {
/// [`Linker::instantiate_pre`]: crate::Linker::instantiate_pre
pub struct InstancePre<T> {
module: Module,
items: Vec<Definition>,
/// The items which this `InstancePre` use to instantiate the `module`
/// provided, passed to `Instance::new_started` after inserting them into a
/// `Store`.
///
/// Note that this is stored as an `Arc<[T]>` to quickly move a strong
/// reference to everything internally into a `Store<T>` without having to
/// clone each individual item.
items: Arc<[Definition]>,
/// A count of `Definition::HostFunc` entries in `items` above to
/// preallocate space in a `Store` up front for all entries to be inserted.
host_funcs: usize,
_marker: std::marker::PhantomData<fn() -> T>,
}
@@ -589,6 +599,14 @@ impl<T> Clone for InstancePre<T> {
}
impl<T> InstancePre<T> {
/// Creates a new `InstancePre` which type-checks the `items` provided and
/// on success is ready to instantiate a new instance.
///
/// # Unsafety
///
/// This method is unsafe as the `T` of the `InstancePre<T>` is not
/// guaranteed to be the same as the `T` within the `Store`, the caller must
/// verify that.
pub(crate) unsafe fn new(
store: &mut StoreOpaque,
module: &Module,
@@ -612,7 +630,7 @@ impl<T> InstancePre<T> {
.count();
Ok(InstancePre {
module: module.clone(),
items,
items: items.into(),
host_funcs,
_marker: std::marker::PhantomData,
})
@@ -689,24 +707,33 @@ impl<T> InstancePre<T> {
fn pre_instantiate_raw(
store: &mut StoreOpaque,
module: &Module,
items: &[Definition],
items: &Arc<[Definition]>,
host_funcs: usize,
) -> Result<OwnedImports> {
if host_funcs > 0 {
// Any linker-defined function of the `Definition::HostFunc` variant
// will insert a function into the store automatically as part of
// instantiation, so reserve space here to make insertion more efficient
// as it won't have to realloc during the instantiation.
store.store_data_mut().reserve_funcs(host_funcs);
// The usage of `to_extern_store_rooted` requires that the items are
// rooted via another means, which happens here by cloning the list of
// items into the store once. This avoids cloning each individual item
// below.
store.push_rooted_funcs(items.clone());
}
let mut imports = OwnedImports::new(module);
for import in items {
for import in items.iter() {
if !import.comes_from_same_store(store) {
bail!("cross-`Store` instantiation is not currently supported");
}
// This unsafety should be encapsulated in the constructor of
// `InstancePre` where the `T` of the original item should match the
// `T` of the store.
let item = unsafe { import.to_extern(store) };
// `T` of the store. Additionally the rooting necessary has happened
// above.
let item = unsafe { import.to_extern_store_rooted(store) };
imports.push(&item, store);
}

View File

@@ -1194,6 +1194,15 @@ impl Definition {
}
}
/// Note the unsafety here is due to calling
/// `HostFunc::to_func_store_rooted`.
pub(crate) unsafe fn to_extern_store_rooted(&self, store: &mut StoreOpaque) -> Extern {
match self {
Definition::Extern(e) => e.clone(),
Definition::HostFunc(func) => func.to_func_store_rooted(store).into(),
}
}
pub(crate) fn comes_from_same_store(&self, store: &StoreOpaque) -> bool {
match self {
Definition::Extern(e) => e.comes_from_same_store(store),

View File

@@ -76,6 +76,7 @@
//! contents of `StoreOpaque`. This is an invariant that we, as the authors of
//! `wasmtime`, must uphold for the public interface to be safe.
use crate::linker::Definition;
use crate::module::BareModuleInfo;
use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw};
use anyhow::{bail, Result};
@@ -296,7 +297,13 @@ pub struct StoreOpaque {
async_state: AsyncState,
out_of_gas_behavior: OutOfGas,
epoch_deadline_behavior: EpochDeadline,
store_data: StoreData,
/// Indexed data within this `Store`, used to store information about
/// globals, functions, memories, etc.
///
/// Note that this is `ManuallyDrop` because it needs to be dropped before
/// `rooted_host_funcs` below. This structure contains pointers which are
/// otherwise kept alive by the `Arc` references in `rooted_host_funcs`.
store_data: ManuallyDrop<StoreData>,
default_callee: InstanceHandle,
/// Used to optimzed wasm->host calls when the host function is defined with
@@ -306,6 +313,20 @@ pub struct StoreOpaque {
/// Same as `hostcall_val_storage`, but for the direction of the host
/// calling wasm.
wasm_val_raw_storage: Vec<ValRaw>,
/// A list of lists of definitions which have been used to instantiate
/// within this `Store`.
///
/// Note that not all instantiations end up pushing to this list. At the
/// time of this writing only the `InstancePre<T>` type will push to this
/// list. Pushes to this list are typically accompanied with
/// `HostFunc::to_func_store_rooted` to clone an `Arc` here once which
/// preserves a strong reference to the `Arc` for each `HostFunc` stored
/// within the list of `Definition`s.
///
/// Note that this is `ManuallyDrop` as it must be dropped after
/// `store_data` above, where the function pointers are stored.
rooted_host_funcs: ManuallyDrop<Vec<Arc<[Definition]>>>,
}
#[cfg(feature = "async")]
@@ -471,10 +492,11 @@ impl<T> Store<T> {
},
out_of_gas_behavior: OutOfGas::Trap,
epoch_deadline_behavior: EpochDeadline::Trap,
store_data: StoreData::new(),
store_data: ManuallyDrop::new(StoreData::new()),
default_callee,
hostcall_val_storage: Vec::new(),
wasm_val_raw_storage: Vec::new(),
rooted_host_funcs: ManuallyDrop::new(Vec::new()),
},
limiter: None,
call_hook: None,
@@ -1401,6 +1423,10 @@ impl StoreOpaque {
self.wasm_val_raw_storage = storage;
}
}
pub(crate) fn push_rooted_funcs(&mut self, funcs: Arc<[Definition]>) {
self.rooted_host_funcs.push(funcs);
}
}
impl<T> StoreContextMut<'_, T> {
@@ -1913,8 +1939,8 @@ impl Drop for StoreOpaque {
// NB it's important that this destructor does not access `self.data`.
// That is deallocated by `Drop for Store<T>` above.
let allocator = self.engine.allocator();
unsafe {
let allocator = self.engine.allocator();
let ondemand = OnDemandInstanceAllocator::default();
for instance in self.instances.iter() {
if instance.ondemand {
@@ -1924,6 +1950,11 @@ impl Drop for StoreOpaque {
}
}
ondemand.deallocate(&self.default_callee);
// See documentation for these fields on `StoreOpaque` for why they
// must be dropped in this order.
ManuallyDrop::drop(&mut self.store_data);
ManuallyDrop::drop(&mut self.rooted_host_funcs);
}
}
}