Don't copy VMBuiltinFunctionsArray into each VMContext (#3741)

* Don't copy `VMBuiltinFunctionsArray` into each `VMContext`

This is another PR along the lines of "let's squeeze all possible
performance we can out of instantiation". Before this PR we would copy,
by value, the contents of `VMBuiltinFunctionsArray` into each
`VMContext` allocated. This array of function pointers is modestly-sized
but growing over time as we add various intrinsics. Additionally it's
the exact same for all `VMContext` allocations.

This PR attempts to speed up instantiation slightly by instead storing
an indirection to the function array. This means that calling a builtin
intrinsic is a tad bit slower since it requires two loads instead of one
(one to get the base pointer, another to get the actual address).
Otherwise though `VMContext` initialization is now simply setting one
pointer instead of doing a `memcpy` from one location to another.

With some macro-magic this commit also replaces the previous
implementation with one that's more `const`-friendly which also gets us
compile-time type-checks of libcalls as well as compile-time
verification that all libcalls are defined.

Overall, as with #3739, the win is very modest here. Locally I measured
a speedup from 1.9us to 1.7us taken to instantiate an empty module with
one function. While small at these scales it's still a 10% improvement!

* Review comments
This commit is contained in:
Alex Crichton
2022-01-28 16:24:34 -06:00
committed by GitHub
parent 2f494240f8
commit a25f7bdba5
5 changed files with 87 additions and 111 deletions

View File

@@ -9,7 +9,6 @@ use std::marker;
use std::ptr::NonNull;
use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
use std::u32;
use wasmtime_environ::BuiltinFunctionIndex;
/// An imported function.
#[derive(Debug, Copy, Clone)]
@@ -594,66 +593,43 @@ mod test_vmcaller_checked_anyfunc {
}
}
/// An array that stores addresses of builtin functions. We translate code
/// to use indirect calls. This way, we don't have to patch the code.
#[repr(C)]
pub struct VMBuiltinFunctionsArray {
ptrs: [usize; Self::len()],
}
impl VMBuiltinFunctionsArray {
pub const fn len() -> usize {
BuiltinFunctionIndex::builtin_functions_total_number() as usize
}
pub fn initialized() -> Self {
use crate::libcalls::*;
let mut ptrs = [0; Self::len()];
ptrs[BuiltinFunctionIndex::memory32_grow().index() as usize] =
wasmtime_memory32_grow as usize;
ptrs[BuiltinFunctionIndex::table_copy().index() as usize] = wasmtime_table_copy as usize;
ptrs[BuiltinFunctionIndex::table_grow_funcref().index() as usize] =
wasmtime_table_grow as usize;
ptrs[BuiltinFunctionIndex::table_grow_externref().index() as usize] =
wasmtime_table_grow as usize;
ptrs[BuiltinFunctionIndex::table_init().index() as usize] = wasmtime_table_init as usize;
ptrs[BuiltinFunctionIndex::elem_drop().index() as usize] = wasmtime_elem_drop as usize;
ptrs[BuiltinFunctionIndex::memory_copy().index() as usize] = wasmtime_memory_copy as usize;
ptrs[BuiltinFunctionIndex::memory_fill().index() as usize] = wasmtime_memory_fill as usize;
ptrs[BuiltinFunctionIndex::memory_init().index() as usize] = wasmtime_memory_init as usize;
ptrs[BuiltinFunctionIndex::data_drop().index() as usize] = wasmtime_data_drop as usize;
ptrs[BuiltinFunctionIndex::drop_externref().index() as usize] =
wasmtime_drop_externref as usize;
ptrs[BuiltinFunctionIndex::activations_table_insert_with_gc().index() as usize] =
wasmtime_activations_table_insert_with_gc as usize;
ptrs[BuiltinFunctionIndex::externref_global_get().index() as usize] =
wasmtime_externref_global_get as usize;
ptrs[BuiltinFunctionIndex::externref_global_set().index() as usize] =
wasmtime_externref_global_set as usize;
ptrs[BuiltinFunctionIndex::table_fill_externref().index() as usize] =
wasmtime_table_fill as usize;
ptrs[BuiltinFunctionIndex::table_fill_funcref().index() as usize] =
wasmtime_table_fill as usize;
ptrs[BuiltinFunctionIndex::memory_atomic_notify().index() as usize] =
wasmtime_memory_atomic_notify as usize;
ptrs[BuiltinFunctionIndex::memory_atomic_wait32().index() as usize] =
wasmtime_memory_atomic_wait32 as usize;
ptrs[BuiltinFunctionIndex::memory_atomic_wait64().index() as usize] =
wasmtime_memory_atomic_wait64 as usize;
ptrs[BuiltinFunctionIndex::out_of_gas().index() as usize] = wasmtime_out_of_gas as usize;
ptrs[BuiltinFunctionIndex::new_epoch().index() as usize] = wasmtime_new_epoch as usize;
if cfg!(debug_assertions) {
for i in 0..ptrs.len() {
debug_assert!(ptrs[i] != 0, "index {} is not initialized", i);
}
macro_rules! define_builtin_array {
(
$(
$( #[$attr:meta] )*
$name:ident( $( $param:ident ),* ) -> ( $( $result:ident ),* );
)*
) => {
/// An array that stores addresses of builtin functions. We translate code
/// to use indirect calls. This way, we don't have to patch the code.
#[repr(C)]
#[allow(unused_parens)]
pub struct VMBuiltinFunctionsArray {
$(
$name: unsafe extern "C" fn(
$(define_builtin_array!(@ty $param)),*
) -> (
$(define_builtin_array!(@ty $result)),*
),
)*
}
Self { ptrs }
}
impl VMBuiltinFunctionsArray {
pub const INIT: VMBuiltinFunctionsArray = VMBuiltinFunctionsArray {
$($name: crate::libcalls::$name,)*
};
}
};
(@ty i32) => (u32);
(@ty i64) => (u64);
(@ty reference) => (*mut u8);
(@ty pointer) => (*mut u8);
(@ty vmctx) => (*mut VMContext);
}
wasmtime_environ::foreach_builtin_function!(define_builtin_array);
/// The storage for a WebAssembly invocation argument
///
/// TODO: These could be packed more densely, rather than using the same size for every type.