Implement lowered-then-lifted functions (#4327)

* Implement lowered-then-lifted functions

This commit is a few features bundled into one, culminating in the
implementation of lowered-then-lifted functions for the component model.
It's probably not going to be used all that often but this is possible
within a valid component so Wasmtime needs to do something relatively
reasonable. The main things implemented in this commit are:

* Component instances are now assigned a `RuntimeComponentInstanceIndex`
  to differentiate each one. This will be used in the future to detect
  fusion (one instance lowering a function from another instance). For
  now it's used to allocate separate `VMComponentFlags` for each
  internal component instance.

* The `CoreExport<FuncIndex>` of lowered functions was changed to a
  `CoreDef` since technically a lowered function can use another lowered
  function as the callee. This ended up being not too difficult to plumb
  through as everything else was already in place.

* A need arose to compile host-to-wasm trampolines which weren't already
  present. Currently wasm in a component is always entered through a
  host-to-wasm trampoline but core wasm modules are the source of all
  the trampolines. In the case of a lowered-then-lifted function there
  may not actually be any core wasm modules, so component objects now
  contain necessary trampolines not otherwise provided by the core wasm
  objects. This feature required splitting a new function into the
  `Compiler` trait for creating a host-to-wasm trampoline. After doing
  this core wasm compilation was also updated to leverage this which
  further enabled compiling trampolines in parallel as opposed to the
  previous synchronous compilation.

* Review comments
This commit is contained in:
Alex Crichton
2022-06-28 13:50:08 -05:00
committed by GitHub
parent df1502531d
commit c1b3962f7b
17 changed files with 400 additions and 107 deletions

View File

@@ -2,13 +2,14 @@ use crate::signatures::SignatureCollection;
use crate::{Engine, Module};
use anyhow::{bail, Context, Result};
use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use std::ops::Range;
use std::path::Path;
use std::ptr::NonNull;
use std::sync::Arc;
use wasmtime_environ::component::{
ComponentTypes, GlobalInitializer, LoweredIndex, StaticModuleIndex, TrampolineInfo, Translator,
ComponentTypes, GlobalInitializer, LoweredIndex, LoweringInfo, StaticModuleIndex, Translator,
};
use wasmtime_environ::PrimaryMap;
use wasmtime_jit::CodeMemory;
@@ -52,7 +53,7 @@ struct ComponentInner {
/// Where trampolines are located within the `text` section of
/// `trampoline_obj`.
trampolines: PrimaryMap<LoweredIndex, TrampolineInfo>,
trampolines: PrimaryMap<LoweredIndex, LoweringInfo>,
}
impl Component {
@@ -116,6 +117,40 @@ impl Component {
.context("failed to parse WebAssembly module")?;
let types = Arc::new(types.finish());
// All lowered functions will require a trampoline to be available in
// case they're used when entering wasm. For example a lowered function
// could be immediately lifted in which case we'll need a trampoline to
// call that lowered function.
//
// Most of the time trampolines can come from the core wasm modules
// since lifted functions come from core wasm. For these esoteric cases
// though we may have to compile trampolines specifically into the
// component object as well in case core wasm doesn't provide the
// necessary trampoline.
let lowerings = component
.initializers
.iter()
.filter_map(|init| match init {
GlobalInitializer::LowerImport(i) => Some(i),
_ => None,
})
.collect::<Vec<_>>();
let required_trampolines = lowerings
.iter()
.map(|l| l.canonical_abi)
.collect::<HashSet<_>>();
let provided_trampolines = modules
.iter()
.flat_map(|(_, m)| m.exported_signatures.iter().copied())
.collect::<HashSet<_>>();
let mut trampolines_to_compile = required_trampolines
.difference(&provided_trampolines)
.collect::<Vec<_>>();
// Ensure a deterministically compiled artifact by sorting this list
// which was otherwise created with nondeterministically ordered hash
// tables.
trampolines_to_compile.sort();
let (static_modules, trampolines) = engine.join_maybe_parallel(
// In one (possibly) parallel task all the modules found within this
// component are compiled. Note that this will further parallelize
@@ -139,28 +174,40 @@ impl Component {
// In another (possibly) parallel task we compile lowering
// trampolines necessary found in the component.
|| -> Result<_> {
let lowerings = component
.initializers
.iter()
.filter_map(|init| match init {
GlobalInitializer::LowerImport(i) => Some(i),
_ => None,
})
.collect::<Vec<_>>();
let compiler = engine.compiler().component_compiler();
let trampolines = engine
.run_maybe_parallel(lowerings, |lowering| {
compiler.compile_lowered_trampoline(&component, lowering, &types)
})?
.into_iter()
.collect();
let compiler = engine.compiler();
let (lowered_trampolines, core_trampolines) = engine.join_maybe_parallel(
// Compile all the lowered trampolines here which implement
// `canon lower` and are used to exit wasm into the host.
|| -> Result<_> {
Ok(engine
.run_maybe_parallel(lowerings, |lowering| {
compiler
.component_compiler()
.compile_lowered_trampoline(&component, lowering, &types)
})?
.into_iter()
.collect())
},
// Compile all entry host-to-wasm trampolines here that
// aren't otherwise provided by core wasm modules.
|| -> Result<_> {
engine.run_maybe_parallel(trampolines_to_compile.clone(), |i| {
let ty = &types[*i];
Ok((*i, compiler.compile_host_to_wasm_trampoline(ty)?))
})
},
);
let mut obj = engine.compiler().object()?;
let trampolines = compiler.emit_obj(trampolines, &mut obj)?;
let trampolines = compiler.component_compiler().emit_obj(
lowered_trampolines?,
core_trampolines?,
&mut obj,
)?;
Ok((trampolines, wasmtime_jit::mmap_vec_from_obj(obj)?))
},
);
let static_modules = static_modules?;
let (trampolines, trampoline_obj) = trampolines?;
let ((lowering_trampolines, core_trampolines), trampoline_obj) = trampolines?;
let mut trampoline_obj = CodeMemory::new(trampoline_obj);
let code = trampoline_obj.publish()?;
let text = wasmtime_jit::subslice_range(code.text, code.mmap);
@@ -184,6 +231,13 @@ impl Component {
vmtrampolines.insert(idx, trampoline);
}
}
for (signature, trampoline) in trampolines_to_compile.iter().zip(core_trampolines) {
vmtrampolines.insert(**signature, unsafe {
let ptr =
code.text[trampoline.start as usize..][..trampoline.length as usize].as_ptr();
std::mem::transmute::<*const u8, wasmtime_runtime::VMTrampoline>(ptr)
});
}
// FIXME: for the same reason as above where each module is
// re-registering everything this should only be registered once. This
@@ -202,7 +256,7 @@ impl Component {
signatures,
trampoline_obj,
text,
trampolines,
trampolines: lowering_trampolines,
}),
})
}

View File

@@ -5,8 +5,9 @@ use anyhow::{Context, Result};
use std::mem::MaybeUninit;
use std::ptr::NonNull;
use std::sync::Arc;
use wasmtime_environ::component::{CanonicalOptions, ComponentTypes, CoreExport, TypeFuncIndex};
use wasmtime_environ::FuncIndex;
use wasmtime_environ::component::{
CanonicalOptions, ComponentTypes, CoreDef, RuntimeComponentInstanceIndex, TypeFuncIndex,
};
use wasmtime_runtime::{Export, ExportFunction, VMTrampoline};
const MAX_STACK_PARAMS: usize = 16;
@@ -82,6 +83,7 @@ pub struct FuncData {
types: Arc<ComponentTypes>,
options: Options,
instance: Instance,
component_instance: RuntimeComponentInstanceIndex,
post_return: Option<(ExportFunction, VMTrampoline)>,
post_return_arg: Option<ValRaw>,
}
@@ -92,10 +94,10 @@ impl Func {
instance: &Instance,
data: &InstanceData,
ty: TypeFuncIndex,
func: &CoreExport<FuncIndex>,
func: &CoreDef,
options: &CanonicalOptions,
) -> Func {
let export = match data.lookup_export(store, func) {
let export = match data.lookup_def(store, func) {
Export::Function(f) => f,
_ => unreachable!(),
};
@@ -109,6 +111,7 @@ impl Func {
let trampoline = store.lookup_trampoline(unsafe { anyfunc.as_ref() });
(ExportFunction { anyfunc }, trampoline)
});
let component_instance = options.instance;
let options = unsafe { Options::new(store.id(), memory, realloc, options.string_encoding) };
Func(store.store_data_mut().insert(FuncData {
trampoline,
@@ -117,6 +120,7 @@ impl Func {
ty,
types: data.component_types().clone(),
instance: *instance,
component_instance,
post_return,
post_return_arg: None,
}))

View File

@@ -27,6 +27,7 @@ pub trait IntoComponentFunc<T, Params, Return> {
extern "C" fn entrypoint(
cx: *mut VMOpaqueContext,
data: *mut u8,
flags: *mut VMComponentFlags,
memory: *mut VMMemoryDefinition,
realloc: *mut VMCallerCheckedAnyfunc,
string_encoding: StringEncoding,
@@ -105,6 +106,7 @@ where
/// the select few places it's intended to be called from.
unsafe fn call_host<T, Params, Return, F>(
cx: *mut VMOpaqueContext,
flags: *mut VMComponentFlags,
memory: *mut VMMemoryDefinition,
realloc: *mut VMCallerCheckedAnyfunc,
string_encoding: StringEncoding,
@@ -136,7 +138,6 @@ where
let cx = VMComponentContext::from_opaque(cx);
let instance = (*cx).instance();
let flags = (*instance).flags();
let mut cx = StoreContextMut::from_raw((*instance).store());
let options = Options::new(
@@ -282,6 +283,7 @@ macro_rules! impl_into_component_func {
extern "C" fn entrypoint(
cx: *mut VMOpaqueContext,
data: *mut u8,
flags: *mut VMComponentFlags,
memory: *mut VMMemoryDefinition,
realloc: *mut VMCallerCheckedAnyfunc,
string_encoding: StringEncoding,
@@ -292,6 +294,7 @@ macro_rules! impl_into_component_func {
unsafe {
handle_result(|| call_host::<T, _, _, _>(
cx,
flags,
memory,
realloc,
string_encoding,
@@ -318,6 +321,7 @@ macro_rules! impl_into_component_func {
extern "C" fn entrypoint(
cx: *mut VMOpaqueContext,
data: *mut u8,
flags: *mut VMComponentFlags,
memory: *mut VMMemoryDefinition,
realloc: *mut VMCallerCheckedAnyfunc,
string_encoding: StringEncoding,
@@ -328,6 +332,7 @@ macro_rules! impl_into_component_func {
unsafe {
handle_result(|| call_host::<T, _, _, _>(
cx,
flags,
memory,
realloc,
string_encoding,

View File

@@ -308,6 +308,7 @@ where
export,
options,
instance,
component_instance,
..
} = store.0[self.func.0];
@@ -329,7 +330,7 @@ where
assert!(mem::align_of_val(map_maybe_uninit!(space.ret)) == val_align);
let instance = store.0[instance.0].as_ref().unwrap().instance();
let flags = instance.flags();
let flags = instance.flags(component_instance);
unsafe {
if !(*flags).may_enter() {
@@ -448,9 +449,10 @@ where
let data = &mut store.0[self.func.0];
let instance = data.instance;
let post_return = data.post_return;
let component_instance = data.component_instance;
let post_return_arg = data.post_return_arg.take();
let instance = store.0[instance.0].as_ref().unwrap().instance();
let flags = instance.flags();
let flags = instance.flags(component_instance);
unsafe {
// First assert that the instance is in a "needs post return" state.

View File

@@ -143,7 +143,7 @@ impl InstanceData {
}
}
fn lookup_def(&self, store: &mut StoreOpaque, def: &CoreDef) -> wasmtime_runtime::Export {
pub fn lookup_def(&self, store: &mut StoreOpaque, def: &CoreDef) -> wasmtime_runtime::Export {
match def {
CoreDef::Lowered(idx) => {
wasmtime_runtime::Export::Function(wasmtime_runtime::ExportFunction {

View File

@@ -367,18 +367,20 @@ impl Module {
mut translation: ModuleTranslation<'_>,
types: &ModuleTypes,
) -> Result<(MmapVec, Option<CompiledModuleInfo>)> {
// Compile all functions in parallel using rayon. This will also perform
// validation of function bodies.
let tunables = &engine.config().tunables;
let functions = mem::take(&mut translation.function_body_inputs);
let functions = functions.into_iter().collect::<Vec<_>>();
let funcs = engine
.run_maybe_parallel(functions, |(index, func)| {
let offset = func.body.range().start;
engine
.compiler()
.compile_function(&translation, index, func, tunables, types)
.with_context(|| {
let compiler = engine.compiler();
let (funcs, trampolines) = engine.join_maybe_parallel(
// In one (possibly) parallel task all wasm functions are compiled
// in parallel. Note that this is also where the actual validation
// of all function bodies happens as well.
|| -> Result<_> {
let funcs = engine.run_maybe_parallel(functions, |(index, func)| {
let offset = func.body.range().start;
let result =
compiler.compile_function(&translation, index, func, tunables, types);
result.with_context(|| {
let index = translation.module.func_index(index);
let name = match translation.debuginfo.name_section.func_names.get(&index) {
Some(name) => format!(" (`{}`)", name),
@@ -389,16 +391,28 @@ impl Module {
"failed to compile wasm function {index}{name} at offset {offset:#x}"
)
})
})?
.into_iter()
.collect();
})?;
Ok(funcs.into_iter().collect())
},
// In another (possibly) parallel task all trampolines necessary
// for untyped host-to-wasm entry are compiled. Note that this
// isn't really expected to take all that long, it's moreso "well
// if we're using rayon why not use it here too".
|| -> Result<_> {
engine.run_maybe_parallel(translation.exported_signatures.clone(), |sig| {
let ty = &types[sig];
Ok(compiler.compile_host_to_wasm_trampoline(ty)?)
})
},
);
// Collect all the function results into a final ELF object.
let mut obj = engine.compiler().object()?;
let (funcs, trampolines) =
engine
.compiler()
.emit_obj(&translation, types, funcs, tunables, &mut obj)?;
.emit_obj(&translation, funcs?, trampolines?, tunables, &mut obj)?;
// If configured attempt to use static memory initialization which
// can either at runtime be implemented as a single memcpy to