Files
wasmtime/crates/jit/src/compiler.rs
Nick Fitzgerald f30ce1fe97 externref: implement stack map-based garbage collection
For host VM code, we use plain reference counting, where cloning increments
the reference count, and dropping decrements it. We can avoid many of the
on-stack increment/decrement operations that typically plague the
performance of reference counting via Rust's ownership and borrowing system.
Moving a `VMExternRef` avoids mutating its reference count, and borrowing it
either avoids the reference count increment or delays it until if/when the
`VMExternRef` is cloned.

When passing a `VMExternRef` into compiled Wasm code, we don't want to do
reference count mutations for every compiled `local.{get,set}`, nor for
every function call. Therefore, we use a variation of **deferred reference
counting**, where we only mutate reference counts when storing
`VMExternRef`s somewhere that outlives the activation: into a global or
table. Simultaneously, we over-approximate the set of `VMExternRef`s that
are inside Wasm function activations. Periodically, we walk the stack at GC
safe points, and use stack map information to precisely identify the set of
`VMExternRef`s inside Wasm activations. Then we take the difference between
this precise set and our over-approximation, and decrement the reference
count for each of the `VMExternRef`s that are in our over-approximation but
not in the precise set. Finally, the over-approximation is replaced with the
precise set.

The `VMExternRefActivationsTable` implements the over-approximized set of
`VMExternRef`s referenced by Wasm activations. Calling a Wasm function and
passing it a `VMExternRef` moves the `VMExternRef` into the table, and the
compiled Wasm function logically "borrows" the `VMExternRef` from the
table. Similarly, `global.get` and `table.get` operations clone the gotten
`VMExternRef` into the `VMExternRefActivationsTable` and then "borrow" the
reference out of the table.

When a `VMExternRef` is returned to host code from a Wasm function, the host
increments the reference count (because the reference is logically
"borrowed" from the `VMExternRefActivationsTable` and the reference count
from the table will be dropped at the next GC).

For more general information on deferred reference counting, see *An
Examination of Deferred Reference Counting and Cycle Detection* by Quinane:
https://openresearch-repository.anu.edu.au/bitstream/1885/42030/2/hon-thesis.pdf

cc #929

Fixes #1804
2020-06-15 09:39:37 -07:00

466 lines
14 KiB
Rust

//! JIT compilation.
use crate::code_memory::CodeMemory;
use crate::instantiate::SetupError;
use cranelift_codegen::ir::ExternalName;
use cranelift_codegen::ir::InstBuilder;
use cranelift_codegen::print_errors::pretty_error;
use cranelift_codegen::Context;
use cranelift_codegen::{binemit, ir};
use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
use wasmtime_debug::{emit_dwarf, DebugInfoData, DwarfSection};
use wasmtime_environ::entity::{EntityRef, PrimaryMap};
use wasmtime_environ::isa::{TargetFrontendConfig, TargetIsa};
use wasmtime_environ::wasm::{DefinedFuncIndex, DefinedMemoryIndex, MemoryIndex, SignatureIndex};
use wasmtime_environ::{
CacheConfig, CompileError, CompiledFunction, Compiler as _C, Module, ModuleAddressMap,
ModuleMemoryOffset, ModuleTranslation, ModuleVmctxInfo, Relocation, RelocationTarget,
Relocations, StackMaps, Traps, Tunables, VMOffsets, ValueLabelsRanges,
};
use wasmtime_runtime::{InstantiationError, VMFunctionBody, VMTrampoline};
/// Select which kind of compilation to use.
#[derive(Copy, Clone, Debug)]
pub enum CompilationStrategy {
/// Let Wasmtime pick the strategy.
Auto,
/// Compile all functions with Cranelift.
Cranelift,
/// Compile all functions with Lightbeam.
#[cfg(feature = "lightbeam")]
Lightbeam,
}
/// A WebAssembly code JIT compiler.
///
/// A `Compiler` instance owns the executable memory that it allocates.
///
/// TODO: Evolve this to support streaming rather than requiring a `&[u8]`
/// containing a whole wasm module at once.
///
/// TODO: Consider using cranelift-module.
pub struct Compiler {
isa: Box<dyn TargetIsa>,
strategy: CompilationStrategy,
cache_config: CacheConfig,
tunables: Tunables,
}
impl Compiler {
/// Construct a new `Compiler`.
pub fn new(
isa: Box<dyn TargetIsa>,
strategy: CompilationStrategy,
cache_config: CacheConfig,
tunables: Tunables,
) -> Self {
Self {
isa,
strategy,
cache_config,
tunables,
}
}
}
fn _assert_compiler_send_sync() {
fn _assert<T: Send + Sync>() {}
_assert::<Compiler>();
}
fn transform_dwarf_data(
isa: &dyn TargetIsa,
module: &Module,
debug_data: &DebugInfoData,
address_transform: &ModuleAddressMap,
value_ranges: &ValueLabelsRanges,
stack_slots: PrimaryMap<DefinedFuncIndex, ir::StackSlots>,
compilation: &wasmtime_environ::Compilation,
) -> Result<Vec<DwarfSection>, SetupError> {
let target_config = isa.frontend_config();
let ofs = VMOffsets::new(target_config.pointer_bytes(), &module.local);
let module_vmctx_info = {
ModuleVmctxInfo {
memory_offset: if ofs.num_imported_memories > 0 {
ModuleMemoryOffset::Imported(ofs.vmctx_vmmemory_import(MemoryIndex::new(0)))
} else if ofs.num_defined_memories > 0 {
ModuleMemoryOffset::Defined(
ofs.vmctx_vmmemory_definition_base(DefinedMemoryIndex::new(0)),
)
} else {
ModuleMemoryOffset::None
},
stack_slots,
}
};
emit_dwarf(
isa,
debug_data,
&address_transform,
&module_vmctx_info,
&value_ranges,
&compilation,
)
.map_err(SetupError::DebugInfo)
}
fn get_code_range(
compilation: &wasmtime_environ::Compilation,
finished_functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
) -> (*const u8, usize) {
if finished_functions.is_empty() {
return (::std::ptr::null(), 0);
}
// Assuming all functions in the same code block, looking min/max of its range.
let (start, end) = finished_functions.iter().fold::<(usize, usize), _>(
(!0, 0),
|(start, end), (i, body_ptr)| {
let body_ptr = (*body_ptr) as *const u8 as usize;
let body_len = compilation.get(i).body.len();
(
::std::cmp::min(start, body_ptr),
::std::cmp::max(end, body_ptr + body_len),
)
},
);
(start as *const u8, end - start)
}
#[allow(missing_docs)]
pub struct Compilation {
pub code_memory: CodeMemory,
pub finished_functions: PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
pub code_range: (*const u8, usize),
pub trampolines: PrimaryMap<SignatureIndex, VMTrampoline>,
pub jt_offsets: PrimaryMap<DefinedFuncIndex, ir::JumpTableOffsets>,
pub dwarf_sections: Vec<DwarfSection>,
pub traps: Traps,
pub stack_maps: StackMaps,
pub address_transform: ModuleAddressMap,
}
impl Compiler {
/// Return the isa.
pub fn isa(&self) -> &dyn TargetIsa {
self.isa.as_ref()
}
/// Return the target's frontend configuration settings.
pub fn frontend_config(&self) -> TargetFrontendConfig {
self.isa.frontend_config()
}
/// Return the tunables in use by this engine.
pub fn tunables(&self) -> &Tunables {
&self.tunables
}
/// Compile the given function bodies.
pub(crate) fn compile<'data>(
&self,
translation: &ModuleTranslation,
debug_data: Option<DebugInfoData>,
) -> Result<Compilation, SetupError> {
let mut code_memory = CodeMemory::new();
let (
compilation,
relocations,
address_transform,
value_ranges,
stack_slots,
traps,
stack_maps,
) = match self.strategy {
// For now, interpret `Auto` as `Cranelift` since that's the most stable
// implementation.
CompilationStrategy::Auto | CompilationStrategy::Cranelift => {
wasmtime_environ::cranelift::Cranelift::compile_module(
translation,
&*self.isa,
&self.cache_config,
)
}
#[cfg(feature = "lightbeam")]
CompilationStrategy::Lightbeam => {
wasmtime_environ::lightbeam::Lightbeam::compile_module(
translation,
&*self.isa,
&self.cache_config,
)
}
}
.map_err(SetupError::Compile)?;
let dwarf_sections = if debug_data.is_some() && !compilation.is_empty() {
transform_dwarf_data(
&*self.isa,
&translation.module,
debug_data.as_ref().unwrap(),
&address_transform,
&value_ranges,
stack_slots,
&compilation,
)?
} else {
vec![]
};
// Allocate all of the compiled functions into executable memory,
// copying over their contents.
let finished_functions = allocate_functions(&mut code_memory, &compilation, &relocations)
.map_err(|message| {
SetupError::Instantiate(InstantiationError::Resource(format!(
"failed to allocate memory for functions: {}",
message
)))
})?;
// Eagerly generate a entry trampoline for every type signature in the
// module. This should be "relatively lightweight" for most modules and
// guarantees that all functions (including indirect ones through
// tables) have a trampoline when invoked through the wasmtime API.
let mut cx = FunctionBuilderContext::new();
let mut trampolines = PrimaryMap::new();
for (_, (_, native_sig)) in translation.module.local.signatures.iter() {
let trampoline = make_trampoline(
&*self.isa,
&mut code_memory,
&mut cx,
native_sig,
std::mem::size_of::<u128>(),
)?;
trampolines.push(trampoline);
}
let jt_offsets = compilation.get_jt_offsets();
let code_range = get_code_range(&compilation, &finished_functions);
Ok(Compilation {
code_memory,
finished_functions,
code_range,
trampolines,
jt_offsets,
dwarf_sections,
traps,
stack_maps,
address_transform,
})
}
}
/// Create a trampoline for invoking a function.
pub fn make_trampoline(
isa: &dyn TargetIsa,
code_memory: &mut CodeMemory,
fn_builder_ctx: &mut FunctionBuilderContext,
signature: &ir::Signature,
value_size: usize,
) -> Result<VMTrampoline, SetupError> {
let pointer_type = isa.pointer_type();
let mut wrapper_sig = ir::Signature::new(isa.frontend_config().default_call_conv);
// Add the callee `vmctx` parameter.
wrapper_sig.params.push(ir::AbiParam::special(
pointer_type,
ir::ArgumentPurpose::VMContext,
));
// Add the caller `vmctx` parameter.
wrapper_sig.params.push(ir::AbiParam::new(pointer_type));
// Add the `callee_address` parameter.
wrapper_sig.params.push(ir::AbiParam::new(pointer_type));
// Add the `values_vec` parameter.
wrapper_sig.params.push(ir::AbiParam::new(pointer_type));
let mut context = Context::new();
context.func = ir::Function::with_name_signature(ir::ExternalName::user(0, 0), wrapper_sig);
{
let mut builder = FunctionBuilder::new(&mut context.func, fn_builder_ctx);
let block0 = builder.create_block();
builder.append_block_params_for_function_params(block0);
builder.switch_to_block(block0);
builder.seal_block(block0);
let (vmctx_ptr_val, caller_vmctx_ptr_val, callee_value, values_vec_ptr_val) = {
let params = builder.func.dfg.block_params(block0);
(params[0], params[1], params[2], params[3])
};
// Load the argument values out of `values_vec`.
let mflags = ir::MemFlags::trusted();
let callee_args = signature
.params
.iter()
.enumerate()
.map(|(i, r)| {
match i {
0 => vmctx_ptr_val,
1 => caller_vmctx_ptr_val,
_ =>
// i - 2 because vmctx and caller vmctx aren't passed through `values_vec`.
{
builder.ins().load(
r.value_type,
mflags,
values_vec_ptr_val,
((i - 2) * value_size) as i32,
)
}
}
})
.collect::<Vec<_>>();
let new_sig = builder.import_signature(signature.clone());
let call = builder
.ins()
.call_indirect(new_sig, callee_value, &callee_args);
let results = builder.func.dfg.inst_results(call).to_vec();
// Store the return values into `values_vec`.
let mflags = ir::MemFlags::trusted();
for (i, r) in results.iter().enumerate() {
builder
.ins()
.store(mflags, *r, values_vec_ptr_val, (i * value_size) as i32);
}
builder.ins().return_(&[]);
builder.finalize()
}
let mut code_buf = Vec::new();
let mut reloc_sink = RelocSink::default();
let mut trap_sink = binemit::NullTrapSink {};
let mut stackmap_sink = binemit::NullStackmapSink {};
context
.compile_and_emit(
isa,
&mut code_buf,
&mut reloc_sink,
&mut trap_sink,
&mut stackmap_sink,
)
.map_err(|error| {
SetupError::Compile(CompileError::Codegen(pretty_error(
&context.func,
Some(isa),
error,
)))
})?;
let unwind_info = context.create_unwind_info(isa).map_err(|error| {
SetupError::Compile(CompileError::Codegen(pretty_error(
&context.func,
Some(isa),
error,
)))
})?;
let ptr = code_memory
.allocate_for_function(
&CompiledFunction {
body: code_buf,
jt_offsets: context.func.jt_offsets,
unwind_info,
},
reloc_sink.relocs.iter(),
)
.map_err(|message| SetupError::Instantiate(InstantiationError::Resource(message)))?
.as_ptr();
Ok(unsafe { std::mem::transmute::<*const VMFunctionBody, VMTrampoline>(ptr) })
}
fn allocate_functions(
code_memory: &mut CodeMemory,
compilation: &wasmtime_environ::Compilation,
relocations: &Relocations,
) -> Result<PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>, String> {
if compilation.is_empty() {
return Ok(PrimaryMap::new());
}
let fat_ptrs = code_memory.allocate_for_compilation(compilation, relocations)?;
// Second, create a PrimaryMap from result vector of pointers.
let mut result = PrimaryMap::with_capacity(compilation.len());
for i in 0..fat_ptrs.len() {
let fat_ptr: *mut [VMFunctionBody] = fat_ptrs[i];
result.push(fat_ptr);
}
Ok(result)
}
/// We don't expect trampoline compilation to produce many relocations, so
/// this `RelocSink` just asserts that it doesn't recieve most of them, but
/// handles libcall ones.
#[derive(Default)]
pub struct RelocSink {
relocs: Vec<Relocation>,
}
impl RelocSink {
/// Returns collected relocations.
pub fn relocs(&self) -> &[Relocation] {
&self.relocs
}
}
impl binemit::RelocSink for RelocSink {
fn reloc_block(
&mut self,
_offset: binemit::CodeOffset,
_reloc: binemit::Reloc,
_block_offset: binemit::CodeOffset,
) {
panic!("trampoline compilation should not produce block relocs");
}
fn reloc_external(
&mut self,
offset: binemit::CodeOffset,
_srcloc: ir::SourceLoc,
reloc: binemit::Reloc,
name: &ir::ExternalName,
addend: binemit::Addend,
) {
let reloc_target = if let ExternalName::LibCall(libcall) = *name {
RelocationTarget::LibCall(libcall)
} else {
panic!("unrecognized external name")
};
self.relocs.push(Relocation {
reloc,
reloc_target,
offset,
addend,
});
}
fn reloc_constant(
&mut self,
_code_offset: binemit::CodeOffset,
_reloc: binemit::Reloc,
_constant_offset: ir::ConstantOffset,
) {
panic!("trampoline compilation should not produce constant relocs");
}
fn reloc_jt(
&mut self,
_offset: binemit::CodeOffset,
_reloc: binemit::Reloc,
_jt: ir::JumpTable,
) {
panic!("trampoline compilation should not produce jump table relocs");
}
}