externref: implement stack map-based garbage collection

For host VM code, we use plain reference counting, where cloning increments
the reference count, and dropping decrements it. We can avoid many of the
on-stack increment/decrement operations that typically plague the
performance of reference counting via Rust's ownership and borrowing system.
Moving a `VMExternRef` avoids mutating its reference count, and borrowing it
either avoids the reference count increment or delays it until if/when the
`VMExternRef` is cloned.

When passing a `VMExternRef` into compiled Wasm code, we don't want to do
reference count mutations for every compiled `local.{get,set}`, nor for
every function call. Therefore, we use a variation of **deferred reference
counting**, where we only mutate reference counts when storing
`VMExternRef`s somewhere that outlives the activation: into a global or
table. Simultaneously, we over-approximate the set of `VMExternRef`s that
are inside Wasm function activations. Periodically, we walk the stack at GC
safe points, and use stack map information to precisely identify the set of
`VMExternRef`s inside Wasm activations. Then we take the difference between
this precise set and our over-approximation, and decrement the reference
count for each of the `VMExternRef`s that are in our over-approximation but
not in the precise set. Finally, the over-approximation is replaced with the
precise set.

The `VMExternRefActivationsTable` implements the over-approximized set of
`VMExternRef`s referenced by Wasm activations. Calling a Wasm function and
passing it a `VMExternRef` moves the `VMExternRef` into the table, and the
compiled Wasm function logically "borrows" the `VMExternRef` from the
table. Similarly, `global.get` and `table.get` operations clone the gotten
`VMExternRef` into the `VMExternRefActivationsTable` and then "borrow" the
reference out of the table.

When a `VMExternRef` is returned to host code from a Wasm function, the host
increments the reference count (because the reference is logically
"borrowed" from the `VMExternRefActivationsTable` and the reference count
from the table will be dropped at the next GC).

For more general information on deferred reference counting, see *An
Examination of Deferred Reference Counting and Cycle Detection* by Quinane:
https://openresearch-repository.anu.edu.au/bitstream/1885/42030/2/hon-thesis.pdf

cc #929

Fixes #1804
This commit is contained in:
Nick Fitzgerald
2020-06-03 09:21:34 -07:00
parent 357fb11f46
commit f30ce1fe97
32 changed files with 1415 additions and 235 deletions

View File

@@ -1,5 +1,5 @@
use crate::address_map::{ModuleAddressMap, ValueLabelsRanges};
use crate::compilation::{Compilation, Relocations, Traps};
use crate::compilation::{Compilation, Relocations, StackMaps, Traps};
use cranelift_codegen::ir;
use cranelift_entity::PrimaryMap;
use cranelift_wasm::DefinedFuncIndex;
@@ -35,6 +35,7 @@ pub struct ModuleCacheData {
value_ranges: ValueLabelsRanges,
stack_slots: PrimaryMap<DefinedFuncIndex, ir::StackSlots>,
traps: Traps,
stack_maps: StackMaps,
}
/// A type alias over the module cache data as a tuple.
@@ -45,6 +46,7 @@ pub type ModuleCacheDataTupleType = (
ValueLabelsRanges,
PrimaryMap<DefinedFuncIndex, ir::StackSlots>,
Traps,
StackMaps,
);
struct Sha256Hasher(Sha256);
@@ -204,6 +206,7 @@ impl ModuleCacheData {
value_ranges: data.3,
stack_slots: data.4,
traps: data.5,
stack_maps: data.6,
}
}
@@ -215,6 +218,7 @@ impl ModuleCacheData {
self.value_ranges,
self.stack_slots,
self.traps,
self.stack_maps,
)
}
}

View File

@@ -144,6 +144,22 @@ pub struct TrapInformation {
/// Information about traps associated with the functions where the traps are placed.
pub type Traps = PrimaryMap<DefinedFuncIndex, Vec<TrapInformation>>;
/// The offset within a function of a GC safepoint, and its associated stack
/// map.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct StackMapInformation {
/// The offset of the GC safepoint within the function's native code. It is
/// relative to the beginning of the function.
pub code_offset: binemit::CodeOffset,
/// The stack map for identifying live GC refs at the GC safepoint.
pub stack_map: binemit::Stackmap,
}
/// Information about GC safepoints and their associated stack maps within each
/// function.
pub type StackMaps = PrimaryMap<DefinedFuncIndex, Vec<StackMapInformation>>;
/// An error while compiling WebAssembly to machine code.
#[derive(Error, Debug)]
pub enum CompileError {

View File

@@ -88,7 +88,8 @@
use crate::address_map::{FunctionAddressMap, InstructionAddressMap};
use crate::cache::{ModuleCacheDataTupleType, ModuleCacheEntry};
use crate::compilation::{
Compilation, CompileError, CompiledFunction, Relocation, RelocationTarget, TrapInformation,
Compilation, CompileError, CompiledFunction, Relocation, RelocationTarget, StackMapInformation,
TrapInformation,
};
use crate::func_environ::{get_func_name, FuncEnvironment};
use crate::{CacheConfig, FunctionBodyData, ModuleLocal, ModuleTranslation, Tunables};
@@ -204,6 +205,27 @@ impl binemit::TrapSink for TrapSink {
}
}
#[derive(Default)]
struct StackMapSink {
infos: Vec<StackMapInformation>,
}
impl binemit::StackmapSink for StackMapSink {
fn add_stackmap(&mut self, code_offset: binemit::CodeOffset, stack_map: binemit::Stackmap) {
self.infos.push(StackMapInformation {
code_offset,
stack_map,
});
}
}
impl StackMapSink {
fn finish(mut self) -> Vec<StackMapInformation> {
self.infos.sort_by_key(|info| info.code_offset);
self.infos
}
}
fn get_function_address_map<'data>(
context: &Context,
data: &FunctionBodyData<'data>,
@@ -294,6 +316,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
let mut value_ranges = PrimaryMap::with_capacity(env.function_body_inputs.len());
let mut stack_slots = PrimaryMap::with_capacity(env.function_body_inputs.len());
let mut traps = PrimaryMap::with_capacity(env.function_body_inputs.len());
let mut stack_maps = PrimaryMap::with_capacity(env.function_body_inputs.len());
env.function_body_inputs
.into_iter()
@@ -354,14 +377,14 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
let mut code_buf: Vec<u8> = Vec::new();
let mut reloc_sink = RelocSink::new(func_index);
let mut trap_sink = TrapSink::new();
let mut stackmap_sink = binemit::NullStackmapSink {};
let mut stack_map_sink = StackMapSink::default();
context
.compile_and_emit(
isa,
&mut code_buf,
&mut reloc_sink,
&mut trap_sink,
&mut stackmap_sink,
&mut stack_map_sink,
)
.map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
@@ -391,6 +414,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
context.func.stack_slots,
trap_sink.traps,
unwind_info,
stack_map_sink.finish(),
))
})
.collect::<Result<Vec<_>, CompileError>>()?
@@ -405,6 +429,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
sss,
function_traps,
unwind_info,
stack_map,
)| {
functions.push(CompiledFunction {
body: function,
@@ -416,6 +441,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
value_ranges.push(ranges.unwrap_or_default());
stack_slots.push(sss);
traps.push(function_traps);
stack_maps.push(stack_map);
},
);
@@ -428,6 +454,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
value_ranges,
stack_slots,
traps,
stack_maps,
))
}

View File

@@ -1,6 +1,7 @@
#![doc(hidden)]
pub mod ir {
pub use cranelift_codegen::binemit::Stackmap;
pub use cranelift_codegen::ir::{
types, AbiParam, ArgumentPurpose, Signature, SourceLoc, StackSlots, TrapCode, Type,
ValueLabel, ValueLoc,

View File

@@ -658,13 +658,6 @@ impl<'module_environment> TargetEnvironment for FuncEnvironment<'module_environm
fn target_config(&self) -> TargetFrontendConfig {
self.target_config
}
fn reference_type(&self) -> ir::Type {
// For now, the only reference types we support are `externref`, which
// don't require tracing GC and stack maps. So we just use the target's
// pointer type. This will have to change once we move to tracing GC.
self.pointer_type()
}
}
impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'module_environment> {

View File

@@ -47,7 +47,7 @@ pub use crate::cache::create_new_config as cache_create_new_config;
pub use crate::cache::CacheConfig;
pub use crate::compilation::{
Compilation, CompileError, CompiledFunction, Compiler, Relocation, RelocationTarget,
Relocations, TrapInformation, Traps,
Relocations, StackMapInformation, StackMaps, TrapInformation, Traps,
};
pub use crate::cranelift::Cranelift;
pub use crate::data_structures::*;

View File

@@ -34,6 +34,7 @@ impl crate::compilation::Compiler for Lightbeam {
);
let mut relocations = PrimaryMap::with_capacity(translation.function_body_inputs.len());
let mut traps = PrimaryMap::with_capacity(translation.function_body_inputs.len());
let stack_maps = PrimaryMap::with_capacity(translation.function_body_inputs.len());
let mut codegen_session: CodeGenSession<_> = CodeGenSession::new(
translation.function_body_inputs.len() as u32,
@@ -81,6 +82,7 @@ impl crate::compilation::Compiler for Lightbeam {
ValueLabelsRanges::new(),
PrimaryMap::new(),
traps,
stack_maps,
))
}
}

View File

@@ -92,13 +92,6 @@ impl<'data> TargetEnvironment for ModuleEnvironment<'data> {
fn target_config(&self) -> TargetFrontendConfig {
self.result.target_config
}
fn reference_type(&self) -> ir::Type {
// For now, the only reference types we support are `externref`, which
// don't require tracing GC and stack maps. So we just use the target's
// pointer type. This will have to change once we move to tracing GC.
self.pointer_type()
}
}
/// This trait is useful for `translate_module` because it tells how to translate

View File

@@ -5,6 +5,8 @@
//
// struct VMContext {
// interrupts: *const VMInterrupts,
// externref_activations_table: *mut VMExternRefActivationsTable,
// stack_map_registry: *mut StackMapRegistry,
// signature_ids: [VMSharedSignatureIndex; module.num_signature_ids],
// imported_functions: [VMFunctionImport; module.num_imported_functions],
// imported_tables: [VMTableImport; module.num_imported_tables],
@@ -286,9 +288,23 @@ impl VMOffsets {
0
}
/// The offset of the `VMExternRefActivationsTable` member.
pub fn vmctx_externref_activations_table(&self) -> u32 {
self.vmctx_interrupts()
.checked_add(u32::from(self.pointer_size))
.unwrap()
}
/// The offset of the `*mut StackMapRegistry` member.
pub fn vmctx_stack_map_registry(&self) -> u32 {
self.vmctx_externref_activations_table()
.checked_add(u32::from(self.pointer_size))
.unwrap()
}
/// The offset of the `signature_ids` array.
pub fn vmctx_signature_ids_begin(&self) -> u32 {
self.vmctx_interrupts()
self.vmctx_stack_map_registry()
.checked_add(u32::from(self.pointer_size))
.unwrap()
}
@@ -591,6 +607,19 @@ impl VMOffsets {
}
}
/// Offsets for `VMExternRefActivationsTable`.
impl VMOffsets {
/// Return the offset for `VMExternRefActivationsTable::next`.
pub fn vm_extern_ref_activation_table_next(&self) -> u32 {
0
}
/// Return the offset for `VMExternRefActivationsTable::end`.
pub fn vm_extern_ref_activation_table_end(&self) -> u32 {
self.pointer_size.into()
}
}
/// Target specific type for shared signature index.
#[derive(Debug, Copy, Clone)]
pub struct TargetSharedSignatureIndex(u32);