wasmtime: Extract cranelift/lightbeam compilers to separate crates (#2117)

This commit extracts the two implementations of `Compiler` into two
separate crates, `wasmtime-cranelfit` and `wasmtime-lightbeam`. The
`wasmtime-jit` crate then depends on these two and instantiates them
appropriately. The goal here is to start reducing the weight of the
`wasmtime-environ` crate, which currently serves as a common set of
types between all `wasmtime-*` crates. Long-term I'd like to remove the
dependency on Cranelift from `wasmtime-environ`, but that's going to
take a lot more work.

In the meantime I figure it's a good way to get started by separating
out the lightbeam/cranelift function compilers from the
`wasmtime-environ` crate. We can continue to iterate on moving things
out in the future, too.
This commit is contained in:
Alex Crichton
2020-08-20 04:34:31 -05:00
committed by GitHub
parent b5e24c8c67
commit 693c6ea771
19 changed files with 564 additions and 369 deletions

View File

@@ -0,0 +1,123 @@
/// Helper macro to iterate over all builtin functions and their signatures.
#[macro_export]
macro_rules! foreach_builtin_function {
($mac:ident) => {
$mac! {
/// Returns an index for wasm's `memory.grow` builtin function.
memory32_grow(vmctx, i32, i32) -> (i32);
/// Returns an index for wasm's imported `memory.grow` builtin function.
imported_memory32_grow(vmctx, i32, i32) -> (i32);
/// Returns an index for wasm's `memory.size` builtin function.
memory32_size(vmctx, i32) -> (i32);
/// Returns an index for wasm's imported `memory.size` builtin function.
imported_memory32_size(vmctx, i32) -> (i32);
/// Returns an index for wasm's `table.copy` when both tables are locally
/// defined.
table_copy(vmctx, i32, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `table.init`.
table_init(vmctx, i32, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `elem.drop`.
elem_drop(vmctx, i32) -> ();
/// Returns an index for wasm's `memory.copy` for locally defined memories.
defined_memory_copy(vmctx, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `memory.copy` for imported memories.
imported_memory_copy(vmctx, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `memory.fill` for locally defined memories.
memory_fill(vmctx, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `memory.fill` for imported memories.
imported_memory_fill(vmctx, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `memory.init` instruction.
memory_init(vmctx, i32, i32, i32, i32, i32) -> ();
/// Returns an index for wasm's `data.drop` instruction.
data_drop(vmctx, i32) -> ();
/// Returns an index for Wasm's `table.grow` instruction for `funcref`s.
table_grow_funcref(vmctx, i32, i32, pointer) -> (i32);
/// Returns an index for Wasm's `table.grow` instruction for `externref`s.
table_grow_externref(vmctx, i32, i32, reference) -> (i32);
/// Returns an index for Wasm's `table.fill` instruction for `externref`s.
table_fill_externref(vmctx, i32, i32, reference, i32) -> ();
/// Returns an index for Wasm's `table.fill` instruction for `funcref`s.
table_fill_funcref(vmctx, i32, i32, pointer, i32) -> ();
/// Returns an index to drop a `VMExternRef`.
drop_externref(pointer) -> ();
/// Returns an index to do a GC and then insert a `VMExternRef` into the
/// `VMExternRefActivationsTable`.
activations_table_insert_with_gc(vmctx, reference) -> ();
/// Returns an index for Wasm's `global.get` instruction for `externref`s.
externref_global_get(vmctx, i32) -> (reference);
/// Returns an index for Wasm's `global.get` instruction for `externref`s.
externref_global_set(vmctx, i32, reference) -> ();
}
};
}
/// An index type for builtin functions.
#[derive(Copy, Clone, Debug)]
pub struct BuiltinFunctionIndex(u32);
impl BuiltinFunctionIndex {
/// Create a new `BuiltinFunctionIndex` from its index
pub const fn from_u32(i: u32) -> Self {
Self(i)
}
/// Return the index as an u32 number.
pub const fn index(&self) -> u32 {
self.0
}
}
macro_rules! declare_indexes {
(
$(
$( #[$attr:meta] )*
$name:ident( $( $param:ident ),* ) -> ( $( $result:ident ),* );
)*
) => {
impl BuiltinFunctionIndex {
declare_indexes!(
@indices;
0;
$( $( #[$attr] )* $name; )*
);
}
};
// Base case: no more indices to declare, so define the total number of
// function indices.
(
@indices;
$len:expr;
) => {
/// Returns the total number of builtin functions.
pub const fn builtin_functions_total_number() -> u32 {
$len
}
};
// Recursive case: declare the next index, and then keep declaring the rest of
// the indices.
(
@indices;
$index:expr;
$( #[$this_attr:meta] )*
$this_name:ident;
$(
$( #[$rest_attr:meta] )*
$rest_name:ident;
)*
) => {
$( #[$this_attr] )*
pub const fn $this_name() -> Self {
Self($index)
}
declare_indexes!(
@indices;
($index + 1);
$( $( #[$rest_attr] )* $rest_name; )*
);
}
}
foreach_builtin_function!(declare_indexes);

View File

@@ -1,404 +0,0 @@
//! Support for compiling with Cranelift.
// # How does Wasmtime prevent stack overflow?
//
// A few locations throughout the codebase link to this file to explain
// interrupts and stack overflow. To start off, let's take a look at stack
// overflow. Wasm code is well-defined to have stack overflow being recoverable
// and raising a trap, so we need to handle this somehow! There's also an added
// constraint where as an embedder you frequently are running host-provided
// code called from wasm. WebAssembly and native code currently share the same
// call stack, so you want to make sure that your host-provided code will have
// enough call-stack available to it.
//
// Given all that, the way that stack overflow is handled is by adding a
// prologue check to all JIT functions for how much native stack is remaining.
// The `VMContext` pointer is the first argument to all functions, and the first
// field of this structure is `*const VMInterrupts` and the first field of that
// is the stack limit. Note that the stack limit in this case means "if the
// stack pointer goes below this, trap". Each JIT function which consumes stack
// space or isn't a leaf function starts off by loading the stack limit,
// checking it against the stack pointer, and optionally traps.
//
// This manual check allows the embedder (us) to give wasm a relatively precise
// amount of stack allocation. Using this scheme we reserve a chunk of stack
// for wasm code relative from where wasm code was called. This ensures that
// native code called by wasm should have native stack space to run, and the
// numbers of stack spaces here should all be configurable for various
// embeddings.
//
// Note that we do not consider each thread's stack guard page here. It's
// considered that if you hit that you still abort the whole program. This
// shouldn't happen most of the time because wasm is always stack-bound and
// it's up to the embedder to bound its own native stack.
//
// So all-in-all, that's how we implement stack checks. Note that stack checks
// cannot be disabled because it's a feature of core wasm semantics. This means
// that all functions almost always have a stack check prologue, and it's up to
// us to optimize away that cost as much as we can.
//
// For more information about the tricky bits of managing the reserved stack
// size of wasm, see the implementation in `traphandlers.rs` in the
// `update_stack_limit` function.
//
// # How is Wasmtime interrupted?
//
// Ok so given all that background of stack checks, the next thing we want to
// build on top of this is the ability to *interrupt* executing wasm code. This
// is useful to ensure that wasm always executes within a particular time slice
// or otherwise doesn't consume all CPU resources on a system. There are two
// major ways that interrupts are required:
//
// * Loops - likely immediately apparent but it's easy to write an infinite
// loop in wasm, so we need the ability to interrupt loops.
// * Function entries - somewhat more subtle, but imagine a module where each
// function calls the next function twice. This creates 2^n calls pretty
// quickly, so a pretty small module can export a function with no loops
// that takes an extremely long time to call.
//
// In many cases if an interrupt comes in you want to interrupt host code as
// well, but we're explicitly not considering that here. We're hoping that
// interrupting host code is largely left to the embedder (e.g. figuring out
// how to interrupt blocking syscalls) and they can figure that out. The purpose
// of this feature is to basically only give the ability to interrupt
// currently-executing wasm code (or triggering an interrupt as soon as wasm
// reenters itself).
//
// To implement interruption of loops we insert code at the head of all loops
// which checks the stack limit counter. If the counter matches a magical
// sentinel value that's impossible to be the real stack limit, then we
// interrupt the loop and trap. To implement interrupts of functions, we
// actually do the same thing where the magical sentinel value we use here is
// automatically considered as considering all stack pointer values as "you ran
// over your stack". This means that with a write of a magical value to one
// location we can interrupt both loops and function bodies.
//
// The "magical value" here is `usize::max_value() - N`. We reserve
// `usize::max_value()` for "the stack limit isn't set yet" and so -N is
// then used for "you got interrupted". We do a bit of patching afterwards to
// translate a stack overflow into an interrupt trap if we see that an
// interrupt happened. Note that `N` here is a medium-size-ish nonzero value
// chosen in coordination with the cranelift backend. Currently it's 32k. The
// value of N is basically a threshold in the backend for "anything less than
// this requires only one branch in the prologue, any stack size bigger requires
// two branches". Naturally we want most functions to have one branch, but we
// also need to actually catch stack overflow, so for now 32k is chosen and it's
// assume no valid stack pointer will ever be `usize::max_value() - 32k`.
use crate::func_environ::{get_func_name, FuncEnvironment};
use crate::Compiler;
use crate::{
CompileError, CompiledFunction, Relocation, RelocationTarget, StackMapInformation,
TrapInformation,
};
use crate::{FunctionAddressMap, InstructionAddressMap};
use crate::{FunctionBodyData, ModuleTranslation};
use cranelift_codegen::ir::{self, ExternalName};
use cranelift_codegen::machinst::buffer::MachSrcLoc;
use cranelift_codegen::print_errors::pretty_error;
use cranelift_codegen::{binemit, isa, Context};
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator};
use std::convert::TryFrom;
use std::sync::Mutex;
/// Implementation of a relocation sink that just saves all the information for later
pub struct RelocSink {
/// Current function index.
func_index: FuncIndex,
/// Relocations recorded for the function.
pub func_relocs: Vec<Relocation>,
}
impl binemit::RelocSink for RelocSink {
fn reloc_block(
&mut self,
_offset: binemit::CodeOffset,
_reloc: binemit::Reloc,
_block_offset: binemit::CodeOffset,
) {
// This should use the `offsets` field of `ir::Function`.
panic!("block headers not yet implemented");
}
fn reloc_external(
&mut self,
offset: binemit::CodeOffset,
_srcloc: ir::SourceLoc,
reloc: binemit::Reloc,
name: &ExternalName,
addend: binemit::Addend,
) {
let reloc_target = if let ExternalName::User { namespace, index } = *name {
debug_assert_eq!(namespace, 0);
RelocationTarget::UserFunc(FuncIndex::from_u32(index))
} else if let ExternalName::LibCall(libcall) = *name {
RelocationTarget::LibCall(libcall)
} else {
panic!("unrecognized external name")
};
self.func_relocs.push(Relocation {
reloc,
reloc_target,
offset,
addend,
});
}
fn reloc_constant(
&mut self,
_code_offset: binemit::CodeOffset,
_reloc: binemit::Reloc,
_constant_offset: ir::ConstantOffset,
) {
// Do nothing for now: cranelift emits constant data after the function code and also emits
// function code with correct relative offsets to the constant data.
}
fn reloc_jt(&mut self, offset: binemit::CodeOffset, reloc: binemit::Reloc, jt: ir::JumpTable) {
self.func_relocs.push(Relocation {
reloc,
reloc_target: RelocationTarget::JumpTable(self.func_index, jt),
offset,
addend: 0,
});
}
}
impl RelocSink {
/// Return a new `RelocSink` instance.
pub fn new(func_index: FuncIndex) -> Self {
Self {
func_index,
func_relocs: Vec::new(),
}
}
}
/// Implementation of a trap sink that simply stores all trap info in-memory
#[derive(Default)]
pub struct TrapSink {
/// The in-memory vector of trap info
pub traps: Vec<TrapInformation>,
}
impl TrapSink {
/// Create a new `TrapSink`
pub fn new() -> Self {
Self::default()
}
}
impl binemit::TrapSink for TrapSink {
fn trap(
&mut self,
code_offset: binemit::CodeOffset,
source_loc: ir::SourceLoc,
trap_code: ir::TrapCode,
) {
self.traps.push(TrapInformation {
code_offset,
source_loc,
trap_code,
});
}
}
#[derive(Default)]
struct StackMapSink {
infos: Vec<StackMapInformation>,
}
impl binemit::StackMapSink for StackMapSink {
fn add_stack_map(&mut self, code_offset: binemit::CodeOffset, stack_map: binemit::StackMap) {
self.infos.push(StackMapInformation {
code_offset,
stack_map,
});
}
}
impl StackMapSink {
fn finish(mut self) -> Vec<StackMapInformation> {
self.infos.sort_unstable_by_key(|info| info.code_offset);
self.infos
}
}
fn get_function_address_map<'data>(
context: &Context,
data: &FunctionBodyData<'data>,
body_len: usize,
isa: &dyn isa::TargetIsa,
) -> FunctionAddressMap {
let mut instructions = Vec::new();
if let Some(ref mcr) = &context.mach_compile_result {
// New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping
// tuples.
for &MachSrcLoc { start, end, loc } in mcr.buffer.get_srclocs_sorted() {
instructions.push(InstructionAddressMap {
srcloc: loc,
code_offset: start as usize,
code_len: (end - start) as usize,
});
}
} else {
// Old-style backend: we need to traverse the instruction/encoding info in the function.
let func = &context.func;
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase
let encinfo = isa.encoding_info();
for block in blocks {
for (offset, inst, size) in func.inst_offsets(block, &encinfo) {
let srcloc = func.srclocs[inst];
instructions.push(InstructionAddressMap {
srcloc,
code_offset: offset as usize,
code_len: size as usize,
});
}
}
}
// Generate artificial srcloc for function start/end to identify boundary
// within module. Similar to FuncTranslator::cur_srcloc(): it will wrap around
// if byte code is larger than 4 GB.
let start_srcloc = ir::SourceLoc::new(data.module_offset as u32);
let end_srcloc = ir::SourceLoc::new((data.module_offset + data.data.len()) as u32);
FunctionAddressMap {
instructions,
start_srcloc,
end_srcloc,
body_offset: 0,
body_len,
}
}
/// A compiler that compiles a WebAssembly module with Cranelift, translating the Wasm to Cranelift IR,
/// optimizing it and then translating to assembly.
#[derive(Default)]
pub struct Cranelift {
translators: Mutex<Vec<FuncTranslator>>,
}
impl Cranelift {
fn take_translator(&self) -> FuncTranslator {
let candidate = self.translators.lock().unwrap().pop();
candidate.unwrap_or_else(FuncTranslator::new)
}
fn save_translator(&self, translator: FuncTranslator) {
self.translators.lock().unwrap().push(translator);
}
}
impl Compiler for Cranelift {
fn compile_function(
&self,
translation: &ModuleTranslation<'_>,
func_index: DefinedFuncIndex,
input: &FunctionBodyData<'_>,
isa: &dyn isa::TargetIsa,
) -> Result<CompiledFunction, CompileError> {
let module = &translation.module;
let tunables = &translation.tunables;
let func_index = module.func_index(func_index);
let mut context = Context::new();
context.func.name = get_func_name(func_index);
context.func.signature = module.native_func_signature(func_index).clone();
if tunables.debug_info {
context.func.collect_debug_info();
}
let mut func_env = FuncEnvironment::new(isa.frontend_config(), module, tunables);
// We use these as constant offsets below in
// `stack_limit_from_arguments`, so assert their values here. This
// allows the closure below to get coerced to a function pointer, as
// needed by `ir::Function`.
//
// Otherwise our stack limit is specially calculated from the vmctx
// argument, where we need to load the `*const VMInterrupts`
// pointer, and then from that pointer we need to load the stack
// limit itself. Note that manual register allocation is needed here
// too due to how late in the process this codegen happens.
//
// For more information about interrupts and stack checks, see the
// top of this file.
let vmctx = context
.func
.create_global_value(ir::GlobalValueData::VMContext);
let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load {
base: vmctx,
offset: i32::try_from(func_env.offsets.vmctx_interrupts())
.unwrap()
.into(),
global_type: isa.pointer_type(),
readonly: true,
});
let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load {
base: interrupts_ptr,
offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit())
.unwrap()
.into(),
global_type: isa.pointer_type(),
readonly: false,
});
context.func.stack_limit = Some(stack_limit);
let mut func_translator = self.take_translator();
let result = func_translator.translate(
translation.module_translation.as_ref().unwrap(),
input.data,
input.module_offset,
&mut context.func,
&mut func_env,
);
self.save_translator(func_translator);
result?;
let mut code_buf: Vec<u8> = Vec::new();
let mut reloc_sink = RelocSink::new(func_index);
let mut trap_sink = TrapSink::new();
let mut stack_map_sink = StackMapSink::default();
context
.compile_and_emit(
isa,
&mut code_buf,
&mut reloc_sink,
&mut trap_sink,
&mut stack_map_sink,
)
.map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?;
let unwind_info = context.create_unwind_info(isa).map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?;
let address_transform = get_function_address_map(&context, input, code_buf.len(), isa);
let ranges = if tunables.debug_info {
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?;
Some(ranges)
} else {
None
};
Ok(CompiledFunction {
body: code_buf,
jt_offsets: context.func.jt_offsets,
relocations: reloc_sink.func_relocs,
address_map: address_transform,
value_labels_ranges: ranges.unwrap_or(Default::default()),
stack_slots: context.func.stack_slots,
traps: trap_sink.traps,
unwind_info,
stack_maps: stack_map_sink.finish(),
})
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -25,25 +25,19 @@
)]
mod address_map;
mod builtin;
mod compilation;
mod data_structures;
mod func_environ;
mod module;
mod module_environ;
mod tunables;
mod vmoffsets;
pub mod cranelift;
#[cfg(feature = "lightbeam")]
pub mod lightbeam;
pub use crate::address_map::*;
pub use crate::builtin::*;
pub use crate::compilation::*;
pub use crate::cranelift::Cranelift;
pub use crate::data_structures::*;
pub use crate::func_environ::BuiltinFunctionIndex;
#[cfg(feature = "lightbeam")]
pub use crate::lightbeam::Lightbeam;
// pub use crate::func_environ::BuiltinFunctionIndex;
pub use crate::module::{
EntityIndex, MemoryPlan, MemoryStyle, Module, TableElements, TablePlan, TableStyle,
};
@@ -60,10 +54,8 @@ pub const WASM_MAX_PAGES: u32 = 0x10000;
/// Version number of this crate.
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
pub(crate) fn reference_type(
wasm_ty: cranelift_wasm::WasmType,
pointer_type: ir::Type,
) -> ir::Type {
/// Returns the reference type to use for the provided wasm type.
pub fn reference_type(wasm_ty: cranelift_wasm::WasmType, pointer_type: ir::Type) -> ir::Type {
match wasm_ty {
cranelift_wasm::WasmType::FuncRef => pointer_type,
cranelift_wasm::WasmType::ExternRef => match pointer_type {

View File

@@ -1,71 +0,0 @@
//! Support for compiling with Lightbeam.
use crate::compilation::{CompileError, CompiledFunction, Compiler};
use crate::cranelift::{RelocSink, TrapSink};
use crate::func_environ::FuncEnvironment;
use crate::{FunctionBodyData, ModuleTranslation};
use cranelift_codegen::isa;
use cranelift_wasm::DefinedFuncIndex;
use lightbeam::{CodeGenSession, NullOffsetSink, Sinks};
/// A compiler that compiles a WebAssembly module with Lightbeam, directly translating the Wasm file.
pub struct Lightbeam;
impl Compiler for Lightbeam {
fn compile_function(
&self,
translation: &ModuleTranslation,
i: DefinedFuncIndex,
function_body: &FunctionBodyData<'_>,
isa: &dyn isa::TargetIsa,
) -> Result<CompiledFunction, CompileError> {
if translation.tunables.debug_info {
return Err(CompileError::DebugInfoNotSupported);
}
let func_index = translation.module.func_index(i);
let env = FuncEnvironment::new(
isa.frontend_config(),
&translation.module,
&translation.tunables,
);
let mut codegen_session: CodeGenSession<_> = CodeGenSession::new(
translation.function_body_inputs.len() as u32,
&env,
lightbeam::microwasm::I32,
);
let mut reloc_sink = RelocSink::new(func_index);
let mut trap_sink = TrapSink::new();
lightbeam::translate_function(
&mut codegen_session,
Sinks {
relocs: &mut reloc_sink,
traps: &mut trap_sink,
offsets: &mut NullOffsetSink,
},
i.as_u32(),
wasmparser::FunctionBody::new(0, function_body.data),
)
.map_err(|e| CompileError::Codegen(format!("Failed to translate function: {}", e)))?;
let code_section = codegen_session
.into_translated_code_section()
.map_err(|e| CompileError::Codegen(format!("Failed to generate output code: {}", e)))?;
Ok(CompiledFunction {
// TODO: try to remove copy here (?)
body: code_section.buffer().to_vec(),
traps: trap_sink.traps,
relocations: reloc_sink.func_relocs,
// not implemented for lightbeam currently
unwind_info: None,
stack_maps: Default::default(),
stack_slots: Default::default(),
value_labels_ranges: Default::default(),
address_map: Default::default(),
jt_offsets: Default::default(),
})
}
}