wasmtime: add build-time option for parallel compilation (#1903)

When running in embedded environments, threads creation is sometimes
undesirable. This adds a feature to toggle wasmtime's internal thread
creation for parallel compilation.
This commit is contained in:
Daiki Ueno
2020-07-06 18:22:05 +02:00
committed by GitHub
parent 80ff22fd18
commit 2ce2dd0203
6 changed files with 145 additions and 119 deletions

1
Cargo.lock generated
View File

@@ -2417,6 +2417,7 @@ dependencies = [
"anyhow", "anyhow",
"base64 0.12.1", "base64 0.12.1",
"bincode", "bincode",
"cfg-if",
"cranelift-codegen", "cranelift-codegen",
"cranelift-entity", "cranelift-entity",
"cranelift-frontend", "cranelift-frontend",

View File

@@ -72,7 +72,7 @@ members = [
] ]
[features] [features]
default = ["jitdump", "wasmtime/wat"] default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation"]
lightbeam = [ lightbeam = [
"wasmtime-environ/lightbeam", "wasmtime-environ/lightbeam",
"wasmtime-jit/lightbeam", "wasmtime-jit/lightbeam",

View File

@@ -20,7 +20,7 @@ cranelift-wasm = { path = "../../cranelift/wasm", version = "0.65.0", features =
wasmparser = "0.58.0" wasmparser = "0.58.0"
lightbeam = { path = "../lightbeam", optional = true, version = "0.18.0" } lightbeam = { path = "../lightbeam", optional = true, version = "0.18.0" }
indexmap = "1.0.2" indexmap = "1.0.2"
rayon = "1.2.1" rayon = { version = "1.2.1", optional = true }
thiserror = "1.0.4" thiserror = "1.0.4"
directories = "2.0.1" directories = "2.0.1"
sha2 = "0.8.0" sha2 = "0.8.0"
@@ -32,6 +32,7 @@ zstd = "0.5"
toml = "0.5.5" toml = "0.5.5"
file-per-thread-logger = "0.1.1" file-per-thread-logger = "0.1.1"
more-asserts = "0.2.1" more-asserts = "0.2.1"
cfg-if = "0.1.9"
[target.'cfg(target_os = "windows")'.dependencies] [target.'cfg(target_os = "windows")'.dependencies]
winapi = "0.3.7" winapi = "0.3.7"
@@ -46,5 +47,8 @@ pretty_env_logger = "0.4.0"
filetime = "0.2.7" filetime = "0.2.7"
lazy_static = "1.3.0" lazy_static = "1.3.0"
[features]
parallel-compilation = ["rayon"]
[badges] [badges]
maintenance = { status = "actively-developed" } maintenance = { status = "actively-developed" }

View File

@@ -99,6 +99,7 @@ use cranelift_codegen::print_errors::pretty_error;
use cranelift_codegen::{binemit, isa, Context}; use cranelift_codegen::{binemit, isa, Context};
use cranelift_entity::PrimaryMap; use cranelift_entity::PrimaryMap;
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator, ModuleTranslationState}; use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator, ModuleTranslationState};
#[cfg(feature = "parallel-compilation")]
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
@@ -318,132 +319,149 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
let mut traps = PrimaryMap::with_capacity(env.function_body_inputs.len()); let mut traps = PrimaryMap::with_capacity(env.function_body_inputs.len());
let mut stack_maps = PrimaryMap::with_capacity(env.function_body_inputs.len()); let mut stack_maps = PrimaryMap::with_capacity(env.function_body_inputs.len());
env.function_body_inputs type FunctionBodyInput<'a> = (DefinedFuncIndex, &'a FunctionBodyData<'a>);
.into_iter()
.collect::<Vec<(DefinedFuncIndex, &FunctionBodyData<'_>)>>()
.par_iter()
.map_init(FuncTranslator::new, |func_translator, (i, input)| {
let func_index = env.local.func_index(*i);
let mut context = Context::new();
context.func.name = get_func_name(func_index);
context.func.signature = env.local.native_func_signature(func_index).clone();
if env.tunables.debug_info {
context.func.collect_debug_info();
}
let mut func_env = FuncEnvironment::new(isa.frontend_config(), env.local, env.tunables); let compile_function = |func_translator: &mut FuncTranslator,
(i, input): &FunctionBodyInput| {
let func_index = env.local.func_index(*i);
let mut context = Context::new();
context.func.name = get_func_name(func_index);
context.func.signature = env.local.native_func_signature(func_index).clone();
if env.tunables.debug_info {
context.func.collect_debug_info();
}
// We use these as constant offsets below in let mut func_env = FuncEnvironment::new(isa.frontend_config(), env.local, env.tunables);
// `stack_limit_from_arguments`, so assert their values here. This
// allows the closure below to get coerced to a function pointer, as
// needed by `ir::Function`.
//
// Otherwise our stack limit is specially calculated from the vmctx
// argument, where we need to load the `*const VMInterrupts`
// pointer, and then from that pointer we need to load the stack
// limit itself. Note that manual register allocation is needed here
// too due to how late in the process this codegen happens.
//
// For more information about interrupts and stack checks, see the
// top of this file.
let vmctx = context
.func
.create_global_value(ir::GlobalValueData::VMContext);
let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load {
base: vmctx,
offset: i32::try_from(func_env.offsets.vmctx_interrupts())
.unwrap()
.into(),
global_type: isa.pointer_type(),
readonly: true,
});
let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load {
base: interrupts_ptr,
offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit())
.unwrap()
.into(),
global_type: isa.pointer_type(),
readonly: false,
});
context.func.stack_limit = Some(stack_limit);
func_translator.translate(
env.module_translation.0,
input.data,
input.module_offset,
&mut context.func,
&mut func_env,
)?;
let mut code_buf: Vec<u8> = Vec::new(); // We use these as constant offsets below in
let mut reloc_sink = RelocSink::new(func_index); // `stack_limit_from_arguments`, so assert their values here. This
let mut trap_sink = TrapSink::new(); // allows the closure below to get coerced to a function pointer, as
let mut stack_map_sink = StackMapSink::default(); // needed by `ir::Function`.
context //
.compile_and_emit( // Otherwise our stack limit is specially calculated from the vmctx
isa, // argument, where we need to load the `*const VMInterrupts`
&mut code_buf, // pointer, and then from that pointer we need to load the stack
&mut reloc_sink, // limit itself. Note that manual register allocation is needed here
&mut trap_sink, // too due to how late in the process this codegen happens.
&mut stack_map_sink, //
) // For more information about interrupts and stack checks, see the
.map_err(|error| { // top of this file.
CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) let vmctx = context
})?; .func
.create_global_value(ir::GlobalValueData::VMContext);
let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load {
base: vmctx,
offset: i32::try_from(func_env.offsets.vmctx_interrupts())
.unwrap()
.into(),
global_type: isa.pointer_type(),
readonly: true,
});
let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load {
base: interrupts_ptr,
offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit())
.unwrap()
.into(),
global_type: isa.pointer_type(),
readonly: false,
});
context.func.stack_limit = Some(stack_limit);
func_translator.translate(
env.module_translation.0,
input.data,
input.module_offset,
&mut context.func,
&mut func_env,
)?;
let unwind_info = context.create_unwind_info(isa).map_err(|error| { let mut code_buf: Vec<u8> = Vec::new();
let mut reloc_sink = RelocSink::new(func_index);
let mut trap_sink = TrapSink::new();
let mut stack_map_sink = StackMapSink::default();
context
.compile_and_emit(
isa,
&mut code_buf,
&mut reloc_sink,
&mut trap_sink,
&mut stack_map_sink,
)
.map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?; })?;
let address_transform = get_function_address_map(&context, input, code_buf.len(), isa); let unwind_info = context.create_unwind_info(isa).map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?;
let ranges = if env.tunables.debug_info { let address_transform = get_function_address_map(&context, input, code_buf.len(), isa);
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) let ranges = if env.tunables.debug_info {
})?; let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
Some(ranges) CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?;
Some(ranges)
} else {
None
};
Ok((
code_buf,
context.func.jt_offsets,
reloc_sink.func_relocs,
address_transform,
ranges,
context.func.stack_slots,
trap_sink.traps,
unwind_info,
stack_map_sink.finish(),
))
};
let inputs: Vec<FunctionBodyInput> = env.function_body_inputs.into_iter().collect();
let results: Result<Vec<_>, CompileError> = {
cfg_if::cfg_if! {
if #[cfg(feature = "parallel-compilation")] {
inputs
.par_iter()
.map_init(FuncTranslator::new, compile_function)
.collect()
} else { } else {
None let mut func_translator = FuncTranslator::new();
}; inputs
.iter()
.map(|input| compile_function(&mut func_translator, input))
.collect()
}
}
};
Ok(( results?.into_iter().for_each(
code_buf, |(
context.func.jt_offsets, function,
reloc_sink.func_relocs, func_jt_offsets,
address_transform, relocs,
ranges, address_transform,
context.func.stack_slots, ranges,
trap_sink.traps, sss,
function_traps,
unwind_info,
stack_map,
)| {
functions.push(CompiledFunction {
body: function,
jt_offsets: func_jt_offsets,
unwind_info, unwind_info,
stack_map_sink.finish(), });
)) relocations.push(relocs);
}) address_transforms.push(address_transform);
.collect::<Result<Vec<_>, CompileError>>()? value_ranges.push(ranges.unwrap_or_default());
.into_iter() stack_slots.push(sss);
.for_each( traps.push(function_traps);
|( stack_maps.push(stack_map);
function, },
func_jt_offsets, );
relocs,
address_transform,
ranges,
sss,
function_traps,
unwind_info,
stack_map,
)| {
functions.push(CompiledFunction {
body: function,
jt_offsets: func_jt_offsets,
unwind_info,
});
relocations.push(relocs);
address_transforms.push(address_transform);
value_ranges.push(ranges.unwrap_or_default());
stack_slots.push(sss);
traps.push(function_traps);
stack_maps.push(stack_map);
},
);
// TODO: Reorganize where we create the Vec for the resolved imports. // TODO: Reorganize where we create the Vec for the resolved imports.

View File

@@ -38,7 +38,7 @@ wasmtime-wasi = { path = "../wasi" }
maintenance = { status = "actively-developed" } maintenance = { status = "actively-developed" }
[features] [features]
default = ['wat', 'jitdump'] default = ['wat', 'jitdump', 'parallel-compilation']
# Enables experimental support for the lightbeam codegen backend, an alternative # Enables experimental support for the lightbeam codegen backend, an alternative
# to cranelift. Requires Nightly Rust currently, and this is not enabled by # to cranelift. Requires Nightly Rust currently, and this is not enabled by
@@ -50,3 +50,6 @@ jitdump = ["wasmtime-jit/jitdump"]
# Enables support for the `VTune` profiler # Enables support for the `VTune` profiler
vtune = ["wasmtime-jit/vtune"] vtune = ["wasmtime-jit/vtune"]
# Enables parallel compilation of WebAssembly code
parallel-compilation = ["wasmtime-environ/parallel-compilation"]

View File

@@ -11,7 +11,7 @@ repository = "https://github.com/bytecodealliance/wasmtime"
include = ["src/**/*", "LICENSE"] include = ["src/**/*", "LICENSE"]
[dependencies] [dependencies]
wasmtime = { path = "../../wasmtime", version = "0.18.0" } wasmtime = { path = "../../wasmtime", version = "0.18.0", default-features = false }
wasmtime-wiggle-macro = { path = "./macro", version = "0.18.0" } wasmtime-wiggle-macro = { path = "./macro", version = "0.18.0" }
witx = { path = "../../wasi-common/WASI/tools/witx", version = "0.8.5", optional = true } witx = { path = "../../wasi-common/WASI/tools/witx", version = "0.8.5", optional = true }
wiggle = { path = "..", version = "0.18.0" } wiggle = { path = "..", version = "0.18.0" }