wasmtime: add build-time option for parallel compilation (#1903)
When running in embedded environments, threads creation is sometimes undesirable. This adds a feature to toggle wasmtime's internal thread creation for parallel compilation.
This commit is contained in:
@@ -20,7 +20,7 @@ cranelift-wasm = { path = "../../cranelift/wasm", version = "0.65.0", features =
|
||||
wasmparser = "0.58.0"
|
||||
lightbeam = { path = "../lightbeam", optional = true, version = "0.18.0" }
|
||||
indexmap = "1.0.2"
|
||||
rayon = "1.2.1"
|
||||
rayon = { version = "1.2.1", optional = true }
|
||||
thiserror = "1.0.4"
|
||||
directories = "2.0.1"
|
||||
sha2 = "0.8.0"
|
||||
@@ -32,6 +32,7 @@ zstd = "0.5"
|
||||
toml = "0.5.5"
|
||||
file-per-thread-logger = "0.1.1"
|
||||
more-asserts = "0.2.1"
|
||||
cfg-if = "0.1.9"
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
winapi = "0.3.7"
|
||||
@@ -46,5 +47,8 @@ pretty_env_logger = "0.4.0"
|
||||
filetime = "0.2.7"
|
||||
lazy_static = "1.3.0"
|
||||
|
||||
[features]
|
||||
parallel-compilation = ["rayon"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "actively-developed" }
|
||||
|
||||
@@ -99,6 +99,7 @@ use cranelift_codegen::print_errors::pretty_error;
|
||||
use cranelift_codegen::{binemit, isa, Context};
|
||||
use cranelift_entity::PrimaryMap;
|
||||
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator, ModuleTranslationState};
|
||||
#[cfg(feature = "parallel-compilation")]
|
||||
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
|
||||
use std::convert::TryFrom;
|
||||
use std::hash::{Hash, Hasher};
|
||||
@@ -318,132 +319,149 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
|
||||
let mut traps = PrimaryMap::with_capacity(env.function_body_inputs.len());
|
||||
let mut stack_maps = PrimaryMap::with_capacity(env.function_body_inputs.len());
|
||||
|
||||
env.function_body_inputs
|
||||
.into_iter()
|
||||
.collect::<Vec<(DefinedFuncIndex, &FunctionBodyData<'_>)>>()
|
||||
.par_iter()
|
||||
.map_init(FuncTranslator::new, |func_translator, (i, input)| {
|
||||
let func_index = env.local.func_index(*i);
|
||||
let mut context = Context::new();
|
||||
context.func.name = get_func_name(func_index);
|
||||
context.func.signature = env.local.native_func_signature(func_index).clone();
|
||||
if env.tunables.debug_info {
|
||||
context.func.collect_debug_info();
|
||||
}
|
||||
type FunctionBodyInput<'a> = (DefinedFuncIndex, &'a FunctionBodyData<'a>);
|
||||
|
||||
let mut func_env = FuncEnvironment::new(isa.frontend_config(), env.local, env.tunables);
|
||||
let compile_function = |func_translator: &mut FuncTranslator,
|
||||
(i, input): &FunctionBodyInput| {
|
||||
let func_index = env.local.func_index(*i);
|
||||
let mut context = Context::new();
|
||||
context.func.name = get_func_name(func_index);
|
||||
context.func.signature = env.local.native_func_signature(func_index).clone();
|
||||
if env.tunables.debug_info {
|
||||
context.func.collect_debug_info();
|
||||
}
|
||||
|
||||
// We use these as constant offsets below in
|
||||
// `stack_limit_from_arguments`, so assert their values here. This
|
||||
// allows the closure below to get coerced to a function pointer, as
|
||||
// needed by `ir::Function`.
|
||||
//
|
||||
// Otherwise our stack limit is specially calculated from the vmctx
|
||||
// argument, where we need to load the `*const VMInterrupts`
|
||||
// pointer, and then from that pointer we need to load the stack
|
||||
// limit itself. Note that manual register allocation is needed here
|
||||
// too due to how late in the process this codegen happens.
|
||||
//
|
||||
// For more information about interrupts and stack checks, see the
|
||||
// top of this file.
|
||||
let vmctx = context
|
||||
.func
|
||||
.create_global_value(ir::GlobalValueData::VMContext);
|
||||
let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load {
|
||||
base: vmctx,
|
||||
offset: i32::try_from(func_env.offsets.vmctx_interrupts())
|
||||
.unwrap()
|
||||
.into(),
|
||||
global_type: isa.pointer_type(),
|
||||
readonly: true,
|
||||
});
|
||||
let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load {
|
||||
base: interrupts_ptr,
|
||||
offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit())
|
||||
.unwrap()
|
||||
.into(),
|
||||
global_type: isa.pointer_type(),
|
||||
readonly: false,
|
||||
});
|
||||
context.func.stack_limit = Some(stack_limit);
|
||||
func_translator.translate(
|
||||
env.module_translation.0,
|
||||
input.data,
|
||||
input.module_offset,
|
||||
&mut context.func,
|
||||
&mut func_env,
|
||||
)?;
|
||||
let mut func_env = FuncEnvironment::new(isa.frontend_config(), env.local, env.tunables);
|
||||
|
||||
let mut code_buf: Vec<u8> = Vec::new();
|
||||
let mut reloc_sink = RelocSink::new(func_index);
|
||||
let mut trap_sink = TrapSink::new();
|
||||
let mut stack_map_sink = StackMapSink::default();
|
||||
context
|
||||
.compile_and_emit(
|
||||
isa,
|
||||
&mut code_buf,
|
||||
&mut reloc_sink,
|
||||
&mut trap_sink,
|
||||
&mut stack_map_sink,
|
||||
)
|
||||
.map_err(|error| {
|
||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||
})?;
|
||||
// We use these as constant offsets below in
|
||||
// `stack_limit_from_arguments`, so assert their values here. This
|
||||
// allows the closure below to get coerced to a function pointer, as
|
||||
// needed by `ir::Function`.
|
||||
//
|
||||
// Otherwise our stack limit is specially calculated from the vmctx
|
||||
// argument, where we need to load the `*const VMInterrupts`
|
||||
// pointer, and then from that pointer we need to load the stack
|
||||
// limit itself. Note that manual register allocation is needed here
|
||||
// too due to how late in the process this codegen happens.
|
||||
//
|
||||
// For more information about interrupts and stack checks, see the
|
||||
// top of this file.
|
||||
let vmctx = context
|
||||
.func
|
||||
.create_global_value(ir::GlobalValueData::VMContext);
|
||||
let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load {
|
||||
base: vmctx,
|
||||
offset: i32::try_from(func_env.offsets.vmctx_interrupts())
|
||||
.unwrap()
|
||||
.into(),
|
||||
global_type: isa.pointer_type(),
|
||||
readonly: true,
|
||||
});
|
||||
let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load {
|
||||
base: interrupts_ptr,
|
||||
offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit())
|
||||
.unwrap()
|
||||
.into(),
|
||||
global_type: isa.pointer_type(),
|
||||
readonly: false,
|
||||
});
|
||||
context.func.stack_limit = Some(stack_limit);
|
||||
func_translator.translate(
|
||||
env.module_translation.0,
|
||||
input.data,
|
||||
input.module_offset,
|
||||
&mut context.func,
|
||||
&mut func_env,
|
||||
)?;
|
||||
|
||||
let unwind_info = context.create_unwind_info(isa).map_err(|error| {
|
||||
let mut code_buf: Vec<u8> = Vec::new();
|
||||
let mut reloc_sink = RelocSink::new(func_index);
|
||||
let mut trap_sink = TrapSink::new();
|
||||
let mut stack_map_sink = StackMapSink::default();
|
||||
context
|
||||
.compile_and_emit(
|
||||
isa,
|
||||
&mut code_buf,
|
||||
&mut reloc_sink,
|
||||
&mut trap_sink,
|
||||
&mut stack_map_sink,
|
||||
)
|
||||
.map_err(|error| {
|
||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||
})?;
|
||||
|
||||
let address_transform = get_function_address_map(&context, input, code_buf.len(), isa);
|
||||
let unwind_info = context.create_unwind_info(isa).map_err(|error| {
|
||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||
})?;
|
||||
|
||||
let ranges = if env.tunables.debug_info {
|
||||
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
|
||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||
})?;
|
||||
Some(ranges)
|
||||
let address_transform = get_function_address_map(&context, input, code_buf.len(), isa);
|
||||
|
||||
let ranges = if env.tunables.debug_info {
|
||||
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
|
||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||
})?;
|
||||
Some(ranges)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok((
|
||||
code_buf,
|
||||
context.func.jt_offsets,
|
||||
reloc_sink.func_relocs,
|
||||
address_transform,
|
||||
ranges,
|
||||
context.func.stack_slots,
|
||||
trap_sink.traps,
|
||||
unwind_info,
|
||||
stack_map_sink.finish(),
|
||||
))
|
||||
};
|
||||
|
||||
let inputs: Vec<FunctionBodyInput> = env.function_body_inputs.into_iter().collect();
|
||||
|
||||
let results: Result<Vec<_>, CompileError> = {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(feature = "parallel-compilation")] {
|
||||
inputs
|
||||
.par_iter()
|
||||
.map_init(FuncTranslator::new, compile_function)
|
||||
.collect()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let mut func_translator = FuncTranslator::new();
|
||||
inputs
|
||||
.iter()
|
||||
.map(|input| compile_function(&mut func_translator, input))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok((
|
||||
code_buf,
|
||||
context.func.jt_offsets,
|
||||
reloc_sink.func_relocs,
|
||||
address_transform,
|
||||
ranges,
|
||||
context.func.stack_slots,
|
||||
trap_sink.traps,
|
||||
results?.into_iter().for_each(
|
||||
|(
|
||||
function,
|
||||
func_jt_offsets,
|
||||
relocs,
|
||||
address_transform,
|
||||
ranges,
|
||||
sss,
|
||||
function_traps,
|
||||
unwind_info,
|
||||
stack_map,
|
||||
)| {
|
||||
functions.push(CompiledFunction {
|
||||
body: function,
|
||||
jt_offsets: func_jt_offsets,
|
||||
unwind_info,
|
||||
stack_map_sink.finish(),
|
||||
))
|
||||
})
|
||||
.collect::<Result<Vec<_>, CompileError>>()?
|
||||
.into_iter()
|
||||
.for_each(
|
||||
|(
|
||||
function,
|
||||
func_jt_offsets,
|
||||
relocs,
|
||||
address_transform,
|
||||
ranges,
|
||||
sss,
|
||||
function_traps,
|
||||
unwind_info,
|
||||
stack_map,
|
||||
)| {
|
||||
functions.push(CompiledFunction {
|
||||
body: function,
|
||||
jt_offsets: func_jt_offsets,
|
||||
unwind_info,
|
||||
});
|
||||
relocations.push(relocs);
|
||||
address_transforms.push(address_transform);
|
||||
value_ranges.push(ranges.unwrap_or_default());
|
||||
stack_slots.push(sss);
|
||||
traps.push(function_traps);
|
||||
stack_maps.push(stack_map);
|
||||
},
|
||||
);
|
||||
});
|
||||
relocations.push(relocs);
|
||||
address_transforms.push(address_transform);
|
||||
value_ranges.push(ranges.unwrap_or_default());
|
||||
stack_slots.push(sss);
|
||||
traps.push(function_traps);
|
||||
stack_maps.push(stack_map);
|
||||
},
|
||||
);
|
||||
|
||||
// TODO: Reorganize where we create the Vec for the resolved imports.
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ wasmtime-wasi = { path = "../wasi" }
|
||||
maintenance = { status = "actively-developed" }
|
||||
|
||||
[features]
|
||||
default = ['wat', 'jitdump']
|
||||
default = ['wat', 'jitdump', 'parallel-compilation']
|
||||
|
||||
# Enables experimental support for the lightbeam codegen backend, an alternative
|
||||
# to cranelift. Requires Nightly Rust currently, and this is not enabled by
|
||||
@@ -50,3 +50,6 @@ jitdump = ["wasmtime-jit/jitdump"]
|
||||
|
||||
# Enables support for the `VTune` profiler
|
||||
vtune = ["wasmtime-jit/vtune"]
|
||||
|
||||
# Enables parallel compilation of WebAssembly code
|
||||
parallel-compilation = ["wasmtime-environ/parallel-compilation"]
|
||||
|
||||
@@ -11,7 +11,7 @@ repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
include = ["src/**/*", "LICENSE"]
|
||||
|
||||
[dependencies]
|
||||
wasmtime = { path = "../../wasmtime", version = "0.18.0" }
|
||||
wasmtime = { path = "../../wasmtime", version = "0.18.0", default-features = false }
|
||||
wasmtime-wiggle-macro = { path = "./macro", version = "0.18.0" }
|
||||
witx = { path = "../../wasi-common/WASI/tools/witx", version = "0.8.5", optional = true }
|
||||
wiggle = { path = "..", version = "0.18.0" }
|
||||
|
||||
Reference in New Issue
Block a user