From 2ce2dd020397b8d1f03ff3cbccf7b043d7c1cf1d Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Mon, 6 Jul 2020 18:22:05 +0200 Subject: [PATCH] wasmtime: add build-time option for parallel compilation (#1903) When running in embedded environments, threads creation is sometimes undesirable. This adds a feature to toggle wasmtime's internal thread creation for parallel compilation. --- Cargo.lock | 1 + Cargo.toml | 2 +- crates/environ/Cargo.toml | 6 +- crates/environ/src/cranelift.rs | 248 ++++++++++++++++-------------- crates/wasmtime/Cargo.toml | 5 +- crates/wiggle/wasmtime/Cargo.toml | 2 +- 6 files changed, 145 insertions(+), 119 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9692147912..019b5a8da4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2417,6 +2417,7 @@ dependencies = [ "anyhow", "base64 0.12.1", "bincode", + "cfg-if", "cranelift-codegen", "cranelift-entity", "cranelift-frontend", diff --git a/Cargo.toml b/Cargo.toml index 864741006b..cb60a12fde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,7 +72,7 @@ members = [ ] [features] -default = ["jitdump", "wasmtime/wat"] +default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation"] lightbeam = [ "wasmtime-environ/lightbeam", "wasmtime-jit/lightbeam", diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index cf096a4303..1ab3dd6d07 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -20,7 +20,7 @@ cranelift-wasm = { path = "../../cranelift/wasm", version = "0.65.0", features = wasmparser = "0.58.0" lightbeam = { path = "../lightbeam", optional = true, version = "0.18.0" } indexmap = "1.0.2" -rayon = "1.2.1" +rayon = { version = "1.2.1", optional = true } thiserror = "1.0.4" directories = "2.0.1" sha2 = "0.8.0" @@ -32,6 +32,7 @@ zstd = "0.5" toml = "0.5.5" file-per-thread-logger = "0.1.1" more-asserts = "0.2.1" +cfg-if = "0.1.9" [target.'cfg(target_os = "windows")'.dependencies] winapi = "0.3.7" @@ -46,5 +47,8 @@ pretty_env_logger = "0.4.0" filetime = "0.2.7" lazy_static = "1.3.0" +[features] +parallel-compilation = ["rayon"] + [badges] maintenance = { status = "actively-developed" } diff --git a/crates/environ/src/cranelift.rs b/crates/environ/src/cranelift.rs index 89a7dfd6f6..91a0db47b4 100644 --- a/crates/environ/src/cranelift.rs +++ b/crates/environ/src/cranelift.rs @@ -99,6 +99,7 @@ use cranelift_codegen::print_errors::pretty_error; use cranelift_codegen::{binemit, isa, Context}; use cranelift_entity::PrimaryMap; use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator, ModuleTranslationState}; +#[cfg(feature = "parallel-compilation")] use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use std::convert::TryFrom; use std::hash::{Hash, Hasher}; @@ -318,132 +319,149 @@ fn compile(env: CompileEnv<'_>) -> Result)>>() - .par_iter() - .map_init(FuncTranslator::new, |func_translator, (i, input)| { - let func_index = env.local.func_index(*i); - let mut context = Context::new(); - context.func.name = get_func_name(func_index); - context.func.signature = env.local.native_func_signature(func_index).clone(); - if env.tunables.debug_info { - context.func.collect_debug_info(); - } + type FunctionBodyInput<'a> = (DefinedFuncIndex, &'a FunctionBodyData<'a>); - let mut func_env = FuncEnvironment::new(isa.frontend_config(), env.local, env.tunables); + let compile_function = |func_translator: &mut FuncTranslator, + (i, input): &FunctionBodyInput| { + let func_index = env.local.func_index(*i); + let mut context = Context::new(); + context.func.name = get_func_name(func_index); + context.func.signature = env.local.native_func_signature(func_index).clone(); + if env.tunables.debug_info { + context.func.collect_debug_info(); + } - // We use these as constant offsets below in - // `stack_limit_from_arguments`, so assert their values here. This - // allows the closure below to get coerced to a function pointer, as - // needed by `ir::Function`. - // - // Otherwise our stack limit is specially calculated from the vmctx - // argument, where we need to load the `*const VMInterrupts` - // pointer, and then from that pointer we need to load the stack - // limit itself. Note that manual register allocation is needed here - // too due to how late in the process this codegen happens. - // - // For more information about interrupts and stack checks, see the - // top of this file. - let vmctx = context - .func - .create_global_value(ir::GlobalValueData::VMContext); - let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load { - base: vmctx, - offset: i32::try_from(func_env.offsets.vmctx_interrupts()) - .unwrap() - .into(), - global_type: isa.pointer_type(), - readonly: true, - }); - let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load { - base: interrupts_ptr, - offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit()) - .unwrap() - .into(), - global_type: isa.pointer_type(), - readonly: false, - }); - context.func.stack_limit = Some(stack_limit); - func_translator.translate( - env.module_translation.0, - input.data, - input.module_offset, - &mut context.func, - &mut func_env, - )?; + let mut func_env = FuncEnvironment::new(isa.frontend_config(), env.local, env.tunables); - let mut code_buf: Vec = Vec::new(); - let mut reloc_sink = RelocSink::new(func_index); - let mut trap_sink = TrapSink::new(); - let mut stack_map_sink = StackMapSink::default(); - context - .compile_and_emit( - isa, - &mut code_buf, - &mut reloc_sink, - &mut trap_sink, - &mut stack_map_sink, - ) - .map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; + // We use these as constant offsets below in + // `stack_limit_from_arguments`, so assert their values here. This + // allows the closure below to get coerced to a function pointer, as + // needed by `ir::Function`. + // + // Otherwise our stack limit is specially calculated from the vmctx + // argument, where we need to load the `*const VMInterrupts` + // pointer, and then from that pointer we need to load the stack + // limit itself. Note that manual register allocation is needed here + // too due to how late in the process this codegen happens. + // + // For more information about interrupts and stack checks, see the + // top of this file. + let vmctx = context + .func + .create_global_value(ir::GlobalValueData::VMContext); + let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load { + base: vmctx, + offset: i32::try_from(func_env.offsets.vmctx_interrupts()) + .unwrap() + .into(), + global_type: isa.pointer_type(), + readonly: true, + }); + let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load { + base: interrupts_ptr, + offset: i32::try_from(func_env.offsets.vminterrupts_stack_limit()) + .unwrap() + .into(), + global_type: isa.pointer_type(), + readonly: false, + }); + context.func.stack_limit = Some(stack_limit); + func_translator.translate( + env.module_translation.0, + input.data, + input.module_offset, + &mut context.func, + &mut func_env, + )?; - let unwind_info = context.create_unwind_info(isa).map_err(|error| { + let mut code_buf: Vec = Vec::new(); + let mut reloc_sink = RelocSink::new(func_index); + let mut trap_sink = TrapSink::new(); + let mut stack_map_sink = StackMapSink::default(); + context + .compile_and_emit( + isa, + &mut code_buf, + &mut reloc_sink, + &mut trap_sink, + &mut stack_map_sink, + ) + .map_err(|error| { CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) })?; - let address_transform = get_function_address_map(&context, input, code_buf.len(), isa); + let unwind_info = context.create_unwind_info(isa).map_err(|error| { + CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) + })?; - let ranges = if env.tunables.debug_info { - let ranges = context.build_value_labels_ranges(isa).map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; - Some(ranges) + let address_transform = get_function_address_map(&context, input, code_buf.len(), isa); + + let ranges = if env.tunables.debug_info { + let ranges = context.build_value_labels_ranges(isa).map_err(|error| { + CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) + })?; + Some(ranges) + } else { + None + }; + + Ok(( + code_buf, + context.func.jt_offsets, + reloc_sink.func_relocs, + address_transform, + ranges, + context.func.stack_slots, + trap_sink.traps, + unwind_info, + stack_map_sink.finish(), + )) + }; + + let inputs: Vec = env.function_body_inputs.into_iter().collect(); + + let results: Result, CompileError> = { + cfg_if::cfg_if! { + if #[cfg(feature = "parallel-compilation")] { + inputs + .par_iter() + .map_init(FuncTranslator::new, compile_function) + .collect() } else { - None - }; + let mut func_translator = FuncTranslator::new(); + inputs + .iter() + .map(|input| compile_function(&mut func_translator, input)) + .collect() + } + } + }; - Ok(( - code_buf, - context.func.jt_offsets, - reloc_sink.func_relocs, - address_transform, - ranges, - context.func.stack_slots, - trap_sink.traps, + results?.into_iter().for_each( + |( + function, + func_jt_offsets, + relocs, + address_transform, + ranges, + sss, + function_traps, + unwind_info, + stack_map, + )| { + functions.push(CompiledFunction { + body: function, + jt_offsets: func_jt_offsets, unwind_info, - stack_map_sink.finish(), - )) - }) - .collect::, CompileError>>()? - .into_iter() - .for_each( - |( - function, - func_jt_offsets, - relocs, - address_transform, - ranges, - sss, - function_traps, - unwind_info, - stack_map, - )| { - functions.push(CompiledFunction { - body: function, - jt_offsets: func_jt_offsets, - unwind_info, - }); - relocations.push(relocs); - address_transforms.push(address_transform); - value_ranges.push(ranges.unwrap_or_default()); - stack_slots.push(sss); - traps.push(function_traps); - stack_maps.push(stack_map); - }, - ); + }); + relocations.push(relocs); + address_transforms.push(address_transform); + value_ranges.push(ranges.unwrap_or_default()); + stack_slots.push(sss); + traps.push(function_traps); + stack_maps.push(stack_map); + }, + ); // TODO: Reorganize where we create the Vec for the resolved imports. diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index adbdee49d9..e13c872cac 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -38,7 +38,7 @@ wasmtime-wasi = { path = "../wasi" } maintenance = { status = "actively-developed" } [features] -default = ['wat', 'jitdump'] +default = ['wat', 'jitdump', 'parallel-compilation'] # Enables experimental support for the lightbeam codegen backend, an alternative # to cranelift. Requires Nightly Rust currently, and this is not enabled by @@ -50,3 +50,6 @@ jitdump = ["wasmtime-jit/jitdump"] # Enables support for the `VTune` profiler vtune = ["wasmtime-jit/vtune"] + +# Enables parallel compilation of WebAssembly code +parallel-compilation = ["wasmtime-environ/parallel-compilation"] diff --git a/crates/wiggle/wasmtime/Cargo.toml b/crates/wiggle/wasmtime/Cargo.toml index 5983576994..b5ad32ae81 100644 --- a/crates/wiggle/wasmtime/Cargo.toml +++ b/crates/wiggle/wasmtime/Cargo.toml @@ -11,7 +11,7 @@ repository = "https://github.com/bytecodealliance/wasmtime" include = ["src/**/*", "LICENSE"] [dependencies] -wasmtime = { path = "../../wasmtime", version = "0.18.0" } +wasmtime = { path = "../../wasmtime", version = "0.18.0", default-features = false } wasmtime-wiggle-macro = { path = "./macro", version = "0.18.0" } witx = { path = "../../wasi-common/WASI/tools/witx", version = "0.8.5", optional = true } wiggle = { path = "..", version = "0.18.0" }