Serialize and deserialize compilation artifacts. (#2020)

* Serialize and deserialize Module
* Use bincode to serialize
* Add wasm_module_serialize; docs
* Simple tests
This commit is contained in:
Yury Delendik
2020-07-21 15:05:50 -05:00
committed by GitHub
parent c420f65214
commit 399ee0a54c
17 changed files with 528 additions and 20 deletions

View File

@@ -930,6 +930,37 @@ WASM_API_EXTERN void wasmtime_externref_new_with_finalizer(
*/
WASM_API_EXTERN bool wasmtime_externref_data(wasm_val_t* val, void** datap);
/**
* \brief This function serializes compiled module artifacts
* as blob data.
*
* \param module the module
* \param ret if the conversion is successful, this byte vector is filled in with
* the serialized compiled module.
*
* \return a non-null error if parsing fails, or returns `NULL`. If parsing
* fails then `ret` isn't touched.
*
* This function does not take ownership of `module`, and the caller is
* expected to deallocate the returned #wasmtime_error_t and #wasm_byte_vec_t.
*/
WASM_API_EXTERN own wasmtime_error_t* wasmtime_module_serialize(
wasm_module_t* module,
own wasm_byte_vec_t *ret
);
/**
* \brief Build a module from serialized data.
* *
* This function does not take ownership of any of its arguments, but the
* returned error and module are owned by the caller.
*/
WASM_API_EXTERN own wasmtime_error_t *wasmtime_module_deserialize(
wasm_engine_t *engine,
const wasm_byte_vec_t *serialized,
own wasm_module_t **ret
);
#undef own
#ifdef __cplusplus

View File

@@ -130,3 +130,63 @@ pub extern "C" fn wasm_module_obtain(
exports,
}))
}
#[no_mangle]
pub extern "C" fn wasm_module_serialize(module: &wasm_module_t, ret: &mut wasm_byte_vec_t) {
drop(wasmtime_module_serialize(module, ret));
}
#[no_mangle]
pub extern "C" fn wasm_module_deserialize(
store: &wasm_store_t,
binary: &wasm_byte_vec_t,
) -> Option<Box<wasm_module_t>> {
let mut ret = ptr::null_mut();
let engine = wasm_engine_t {
engine: store.store.engine().clone(),
};
match wasmtime_module_deserialize(&engine, binary, &mut ret) {
Some(_err) => None,
None => {
assert!(!ret.is_null());
Some(unsafe { Box::from_raw(ret) })
}
}
}
#[no_mangle]
pub extern "C" fn wasmtime_module_serialize(
module: &wasm_module_t,
ret: &mut wasm_byte_vec_t,
) -> Option<Box<wasmtime_error_t>> {
handle_result(module.module.serialize(), |buf| {
ret.set_buffer(buf);
})
}
#[no_mangle]
pub extern "C" fn wasmtime_module_deserialize(
engine: &wasm_engine_t,
binary: &wasm_byte_vec_t,
ret: &mut *mut wasm_module_t,
) -> Option<Box<wasmtime_error_t>> {
handle_result(
Module::deserialize(&engine.engine, binary.as_slice()),
|module| {
let imports = module
.imports()
.map(|i| wasm_importtype_t::new(i.module().to_owned(), i.name().to_owned(), i.ty()))
.collect::<Vec<_>>();
let exports = module
.exports()
.map(|e| wasm_exporttype_t::new(e.name().to_owned(), e.ty()))
.collect::<Vec<_>>();
let module = Box::new(wasm_module_t {
module: module,
imports,
exports,
});
*ret = Box::into_raw(module);
},
)
}

View File

@@ -137,10 +137,10 @@ impl TablePlan {
/// A translated WebAssembly module, excluding the function bodies and
/// memory initializers.
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Module {
/// A unique identifier (within this process) for this module.
#[serde(skip_serializing, default = "Module::next_id")]
#[serde(skip_serializing, skip_deserializing, default = "Module::next_id")]
pub id: usize,
/// The name of this wasm module, often found in the wasm file.
@@ -181,7 +181,7 @@ pub struct Module {
/// This is stored within a `Module` and it implements `Hash`, unlike `Module`,
/// and is used as part of the cache key when we load compiled modules from the
/// global cache.
#[derive(Debug, Hash, Serialize, Deserialize)]
#[derive(Debug, Clone, Hash, Serialize, Deserialize)]
pub struct ModuleLocal {
/// Unprocessed signatures exactly as provided by `declare_signature()`.
pub signatures: PrimaryMap<SignatureIndex, (WasmFuncType, ir::Signature)>,

View File

@@ -9,6 +9,7 @@ use cranelift_wasm::{
Memory, MemoryIndex, ModuleTranslationState, SignatureIndex, Table, TableIndex,
TargetEnvironment, WasmError, WasmFuncType, WasmResult,
};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
use std::sync::Arc;
@@ -450,7 +451,7 @@ pub fn translate_signature(mut sig: ir::Signature, pointer_type: ir::Type) -> ir
/// A memory index and offset within that memory where a data initialization
/// should is to be performed.
#[derive(Clone)]
#[derive(Clone, Serialize, Deserialize)]
pub struct DataInitializerLocation {
/// The index of the memory to initialize.
pub memory_index: MemoryIndex,

View File

@@ -32,6 +32,7 @@ cfg-if = "0.1.9"
log = "0.4"
gimli = { version = "0.21.0", default-features = false, features = ["write"] }
object = { version = "0.20", default-features = false, features = ["write"] }
serde = { version = "1.0.94", features = ["derive"] }
[target.'cfg(target_os = "windows")'.dependencies]
winapi = { version = "0.3.8", features = ["winnt", "impl-default"] }

View File

@@ -10,6 +10,7 @@ use crate::link::link_module;
use crate::object::ObjectUnwindInfo;
use crate::resolver::Resolver;
use object::File as ObjectFile;
use serde::{Deserialize, Serialize};
use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
@@ -51,27 +52,47 @@ pub enum SetupError {
DebugInfo(#[from] anyhow::Error),
}
// Contains all compilation artifacts.
struct CompilationArtifacts {
/// Contains all compilation artifacts.
#[derive(Serialize, Deserialize)]
pub struct CompilationArtifacts {
/// Module metadata.
module: Module,
/// ELF image with functions code.
obj: Box<[u8]>,
/// Unwind information for function code.
unwind_info: Box<[ObjectUnwindInfo]>,
/// Data initiailizers.
data_initializers: Box<[OwnedDataInitializer]>,
/// Traps descriptors.
traps: Traps,
/// Stack map descriptors.
stack_maps: StackMaps,
/// Wasm to function code address map.
address_transform: ModuleAddressMap,
/// Debug info presence flags.
debug_info: bool,
}
impl CompilationArtifacts {
fn new(compiler: &Compiler, data: &[u8]) -> Result<Self, SetupError> {
/// Builds compilation artifacts.
pub fn build(compiler: &Compiler, data: &[u8]) -> Result<Self, SetupError> {
let environ = ModuleEnvironment::new(compiler.frontend_config(), compiler.tunables());
let translation = environ
.translate(data)
.map_err(|error| SetupError::Compile(CompileError::Wasm(error)))?;
let debug_info = compiler.tunables().debug_info;
let mut debug_data = None;
if compiler.tunables().debug_info {
if debug_info {
// TODO Do we want to ignore invalid DWARF data?
debug_data = Some(read_debuginfo(&data)?);
}
@@ -110,6 +131,7 @@ impl CompilationArtifacts {
traps,
stack_maps,
address_transform,
debug_info,
})
}
}
@@ -136,6 +158,8 @@ pub struct CompiledModule {
traps: Traps,
stack_maps: StackMaps,
address_transform: ModuleAddressMap,
obj: Box<[u8]>,
unwind_info: Box<[ObjectUnwindInfo]>,
}
impl CompiledModule {
@@ -145,8 +169,16 @@ impl CompiledModule {
data: &'data [u8],
profiler: &dyn ProfilingAgent,
) -> Result<Self, SetupError> {
let artifacts = CompilationArtifacts::new(compiler, data)?;
let artifacts = CompilationArtifacts::build(compiler, data)?;
Self::from_artifacts(artifacts, compiler.isa(), profiler)
}
/// Creates `CompiledModule` directly from `CompilationArtifacts`.
pub fn from_artifacts(
artifacts: CompilationArtifacts,
isa: &dyn TargetIsa,
profiler: &dyn ProfilingAgent,
) -> Result<Self, SetupError> {
let CompilationArtifacts {
module,
obj,
@@ -155,12 +187,13 @@ impl CompiledModule {
traps,
stack_maps,
address_transform,
debug_info,
} = artifacts;
// Allocate all of the compiled functions into executable memory,
// copying over their contents.
let (code_memory, code_range, finished_functions, trampolines) =
build_code_memory(compiler.isa(), &obj, &module, unwind_info).map_err(|message| {
build_code_memory(isa, &obj, &module, &unwind_info).map_err(|message| {
SetupError::Instantiate(InstantiationError::Resource(format!(
"failed to build code memory for functions: {}",
message
@@ -168,7 +201,7 @@ impl CompiledModule {
})?;
// Register GDB JIT images; initialize profiler and load the wasm module.
let dbg_jit_registration = if compiler.tunables().debug_info {
let dbg_jit_registration = if debug_info {
let bytes = create_dbg_image(obj.to_vec(), code_range, &module, &finished_functions)?;
profiler.module_load(&module, &finished_functions, Some(&bytes));
@@ -194,9 +227,25 @@ impl CompiledModule {
traps,
stack_maps,
address_transform,
obj,
unwind_info,
})
}
/// Extracts `CompilationArtifacts` from the compiled module.
pub fn to_compilation_artifacts(&self) -> CompilationArtifacts {
CompilationArtifacts {
module: (*self.module).clone(),
obj: self.obj.clone(),
unwind_info: self.unwind_info.clone(),
data_initializers: self.data_initializers.clone(),
traps: self.traps.clone(),
stack_maps: self.stack_maps.clone(),
address_transform: self.address_transform.clone(),
debug_info: self.code.dbg_jit_registration.is_some(),
}
}
/// Crate an `Instance` from this `CompiledModule`.
///
/// Note that if only one instance of this module is needed, it may be more
@@ -305,6 +354,7 @@ impl CompiledModule {
/// Similar to `DataInitializer`, but owns its own copy of the data rather
/// than holding a slice of the original module.
#[derive(Clone, Serialize, Deserialize)]
pub struct OwnedDataInitializer {
/// The location where the initialization is to be performed.
location: DataInitializerLocation,
@@ -340,7 +390,7 @@ fn build_code_memory(
isa: &dyn TargetIsa,
obj: &[u8],
module: &Module,
unwind_info: Box<[ObjectUnwindInfo]>,
unwind_info: &Box<[ObjectUnwindInfo]>,
) -> Result<
(
CodeMemory,
@@ -354,7 +404,7 @@ fn build_code_memory(
let mut code_memory = CodeMemory::new();
let allocation = code_memory.allocate_for_object(&obj, &unwind_info)?;
let allocation = code_memory.allocate_for_object(&obj, unwind_info)?;
// Second, create a PrimaryMap from result vector of pointers.
let mut finished_functions = PrimaryMap::new();

View File

@@ -35,7 +35,7 @@ pub mod trampoline;
pub use crate::code_memory::CodeMemory;
pub use crate::compiler::{Compilation, CompilationStrategy, Compiler};
pub use crate::instantiate::{CompiledModule, SetupError};
pub use crate::instantiate::{CompilationArtifacts, CompiledModule, SetupError};
pub use crate::link::link_module;
pub use crate::resolver::{NullResolver, Resolver};

View File

@@ -3,6 +3,7 @@
use super::trampoline::build_trampoline;
use cranelift_frontend::FunctionBuilderContext;
use object::write::Object;
use serde::{Deserialize, Serialize};
use wasmtime_debug::DwarfSection;
use wasmtime_environ::entity::{EntityRef, PrimaryMap};
use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa};
@@ -13,7 +14,7 @@ use wasmtime_obj::{ObjectBuilder, ObjectBuilderTarget};
pub use wasmtime_obj::utils;
/// Unwind information for object files functions (including trampolines).
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ObjectUnwindInfo {
Func(FuncIndex, UnwindInfo),
Trampoline(SignatureIndex, UnwindInfo),

View File

@@ -26,6 +26,8 @@ lazy_static = "1.4"
log = "0.4.8"
wat = { version = "1.0.18", optional = true }
smallvec = "1.4.0"
serde = { version = "1.0.94", features = ["derive"] }
bincode = "1.2.1"
[target.'cfg(target_os = "windows")'.dependencies]
winapi = "0.3.7"

View File

@@ -1,10 +1,10 @@
use crate::frame_info::GlobalFrameInfoRegistration;
use crate::runtime::Engine;
use crate::runtime::{Config, Engine};
use crate::types::{EntityType, ExportType, ExternType, ImportType};
use anyhow::Result;
use anyhow::{bail, Context, Result};
use std::path::Path;
use std::sync::{Arc, Mutex};
use wasmtime_jit::CompiledModule;
use wasmtime_jit::{CompilationArtifacts, CompiledModule};
/// A compiled WebAssembly module, ready to be instantiated.
///
@@ -309,6 +309,51 @@ impl Module {
})
}
/// Serialize compilation artifacts to the buffer. See also `deseriaize`.
pub fn serialize(&self) -> Result<Vec<u8>> {
let artifacts = (
compiler_fingerprint(self.engine.config()),
self.compiled.to_compilation_artifacts(),
);
let mut buffer = Vec::new();
bincode::serialize_into(&mut buffer, &artifacts)?;
Ok(buffer)
}
/// Deserializes and creates a module from the compilatio nartifacts.
/// The `serialize` saves the compilation artifacts along with the host
/// fingerprint, which consists of target, compiler flags, and wasmtime
/// package version.
///
/// The method will fail if fingerprints of current host and serialized
/// one are different. The method does not verify the serialized artifacts
/// for modifications or curruptions. All responsibily of signing and its
/// verification falls on the embedder.
pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result<Module> {
let expected_fingerprint = compiler_fingerprint(engine.config());
let (fingerprint, artifacts) =
bincode::deserialize_from::<_, (u64, CompilationArtifacts)>(serialized)
.context("Deserialize compilation artifacts")?;
if fingerprint != expected_fingerprint {
bail!("Incompatible compilation artifact");
}
let compiled = CompiledModule::from_artifacts(
artifacts,
engine.compiler().isa(),
&*engine.config().profiler,
)?;
Ok(Module {
engine: engine.clone(),
compiled: Arc::new(compiled),
frame_info_registration: Arc::new(Mutex::new(None)),
})
}
pub(crate) fn compiled_module(&self) -> &CompiledModule {
&self.compiled
}
@@ -535,6 +580,13 @@ impl Module {
}
}
fn compiler_fingerprint(config: &Config) -> u64 {
use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
config.compiler_fingerprint(&mut hasher);
hasher.finish()
}
fn _assert_send_sync() {
fn _assert<T: Send + Sync>() {}
_assert::<Module>();

View File

@@ -10,6 +10,7 @@ use std::hash::{Hash, Hasher};
use std::path::Path;
use std::rc::{Rc, Weak};
use std::sync::Arc;
use target_lexicon::Triple;
use wasmparser::Validator;
use wasmtime_environ::settings::{self, Configurable, SetError};
use wasmtime_environ::{ir, isa, isa::TargetIsa, wasm, CacheConfig, Tunables};
@@ -634,6 +635,22 @@ impl Config {
self.tunables.clone(),
)
}
/// Hashes/fingerprints compiler setting to ensure that compatible
/// compilation artifacts are used.
pub(crate) fn compiler_fingerprint<H>(&self, state: &mut H)
where
H: Hasher,
{
self.flags.hash(state);
self.tunables.hash(state);
let triple = Triple::host();
triple.hash(state);
// Catch accidental bugs of reusing across wasmtime versions.
env!("CARGO_PKG_VERSION").hash(state);
}
}
fn round_up_to_pages(val: u64) -> u64 {