diff --git a/Cargo.lock b/Cargo.lock index b05d7a3249..44cbe0a771 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3556,6 +3556,7 @@ dependencies = [ "lazy_static", "libc", "log", + "object", "paste", "psm", "rayon", @@ -3654,6 +3655,7 @@ dependencies = [ "lazy_static", "libc", "log", + "memchr", "more-asserts", "num_cpus", "object", diff --git a/Cargo.toml b/Cargo.toml index 1d128937eb..2aaf5485f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ wast = "37.0.0" criterion = "0.3.4" num_cpus = "1.13.0" winapi = { version = "0.3.9", features = ['memoryapi'] } +memchr = "2.4" [build-dependencies] anyhow = "1.0.19" diff --git a/crates/cache/src/lib.rs b/crates/cache/src/lib.rs index 874e481934..64f7d8e262 100644 --- a/crates/cache/src/lib.rs +++ b/crates/cache/src/lib.rs @@ -42,13 +42,40 @@ impl<'config> ModuleCacheEntry<'config> { Self(Some(inner)) } - /// Gets cached data if state matches, otherwise calls the `compute`. - // NOTE: This takes a function pointer instead of a closure so that it doesn't accidentally - // close over something not accounted in the cache. - pub fn get_data(&self, state: T, compute: fn(T) -> Result) -> Result + /// Gets cached data if state matches, otherwise calls `compute`. + /// + /// Data is automatically serialized/deserialized with `bincode`. + pub fn get_data(&self, state: T, compute: fn(&T) -> Result) -> Result where T: Hash, U: Serialize + for<'a> Deserialize<'a>, + { + self.get_data_raw( + &state, + compute, + |_state, data| bincode::serialize(data).ok(), + |_state, data| bincode::deserialize(&data).ok(), + ) + } + + /// Gets cached data if state matches, otherwise calls `compute`. + /// + /// If the cache is disabled or no cached data is found then `compute` is + /// called to calculate the data. If the data was found in cache it is + /// passed to `deserialize`, which if successful will be the returned value. + /// When computed the `serialize` function is used to generate the bytes + /// from the returned value. + pub fn get_data_raw( + &self, + state: &T, + // NOTE: These are function pointers instead of closures so that they + // don't accidentally close over something not accounted in the cache. + compute: fn(&T) -> Result, + serialize: fn(&T, &U) -> Option>, + deserialize: fn(&T, Vec) -> Option, + ) -> Result + where + T: Hash, { let inner = match &self.0 { Some(inner) => inner, @@ -62,14 +89,18 @@ impl<'config> ModuleCacheEntry<'config> { let hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD); if let Some(cached_val) = inner.get_data(&hash) { - let mod_cache_path = inner.root_path.join(&hash); - inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success - return Ok(cached_val); + if let Some(val) = deserialize(state, cached_val) { + let mod_cache_path = inner.root_path.join(&hash); + inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success + return Ok(val); + } } let val_to_cache = compute(state)?; - if inner.update_data(&hash, &val_to_cache).is_some() { - let mod_cache_path = inner.root_path.join(&hash); - inner.cache_config.on_cache_update_async(&mod_cache_path); // call on success + if let Some(bytes) = serialize(state, &val_to_cache) { + if inner.update_data(&hash, &bytes).is_some() { + let mod_cache_path = inner.root_path.join(&hash); + inner.cache_config.on_cache_update_async(&mod_cache_path); // call on success + } } Ok(val_to_cache) } @@ -118,27 +149,19 @@ impl<'config> ModuleCacheEntryInner<'config> { } } - fn get_data(&self, hash: &str) -> Option - where - T: for<'a> Deserialize<'a>, - { + fn get_data(&self, hash: &str) -> Option> { let mod_cache_path = self.root_path.join(hash); trace!("get_data() for path: {}", mod_cache_path.display()); let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?; let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..]) .map_err(|err| warn!("Failed to decompress cached code: {}", err)) .ok()?; - bincode::deserialize(&cache_bytes[..]) - .map_err(|err| warn!("Failed to deserialize cached code: {}", err)) - .ok() + Some(cache_bytes) } - fn update_data(&self, hash: &str, data: &T) -> Option<()> { + fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> { let mod_cache_path = self.root_path.join(hash); trace!("update_data() for path: {}", mod_cache_path.display()); - let serialized_data = bincode::serialize(&data) - .map_err(|err| warn!("Failed to serialize cached code: {}", err)) - .ok()?; let compressed_data = zstd::encode_all( &serialized_data[..], self.cache_config.baseline_compression_level(), diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index 1757935179..5968a32d95 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -6,7 +6,7 @@ use crate::code_memory::CodeMemory; use crate::debug::create_gdbjit_image; use crate::link::link_module; -use crate::ProfilingAgent; +use crate::{MmapVec, ProfilingAgent}; use anyhow::{anyhow, Context, Result}; use object::read::File; use object::write::{Object, StandardSegment}; @@ -68,15 +68,6 @@ pub enum SetupError { DebugInfo(#[from] anyhow::Error), } -/// Final result of compilation which supports serialization to disk. -#[derive(Serialize, Deserialize)] -pub struct CompilationArtifacts { - // NB: this structure is in a transitionary phase and will soon go away. At - // this time it only contains the ELF image created by compilation, and in - // the near future even this will be removed. - obj: Box<[u8]>, -} - /// Secondary in-memory results of compilation. /// /// This opaque structure can be optionally passed back to @@ -113,125 +104,120 @@ struct Metadata { has_wasm_debuginfo: bool, } -impl CompilationArtifacts { - /// Finishes compilation of the `translation` specified, producing the final - /// compilation artifacts and auxiliary information. - /// - /// This function will consume the final results of compiling a wasm module - /// and finish the ELF image in-progress as part of `obj` by appending any - /// compiler-agnostic sections. - /// - /// The auxiliary `CompiledModuleInfo` structure returned here has also been - /// serialized into `CompilationArtifacts`, but if the caller will quickly - /// turn-around and invoke `CompiledModule::from_artifacts` after this then - /// the information can be passed to that method to avoid extra - /// deserialization. This is done to avoid a serialize-then-deserialize for - /// API calls like `Module::new` where the compiled module is immediately - /// going to be used. - pub fn new( - translation: ModuleTranslation<'_>, - mut obj: Object, - funcs: PrimaryMap, - tunables: &Tunables, - ) -> Result<(CompilationArtifacts, CompiledModuleInfo)> { - let ModuleTranslation { - mut module, - debuginfo, +/// Finishes compilation of the `translation` specified, producing the final +/// compilation artifact and auxiliary information. +/// +/// This function will consume the final results of compiling a wasm module +/// and finish the ELF image in-progress as part of `obj` by appending any +/// compiler-agnostic sections. +/// +/// The auxiliary `CompiledModuleInfo` structure returned here has also been +/// serialized into the object returned, but if the caller will quickly +/// turn-around and invoke `CompiledModule::from_artifacts` after this then the +/// information can be passed to that method to avoid extra deserialization. +/// This is done to avoid a serialize-then-deserialize for API calls like +/// `Module::new` where the compiled module is immediately going to be used. +/// +/// The `MmapVec` returned here contains the compiled image and resides in +/// mmap'd memory for easily switching permissions to executable afterwards. +pub fn finish_compile( + translation: ModuleTranslation<'_>, + mut obj: Object, + funcs: PrimaryMap, + tunables: &Tunables, +) -> Result<(MmapVec, CompiledModuleInfo)> { + let ModuleTranslation { + mut module, + debuginfo, + has_unparsed_debuginfo, + data, + passive_data, + .. + } = translation; + + // Place all data from the wasm module into a section which will the + // source of the data later at runtime. + let data_id = obj.add_section( + obj.segment_name(StandardSegment::Data).to_vec(), + ELF_WASM_DATA.as_bytes().to_vec(), + SectionKind::ReadOnlyData, + ); + let mut total_data_len = 0; + for data in data.iter() { + obj.append_section_data(data_id, data, 1); + total_data_len += data.len(); + } + for data in passive_data.iter() { + obj.append_section_data(data_id, data, 1); + } + + // Update passive data offsets since they're all located after the other + // data in the module. + for (_, range) in module.passive_data_map.iter_mut() { + range.start = range.start.checked_add(total_data_len as u32).unwrap(); + range.end = range.end.checked_add(total_data_len as u32).unwrap(); + } + + // Insert the wasm raw wasm-based debuginfo into the output, if + // requested. Note that this is distinct from the native debuginfo + // possibly generated by the native compiler, hence these sections + // getting wasm-specific names. + if tunables.parse_wasm_debuginfo { + push_debug(&mut obj, &debuginfo.dwarf.debug_abbrev); + push_debug(&mut obj, &debuginfo.dwarf.debug_addr); + push_debug(&mut obj, &debuginfo.dwarf.debug_aranges); + push_debug(&mut obj, &debuginfo.dwarf.debug_info); + push_debug(&mut obj, &debuginfo.dwarf.debug_line); + push_debug(&mut obj, &debuginfo.dwarf.debug_line_str); + push_debug(&mut obj, &debuginfo.dwarf.debug_str); + push_debug(&mut obj, &debuginfo.dwarf.debug_str_offsets); + push_debug(&mut obj, &debuginfo.debug_ranges); + push_debug(&mut obj, &debuginfo.debug_rnglists); + } + + // Encode a `CompiledModuleInfo` structure into the `ELF_WASMTIME_INFO` + // section of this image. This is not necessary when the returned module + // is never serialized to disk, which is also why we return a copy of + // the `CompiledModuleInfo` structure to the caller in case they don't + // want to deserialize this value immediately afterwards from the + // section. Otherwise, though, this is necessary to reify a `Module` on + // the other side from disk-serialized artifacts in + // `Module::deserialize` (a Wasmtime API). + let info_id = obj.add_section( + obj.segment_name(StandardSegment::Data).to_vec(), + ELF_WASMTIME_INFO.as_bytes().to_vec(), + SectionKind::ReadOnlyData, + ); + let mut bytes = Vec::new(); + let info = CompiledModuleInfo { + module, + funcs, + meta: Metadata { + native_debug_info_present: tunables.generate_native_debuginfo, has_unparsed_debuginfo, - data, - passive_data, - .. - } = translation; + code_section_offset: debuginfo.wasm_file.code_section_offset, + has_wasm_debuginfo: tunables.parse_wasm_debuginfo, + }, + }; + bincode::serialize_into(&mut bytes, &info)?; + obj.append_section_data(info_id, &bytes, 1); - // Place all data from the wasm module into a section which will the - // source of the data later at runtime. - let data_id = obj.add_section( - obj.segment_name(StandardSegment::Data).to_vec(), - ELF_WASM_DATA.as_bytes().to_vec(), - SectionKind::ReadOnlyData, + return Ok((MmapVec::from_obj(obj)?, info)); + + fn push_debug<'a, T>(obj: &mut Object, section: &T) + where + T: gimli::Section>, + { + let data = section.reader().slice(); + if data.is_empty() { + return; + } + let section_id = obj.add_section( + obj.segment_name(StandardSegment::Debug).to_vec(), + wasm_section_name(T::id()).as_bytes().to_vec(), + SectionKind::Debug, ); - let mut total_data_len = 0; - for data in data.iter() { - obj.append_section_data(data_id, data, 1); - total_data_len += data.len(); - } - for data in passive_data.iter() { - obj.append_section_data(data_id, data, 1); - } - - // Update passive data offsets since they're all located after the other - // data in the module. - for (_, range) in module.passive_data_map.iter_mut() { - range.start = range.start.checked_add(total_data_len as u32).unwrap(); - range.end = range.end.checked_add(total_data_len as u32).unwrap(); - } - - // Insert the wasm raw wasm-based debuginfo into the output, if - // requested. Note that this is distinct from the native debuginfo - // possibly generated by the native compiler, hence these sections - // getting wasm-specific names. - if tunables.parse_wasm_debuginfo { - push_debug(&mut obj, &debuginfo.dwarf.debug_abbrev); - push_debug(&mut obj, &debuginfo.dwarf.debug_addr); - push_debug(&mut obj, &debuginfo.dwarf.debug_aranges); - push_debug(&mut obj, &debuginfo.dwarf.debug_info); - push_debug(&mut obj, &debuginfo.dwarf.debug_line); - push_debug(&mut obj, &debuginfo.dwarf.debug_line_str); - push_debug(&mut obj, &debuginfo.dwarf.debug_str); - push_debug(&mut obj, &debuginfo.dwarf.debug_str_offsets); - push_debug(&mut obj, &debuginfo.debug_ranges); - push_debug(&mut obj, &debuginfo.debug_rnglists); - } - - // Encode a `CompiledModuleInfo` structure into the `ELF_WASMTIME_INFO` - // section of this image. This is not necessary when the returned module - // is never serialized to disk, which is also why we return a copy of - // the `CompiledModuleInfo` structure to the caller in case they don't - // want to deserialize this value immediately afterwards from the - // section. Otherwise, though, this is necessary to reify a `Module` on - // the other side from disk-serialized artifacts in - // `Module::deserialize` (a Wasmtime API). - let info_id = obj.add_section( - obj.segment_name(StandardSegment::Data).to_vec(), - ELF_WASMTIME_INFO.as_bytes().to_vec(), - SectionKind::ReadOnlyData, - ); - let mut bytes = Vec::new(); - let info = CompiledModuleInfo { - module, - funcs, - meta: Metadata { - native_debug_info_present: tunables.generate_native_debuginfo, - has_unparsed_debuginfo, - code_section_offset: debuginfo.wasm_file.code_section_offset, - has_wasm_debuginfo: tunables.parse_wasm_debuginfo, - }, - }; - bincode::serialize_into(&mut bytes, &info)?; - obj.append_section_data(info_id, &bytes, 1); - - return Ok(( - CompilationArtifacts { - obj: obj.write()?.into(), - }, - info, - )); - - fn push_debug<'a, T>(obj: &mut Object, section: &T) - where - T: gimli::Section>, - { - let data = section.reader().slice(); - if data.is_empty() { - return; - } - let section_id = obj.add_section( - obj.segment_name(StandardSegment::Debug).to_vec(), - wasm_section_name(T::id()).as_bytes().to_vec(), - SectionKind::Debug, - ); - obj.append_section_data(section_id, data, 1); - } + obj.append_section_data(section_id, data, 1); } } @@ -270,7 +256,7 @@ pub struct CompiledModule { wasm_data: Range, address_map_data: Range, trap_data: Range, - artifacts: CompilationArtifacts, + mmap: MmapVec, module: Arc, funcs: PrimaryMap, meta: Metadata, @@ -280,7 +266,12 @@ pub struct CompiledModule { } impl CompiledModule { - /// Creates `CompiledModule` directly from `CompilationArtifacts`. + /// Creates `CompiledModule` directly from a precompiled artifact. + /// + /// The `mmap` argument is expecte to be the result of a previous call to + /// `finish_compile` above. This is an ELF image, at this time, which + /// contains all necessary information to create a `CompiledModule` from a + /// compilation. /// /// This method also takes `info`, an optionally-provided deserialization of /// the artifacts' compilation metadata section. If this information is not @@ -292,11 +283,11 @@ impl CompiledModule { /// The `profiler` argument here is used to inform JIT profiling runtimes /// about new code that is loaded. pub fn from_artifacts( - artifacts: CompilationArtifacts, + mmap: MmapVec, info: Option, profiler: &dyn ProfilingAgent, ) -> Result> { - let obj = File::parse(&artifacts.obj[..]) + let obj = File::parse(&mmap[..]) .with_context(|| "failed to parse internal ELF compilation artifact")?; let section = |name: &str| { @@ -314,9 +305,9 @@ impl CompiledModule { }; let module = Arc::new(info.module); let funcs = info.funcs; - let wasm_data = subslice_range(section(ELF_WASM_DATA)?, &artifacts.obj); - let address_map_data = subslice_range(section(ELF_WASMTIME_ADDRMAP)?, &artifacts.obj); - let trap_data = subslice_range(section(ELF_WASMTIME_TRAPS)?, &artifacts.obj); + let wasm_data = subslice_range(section(ELF_WASM_DATA)?, &mmap); + let address_map_data = subslice_range(section(ELF_WASMTIME_ADDRMAP)?, &mmap); + let trap_data = subslice_range(section(ELF_WASMTIME_TRAPS)?, &mmap); // Allocate all of the compiled functions into executable memory, // copying over their contents. @@ -336,7 +327,7 @@ impl CompiledModule { meta: info.meta, funcs, module, - artifacts, + mmap, wasm_data, address_map_data, trap_data, @@ -357,7 +348,7 @@ impl CompiledModule { // Register GDB JIT images; initialize profiler and load the wasm module. let dbg_jit_registration = if self.meta.native_debug_info_present { let bytes = create_gdbjit_image( - self.artifacts.obj.to_vec(), + self.mmap.to_vec(), ( self.code.range.0 as *const u8, self.code.range.1 - self.code.range.0, @@ -376,9 +367,10 @@ impl CompiledModule { Ok(()) } - /// Extracts `CompilationArtifacts` from the compiled module. - pub fn compilation_artifacts(&self) -> &CompilationArtifacts { - &self.artifacts + /// Returns the underlying memory which contains the compiled module's + /// image. + pub fn mmap(&self) -> &MmapVec { + &self.mmap } /// Returns the concatenated list of all data associated with this wasm @@ -387,20 +379,20 @@ impl CompiledModule { /// This is used for initialization of memories and all data ranges stored /// in a `Module` are relative to the slice returned here. pub fn wasm_data(&self) -> &[u8] { - &self.artifacts.obj[self.wasm_data.clone()] + &self.mmap[self.wasm_data.clone()] } /// Returns the encoded address map section used to pass to /// `wasmtime_environ::lookup_file_pos`. pub fn address_map_data(&self) -> &[u8] { - &self.artifacts.obj[self.address_map_data.clone()] + &self.mmap[self.address_map_data.clone()] } /// Returns the encoded trap information for this compiled image. /// /// For more information see `wasmtime_environ::trap_encoding`. pub fn trap_data(&self) -> &[u8] { - &self.artifacts.obj[self.trap_data.clone()] + &self.mmap[self.trap_data.clone()] } /// Return a reference-counting pointer to a module. @@ -500,7 +492,7 @@ impl CompiledModule { if !self.meta.has_wasm_debuginfo { return Ok(None); } - let obj = File::parse(&self.artifacts.obj[..]) + let obj = File::parse(&self.mmap[..]) .context("failed to parse internal ELF file representation")?; let dwarf = gimli::Dwarf::load(|id| -> Result<_> { let data = obj @@ -603,7 +595,7 @@ fn build_code_memory( /// /// This method requires that `inner` is a sub-slice of `outer`, and if that /// isn't true then this method will panic. -fn subslice_range(inner: &[u8], outer: &[u8]) -> Range { +pub fn subslice_range(inner: &[u8], outer: &[u8]) -> Range { if inner.len() == 0 { return 0..0; } diff --git a/crates/jit/src/lib.rs b/crates/jit/src/lib.rs index 3de944d48a..1d7a284c52 100644 --- a/crates/jit/src/lib.rs +++ b/crates/jit/src/lib.rs @@ -24,15 +24,17 @@ mod code_memory; mod debug; mod instantiate; mod link; +mod mmap_vec; mod profiling; mod unwind; pub use crate::code_memory::CodeMemory; pub use crate::instantiate::{ - CompilationArtifacts, CompiledModule, CompiledModuleInfo, ModuleCode, SetupError, + finish_compile, subslice_range, CompiledModule, CompiledModuleInfo, ModuleCode, SetupError, SymbolizeContext, TypeTables, }; pub use crate::link::link_module; +pub use crate::mmap_vec::MmapVec; pub use profiling::*; /// Version number of this crate. diff --git a/crates/jit/src/mmap_vec.rs b/crates/jit/src/mmap_vec.rs new file mode 100644 index 0000000000..ee49b27119 --- /dev/null +++ b/crates/jit/src/mmap_vec.rs @@ -0,0 +1,229 @@ +use anyhow::{Error, Result}; +use object::write::{Object, WritableBuffer}; +use std::ops::{Deref, DerefMut, Range, RangeTo}; +use std::sync::Arc; +use wasmtime_runtime::Mmap; + +/// A type akin to `Vec`, but backed by `mmap` and able to be split. +/// +/// This type is a non-growable owned list of bytes. It can be segmented into +/// disjoint separately owned views akin to the `split_at` method on slices in +/// Rust. An `MmapVec` is backed by an OS-level memory allocation and is not +/// suitable for lots of small allocation (since it works at the page +/// granularity). +/// +/// An `MmapVec` is an owned value which means that owners have the ability to +/// get exclusive access to the underlying bytes, enabling mutation. +pub struct MmapVec { + mmap: Arc, + range: Range, +} + +impl MmapVec { + /// Consumes an existing `mmap` and wraps it up into an `MmapVec`. + /// + /// The returned `MmapVec` will have the `size` specified, which can be + /// smaller than the region mapped by the `Mmap`. The returned `MmapVec` + /// will only have at most `size` bytes accessible. + pub fn new(mmap: Mmap, size: usize) -> MmapVec { + assert!(size <= mmap.len()); + MmapVec { + mmap: Arc::new(mmap), + range: 0..size, + } + } + + /// Creates a new zero-initialized `MmapVec` with the given `size`. + /// + /// This commit will return a new `MmapVec` suitably sized to hold `size` + /// bytes. All bytes will be initialized to zero since this is a fresh OS + /// page allocation. + pub fn with_capacity(size: usize) -> Result { + Ok(MmapVec::new(Mmap::with_at_least(size)?, size)) + } + + /// Creates a new `MmapVec` from the contents of an existing `slice`. + /// + /// A new `MmapVec` is allocated to hold the contents of `slice` and then + /// `slice` is copied into the new mmap. It's recommended to avoid this + /// method if possible to avoid the need to copy data around. + pub fn from_slice(slice: &[u8]) -> Result { + let mut result = MmapVec::with_capacity(slice.len())?; + result.copy_from_slice(slice); + Ok(result) + } + + /// Creates a new `MmapVec` from serializing the specified `obj`. + /// + /// The returned `MmapVec` will contain the serialized version of `obj` and + /// is sized appropriately to the exact size of the object serialized. + pub fn from_obj(obj: Object) -> Result { + let mut result = ObjectMmap::default(); + match obj.emit(&mut result) { + Ok(()) => { + assert!(result.mmap.is_some(), "no reserve"); + let mmap = result.mmap.expect("reserve not called"); + assert_eq!(mmap.len(), result.len); + Ok(mmap) + } + Err(e) => match result.err.take() { + Some(original) => Err(original.context(e)), + None => Err(e.into()), + }, + } + } + + /// "Drains" leading bytes up to the end specified in `range` from this + /// `MmapVec`, returning a separately owned `MmapVec` which retains access + /// to the bytes. + /// + /// This method is similar to the `Vec` type's `drain` method, except that + /// the return value is not an iterator but rather a new `MmapVec`. The + /// purpose of this method is the ability to split-off new `MmapVec` values + /// which are sub-slices of the original one. + /// + /// Once data has been drained from an `MmapVec` it is no longer accessible + /// from the original `MmapVec`, it's only accessible from the returned + /// `MmapVec`. In other words ownership of the drain'd bytes is returned + /// through the `MmapVec` return value. + /// + /// This `MmapVec` will shrink by `range.end` bytes, and it will only refer + /// to the bytes that come after the drain range. + /// + /// This is an `O(1)` operation which does not involve copies. + pub fn drain(&mut self, range: RangeTo) -> MmapVec { + let amt = range.end; + assert!(amt <= (self.range.end - self.range.start)); + + // Create a new `MmapVec` which refers to the same underlying mmap, but + // has a disjoint range from ours. Our own range is adjusted to be + // disjoint just after `ret` is created. + let ret = MmapVec { + mmap: self.mmap.clone(), + range: self.range.start..self.range.start + amt, + }; + self.range.start += amt; + return ret; + } +} + +impl Deref for MmapVec { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.mmap.as_slice()[self.range.clone()] + } +} + +impl DerefMut for MmapVec { + fn deref_mut(&mut self) -> &mut [u8] { + // SAFETY: The underlying mmap is protected behind an `Arc` which means + // there there can be many references to it. We are guaranteed, though, + // that each reference to the underlying `mmap` has a disjoint `range` + // listed that it can access. This means that despite having shared + // access to the mmap itself we have exclusive ownership of the bytes + // specified in `self.range`. This should allow us to safely hand out + // mutable access to these bytes if so desired. + unsafe { + let slice = std::slice::from_raw_parts_mut(self.mmap.as_mut_ptr(), self.mmap.len()); + &mut slice[self.range.clone()] + } + } +} + +/// Helper struct to implement the `WritableBuffer` trait from the `object` +/// crate. +/// +/// This enables writing an object directly into an mmap'd memory so it's +/// immediately usable for execution after compilation. This implementation +/// relies on a call to `reserve` happening once up front with all the needed +/// data, and the mmap internally does not attempt to grow afterwards. +#[derive(Default)] +struct ObjectMmap { + mmap: Option, + len: usize, + err: Option, +} + +impl WritableBuffer for ObjectMmap { + fn len(&self) -> usize { + self.len + } + + fn reserve(&mut self, additional: usize) -> Result<(), ()> { + assert!(self.mmap.is_none(), "cannot reserve twice"); + self.mmap = match MmapVec::with_capacity(additional) { + Ok(mmap) => Some(mmap), + Err(e) => { + self.err = Some(e); + return Err(()); + } + }; + Ok(()) + } + + fn resize(&mut self, new_len: usize, value: u8) { + if new_len <= self.len { + return; + } + let mmap = self.mmap.as_mut().expect("write before reserve"); + + // new mmaps are automatically filled with zeros, so if we're asked to + // fill with zeros then we can skip the actual fill step. + if value != 0 { + mmap[self.len..][..new_len - self.len].fill(value); + } + self.len = new_len; + } + + fn write_bytes(&mut self, val: &[u8]) { + let mmap = self.mmap.as_mut().expect("write before reserve"); + mmap[self.len..][..val.len()].copy_from_slice(val); + self.len += val.len(); + } +} + +#[cfg(test)] +mod tests { + use super::MmapVec; + + #[test] + fn smoke() { + let mut mmap = MmapVec::with_capacity(10).unwrap(); + assert_eq!(mmap.len(), 10); + assert_eq!(&mmap[..], &[0; 10]); + + mmap[0] = 1; + mmap[2] = 3; + assert!(mmap.get(10).is_none()); + assert_eq!(mmap[0], 1); + assert_eq!(mmap[2], 3); + } + + #[test] + fn drain() { + let mut mmap = MmapVec::from_slice(&[1, 2, 3, 4]).unwrap(); + assert_eq!(mmap.len(), 4); + assert!(mmap.drain(..0).is_empty()); + assert_eq!(mmap.len(), 4); + let one = mmap.drain(..1); + assert_eq!(one.len(), 1); + assert_eq!(one[0], 1); + assert_eq!(mmap.len(), 3); + assert_eq!(&mmap[..], &[2, 3, 4]); + drop(one); + assert_eq!(mmap.len(), 3); + + let two = mmap.drain(..2); + assert_eq!(two.len(), 2); + assert_eq!(two[0], 2); + assert_eq!(two[1], 3); + assert_eq!(mmap.len(), 1); + assert_eq!(mmap[0], 4); + drop(two); + assert!(mmap.drain(..0).is_empty()); + assert!(mmap.drain(..1).len() == 1); + assert!(mmap.is_empty()); + assert!(mmap.drain(..0).is_empty()); + } +} diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 76891f9361..006b65bcc5 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -38,6 +38,7 @@ paste = "1.0.3" psm = "0.1.11" lazy_static = "1.4" rayon = { version = "1.0", optional = true } +object = { version = "0.26", default-features = false, features = ['read_core', 'elf'] } [target.'cfg(target_os = "windows")'.dependencies] winapi = "0.3.7" diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 9a64658237..7cd7348d16 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -10,7 +10,7 @@ use std::path::Path; use std::sync::Arc; use wasmparser::Validator; use wasmtime_environ::{ModuleEnvironment, ModuleIndex, PrimaryMap}; -use wasmtime_jit::{CompilationArtifacts, CompiledModule, CompiledModuleInfo, TypeTables}; +use wasmtime_jit::{CompiledModule, CompiledModuleInfo, MmapVec, TypeTables}; mod registry; mod serialization; @@ -299,21 +299,44 @@ impl Module { cfg_if::cfg_if! { if #[cfg(feature = "cache")] { + let state = (HashedEngineCompileEnv(engine), binary); let (main_module, artifacts, types) = wasmtime_cache::ModuleCacheEntry::new( "wasmtime", engine.cache_config(), ) - .get_data((HashedEngineCompileEnv(engine), binary), |(engine, binary)| { - Module::build_artifacts(engine.0, binary) - })?; + .get_data_raw( + &state, + + // Cache miss, compute the actual artifacts + |(engine, wasm)| Module::build_artifacts(engine.0, wasm), + + // Implementation of how to serialize artifacts + |(engine, _wasm), (_, artifacts, types)| { + SerializedModule::from_artifacts( + engine.0, + artifacts.iter().map(|p| &p.0), + types, + ).to_bytes().ok() + }, + + // Cache hit, deserialize the provided artifacts + |(engine, _wasm), serialized_bytes| { + let (i, m, t, upvars) = SerializedModule::from_bytes(&serialized_bytes, true) + .ok()? + .into_parts(engine.0) + .ok()?; + // This upvars list is always empty for top-level modules + assert!(upvars.is_empty()); + Some((i, m, t)) + }, + )?; } else { - let (main_module, artifacts, types) = - Module::build_artifacts(engine, binary)?; + let (main_module, artifacts, types) = Module::build_artifacts(engine, binary)?; } }; - let modules = engine.run_maybe_parallel(artifacts, |(a, i)| { - CompiledModule::from_artifacts(a, Some(i), &*engine.config().profiler) + let modules = engine.run_maybe_parallel(artifacts, |(a, b)| { + CompiledModule::from_artifacts(a, b, &*engine.config().profiler) })?; Self::from_parts(engine, modules, main_module, Arc::new(types), &[]) @@ -329,9 +352,10 @@ impl Module { /// * The index into the second field of the "main module". The "main /// module" in this case is the outermost module described by the `wasm` /// input, and is here for the module linking proposal. - /// * A list of `CompilationArtifacts` for each module found within `wasm`. + /// * A list of compilation artifacts for each module found within `wasm`. /// Note that if module linking is disabled then this list will always - /// have a size of exactly 1. + /// have a size of exactly 1. These pairs are returned by + /// `wasmtime_jit::finish_compile`. /// * Type information about all the modules returned. All returned modules /// have local type information with indices that refer to these returned /// tables. @@ -341,7 +365,7 @@ impl Module { wasm: &[u8], ) -> Result<( usize, - Vec<(CompilationArtifacts, CompiledModuleInfo)>, + Vec<(MmapVec, Option)>, TypeTables, )> { let tunables = &engine.config().tunables; @@ -388,12 +412,8 @@ impl Module { translation.try_paged_init(); } - Ok(CompilationArtifacts::new( - translation, - obj, - funcs, - tunables, - )?) + let (mmap, info) = wasmtime_jit::finish_compile(translation, obj, funcs, tunables)?; + Ok((mmap, Some(info))) })?; Ok(( diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs index 7c9fa725a9..128ff13f13 100644 --- a/crates/wasmtime/src/module/serialization.rs +++ b/crates/wasmtime/src/module/serialization.rs @@ -1,28 +1,67 @@ //! Implements module serialization. +//! +//! This module implements the serialization format for `wasmtime::Module`. +//! This includes both the binary format of the final artifact as well as +//! validation on ingestion of artifacts. +//! +//! There are two main pieces of data associated with a binary artifact: +//! +//! 1. A list of compiled modules. The reason this is a list as opposed to one +//! singular module is that a module-linking module may encompass a number +//! of other modules. +//! 2. Compilation metadata shared by all modules, including the global +//! `TypeTables` information. This metadata is validated for compilation +//! settings and also has information shared by all modules (such as the +//! shared `TypeTables`). +//! +//! Compiled modules are, at this time, represented as an ELF file. This ELF +//! file contains all the necessary data needed to decode each individual +//! module, and conveniently also handles things like alignment so we can +//! actually directly `mmap` compilation artifacts from disk. +//! +//! With all this in mind, the current serialization format is as follows: +//! +//! * The first, primary, module starts the final artifact. This means that the +//! final artifact is actually, and conveniently, a valid ELF file. ELF files +//! don't place any restrictions on data coming after the ELF file itself, +//! so that's where everything else will go. Another reason for using this +//! format is that our compilation artifacts are then consumable by standard +//! debugging tools like `objdump` to poke around and see what's what. +//! +//! * Next, all other modules are encoded. Each module has its own alignment, +//! though, so modules aren't simply concatenated. Instead directly after an +//! ELF file there is a 64-bit little-endian integer which is the offset, +//! from the end of the previous ELF file, to the next ELF file. +//! +//! * Finally, once all modules have been encoded (there's always at least +//! one), the 8-byte value `u64::MAX` is encoded. Following this is a +//! number of fields: +//! +//! 1. The `HEADER` value +//! 2. A byte indicating how long the next field is +//! 3. A version string of the length of the previous byte value +//! 4. A `bincode`-encoded `Metadata` structure. +//! +//! This is hoped to help distinguish easily Wasmtime-based ELF files from +//! other random ELF files, as well as provide better error messages for +//! using wasmtime artifacts across versions. +//! +//! This format is implemented by the `to_bytes` and `from_mmap` function. use crate::{Engine, Module}; use anyhow::{anyhow, bail, Context, Result}; -use bincode::Options; +use object::read::elf::FileHeader; +use object::{Bytes, File, Object, ObjectSection}; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; +use std::convert::TryFrom; use std::str::FromStr; use std::sync::Arc; use wasmtime_environ::{Compiler, FlagValue, Tunables}; -use wasmtime_jit::{CompilationArtifacts, CompiledModule, TypeTables}; +use wasmtime_jit::{subslice_range, CompiledModule, CompiledModuleInfo, MmapVec, TypeTables}; const HEADER: &[u8] = b"\0wasmtime-aot"; -fn bincode_options() -> impl Options { - // Use a variable-length integer encoding instead of fixed length. The - // module shown on #2318 gets compressed from ~160MB to ~110MB simply using - // this, presumably because there's a lot of 8-byte integers which generally - // have small values. Local testing shows that the deserialization - // performance, while higher, is in the few-percent range. For huge size - // savings this seems worthwhile to lose a small percentage of - // deserialization performance. - bincode::DefaultOptions::new().with_varint_encoding() -} - // This exists because `wasmparser::WasmFeatures` isn't serializable #[derive(Debug, Copy, Clone, Serialize, Deserialize)] struct WasmFeatures { @@ -78,6 +117,12 @@ enum MyCow<'a, T> { } impl<'a, T> MyCow<'a, T> { + fn as_ref(&self) -> &T { + match self { + MyCow::Owned(val) => val, + MyCow::Borrowed(val) => val, + } + } fn unwrap_owned(self) -> T { match self { MyCow::Owned(val) => val, @@ -149,14 +194,18 @@ impl SerializedModuleUpvar { } } -#[derive(Serialize, Deserialize)] pub struct SerializedModule<'a> { + artifacts: Vec>, + metadata: Metadata<'a>, +} + +#[derive(Serialize, Deserialize)] +struct Metadata<'a> { target: String, shared_flags: BTreeMap, isa_flags: BTreeMap, tunables: Tunables, features: WasmFeatures, - artifacts: Vec>, module_upvars: Vec, types: MyCow<'a, TypeTables>, } @@ -168,10 +217,8 @@ impl<'a> SerializedModule<'a> { .inner .artifact_upvars .iter() - .map(|m| MyCow::Borrowed(m.compilation_artifacts())) - .chain(Some(MyCow::Borrowed( - module.inner.module.compilation_artifacts(), - ))) + .map(|m| MyCow::Borrowed(m.mmap())) + .chain(Some(MyCow::Borrowed(module.inner.module.mmap()))) .collect::>(); let module_upvars = module .inner @@ -191,12 +238,12 @@ impl<'a> SerializedModule<'a> { #[cfg(compiler)] pub fn from_artifacts( engine: &Engine, - artifacts: &'a Vec, + artifacts: impl IntoIterator, types: &'a TypeTables, ) -> Self { Self::with_data( engine, - artifacts.iter().map(MyCow::Borrowed).collect(), + artifacts.into_iter().map(MyCow::Borrowed).collect(), Vec::new(), MyCow::Borrowed(types), ) @@ -205,23 +252,42 @@ impl<'a> SerializedModule<'a> { #[cfg(compiler)] fn with_data( engine: &Engine, - artifacts: Vec>, + artifacts: Vec>, module_upvars: Vec, types: MyCow<'a, TypeTables>, ) -> Self { Self { - target: engine.compiler().triple().to_string(), - shared_flags: engine.compiler().flags(), - isa_flags: engine.compiler().isa_flags(), - tunables: engine.config().tunables.clone(), - features: (&engine.config().features).into(), artifacts, - module_upvars, - types, + metadata: Metadata { + target: engine.compiler().triple().to_string(), + shared_flags: engine.compiler().flags(), + isa_flags: engine.compiler().isa_flags(), + tunables: engine.config().tunables.clone(), + features: (&engine.config().features).into(), + module_upvars, + types, + }, } } - pub fn into_module(mut self, engine: &Engine) -> Result { + pub fn into_module(self, engine: &Engine) -> Result { + let (main_module, modules, types, upvars) = self.into_parts(engine)?; + let modules = engine.run_maybe_parallel(modules, |(i, m)| { + CompiledModule::from_artifacts(i, m, &*engine.config().profiler) + })?; + + Module::from_parts(engine, modules, main_module, Arc::new(types), &upvars) + } + + pub fn into_parts( + mut self, + engine: &Engine, + ) -> Result<( + usize, + Vec<(MmapVec, Option)>, + TypeTables, + Vec, + )> { // Verify that the module we're loading matches the triple that `engine` // is configured for. If compilation is disabled within engine then the // assumed triple is the host itself. @@ -245,64 +311,106 @@ impl<'a> SerializedModule<'a> { self.check_tunables(&engine.config().tunables)?; self.check_features(&engine.config().features)?; - let modules = engine.run_maybe_parallel(self.artifacts, |i| { - CompiledModule::from_artifacts(i.unwrap_owned(), None, &*engine.config().profiler) - })?; - - assert!(!modules.is_empty()); + assert!(!self.artifacts.is_empty()); + let modules = self.artifacts.into_iter().map(|i| (i.unwrap_owned(), None)); let main_module = modules.len() - 1; - Module::from_parts( - engine, - modules, + Ok(( main_module, - Arc::new(self.types.unwrap_owned()), - &self.module_upvars, - ) + modules.collect(), + self.metadata.types.unwrap_owned(), + self.metadata.module_upvars, + )) } pub fn to_bytes(&self) -> Result> { - use std::io::Write; + // First up, create a linked-ish list of ELF files. For more + // information on this format, see the doc comment on this module. + // The only semi-tricky bit here is that we leave an + // offset-to-the-next-file between each set of ELF files. The list + // is then terminated with `u64::MAX`. + let mut ret = Vec::new(); + for (i, obj) in self.artifacts.iter().enumerate() { + // Anything after the first object needs to respect the alignment of + // the object's sections, so insert padding as necessary. Note that + // the +8 to the length here is to accomodate the size we'll write + // to get to the next object. + if i > 0 { + let obj = File::parse(&obj.as_ref()[..])?; + let align = obj.sections().map(|s| s.align()).max().unwrap_or(0).max(1); + let align = usize::try_from(align).unwrap(); + let new_size = align_to(ret.len() + 8, align); + ret.extend_from_slice(&(new_size as u64).to_le_bytes()); + ret.resize(new_size, 0); + } + ret.extend_from_slice(obj.as_ref()); + } + ret.extend_from_slice(&[0xff; 8]); - let mut bytes = Vec::new(); - - bytes.write_all(HEADER)?; - - // Preface the data with a version so we can do a version check independent - // of the serialized data. + // The last part of our artifact is the bincode-encoded `Metadata` + // section with a few other guards to help give better error messages. + ret.extend_from_slice(HEADER); let version = env!("CARGO_PKG_VERSION"); assert!( version.len() < 256, "package version must be less than 256 bytes" ); - bytes.write(&[version.len() as u8])?; + ret.push(version.len() as u8); + ret.extend_from_slice(version.as_bytes()); + bincode::serialize_into(&mut ret, &self.metadata)?; - bytes.write_all(version.as_bytes())?; - - bincode_options().serialize_into(&mut bytes, self)?; - - Ok(bytes) + Ok(ret) } pub fn from_bytes(bytes: &[u8], check_version: bool) -> Result { - if !bytes.starts_with(HEADER) { - bail!("bytes are not a compatible serialized wasmtime module"); - } + Self::from_mmap(MmapVec::from_slice(bytes)?, check_version) + } - let bytes = &bytes[HEADER.len()..]; + pub fn from_mmap(mut mmap: MmapVec, check_version: bool) -> Result { + // Artifacts always start with an ELF file, so read that first. + // Afterwards we continually read ELF files until we see the `u64::MAX` + // marker, meaning we've reached the end. + let first_module = read_file(&mut mmap)?; + let mut pos = first_module.len(); + let mut artifacts = vec![MyCow::Owned(first_module)]; - if bytes.is_empty() { + let metadata = loop { + if mmap.len() < 8 { + bail!("invalid serialized data"); + } + let next_file_start = u64::from_le_bytes([ + mmap[0], mmap[1], mmap[2], mmap[3], mmap[4], mmap[5], mmap[6], mmap[7], + ]); + if next_file_start == u64::MAX { + mmap.drain(..8); + break mmap; + } + + // Remove padding leading up to the next file + let next_file_start = usize::try_from(next_file_start).unwrap(); + let _padding = mmap.drain(..next_file_start - pos); + let data = read_file(&mut mmap)?; + pos = next_file_start + data.len(); + artifacts.push(MyCow::Owned(data)); + }; + + // Once we've reached the end we parse a `Metadata` object. This has a + // few guards up front which we process first, and eventually this + // bottoms out in a `bincode::deserialize` call. + let metadata = metadata + .strip_prefix(HEADER) + .ok_or_else(|| anyhow!("bytes are not a compatible serialized wasmtime module"))?; + if metadata.is_empty() { bail!("serialized data data is empty"); } - - let version_len = bytes[0] as usize; - if bytes.len() < version_len + 1 { + let version_len = metadata[0] as usize; + if metadata.len() < version_len + 1 { bail!("serialized data is malformed"); } if check_version { - let version = std::str::from_utf8(&bytes[1..1 + version_len])?; + let version = std::str::from_utf8(&metadata[1..1 + version_len])?; if version != env!("CARGO_PKG_VERSION") { bail!( "Module was compiled with incompatible Wasmtime version '{}'", @@ -311,13 +419,47 @@ impl<'a> SerializedModule<'a> { } } - Ok(bincode_options() - .deserialize::>(&bytes[1 + version_len..]) - .context("deserialize compilation artifacts")?) + let metadata = bincode::deserialize::(&metadata[1 + version_len..]) + .context("deserialize compilation artifacts")?; + + return Ok(SerializedModule { + artifacts, + metadata, + }); + + /// This function will drain the beginning contents of `mmap` which + /// correspond to an ELF object file. The ELF file is only very lightly + /// validated. + /// + /// The `mmap` passed in will be reset to just after the ELF file, and + /// the `MmapVec` returned represents the extend of the ELF file + /// itself. + fn read_file(mmap: &mut MmapVec) -> Result { + use object::NativeEndian as NE; + // There's not actually a great utility for figuring out where + // the end of an ELF file is in the `object` crate. In lieu of that + // we build our own which leverages the format of ELF files, which + // is that the header comes first, that tells us where the section + // headers are, and for our ELF files the end of the file is the + // end of the section headers. + let mut bytes = Bytes(mmap); + let header = bytes + .read::>() + .map_err(|()| anyhow!("artifact truncated, can't read header"))?; + if !header.is_supported() { + bail!("invalid elf header"); + } + let sections = header + .section_headers(NE, &mmap[..]) + .context("failed to read section headers")?; + let range = subslice_range(object::bytes_of_slice(sections), mmap); + Ok(mmap.drain(..range.end)) + } } fn check_triple(&self, other: &target_lexicon::Triple) -> Result<()> { - let triple = target_lexicon::Triple::from_str(&self.target).map_err(|e| anyhow!(e))?; + let triple = + target_lexicon::Triple::from_str(&self.metadata.target).map_err(|e| anyhow!(e))?; if triple.architecture != other.architecture { bail!( @@ -337,7 +479,7 @@ impl<'a> SerializedModule<'a> { } fn check_shared_flags(&mut self, compiler: &dyn Compiler) -> Result<()> { - let mut shared_flags = std::mem::take(&mut self.shared_flags); + let mut shared_flags = std::mem::take(&mut self.metadata.shared_flags); for (name, host) in compiler.flags() { match shared_flags.remove(&name) { Some(v) => { @@ -360,7 +502,7 @@ impl<'a> SerializedModule<'a> { } fn check_isa_flags(&mut self, compiler: &dyn Compiler) -> Result<()> { - let mut isa_flags = std::mem::take(&mut self.isa_flags); + let mut isa_flags = std::mem::take(&mut self.metadata.isa_flags); for (name, host) in compiler.isa_flags() { match isa_flags.remove(&name) { Some(v) => match (&v, &host) { @@ -432,7 +574,7 @@ impl<'a> SerializedModule<'a> { // This doesn't affect compilation, it's just a runtime setting. dynamic_memory_growth_reserve: _, - } = self.tunables; + } = self.metadata.tunables; Self::check_int( static_memory_bound, @@ -488,7 +630,7 @@ impl<'a> SerializedModule<'a> { multi_memory, exceptions, memory64, - } = self.features; + } = self.metadata.features; Self::check_bool( reference_types, @@ -538,6 +680,12 @@ impl<'a> SerializedModule<'a> { } } +/// Aligns the `val` specified up to `align`, which must be a power of two +fn align_to(val: usize, align: usize) -> usize { + debug_assert!(align.is_power_of_two()); + (val + (align - 1)) & (!(align - 1)) +} + #[cfg(test)] mod test { use super::*; @@ -550,7 +698,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.target = "unknown-generic-linux".to_string(); + serialized.metadata.target = "unknown-generic-linux".to_string(); match serialized.into_module(&engine) { Ok(_) => unreachable!(), @@ -569,7 +717,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.target = format!( + serialized.metadata.target = format!( "{}-generic-unknown", target_lexicon::Triple::host().architecture ); @@ -591,7 +739,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.shared_flags.insert( + serialized.metadata.shared_flags.insert( "opt_level".to_string(), FlagValue::Enum(Cow::Borrowed("none")), ); @@ -615,6 +763,7 @@ mod test { let mut serialized = SerializedModule::new(&module); serialized + .metadata .isa_flags .insert("not_a_flag".to_string(), FlagValue::Bool(true)); @@ -636,7 +785,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.strategy = CompilationStrategy::Lightbeam; + serialized.metadata.strategy = CompilationStrategy::Lightbeam; match serialized.into_module(&engine) { Ok(_) => unreachable!(), @@ -655,7 +804,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.tunables.static_memory_offset_guard_size = 0; + serialized.metadata.tunables.static_memory_offset_guard_size = 0; match serialized.into_module(&engine) { Ok(_) => unreachable!(), @@ -674,7 +823,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.tunables.interruptable = false; + serialized.metadata.tunables.interruptable = false; match serialized.into_module(&engine) { Ok(_) => unreachable!(), @@ -691,7 +840,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.tunables.interruptable = true; + serialized.metadata.tunables.interruptable = true; match serialized.into_module(&engine) { Ok(_) => unreachable!(), @@ -713,7 +862,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.features.simd = false; + serialized.metadata.features.simd = false; match serialized.into_module(&engine) { Ok(_) => unreachable!(), @@ -727,7 +876,7 @@ mod test { let module = Module::new(&engine, "(module)")?; let mut serialized = SerializedModule::new(&module); - serialized.features.simd = true; + serialized.metadata.features.simd = true; match serialized.into_module(&engine) { Ok(_) => unreachable!(), diff --git a/tests/all/module_serialize.rs b/tests/all/module_serialize.rs index e444487a9e..3c67ebf99f 100644 --- a/tests/all/module_serialize.rs +++ b/tests/all/module_serialize.rs @@ -15,7 +15,9 @@ unsafe fn deserialize_and_instantiate(store: &mut Store<()>, buffer: &[u8]) -> R fn test_version_mismatch() -> Result<()> { let engine = Engine::default(); let mut buffer = serialize(&engine, "(module)")?; - buffer[13 /* header length */ + 1 /* version length */] = 'x' as u8; + const HEADER: &[u8] = b"\0wasmtime-aot"; + let pos = memchr::memmem::rfind_iter(&buffer, HEADER).next().unwrap(); + buffer[pos + HEADER.len() + 1 /* version length */] = 'x' as u8; match unsafe { Module::deserialize(&engine, &buffer) } { Ok(_) => bail!("expected deserialization to fail"),