diff --git a/Cargo.lock b/Cargo.lock index 3b22f19a55..73dbdecd48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3324,6 +3324,7 @@ dependencies = [ "indexmap", "log", "more-asserts", + "region", "serde", "thiserror", "wasmparser", diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index 230295aecb..12c39d6ac6 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -13,6 +13,7 @@ edition = "2018" [dependencies] anyhow = "1.0" +region = "2.2.0" cranelift-codegen = { path = "../../cranelift/codegen", version = "0.71.0", features = ["enable-serde"] } cranelift-entity = { path = "../../cranelift/entity", version = "0.71.0", features = ["enable-serde"] } cranelift-wasm = { path = "../../cranelift/wasm", version = "0.71.0", features = ["enable-serde"] } diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index 8daefaf079..79342f3546 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -1,7 +1,7 @@ //! Data structures for representing decoded wasm modules. use crate::tunables::Tunables; -use crate::WASM_MAX_PAGES; +use crate::{DataInitializer, WASM_MAX_PAGES, WASM_PAGE_SIZE}; use cranelift_codegen::ir; use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_wasm::*; @@ -10,19 +10,6 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::Arc; -/// A WebAssembly table initializer. -#[derive(Clone, Debug, Hash, Serialize, Deserialize)] -pub struct TableElements { - /// The index of a table to initialize. - pub table_index: TableIndex, - /// Optionally, a global variable giving a base index. - pub base: Option, - /// The offset to add to the base. - pub offset: usize, - /// The values to write into the table elements. - pub elements: Box<[FuncIndex]>, -} - /// Implemenation styles for WebAssembly linear memory. #[derive(Debug, Clone, Hash, Serialize, Deserialize)] pub enum MemoryStyle { @@ -92,6 +79,164 @@ impl MemoryPlan { } } +/// A WebAssembly linear memory initializer. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct MemoryInitializer { + /// The index of a linear memory to initialize. + pub memory_index: MemoryIndex, + /// Optionally, a global variable giving a base index. + pub base: Option, + /// The offset to add to the base. + pub offset: usize, + /// The data to write into the linear memory. + pub data: Box<[u8]>, +} + +impl From> for MemoryInitializer { + fn from(initializer: DataInitializer) -> Self { + Self { + memory_index: initializer.memory_index, + base: initializer.base, + offset: initializer.offset, + data: initializer.data.into(), + } + } +} + +/// The type of WebAssembly linear memory initialization. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum MemoryInitialization { + /// Memory initialization is paged. + /// + /// To be paged, the following requirements must be met: + /// + /// * All data segments must reference defined memories. + /// * All data segments must not use a global base. + /// * All data segments must be in bounds. + /// + /// Paged initialization is performed by memcopying individual pages to the linear memory. + Paged { + /// The size of each page stored in the map. + /// This is expected to be the host page size. + page_size: usize, + /// The map of defined memory index to a list of page data. + /// The list of page data is sparse, with None representing a zero page. + /// The size of the list will be the maximum page written to by a data segment. + map: PrimaryMap>>>, + }, + /// Memory initialization is out of bounds. + /// + /// To be out of bounds, the following requirements must be met: + /// + /// * All data segments must reference defined memories. + /// * All data segments must not use a global base. + /// * At least one data segments was out of bounds. + /// + /// This can be used to quickly return an error when the module is instantiated. + OutOfBounds, + /// Memory initialization is segmented. + /// + /// To be segmented, at least one of the following requirements must be met: + /// + /// * A data segment referenced an imported memory. + /// * A data segment uses a global base. + /// + /// Segmented initialization is performed by processing the complete set of data segments + /// when the module is instantiated. + /// + /// This ensures that initialization side-effects are observed according to the bulk-memory proposal. + Segmented(Box<[MemoryInitializer]>), +} + +impl MemoryInitialization { + /// Creates a new memory initialization for a module and its data initializers. + pub fn new(module: &Module, initializers: Vec) -> Self { + let page_size = region::page::size(); + let num_defined_memories = module.memory_plans.len() - module.num_imported_memories; + let mut out_of_bounds = false; + let mut memories = PrimaryMap::with_capacity(num_defined_memories); + + for _ in 0..num_defined_memories { + memories.push(Vec::new()); + } + + for initializer in &initializers { + match ( + module.defined_memory_index(initializer.memory_index), + initializer.base.is_some(), + ) { + (None, _) | (_, true) => { + // If the initializer references an imported memory or uses a global base, + // the complete set of segments will need to be processed at module instantiation + return Self::Segmented( + initializers + .into_iter() + .map(Into::into) + .collect::>() + .into_boxed_slice(), + ); + } + (Some(index), false) => { + if out_of_bounds { + continue; + } + + // Perform a bounds check on the segment + if (initializer.offset + initializer.data.len()) + > ((module.memory_plans[initializer.memory_index].memory.minimum as usize) + * (WASM_PAGE_SIZE as usize)) + { + out_of_bounds = true; + continue; + } + + let pages = &mut memories[index]; + let mut page_index = initializer.offset / page_size; + let mut page_offset = initializer.offset % page_size; + let mut data_offset = 0; + let mut data_remaining = initializer.data.len(); + + if data_remaining == 0 { + continue; + } + + // Copy the initialization data by each page + loop { + if page_index >= pages.len() { + pages.resize(page_index + 1, None); + } + + let page = pages[page_index] + .get_or_insert_with(|| vec![0; page_size].into_boxed_slice()); + let len = std::cmp::min(data_remaining, page_size - page_offset); + + page[page_offset..page_offset + len] + .copy_from_slice(&initializer.data[data_offset..(data_offset + len)]); + + if len == data_remaining { + break; + } + + page_index += 1; + page_offset = 0; + data_offset += len; + data_remaining -= len; + } + } + }; + } + + if out_of_bounds { + Self::OutOfBounds + } else { + Self::Paged { + page_size, + map: memories, + } + } + } +} + /// Implemenation styles for WebAssembly tables. #[derive(Debug, Clone, Hash, Serialize, Deserialize)] pub enum TableStyle { @@ -124,6 +269,19 @@ impl TablePlan { } } +/// A WebAssembly table initializer. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TableInitializer { + /// The index of a table to initialize. + pub table_index: TableIndex, + /// Optionally, a global variable giving a base index. + pub base: Option, + /// The offset to add to the base. + pub offset: usize, + /// The values to write into the table elements. + pub elements: Box<[FuncIndex]>, +} + /// Different types that can appear in a module. /// /// Note that each of these variants are intended to index further into a @@ -164,7 +322,10 @@ pub struct Module { pub start_func: Option, /// WebAssembly table initializers. - pub table_elements: Vec, + pub table_initializers: Vec, + + /// WebAssembly linear memory initializer. + pub memory_initialization: Option, /// WebAssembly passive elements. pub passive_elements: Vec>, diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs index 8004553580..636fa2893e 100644 --- a/crates/environ/src/module_environ.rs +++ b/crates/environ/src/module_environ.rs @@ -1,6 +1,6 @@ use crate::module::{ Initializer, InstanceSignature, MemoryPlan, Module, ModuleSignature, ModuleType, ModuleUpvar, - TableElements, TablePlan, TypeTables, + TableInitializer, TablePlan, TypeTables, }; use crate::tunables::Tunables; use cranelift_codegen::ir; @@ -13,7 +13,6 @@ use cranelift_wasm::{ ModuleIndex, ModuleTypeIndex, SignatureIndex, Table, TableIndex, TargetEnvironment, TypeIndex, WasmError, WasmFuncType, WasmResult, }; -use serde::{Deserialize, Serialize}; use std::collections::{hash_map::Entry, HashMap}; use std::convert::TryFrom; use std::mem; @@ -684,7 +683,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data fn reserve_table_elements(&mut self, num: u32) -> WasmResult<()> { self.result .module - .table_elements + .table_initializers .reserve_exact(usize::try_from(num).unwrap()); Ok(()) } @@ -696,12 +695,15 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data offset: usize, elements: Box<[FuncIndex]>, ) -> WasmResult<()> { - self.result.module.table_elements.push(TableElements { - table_index, - base, - offset, - elements, - }); + self.result + .module + .table_initializers + .push(TableInitializer { + table_index, + base, + offset, + elements, + }); Ok(()) } @@ -774,11 +776,9 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data data: &'data [u8], ) -> WasmResult<()> { self.result.data_initializers.push(DataInitializer { - location: DataInitializerLocation { - memory_index, - base, - offset, - }, + memory_index, + base, + offset, data, }); Ok(()) @@ -1072,10 +1072,8 @@ pub fn translate_signature(mut sig: ir::Signature, pointer_type: ir::Type) -> ir sig } -/// A memory index and offset within that memory where a data initialization -/// should is to be performed. -#[derive(Clone, Serialize, Deserialize)] -pub struct DataInitializerLocation { +/// A data initializer for linear memory. +pub struct DataInitializer<'data> { /// The index of the memory to initialize. pub memory_index: MemoryIndex, @@ -1084,34 +1082,7 @@ pub struct DataInitializerLocation { /// A constant offset to initialize at. pub offset: usize, -} - -/// A data initializer for linear memory. -pub struct DataInitializer<'data> { - /// The location where the initialization is to be performed. - pub location: DataInitializerLocation, /// The initialization data. pub data: &'data [u8], } - -/// Similar to `DataInitializer`, but owns its own copy of the data rather -/// than holding a slice of the original module. -#[derive(Serialize, Deserialize)] -pub struct OwnedDataInitializer { - /// The location where the initialization is to be performed. - pub location: DataInitializerLocation, - - /// The initialization data. - pub data: Box<[u8]>, -} - -impl OwnedDataInitializer { - /// Creates a new owned data initializer from a borrowed data initializer. - pub fn new(borrowed: DataInitializer<'_>) -> Self { - Self { - location: borrowed.location.clone(), - data: borrowed.data.into(), - } - } -} diff --git a/crates/jit/Cargo.toml b/crates/jit/Cargo.toml index 803d88eb43..b4f6faef74 100644 --- a/crates/jit/Cargo.toml +++ b/crates/jit/Cargo.toml @@ -25,7 +25,7 @@ wasmtime-debug = { path = "../debug", version = "0.24.0" } wasmtime-profiling = { path = "../profiling", version = "0.24.0" } wasmtime-obj = { path = "../obj", version = "0.24.0" } rayon = { version = "1.0", optional = true } -region = "2.1.0" +region = "2.2.0" thiserror = "1.0.4" target-lexicon = { version = "0.11.0", default-features = false } wasmparser = "0.76" diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index 4d43f43ba1..b4875305be 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -21,8 +21,9 @@ use wasmtime_environ::wasm::{ DefinedFuncIndex, InstanceTypeIndex, ModuleTypeIndex, SignatureIndex, WasmFuncType, }; use wasmtime_environ::{ - CompileError, DebugInfoData, FunctionAddressMap, InstanceSignature, Module, ModuleEnvironment, - ModuleSignature, ModuleTranslation, OwnedDataInitializer, StackMapInformation, TrapInformation, + CompileError, DebugInfoData, FunctionAddressMap, InstanceSignature, MemoryInitialization, + Module, ModuleEnvironment, ModuleSignature, ModuleTranslation, StackMapInformation, + TrapInformation, }; use wasmtime_profiling::ProfilingAgent; use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline}; @@ -62,10 +63,6 @@ pub struct CompilationArtifacts { /// Unwind information for function code. unwind_info: Box<[ObjectUnwindInfo]>, - /// Data initiailizers. - #[serde(with = "arc_slice_serde")] - data_initializers: Arc<[OwnedDataInitializer]>, - /// Descriptions of compiled functions funcs: PrimaryMap, @@ -122,18 +119,15 @@ impl CompilationArtifacts { } = compiler.compile(&mut translation, &types)?; let ModuleTranslation { - module, + mut module, data_initializers, debuginfo, has_unparsed_debuginfo, .. } = translation; - let data_initializers = data_initializers - .into_iter() - .map(OwnedDataInitializer::new) - .collect::>() - .into(); + module.memory_initialization = + Some(MemoryInitialization::new(&module, data_initializers)); let obj = obj.write().map_err(|_| { SetupError::Instantiate(InstantiationError::Resource( @@ -145,7 +139,6 @@ impl CompilationArtifacts { module: Arc::new(module), obj: obj.into_boxed_slice(), unwind_info: unwind_info.into_boxed_slice(), - data_initializers, funcs: funcs .into_iter() .map(|(_, func)| FunctionInfo { @@ -280,11 +273,6 @@ impl CompiledModule { &self.artifacts } - /// Returns the data initializers from the compiled module. - pub fn data_initializers(&self) -> &Arc<[OwnedDataInitializer]> { - &self.artifacts.data_initializers - } - /// Return a reference-counting pointer to a module. pub fn module(&self) -> &Arc { &self.artifacts.module @@ -546,24 +534,3 @@ mod arc_serde { Ok(Arc::new(T::deserialize(de)?)) } } - -mod arc_slice_serde { - use super::Arc; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - - pub(super) fn serialize(arc: &Arc<[T]>, ser: S) -> Result - where - S: Serializer, - T: Serialize, - { - (**arc).serialize(ser) - } - - pub(super) fn deserialize<'de, D, T>(de: D) -> Result, D::Error> - where - D: Deserializer<'de>, - T: Deserialize<'de>, - { - Ok(Vec::::deserialize(de)?.into()) - } -} diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs index 927afd9e41..c45270e173 100644 --- a/crates/runtime/src/instance/allocator.rs +++ b/crates/runtime/src/instance/allocator.rs @@ -23,7 +23,8 @@ use wasmtime_environ::wasm::{ TableElementType, WasmType, }; use wasmtime_environ::{ - ir, Module, ModuleTranslation, ModuleType, OwnedDataInitializer, TableElements, VMOffsets, + ir, MemoryInitialization, MemoryInitializer, Module, ModuleTranslation, ModuleType, + TableInitializer, VMOffsets, }; mod pooling; @@ -139,7 +140,6 @@ pub unsafe trait InstanceAllocator: Send + Sync { &self, handle: &InstanceHandle, is_bulk_memory: bool, - data_initializers: &Arc<[OwnedDataInitializer]>, ) -> Result<(), InstantiationError>; /// Deallocates a previously allocated instance. @@ -169,6 +169,228 @@ pub unsafe trait InstanceAllocator: Send + Sync { unsafe fn deallocate_fiber_stack(&self, stack: *mut u8); } +fn get_table_init_start(init: &TableInitializer, instance: &Instance) -> usize { + let mut start = init.offset; + + if let Some(base) = init.base { + let val = unsafe { + if let Some(def_index) = instance.module.defined_global_index(base) { + *instance.global(def_index).as_u32() + } else { + *(*instance.imported_global(base).from).as_u32() + } + }; + start += usize::try_from(val).unwrap(); + } + + start +} + +fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError> { + for init in &instance.module.table_initializers { + let start = get_table_init_start(init, instance); + let table = instance.get_table(init.table_index); + + let size = usize::try_from(table.size()).unwrap(); + if size < start + init.elements.len() { + return Err(InstantiationError::Link(LinkError( + "table out of bounds: elements segment does not fit".to_owned(), + ))); + } + } + + Ok(()) +} + +fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> { + for init in &instance.module.table_initializers { + let start = get_table_init_start(init, instance); + let table = instance.get_table(init.table_index); + + if start + .checked_add(init.elements.len()) + .map_or(true, |end| end > table.size() as usize) + { + return Err(InstantiationError::Trap(Trap::wasm( + ir::TrapCode::TableOutOfBounds, + ))); + } + + for (i, func_idx) in init.elements.iter().enumerate() { + let item = match table.element_type() { + TableElementType::Func => instance + .get_caller_checked_anyfunc(*func_idx) + .map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| { + f as *const VMCallerCheckedAnyfunc as *mut VMCallerCheckedAnyfunc + }) + .into(), + TableElementType::Val(_) => { + assert!(*func_idx == FuncIndex::reserved_value()); + TableElement::ExternRef(None) + } + }; + table.set(u32::try_from(start + i).unwrap(), item).unwrap(); + } + } + + Ok(()) +} + +fn get_memory_init_start(init: &MemoryInitializer, instance: &Instance) -> usize { + let mut start = init.offset; + + if let Some(base) = init.base { + let val = unsafe { + if let Some(def_index) = instance.module.defined_global_index(base) { + *instance.global(def_index).as_u32() + } else { + *(*instance.imported_global(base).from).as_u32() + } + }; + start += usize::try_from(val).unwrap(); + } + + start +} + +unsafe fn get_memory_slice<'instance>( + init: &MemoryInitializer, + instance: &'instance Instance, +) -> &'instance mut [u8] { + let memory = if let Some(defined_memory_index) = + instance.module.defined_memory_index(init.memory_index) + { + instance.memory(defined_memory_index) + } else { + let import = instance.imported_memory(init.memory_index); + let foreign_instance = (&mut *(import).vmctx).instance(); + let foreign_memory = &mut *(import).from; + let foreign_index = foreign_instance.memory_index(foreign_memory); + foreign_instance.memory(foreign_index) + }; + slice::from_raw_parts_mut(memory.base, memory.current_length) +} + +fn check_memory_init_bounds( + instance: &Instance, + initializers: &[MemoryInitializer], +) -> Result<(), InstantiationError> { + for init in initializers { + let start = get_memory_init_start(init, instance); + unsafe { + let mem_slice = get_memory_slice(init, instance); + if mem_slice.get_mut(start..start + init.data.len()).is_none() { + return Err(InstantiationError::Link(LinkError( + "memory out of bounds: data segment does not fit".into(), + ))); + } + } + } + + Ok(()) +} + +fn initialize_memories( + instance: &Instance, + initializers: &[MemoryInitializer], +) -> Result<(), InstantiationError> { + for init in initializers { + let memory = instance.get_memory(init.memory_index); + + let start = get_memory_init_start(init, instance); + if start + .checked_add(init.data.len()) + .map_or(true, |end| end > memory.current_length) + { + return Err(InstantiationError::Trap(Trap::wasm( + ir::TrapCode::HeapOutOfBounds, + ))); + } + + unsafe { + let mem_slice = get_memory_slice(init, instance); + let end = start + init.data.len(); + let to_init = &mut mem_slice[start..end]; + to_init.copy_from_slice(&init.data); + } + } + + Ok(()) +} + +fn check_init_bounds(instance: &Instance) -> Result<(), InstantiationError> { + check_table_init_bounds(instance)?; + + match &instance.module.memory_initialization { + Some(MemoryInitialization::Paged { .. }) | None => { + // Bounds were checked at compile-time + } + Some(MemoryInitialization::OutOfBounds) => { + return Err(InstantiationError::Link(LinkError( + "memory out of bounds: data segment does not fit".into(), + ))); + } + Some(MemoryInitialization::Segmented(initializers)) => { + check_memory_init_bounds(instance, initializers)?; + } + } + + Ok(()) +} + +fn initialize_instance( + instance: &Instance, + is_bulk_memory: bool, +) -> Result<(), InstantiationError> { + // If bulk memory is not enabled, bounds check the data and element segments before + // making any changes. With bulk memory enabled, initializers are processed + // in-order and side effects are observed up to the point of an out-of-bounds + // initializer, so the early checking is not desired. + if !is_bulk_memory { + check_init_bounds(instance)?; + } + + // Initialize the tables + initialize_tables(instance)?; + + // Initialize the memories + match &instance.module.memory_initialization { + Some(MemoryInitialization::Paged { page_size, map }) => { + for (index, pages) in map { + let memory = instance.memory(index); + + for (page_index, page) in pages.iter().enumerate() { + if let Some(data) = page { + // Bounds checking should have occurred when the module was compiled + // The data should always be page sized + assert!((page_index * page_size) < memory.current_length); + assert_eq!(data.len(), *page_size); + + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr(), + memory.base.add(page_index * page_size), + data.len(), + ); + } + } + } + } + } + Some(MemoryInitialization::OutOfBounds) => { + return Err(InstantiationError::Trap(Trap::wasm( + ir::TrapCode::HeapOutOfBounds, + ))) + } + Some(MemoryInitialization::Segmented(initializers)) => { + initialize_memories(instance, initializers)?; + } + None => {} + } + + Ok(()) +} + unsafe fn initialize_vmcontext( instance: &Instance, functions: &[VMFunctionImport], @@ -350,157 +572,6 @@ impl OnDemandInstanceAllocator { } Ok(memories) } - - fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError> { - for init in &instance.module.table_elements { - let start = Self::get_table_init_start(init, instance); - let table = instance.get_table(init.table_index); - - let size = usize::try_from(table.size()).unwrap(); - if size < start + init.elements.len() { - return Err(InstantiationError::Link(LinkError( - "table out of bounds: elements segment does not fit".to_owned(), - ))); - } - } - - Ok(()) - } - - fn get_memory_init_start(init: &OwnedDataInitializer, instance: &Instance) -> usize { - let mut start = init.location.offset; - - if let Some(base) = init.location.base { - let val = unsafe { - if let Some(def_index) = instance.module.defined_global_index(base) { - *instance.global(def_index).as_u32() - } else { - *(*instance.imported_global(base).from).as_u32() - } - }; - start += usize::try_from(val).unwrap(); - } - - start - } - - unsafe fn get_memory_slice<'instance>( - init: &OwnedDataInitializer, - instance: &'instance Instance, - ) -> &'instance mut [u8] { - let memory = if let Some(defined_memory_index) = instance - .module - .defined_memory_index(init.location.memory_index) - { - instance.memory(defined_memory_index) - } else { - let import = instance.imported_memory(init.location.memory_index); - let foreign_instance = (&mut *(import).vmctx).instance(); - let foreign_memory = &mut *(import).from; - let foreign_index = foreign_instance.memory_index(foreign_memory); - foreign_instance.memory(foreign_index) - }; - slice::from_raw_parts_mut(memory.base, memory.current_length) - } - - fn check_memory_init_bounds( - instance: &Instance, - data_initializers: &[OwnedDataInitializer], - ) -> Result<(), InstantiationError> { - for init in data_initializers { - let start = Self::get_memory_init_start(init, instance); - unsafe { - let mem_slice = Self::get_memory_slice(init, instance); - if mem_slice.get_mut(start..start + init.data.len()).is_none() { - return Err(InstantiationError::Link(LinkError( - "memory out of bounds: data segment does not fit".into(), - ))); - } - } - } - - Ok(()) - } - - fn get_table_init_start(init: &TableElements, instance: &Instance) -> usize { - let mut start = init.offset; - - if let Some(base) = init.base { - let val = unsafe { - if let Some(def_index) = instance.module.defined_global_index(base) { - *instance.global(def_index).as_u32() - } else { - *(*instance.imported_global(base).from).as_u32() - } - }; - start += usize::try_from(val).unwrap(); - } - - start - } - - fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> { - for init in &instance.module.table_elements { - let start = Self::get_table_init_start(init, instance); - let table = instance.get_table(init.table_index); - - if start - .checked_add(init.elements.len()) - .map_or(true, |end| end > table.size() as usize) - { - return Err(InstantiationError::Trap(Trap::wasm( - ir::TrapCode::TableOutOfBounds, - ))); - } - - for (i, func_idx) in init.elements.iter().enumerate() { - let item = match table.element_type() { - TableElementType::Func => instance - .get_caller_checked_anyfunc(*func_idx) - .map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| { - f as *const VMCallerCheckedAnyfunc as *mut VMCallerCheckedAnyfunc - }) - .into(), - TableElementType::Val(_) => { - assert!(*func_idx == FuncIndex::reserved_value()); - TableElement::ExternRef(None) - } - }; - table.set(u32::try_from(start + i).unwrap(), item).unwrap(); - } - } - - Ok(()) - } - - /// Initialize the table memory from the provided initializers. - fn initialize_memories( - instance: &Instance, - data_initializers: &[OwnedDataInitializer], - ) -> Result<(), InstantiationError> { - for init in data_initializers { - let memory = instance.get_memory(init.location.memory_index); - - let start = Self::get_memory_init_start(init, instance); - if start - .checked_add(init.data.len()) - .map_or(true, |end| end > memory.current_length) - { - return Err(InstantiationError::Trap(Trap::wasm( - ir::TrapCode::HeapOutOfBounds, - ))); - } - - unsafe { - let mem_slice = Self::get_memory_slice(init, instance); - let end = start + init.data.len(); - let to_init = &mut mem_slice[start..end]; - to_init.copy_from_slice(&init.data); - } - } - - Ok(()) - } } unsafe impl InstanceAllocator for OnDemandInstanceAllocator { @@ -561,23 +632,8 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator { &self, handle: &InstanceHandle, is_bulk_memory: bool, - data_initializers: &Arc<[OwnedDataInitializer]>, ) -> Result<(), InstantiationError> { - // Check initializer bounds before initializing anything. Only do this - // when bulk memory is disabled, since the bulk memory proposal changes - // instantiation such that the intermediate results of failed - // initializations are visible. - if !is_bulk_memory { - Self::check_table_init_bounds(handle.instance())?; - Self::check_memory_init_bounds(handle.instance(), data_initializers.as_ref())?; - } - - // Apply fallible initializers. Note that this can "leak" state even if - // it fails. - Self::initialize_tables(handle.instance())?; - Self::initialize_memories(handle.instance(), data_initializers.as_ref())?; - - Ok(()) + initialize_instance(handle.instance(), is_bulk_memory) } unsafe fn deallocate(&self, handle: &InstanceHandle) { diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 9351e94ca6..dfcbd7bb0a 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -8,13 +8,10 @@ //! when modules can be constrained based on configurable limits. use super::{ - initialize_vmcontext, FiberStackError, InstanceAllocationRequest, InstanceAllocator, - InstanceHandle, InstantiationError, -}; -use crate::{ - instance::Instance, table::max_table_element_size, Memory, Mmap, OnDemandInstanceAllocator, - Table, VMContext, + initialize_instance, initialize_vmcontext, FiberStackError, InstanceAllocationRequest, + InstanceAllocator, InstanceHandle, InstantiationError, }; +use crate::{instance::Instance, table::max_table_element_size, Memory, Mmap, Table, VMContext}; use rand::Rng; use std::cell::RefCell; use std::cmp::min; @@ -23,8 +20,7 @@ use std::mem; use std::sync::{Arc, Mutex}; use wasmtime_environ::{ entity::{EntitySet, PrimaryMap}, - MemoryStyle, Module, ModuleTranslation, OwnedDataInitializer, Tunables, VMOffsets, - WASM_PAGE_SIZE, + MemoryStyle, Module, ModuleTranslation, Tunables, VMOffsets, WASM_PAGE_SIZE, }; cfg_if::cfg_if! { @@ -35,6 +31,8 @@ cfg_if::cfg_if! { mod uffd; use uffd as imp; use imp::{PageFaultHandler, reset_guard_page}; + use super::{check_init_bounds, initialize_tables}; + use wasmtime_environ::MemoryInitialization; use std::sync::atomic::{AtomicBool, Ordering}; } else if #[cfg(target_os = "linux")] { mod linux; @@ -979,31 +977,29 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { &self, handle: &InstanceHandle, is_bulk_memory: bool, - data_initializers: &Arc<[OwnedDataInitializer]>, ) -> Result<(), InstantiationError> { - // TODO: refactor this implementation + let instance = handle.instance(); - // Check initializer bounds before initializing anything. Only do this - // when bulk memory is disabled, since the bulk memory proposal changes - // instantiation such that the intermediate results of failed - // initializations are visible. - if !is_bulk_memory { - OnDemandInstanceAllocator::check_table_init_bounds(handle.instance())?; - OnDemandInstanceAllocator::check_memory_init_bounds( - handle.instance(), - data_initializers.as_ref(), - )?; + cfg_if::cfg_if! { + if #[cfg(all(feature = "uffd", target_os = "linux"))] { + match instance.module.memory_initialization { + Some(MemoryInitialization::Paged{ .. }) => { + if !is_bulk_memory { + check_init_bounds(instance)?; + } + + // Initialize the tables + initialize_tables(instance)?; + + // Don't initialize the memory; the fault handler will fill the pages when accessed + Ok(()) + }, + _ => initialize_instance(instance, is_bulk_memory) + } + } else { + initialize_instance(instance, is_bulk_memory) + } } - - // Apply fallible initializers. Note that this can "leak" state even if - // it fails. - OnDemandInstanceAllocator::initialize_tables(handle.instance())?; - OnDemandInstanceAllocator::initialize_memories( - handle.instance(), - data_initializers.as_ref(), - )?; - - Ok(()) } unsafe fn deallocate(&self, handle: &InstanceHandle) { diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs index ed236a8d43..d18db383ef 100644 --- a/crates/runtime/src/instance/allocator/pooling/uffd.rs +++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs @@ -20,7 +20,9 @@ use std::sync::{ }; use std::thread; use userfaultfd::{Event, FeatureFlags, IoctlFlags, Uffd, UffdBuilder}; -use wasmtime_environ::{wasm::DefinedMemoryIndex, WASM_PAGE_SIZE}; +use wasmtime_environ::{entity::EntityRef, wasm::DefinedMemoryIndex, MemoryInitialization}; + +const WASM_PAGE_SIZE: usize = wasmtime_environ::WASM_PAGE_SIZE as usize; pub unsafe fn make_accessible(_addr: *mut u8, _len: usize) -> bool { // A no-op when userfaultfd is used @@ -191,7 +193,7 @@ impl AddressLocator { let index = (addr - self.memories_start) / self.memory_size; let memory_index = index % self.max_memories; let memory_start = self.memories_start + (index * self.memory_size); - let page_index = (addr - memory_start) / (WASM_PAGE_SIZE as usize); + let page_index = (addr - memory_start) / WASM_PAGE_SIZE; let instance = self.get_instance(index / self.max_memories); let init_page_index = instance @@ -210,8 +212,8 @@ impl AddressLocator { }); return Some(AddressLocation::MemoryPage { - page_addr: (memory_start + page_index * (WASM_PAGE_SIZE as usize)) as _, - len: WASM_PAGE_SIZE as usize, + page_addr: (memory_start + page_index * WASM_PAGE_SIZE) as _, + len: WASM_PAGE_SIZE, instance, memory_index, page_index: init_page_index, @@ -250,18 +252,98 @@ impl AddressLocator { } } -fn wake_guard_page_access(uffd: &Uffd, page_addr: *const u8, len: usize) -> Result<(), String> { - unsafe { - // Set the page to NONE to induce a SIGSEV for the access on the next retry - region::protect(page_addr, len, region::Protection::NONE) - .map_err(|e| format!("failed to change guard page protection: {}", e))?; +unsafe fn wake_guard_page_access( + uffd: &Uffd, + page_addr: *const u8, + len: usize, +) -> Result<(), String> { + // Set the page to NONE to induce a SIGSEV for the access on the next retry + region::protect(page_addr, len, region::Protection::NONE) + .map_err(|e| format!("failed to change guard page protection: {}", e))?; - uffd.wake(page_addr as _, len).map_err(|e| { + uffd.wake(page_addr as _, len).map_err(|e| { + format!( + "failed to wake page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + + Ok(()) +} + +unsafe fn initialize_wasm_page( + uffd: &Uffd, + instance: &Instance, + page_addr: *const u8, + memory_index: usize, + page_index: usize, +) -> Result<(), String> { + if let Some(MemoryInitialization::Paged { page_size, map }) = + &instance.module.memory_initialization + { + let memory_index = DefinedMemoryIndex::new(memory_index); + let memory = instance.memory(memory_index); + let pages = &map[memory_index]; + debug_assert_eq!(WASM_PAGE_SIZE % page_size, 0); + + let count = WASM_PAGE_SIZE / page_size; + let start = page_index * count; + + for i in start..start + count { + let dst = memory.base.add(i * page_size); + + match pages.get(i) { + Some(Some(data)) => { + log::trace!( + "copying page initialization data from {:p} to {:p} with length {}", + data, + dst, + page_size + ); + + // Copy the page data without waking + uffd.copy(data.as_ptr() as _, dst as _, *page_size, false) + .map_err(|e| { + format!( + "failed to copy page from {:p} to {:p} with length {}: {}", + data, dst, page_size, e + ) + })?; + } + _ => { + log::trace!("zeroing page at {:p} with length {}", dst, page_size); + + // No data, zero the page without waking + uffd.zeropage(dst as _, *page_size, false).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + dst, page_size, e + ) + })?; + } + } + } + + // Finally wake the entire wasm page + uffd.wake(page_addr as _, WASM_PAGE_SIZE).map_err(|e| { format!( "failed to wake page at {:p} with length {}: {}", - page_addr, len, e + page_addr, WASM_PAGE_SIZE, e ) - })?; + }) + } else { + log::trace!( + "initialization data is not paged; zeroing Wasm page at {:p}", + page_addr + ); + + uffd.zeropage(page_addr as _, WASM_PAGE_SIZE, true) + .map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, WASM_PAGE_SIZE, e + ) + })?; Ok(()) } @@ -327,13 +409,13 @@ fn handler_thread( match page_index { Some(page_index) => { - // TODO: copy the memory initialization data rather than zero the page - uffd.zeropage(page_addr as _, len, true).map_err(|e| { - format!( - "failed to zero page at {:p} with length {}: {}", - page_addr, len, e - ) - })?; + initialize_wasm_page( + &uffd, + instance, + page_addr, + memory_index, + page_index, + )?; } None => { log::trace!("out of bounds memory access at {:p}", access_addr); @@ -529,7 +611,7 @@ mod test { locator.memories_end, locator.memories_start + instances.memories.mapping.len() ); - assert_eq!(locator.memory_size, (WASM_PAGE_SIZE * 10) as usize); + assert_eq!(locator.memory_size, WASM_PAGE_SIZE * 10); assert_eq!(locator.max_memories, 2); assert_eq!( locator.tables_start, @@ -634,7 +716,7 @@ mod test { page_index, }) => { assert_eq!(page_addr, memory_start as _); - assert_eq!(len, WASM_PAGE_SIZE as usize); + assert_eq!(len, WASM_PAGE_SIZE); assert_eq!(mem_index, memory_index); assert_eq!(page_index, Some(0)); } @@ -642,7 +724,7 @@ mod test { } // Test for access to second page - match locator.get_location(memory_start + 1024 + WASM_PAGE_SIZE as usize) { + match locator.get_location(memory_start + 1024 + WASM_PAGE_SIZE) { Some(AddressLocation::MemoryPage { page_addr, len, @@ -650,8 +732,8 @@ mod test { memory_index: mem_index, page_index, }) => { - assert_eq!(page_addr, (memory_start + WASM_PAGE_SIZE as usize) as _); - assert_eq!(len, WASM_PAGE_SIZE as usize); + assert_eq!(page_addr, (memory_start + WASM_PAGE_SIZE) as _); + assert_eq!(len, WASM_PAGE_SIZE); assert_eq!(mem_index, memory_index); assert_eq!(page_index, Some(1)); } @@ -659,7 +741,7 @@ mod test { } // Test for guard page - match locator.get_location(memory_start + 10 + 9 * WASM_PAGE_SIZE as usize) { + match locator.get_location(memory_start + 10 + 9 * WASM_PAGE_SIZE) { Some(AddressLocation::MemoryPage { page_addr, len, @@ -667,11 +749,8 @@ mod test { memory_index: mem_index, page_index, }) => { - assert_eq!( - page_addr, - (memory_start + (9 * WASM_PAGE_SIZE as usize)) as _ - ); - assert_eq!(len, WASM_PAGE_SIZE as usize); + assert_eq!(page_addr, (memory_start + (9 * WASM_PAGE_SIZE)) as _); + assert_eq!(len, WASM_PAGE_SIZE); assert_eq!(mem_index, memory_index); assert_eq!(page_index, None); } diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index 7048d93338..9e2992c9b8 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -522,11 +522,7 @@ impl<'a> Instantiator<'a> { // initialization is successful, we need to keep the instance alive. let instance = self.store.add_instance(instance, false); allocator - .initialize( - &instance.handle, - config.features.bulk_memory, - &compiled_module.data_initializers(), - ) + .initialize(&instance.handle, config.features.bulk_memory) .map_err(|e| -> Error { match e { InstantiationError::Trap(trap) => {