Implement on-demand memory initialization for the uffd feature.

This commit implements copying paged initialization data upon a fault of a
linear memory page.

If the initialization data is "paged", then the appropriate pages are copied
into the Wasm page (or zeroed if the page is not present in the
initialization data).

If the initialization data is not "paged", the Wasm page is zeroed so that
module instantiation can initialize the pages.
This commit is contained in:
Peter Huene
2021-02-16 23:27:14 -08:00
parent a82f1a323f
commit f5c4d87c45
10 changed files with 562 additions and 334 deletions

View File

@@ -13,6 +13,7 @@ edition = "2018"
[dependencies]
anyhow = "1.0"
region = "2.2.0"
cranelift-codegen = { path = "../../cranelift/codegen", version = "0.71.0", features = ["enable-serde"] }
cranelift-entity = { path = "../../cranelift/entity", version = "0.71.0", features = ["enable-serde"] }
cranelift-wasm = { path = "../../cranelift/wasm", version = "0.71.0", features = ["enable-serde"] }

View File

@@ -1,7 +1,7 @@
//! Data structures for representing decoded wasm modules.
use crate::tunables::Tunables;
use crate::WASM_MAX_PAGES;
use crate::{DataInitializer, WASM_MAX_PAGES, WASM_PAGE_SIZE};
use cranelift_codegen::ir;
use cranelift_entity::{EntityRef, PrimaryMap};
use cranelift_wasm::*;
@@ -10,19 +10,6 @@ use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
/// A WebAssembly table initializer.
#[derive(Clone, Debug, Hash, Serialize, Deserialize)]
pub struct TableElements {
/// The index of a table to initialize.
pub table_index: TableIndex,
/// Optionally, a global variable giving a base index.
pub base: Option<GlobalIndex>,
/// The offset to add to the base.
pub offset: usize,
/// The values to write into the table elements.
pub elements: Box<[FuncIndex]>,
}
/// Implemenation styles for WebAssembly linear memory.
#[derive(Debug, Clone, Hash, Serialize, Deserialize)]
pub enum MemoryStyle {
@@ -92,6 +79,164 @@ impl MemoryPlan {
}
}
/// A WebAssembly linear memory initializer.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MemoryInitializer {
/// The index of a linear memory to initialize.
pub memory_index: MemoryIndex,
/// Optionally, a global variable giving a base index.
pub base: Option<GlobalIndex>,
/// The offset to add to the base.
pub offset: usize,
/// The data to write into the linear memory.
pub data: Box<[u8]>,
}
impl From<DataInitializer<'_>> for MemoryInitializer {
fn from(initializer: DataInitializer) -> Self {
Self {
memory_index: initializer.memory_index,
base: initializer.base,
offset: initializer.offset,
data: initializer.data.into(),
}
}
}
/// The type of WebAssembly linear memory initialization.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum MemoryInitialization {
/// Memory initialization is paged.
///
/// To be paged, the following requirements must be met:
///
/// * All data segments must reference defined memories.
/// * All data segments must not use a global base.
/// * All data segments must be in bounds.
///
/// Paged initialization is performed by memcopying individual pages to the linear memory.
Paged {
/// The size of each page stored in the map.
/// This is expected to be the host page size.
page_size: usize,
/// The map of defined memory index to a list of page data.
/// The list of page data is sparse, with None representing a zero page.
/// The size of the list will be the maximum page written to by a data segment.
map: PrimaryMap<DefinedMemoryIndex, Vec<Option<Box<[u8]>>>>,
},
/// Memory initialization is out of bounds.
///
/// To be out of bounds, the following requirements must be met:
///
/// * All data segments must reference defined memories.
/// * All data segments must not use a global base.
/// * At least one data segments was out of bounds.
///
/// This can be used to quickly return an error when the module is instantiated.
OutOfBounds,
/// Memory initialization is segmented.
///
/// To be segmented, at least one of the following requirements must be met:
///
/// * A data segment referenced an imported memory.
/// * A data segment uses a global base.
///
/// Segmented initialization is performed by processing the complete set of data segments
/// when the module is instantiated.
///
/// This ensures that initialization side-effects are observed according to the bulk-memory proposal.
Segmented(Box<[MemoryInitializer]>),
}
impl MemoryInitialization {
/// Creates a new memory initialization for a module and its data initializers.
pub fn new(module: &Module, initializers: Vec<DataInitializer>) -> Self {
let page_size = region::page::size();
let num_defined_memories = module.memory_plans.len() - module.num_imported_memories;
let mut out_of_bounds = false;
let mut memories = PrimaryMap::with_capacity(num_defined_memories);
for _ in 0..num_defined_memories {
memories.push(Vec::new());
}
for initializer in &initializers {
match (
module.defined_memory_index(initializer.memory_index),
initializer.base.is_some(),
) {
(None, _) | (_, true) => {
// If the initializer references an imported memory or uses a global base,
// the complete set of segments will need to be processed at module instantiation
return Self::Segmented(
initializers
.into_iter()
.map(Into::into)
.collect::<Vec<_>>()
.into_boxed_slice(),
);
}
(Some(index), false) => {
if out_of_bounds {
continue;
}
// Perform a bounds check on the segment
if (initializer.offset + initializer.data.len())
> ((module.memory_plans[initializer.memory_index].memory.minimum as usize)
* (WASM_PAGE_SIZE as usize))
{
out_of_bounds = true;
continue;
}
let pages = &mut memories[index];
let mut page_index = initializer.offset / page_size;
let mut page_offset = initializer.offset % page_size;
let mut data_offset = 0;
let mut data_remaining = initializer.data.len();
if data_remaining == 0 {
continue;
}
// Copy the initialization data by each page
loop {
if page_index >= pages.len() {
pages.resize(page_index + 1, None);
}
let page = pages[page_index]
.get_or_insert_with(|| vec![0; page_size].into_boxed_slice());
let len = std::cmp::min(data_remaining, page_size - page_offset);
page[page_offset..page_offset + len]
.copy_from_slice(&initializer.data[data_offset..(data_offset + len)]);
if len == data_remaining {
break;
}
page_index += 1;
page_offset = 0;
data_offset += len;
data_remaining -= len;
}
}
};
}
if out_of_bounds {
Self::OutOfBounds
} else {
Self::Paged {
page_size,
map: memories,
}
}
}
}
/// Implemenation styles for WebAssembly tables.
#[derive(Debug, Clone, Hash, Serialize, Deserialize)]
pub enum TableStyle {
@@ -124,6 +269,19 @@ impl TablePlan {
}
}
/// A WebAssembly table initializer.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TableInitializer {
/// The index of a table to initialize.
pub table_index: TableIndex,
/// Optionally, a global variable giving a base index.
pub base: Option<GlobalIndex>,
/// The offset to add to the base.
pub offset: usize,
/// The values to write into the table elements.
pub elements: Box<[FuncIndex]>,
}
/// Different types that can appear in a module.
///
/// Note that each of these variants are intended to index further into a
@@ -164,7 +322,10 @@ pub struct Module {
pub start_func: Option<FuncIndex>,
/// WebAssembly table initializers.
pub table_elements: Vec<TableElements>,
pub table_initializers: Vec<TableInitializer>,
/// WebAssembly linear memory initializer.
pub memory_initialization: Option<MemoryInitialization>,
/// WebAssembly passive elements.
pub passive_elements: Vec<Box<[FuncIndex]>>,

View File

@@ -1,6 +1,6 @@
use crate::module::{
Initializer, InstanceSignature, MemoryPlan, Module, ModuleSignature, ModuleType, ModuleUpvar,
TableElements, TablePlan, TypeTables,
TableInitializer, TablePlan, TypeTables,
};
use crate::tunables::Tunables;
use cranelift_codegen::ir;
@@ -13,7 +13,6 @@ use cranelift_wasm::{
ModuleIndex, ModuleTypeIndex, SignatureIndex, Table, TableIndex, TargetEnvironment, TypeIndex,
WasmError, WasmFuncType, WasmResult,
};
use serde::{Deserialize, Serialize};
use std::collections::{hash_map::Entry, HashMap};
use std::convert::TryFrom;
use std::mem;
@@ -684,7 +683,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data
fn reserve_table_elements(&mut self, num: u32) -> WasmResult<()> {
self.result
.module
.table_elements
.table_initializers
.reserve_exact(usize::try_from(num).unwrap());
Ok(())
}
@@ -696,12 +695,15 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data
offset: usize,
elements: Box<[FuncIndex]>,
) -> WasmResult<()> {
self.result.module.table_elements.push(TableElements {
table_index,
base,
offset,
elements,
});
self.result
.module
.table_initializers
.push(TableInitializer {
table_index,
base,
offset,
elements,
});
Ok(())
}
@@ -774,11 +776,9 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data
data: &'data [u8],
) -> WasmResult<()> {
self.result.data_initializers.push(DataInitializer {
location: DataInitializerLocation {
memory_index,
base,
offset,
},
memory_index,
base,
offset,
data,
});
Ok(())
@@ -1072,10 +1072,8 @@ pub fn translate_signature(mut sig: ir::Signature, pointer_type: ir::Type) -> ir
sig
}
/// A memory index and offset within that memory where a data initialization
/// should is to be performed.
#[derive(Clone, Serialize, Deserialize)]
pub struct DataInitializerLocation {
/// A data initializer for linear memory.
pub struct DataInitializer<'data> {
/// The index of the memory to initialize.
pub memory_index: MemoryIndex,
@@ -1084,34 +1082,7 @@ pub struct DataInitializerLocation {
/// A constant offset to initialize at.
pub offset: usize,
}
/// A data initializer for linear memory.
pub struct DataInitializer<'data> {
/// The location where the initialization is to be performed.
pub location: DataInitializerLocation,
/// The initialization data.
pub data: &'data [u8],
}
/// Similar to `DataInitializer`, but owns its own copy of the data rather
/// than holding a slice of the original module.
#[derive(Serialize, Deserialize)]
pub struct OwnedDataInitializer {
/// The location where the initialization is to be performed.
pub location: DataInitializerLocation,
/// The initialization data.
pub data: Box<[u8]>,
}
impl OwnedDataInitializer {
/// Creates a new owned data initializer from a borrowed data initializer.
pub fn new(borrowed: DataInitializer<'_>) -> Self {
Self {
location: borrowed.location.clone(),
data: borrowed.data.into(),
}
}
}