Fix determinism of compiled modules (#3229)

* Fix determinism of compiled modules

Currently wasmtime's compilation artifacts are not deterministic due to
the usage of `HashMap` during serialization which has randomized order
of its elements. This commit fixes that by switching to a sorted
`BTreeMap` for various maps. A test is also added to ensure determinism.

If in the future the performance of `BTreeMap` is not as good as
`HashMap` for some of these cases we can implement a fancier
`serialize_with`-style solution where we sort keys during serialization,
but only during serialization and otherwise use a `HashMap`.

* fix lightbeam
This commit is contained in:
Alex Crichton
2021-08-23 17:08:19 -05:00
committed by GitHub
parent eb21ae149a
commit f3977f1d97
7 changed files with 63 additions and 20 deletions

View File

@@ -20,7 +20,7 @@ use cranelift_wasm::{
}; };
use std::any::Any; use std::any::Any;
use std::cmp; use std::cmp;
use std::collections::{BTreeSet, HashMap}; use std::collections::{BTreeMap, BTreeSet};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::mem; use std::mem;
use std::sync::Mutex; use std::sync::Mutex;
@@ -306,7 +306,7 @@ impl wasmtime_environ::Compiler for Compiler {
self.isa.triple() self.isa.triple()
} }
fn flags(&self) -> HashMap<String, FlagValue> { fn flags(&self) -> BTreeMap<String, FlagValue> {
self.isa self.isa
.flags() .flags()
.iter() .iter()
@@ -314,7 +314,7 @@ impl wasmtime_environ::Compiler for Compiler {
.collect() .collect()
} }
fn isa_flags(&self) -> HashMap<String, FlagValue> { fn isa_flags(&self) -> BTreeMap<String, FlagValue> {
self.isa self.isa
.isa_flags() .isa_flags()
.iter() .iter()

View File

@@ -9,7 +9,7 @@ use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::any::Any; use std::any::Any;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::BTreeMap;
use std::fmt; use std::fmt;
use thiserror::Error; use thiserror::Error;
@@ -204,10 +204,10 @@ pub trait Compiler: Send + Sync {
fn triple(&self) -> &target_lexicon::Triple; fn triple(&self) -> &target_lexicon::Triple;
/// Returns a list of configured settings for this compiler. /// Returns a list of configured settings for this compiler.
fn flags(&self) -> HashMap<String, FlagValue>; fn flags(&self) -> BTreeMap<String, FlagValue>;
/// Same as [`Compiler::flags`], but ISA-specific (a cranelift-ism) /// Same as [`Compiler::flags`], but ISA-specific (a cranelift-ism)
fn isa_flags(&self) -> HashMap<String, FlagValue>; fn isa_flags(&self) -> BTreeMap<String, FlagValue>;
} }
/// Value of a configured setting for a [`Compiler`] /// Value of a configured setting for a [`Compiler`]

View File

@@ -3,7 +3,7 @@
use crate::{EntityRef, PrimaryMap, Tunables}; use crate::{EntityRef, PrimaryMap, Tunables};
use indexmap::IndexMap; use indexmap::IndexMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet}; use std::collections::{BTreeMap, BTreeSet};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::sync::Arc; use std::sync::Arc;
use wasmtime_types::*; use wasmtime_types::*;
@@ -349,17 +349,17 @@ pub struct Module {
pub passive_elements: Vec<Box<[FuncIndex]>>, pub passive_elements: Vec<Box<[FuncIndex]>>,
/// The map from passive element index (element segment index space) to index in `passive_elements`. /// The map from passive element index (element segment index space) to index in `passive_elements`.
pub passive_elements_map: HashMap<ElemIndex, usize>, pub passive_elements_map: BTreeMap<ElemIndex, usize>,
/// WebAssembly passive data segments. /// WebAssembly passive data segments.
#[serde(with = "passive_data_serde")] #[serde(with = "passive_data_serde")]
pub passive_data: Vec<Arc<[u8]>>, pub passive_data: Vec<Arc<[u8]>>,
/// The map from passive data index (data segment index space) to index in `passive_data`. /// The map from passive data index (data segment index space) to index in `passive_data`.
pub passive_data_map: HashMap<DataIndex, usize>, pub passive_data_map: BTreeMap<DataIndex, usize>,
/// WebAssembly function names. /// WebAssembly function names.
pub func_names: HashMap<FuncIndex, String>, pub func_names: BTreeMap<FuncIndex, String>,
/// Types declared in the wasm module. /// Types declared in the wasm module.
pub types: PrimaryMap<TypeIndex, ModuleType>, pub types: PrimaryMap<TypeIndex, ModuleType>,
@@ -396,7 +396,7 @@ pub struct Module {
/// The set of defined functions within this module which are located in /// The set of defined functions within this module which are located in
/// element segments. /// element segments.
pub possibly_exported_funcs: HashSet<DefinedFuncIndex>, pub possibly_exported_funcs: BTreeSet<DefinedFuncIndex>,
} }
/// Initialization routines for creating an instance, encompassing imports, /// Initialization routines for creating an instance, encompassing imports,

View File

@@ -9,7 +9,7 @@ use anyhow::Result;
use cranelift_codegen::binemit; use cranelift_codegen::binemit;
use cranelift_codegen::ir::{self, ExternalName}; use cranelift_codegen::ir::{self, ExternalName};
use std::any::Any; use std::any::Any;
use std::collections::HashMap; use std::collections::BTreeMap;
use wasmtime_environ::{ use wasmtime_environ::{
BuiltinFunctionIndex, CompileError, Compiler, FlagValue, FunctionBodyData, FunctionInfo, BuiltinFunctionIndex, CompileError, Compiler, FlagValue, FunctionBodyData, FunctionInfo,
Module, ModuleTranslation, PrimaryMap, TrapInformation, Tunables, TypeTables, VMOffsets, Module, ModuleTranslation, PrimaryMap, TrapInformation, Tunables, TypeTables, VMOffsets,
@@ -96,11 +96,11 @@ impl Compiler for Lightbeam {
unimplemented!() unimplemented!()
} }
fn flags(&self) -> HashMap<String, FlagValue> { fn flags(&self) -> BTreeMap<String, FlagValue> {
unimplemented!() unimplemented!()
} }
fn isa_flags(&self) -> HashMap<String, FlagValue> { fn isa_flags(&self) -> BTreeMap<String, FlagValue> {
unimplemented!() unimplemented!()
} }
} }

View File

@@ -16,7 +16,7 @@ use memoffset::offset_of;
use more_asserts::assert_lt; use more_asserts::assert_lt;
use std::alloc::Layout; use std::alloc::Layout;
use std::any::Any; use std::any::Any;
use std::collections::HashMap; use std::collections::BTreeMap;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::hash::Hash; use std::hash::Hash;
use std::ptr::NonNull; use std::ptr::NonNull;
@@ -511,13 +511,13 @@ impl Instance {
fn find_passive_segment<'a, I, D, T>( fn find_passive_segment<'a, I, D, T>(
index: I, index: I,
index_map: &HashMap<I, usize>, index_map: &BTreeMap<I, usize>,
data: &'a Vec<D>, data: &'a Vec<D>,
dropped: &EntitySet<I>, dropped: &EntitySet<I>,
) -> &'a [T] ) -> &'a [T]
where where
D: AsRef<[T]>, D: AsRef<[T]>,
I: EntityRef + Hash, I: EntityRef + Ord,
{ {
match index_map.get(&index) { match index_map.get(&index) {
Some(index) if !dropped.contains(I::new(*index)) => data[*index].as_ref(), Some(index) if !dropped.contains(I::new(*index)) => data[*index].as_ref(),

View File

@@ -4,7 +4,7 @@ use crate::{Engine, Module};
use anyhow::{anyhow, bail, Context, Result}; use anyhow::{anyhow, bail, Context, Result};
use bincode::Options; use bincode::Options;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::BTreeMap;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use wasmtime_environ::{Compiler, FlagValue, Tunables}; use wasmtime_environ::{Compiler, FlagValue, Tunables};
@@ -152,8 +152,8 @@ impl SerializedModuleUpvar {
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct SerializedModule<'a> { pub struct SerializedModule<'a> {
target: String, target: String,
shared_flags: HashMap<String, FlagValue>, shared_flags: BTreeMap<String, FlagValue>,
isa_flags: HashMap<String, FlagValue>, isa_flags: BTreeMap<String, FlagValue>,
tunables: Tunables, tunables: Tunables,
features: WasmFeatures, features: WasmFeatures,
artifacts: Vec<MyCow<'a, CompilationArtifacts>>, artifacts: Vec<MyCow<'a, CompilationArtifacts>>,

View File

@@ -78,3 +78,46 @@ fn aot_compiles() -> Result<()> {
Ok(()) Ok(())
} }
#[test]
fn serialize_deterministic() {
let engine = Engine::default();
let assert_deterministic = |wasm: &str| {
let p1 = engine.precompile_module(wasm.as_bytes()).unwrap();
let p2 = engine.precompile_module(wasm.as_bytes()).unwrap();
if p1 != p2 {
panic!("precompile_module not determinisitc for:\n{}", wasm);
}
let module1 = Module::new(&engine, wasm).unwrap();
let a1 = module1.serialize().unwrap();
let a2 = module1.serialize().unwrap();
if a1 != a2 {
panic!("Module::serialize not determinisitc for:\n{}", wasm);
}
let module2 = Module::new(&engine, wasm).unwrap();
let b1 = module2.serialize().unwrap();
let b2 = module2.serialize().unwrap();
if b1 != b2 {
panic!("Module::serialize not determinisitc for:\n{}", wasm);
}
if a1 != b2 {
panic!("not matching across modules:\n{}", wasm);
}
if b1 != p2 {
panic!("not matching across engine/module:\n{}", wasm);
}
};
assert_deterministic("(module)");
assert_deterministic("(module (func))");
assert_deterministic("(module (func nop))");
assert_deterministic("(module (func) (func (param i32)))");
assert_deterministic("(module (func (export \"f\")) (func (export \"y\")))");
assert_deterministic("(module (func $f) (func $g))");
assert_deterministic("(module (data \"\") (data \"\"))");
assert_deterministic("(module (elem) (elem))");
}