diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index b4242d4302..c0b93e1242 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -15,18 +15,20 @@ mod config; mod instance_allocation_strategy; mod instance_limits; mod memory; -mod module_config; +mod module; mod single_inst_module; mod spec_test; mod stacks; pub mod table_ops; +mod value; pub use codegen_settings::CodegenSettings; pub use config::{Config, WasmtimeConfig}; pub use instance_allocation_strategy::InstanceAllocationStrategy; pub use instance_limits::InstanceLimits; pub use memory::{MemoryConfig, NormalMemoryConfig, UnalignedMemory, UnalignedMemoryCreator}; -pub use module_config::ModuleConfig; +pub use module::ModuleConfig; pub use single_inst_module::SingleInstModule; pub use spec_test::SpecTest; pub use stacks::Stacks; +pub use value::{DiffValue, DiffValueType}; diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index a2bd2dc2ff..d8b072a072 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -92,6 +92,8 @@ impl Config { limits.tables = 1; limits.table_elements = 1_000; + limits.size = 1_000_000; + match &mut self.wasmtime.memory_config { MemoryConfig::Normal(config) => { config.static_memory_maximum_size = Some(limits.memory_pages * 0x10000); @@ -101,6 +103,34 @@ impl Config { } } + /// Force `self` to be a configuration compatible with `other`. This is + /// useful for differential execution to avoid unhelpful fuzz crashes when + /// one engine has a feature enabled and the other does not. + pub fn make_compatible_with(&mut self, other: &Self) { + // Use the same `wasm-smith` configuration as `other` because this is + // used for determining what Wasm features are enabled in the engine + // (see `to_wasmtime`). + self.module_config = other.module_config.clone(); + + // Use the same allocation strategy between the two configs. + // + // Ideally this wouldn't be necessary, but, during differential + // evaluation, if the `lhs` is using ondemand and the `rhs` is using the + // pooling allocator (or vice versa), then the module may have been + // generated in such a way that is incompatible with the other + // allocation strategy. + // + // We can remove this in the future when it's possible to access the + // fields of `wasm_smith::Module` to constrain the pooling allocator + // based on what was actually generated. + self.wasmtime.strategy = other.wasmtime.strategy.clone(); + if let InstanceAllocationStrategy::Pooling { .. } = &other.wasmtime.strategy { + // Also use the same memory configuration when using the pooling + // allocator. + self.wasmtime.memory_config = other.wasmtime.memory_config.clone(); + } + } + /// Uses this configuration and the supplied source of data to generate /// a wasm module. /// @@ -112,13 +142,7 @@ impl Config { input: &mut Unstructured<'_>, default_fuel: Option, ) -> arbitrary::Result { - let mut module = wasm_smith::Module::new(self.module_config.config.clone(), input)?; - - if let Some(default_fuel) = default_fuel { - module.ensure_termination(default_fuel); - } - - Ok(module) + self.module_config.generate(input, default_fuel) } /// Indicates that this configuration should be spec-test-compliant, diff --git a/crates/fuzzing/src/generators/module_config.rs b/crates/fuzzing/src/generators/module.rs similarity index 60% rename from crates/fuzzing/src/generators/module_config.rs rename to crates/fuzzing/src/generators/module.rs index 190b41e3d4..2c608acb48 100644 --- a/crates/fuzzing/src/generators/module_config.rs +++ b/crates/fuzzing/src/generators/module.rs @@ -1,4 +1,4 @@ -//! Generate a configuration for generating a Wasm module. +//! Generate a Wasm module and the configuration for generating it. use arbitrary::{Arbitrary, Unstructured}; use wasm_smith::SwarmConfig; @@ -36,3 +36,26 @@ impl<'a> Arbitrary<'a> for ModuleConfig { Ok(ModuleConfig { config }) } } + +impl ModuleConfig { + /// Uses this configuration and the supplied source of data to generate a + /// Wasm module. + /// + /// If a `default_fuel` is provided, the resulting module will be configured + /// to ensure termination; as doing so will add an additional global to the + /// module, the pooling allocator, if configured, must also have its globals + /// limit updated. + pub fn generate( + &self, + input: &mut Unstructured<'_>, + default_fuel: Option, + ) -> arbitrary::Result { + let mut module = wasm_smith::Module::new(self.config.clone(), input)?; + + if let Some(default_fuel) = default_fuel { + module.ensure_termination(default_fuel); + } + + Ok(module) + } +} diff --git a/crates/fuzzing/src/generators/single_inst_module.rs b/crates/fuzzing/src/generators/single_inst_module.rs index fda2518359..6e1c193d34 100644 --- a/crates/fuzzing/src/generators/single_inst_module.rs +++ b/crates/fuzzing/src/generators/single_inst_module.rs @@ -1,6 +1,7 @@ //! Generate Wasm modules that contain a single instruction. -use arbitrary::{Arbitrary, Unstructured}; +use super::ModuleConfig; +use arbitrary::Unstructured; use wasm_encoder::{ CodeSection, ExportKind, ExportSection, Function, FunctionSection, Instruction, Module, TypeSection, ValType, @@ -13,17 +14,38 @@ const FUNCTION_NAME: &'static str = "test"; /// /// By explicitly defining the parameter and result types (versus generating the /// module directly), we can more easily generate values of the right type. -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct SingleInstModule<'a> { instruction: Instruction<'a>, parameters: &'a [ValType], results: &'a [ValType], + feature: fn(&ModuleConfig) -> bool, } impl<'a> SingleInstModule<'a> { - /// Generate a binary Wasm module with a single exported function, `test`, + /// Choose a single-instruction module that matches `config`. + pub fn new(u: &mut Unstructured<'a>, config: &mut ModuleConfig) -> arbitrary::Result<&'a Self> { + // To avoid skipping modules unnecessarily during fuzzing, fix up the + // `ModuleConfig` to match the inherent limits of a single-instruction + // module. + config.config.min_funcs = 1; + config.config.max_funcs = 1; + config.config.min_tables = 0; + config.config.max_tables = 0; + config.config.min_memories = 0; + config.config.max_memories = 0; + + // Only select instructions that match the `ModuleConfig`. + let instructions = &INSTRUCTIONS + .iter() + .filter(|i| (i.feature)(config)) + .collect::>(); + u.choose(&instructions[..]).copied() + } + + /// Encode a binary Wasm module with a single exported function, `test`, /// that executes the single instruction. - pub fn encode(&self) -> Vec { + pub fn to_bytes(&self) -> Vec { let mut module = Module::new(); // Encode the type section. @@ -61,12 +83,6 @@ impl<'a> SingleInstModule<'a> { } } -impl<'a> Arbitrary<'a> for &SingleInstModule<'_> { - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - u.choose(&INSTRUCTIONS) - } -} - // MACROS // // These macros make it a bit easier to define the instructions available for @@ -91,39 +107,52 @@ macro_rules! valtype { macro_rules! binary { ($inst:ident, $rust_ty:tt) => { - binary! { $inst, valtype!($rust_ty), valtype!($rust_ty) } + binary! { $inst, $rust_ty, $rust_ty } }; - ($inst:ident, $arguments_ty:expr, $result_ty:expr) => { + ($inst:ident, $arguments_ty:tt, $result_ty:tt) => { SingleInstModule { instruction: Instruction::$inst, - parameters: &[$arguments_ty, $arguments_ty], - results: &[$result_ty], + parameters: &[valtype!($arguments_ty), valtype!($arguments_ty)], + results: &[valtype!($result_ty)], + feature: |_| true, } }; } macro_rules! compare { ($inst:ident, $rust_ty:tt) => { - binary! { $inst, valtype!($rust_ty), ValType::I32 } + binary! { $inst, $rust_ty, i32 } }; } macro_rules! unary { ($inst:ident, $rust_ty:tt) => { - unary! { $inst, valtype!($rust_ty), valtype!($rust_ty) } + unary! { $inst, $rust_ty, $rust_ty } }; - ($inst:ident, $argument_ty:expr, $result_ty:expr) => { + ($inst:ident, $argument_ty:tt, $result_ty:tt) => { SingleInstModule { instruction: Instruction::$inst, - parameters: &[$argument_ty], - results: &[$result_ty], + parameters: &[valtype!($argument_ty)], + results: &[valtype!($result_ty)], + feature: |_| true, + } + }; + ($inst:ident, $argument_ty:tt, $result_ty:tt, $feature:expr) => { + SingleInstModule { + instruction: Instruction::$inst, + parameters: &[valtype!($argument_ty)], + results: &[valtype!($result_ty)], + feature: $feature, } }; } macro_rules! convert { ($inst:ident, $from_ty:tt -> $to_ty:tt) => { - unary! { $inst, valtype!($from_ty), valtype!($to_ty) } + unary! { $inst, $from_ty, $to_ty } + }; + ($inst:ident, $from_ty:tt -> $to_ty:tt, $feature:expr) => { + unary! { $inst, $from_ty, $to_ty, $feature } }; } @@ -172,7 +201,7 @@ static INSTRUCTIONS: &[SingleInstModule] = &[ binary!(I64Rotr, i64), // Integer comparison. unary!(I32Eqz, i32), - unary!(I64Eqz, ValType::I64, ValType::I32), + unary!(I64Eqz, i64, i32), compare!(I32Eq, i32), compare!(I64Eq, i64), compare!(I32Ne, i32), @@ -236,11 +265,11 @@ static INSTRUCTIONS: &[SingleInstModule] = &[ compare!(F32Ge, f32), compare!(F64Ge, f64), // Integer conversions ("to integer"). - unary!(I32Extend8S, i32), - unary!(I32Extend16S, i32), - unary!(I64Extend8S, i64), - unary!(I64Extend16S, i64), - convert!(I64Extend32S, i64 -> i64), + unary!(I32Extend8S, i32, i32, |c| c.config.sign_extension_enabled), + unary!(I32Extend16S, i32, i32, |c| c.config.sign_extension_enabled), + unary!(I64Extend8S, i64, i64, |c| c.config.sign_extension_enabled), + unary!(I64Extend16S, i64, i64, |c| c.config.sign_extension_enabled), + convert!(I64Extend32S, i64 -> i64, |c| c.config.sign_extension_enabled), convert!(I32WrapI64, i64 -> i32), convert!(I64ExtendI32S, i32 -> i64), convert!(I64ExtendI32U, i32 -> i64), @@ -252,14 +281,14 @@ static INSTRUCTIONS: &[SingleInstModule] = &[ convert!(I64TruncF32U, f32 -> i64), convert!(I64TruncF64S, f64 -> i64), convert!(I64TruncF64U, f64 -> i64), - convert!(I32TruncSatF32S, f32 -> i32), - convert!(I32TruncSatF32U, f32 -> i32), - convert!(I32TruncSatF64S, f64 -> i32), - convert!(I32TruncSatF64U, f64 -> i32), - convert!(I64TruncSatF32S, f32 -> i64), - convert!(I64TruncSatF32U, f32 -> i64), - convert!(I64TruncSatF64S, f64 -> i64), - convert!(I64TruncSatF64U, f64 -> i64), + convert!(I32TruncSatF32S, f32 -> i32, |c| c.config.saturating_float_to_int_enabled), + convert!(I32TruncSatF32U, f32 -> i32, |c| c.config.saturating_float_to_int_enabled), + convert!(I32TruncSatF64S, f64 -> i32, |c| c.config.saturating_float_to_int_enabled), + convert!(I32TruncSatF64U, f64 -> i32, |c| c.config.saturating_float_to_int_enabled), + convert!(I64TruncSatF32S, f32 -> i64, |c| c.config.saturating_float_to_int_enabled), + convert!(I64TruncSatF32U, f32 -> i64, |c| c.config.saturating_float_to_int_enabled), + convert!(I64TruncSatF64S, f64 -> i64, |c| c.config.saturating_float_to_int_enabled), + convert!(I64TruncSatF64U, f64 -> i64, |c| c.config.saturating_float_to_int_enabled), convert!(I32ReinterpretF32, f32 -> i32), convert!(I64ReinterpretF64, f64 -> i64), // Floating-point conversions ("to float"). @@ -287,8 +316,9 @@ mod test { instruction: Instruction::I32Add, parameters: &[ValType::I32, ValType::I32], results: &[ValType::I32], + feature: |_| true, }; - let wasm = sut.encode(); + let wasm = sut.to_bytes(); let wat = wasmprinter::print_bytes(wasm).unwrap(); assert_eq!( wat, @@ -307,7 +337,7 @@ mod test { #[test] fn instructions_encode_to_valid_modules() { for inst in INSTRUCTIONS { - assert!(wat::parse_bytes(&inst.encode()).is_ok()); + assert!(wat::parse_bytes(&inst.to_bytes()).is_ok()); } } } diff --git a/crates/fuzzing/src/generators/value.rs b/crates/fuzzing/src/generators/value.rs new file mode 100644 index 0000000000..9386c137bd --- /dev/null +++ b/crates/fuzzing/src/generators/value.rs @@ -0,0 +1,177 @@ +//! Generate Wasm values, primarily for differential execution. + +use arbitrary::{Arbitrary, Unstructured}; +use std::hash::Hash; + +/// A value passed to and from evaluation. Note that reference types are not +/// (yet) supported. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub enum DiffValue { + I32(i32), + I64(i64), + F32(u32), + F64(u64), + V128(u128), +} + +impl DiffValue { + fn ty(&self) -> DiffValueType { + match self { + DiffValue::I32(_) => DiffValueType::I32, + DiffValue::I64(_) => DiffValueType::I64, + DiffValue::F32(_) => DiffValueType::F32, + DiffValue::F64(_) => DiffValueType::F64, + DiffValue::V128(_) => DiffValueType::V128, + } + } + + /// Generate a [`DiffValue`] of the given `ty` type. + /// + /// This function will bias the returned value 50% of the time towards one + /// of a set of known values (e.g., NaN, -1, 0, infinity, etc.). + pub fn arbitrary_of_type( + u: &mut Unstructured<'_>, + ty: DiffValueType, + ) -> arbitrary::Result { + use DiffValueType::*; + let val = match ty { + I32 => DiffValue::I32(biased_arbitrary_value(u, KNOWN_I32_VALUES)?), + I64 => DiffValue::I64(biased_arbitrary_value(u, KNOWN_I64_VALUES)?), + F32 => { + // TODO once `to_bits` is stable as a `const` function, move + // this to a `const` definition. + let known_f32_values = &[ + f32::NAN.to_bits(), + f32::INFINITY.to_bits(), + f32::NEG_INFINITY.to_bits(), + f32::MIN.to_bits(), + (-1.0f32).to_bits(), + (0.0f32).to_bits(), + (1.0f32).to_bits(), + f32::MAX.to_bits(), + ]; + DiffValue::F32(biased_arbitrary_value(u, known_f32_values)?) + } + F64 => { + // TODO once `to_bits` is stable as a `const` function, move + // this to a `const` definition. + let known_f64_values = &[ + f64::NAN.to_bits(), + f64::INFINITY.to_bits(), + f64::NEG_INFINITY.to_bits(), + f64::MIN.to_bits(), + (-1.0f64).to_bits(), + (0.0f64).to_bits(), + (1.0f64).to_bits(), + f64::MAX.to_bits(), + ]; + DiffValue::F64(biased_arbitrary_value(u, known_f64_values)?) + } + V128 => DiffValue::V128(biased_arbitrary_value(u, KNOWN_U128_VALUES)?), + }; + arbitrary::Result::Ok(val) + } +} + +const KNOWN_I32_VALUES: &[i32] = &[i32::MIN, -1, 0, 1, i32::MAX]; +const KNOWN_I64_VALUES: &[i64] = &[i64::MIN, -1, 0, 1, i64::MAX]; +const KNOWN_U128_VALUES: &[u128] = &[u128::MIN, 1, u128::MAX]; + +/// Helper function to pick a known value from the list of `known_values` half +/// the time. +fn biased_arbitrary_value<'a, T>( + u: &mut Unstructured<'a>, + known_values: &[T], +) -> arbitrary::Result +where + T: Arbitrary<'a> + Copy, +{ + let pick_from_known_values: bool = u.arbitrary()?; + if pick_from_known_values { + Ok(*u.choose(known_values)?) + } else { + u.arbitrary() + } +} + +impl<'a> Arbitrary<'a> for DiffValue { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let ty: DiffValueType = u.arbitrary()?; + DiffValue::arbitrary_of_type(u, ty) + } +} + +impl Hash for DiffValue { + fn hash(&self, state: &mut H) { + self.ty().hash(state); + match self { + DiffValue::I32(n) => n.hash(state), + DiffValue::I64(n) => n.hash(state), + DiffValue::F32(n) => n.hash(state), + DiffValue::F64(n) => n.hash(state), + DiffValue::V128(n) => n.hash(state), + } + } +} + +/// Implement equality checks. Note that floating-point values are not compared +/// bit-for-bit in the case of NaNs: because Wasm floating-point numbers may be +/// [arithmetic NaNs with arbitrary payloads] and Wasm operations are [not +/// required to propagate NaN payloads], we simply check that both sides are +/// NaNs here. We could be more strict, though: we could check that the NaN +/// signs are equal and that [canonical NaN payloads remain canonical]. +/// +/// [arithmetic NaNs with arbitrary payloads]: +/// https://webassembly.github.io/spec/core/bikeshed/index.html#floating-point%E2%91%A0 +/// [not required to propagate NaN payloads]: +/// https://webassembly.github.io/spec/core/bikeshed/index.html#floating-point-operations%E2%91%A0 +/// [canonical NaN payloads remain canonical]: +/// https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0 +impl PartialEq for DiffValue { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::I32(l0), Self::I32(r0)) => l0 == r0, + (Self::I64(l0), Self::I64(r0)) => l0 == r0, + (Self::V128(l0), Self::V128(r0)) => l0 == r0, + (Self::F32(l0), Self::F32(r0)) => { + let l0 = f32::from_bits(*l0); + let r0 = f32::from_bits(*r0); + l0 == r0 || (l0.is_nan() && r0.is_nan()) + } + (Self::F64(l0), Self::F64(r0)) => { + let l0 = f64::from_bits(*l0); + let r0 = f64::from_bits(*r0); + l0 == r0 || (l0.is_nan() && r0.is_nan()) + } + _ => false, + } + } +} + +/// Enumerate the supported value types. +#[derive(Clone, Debug, Arbitrary, Hash)] +#[allow(missing_docs)] +pub enum DiffValueType { + I32, + I64, + F32, + F64, + V128, +} + +impl TryFrom for DiffValueType { + type Error = &'static str; + fn try_from(ty: wasmtime::ValType) -> Result { + use wasmtime::ValType::*; + match ty { + I32 => Ok(Self::I32), + I64 => Ok(Self::I64), + F32 => Ok(Self::F32), + F64 => Ok(Self::F64), + V128 => Ok(Self::V128), + FuncRef => Err("unable to convert reference types"), + ExternRef => Err("unable to convert reference types"), + } + } +} diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index 2ddc0c31e5..287f1b332d 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -10,14 +10,23 @@ //! When an oracle finds a bug, it should report it to the fuzzing engine by //! panicking. +#[cfg(feature = "fuzz-spec-interpreter")] +pub mod diff_spec; +pub mod diff_wasmi; +pub mod diff_wasmtime; pub mod dummy; +pub mod engine; mod stacks; -use crate::generators; +use self::diff_wasmtime::WasmtimeInstance; +use self::engine::DiffInstance; +use crate::generators::{self, DiffValue}; use arbitrary::Arbitrary; use log::debug; pub use stacks::check_stacks; use std::cell::Cell; +use std::collections::hash_map::DefaultHasher; +use std::hash::Hasher; use std::rc::Rc; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; use std::sync::{Arc, Condvar, Mutex}; @@ -240,9 +249,10 @@ fn compile_module( if let generators::InstanceAllocationStrategy::Pooling { .. } = &config.wasmtime.strategy { - // When using the pooling allocator, accept failures to compile when arbitrary - // table element limits have been exceeded as there is currently no way - // to constrain the generated module table types. + // When using the pooling allocator, accept failures to compile + // when arbitrary table element limits have been exceeded as + // there is currently no way to constrain the generated module + // table types. let string = e.to_string(); if string.contains("minimum element size") { return None; @@ -250,7 +260,7 @@ fn compile_module( // Allow modules-failing-to-compile which exceed the requested // size for each instance. This is something that is difficult - // to control and ensure it always suceeds, so we simply have a + // to control and ensure it always succeeds, so we simply have a // "random" instance size limit and if a module doesn't fit we // move on to the next fuzz input. if string.contains("instance allocation for this module requires") { @@ -263,7 +273,17 @@ fn compile_module( } } -fn instantiate_with_dummy(store: &mut Store, module: &Module) -> Option { +/// Create a Wasmtime [`Instance`] from a [`Module`] and fill in all imports +/// with dummy values (e.g., zeroed values, immediately-trapping functions). +/// Also, this function catches certain fuzz-related instantiation failures and +/// returns `None` instead of panicking. +/// +/// TODO: we should implement tracing versions of these dummy imports that +/// record a trace of the order that imported functions were called in and with +/// what values. Like the results of exported functions, calls to imports should +/// also yield the same values for each configuration, and we should assert +/// that. +pub fn instantiate_with_dummy(store: &mut Store, module: &Module) -> Option { // Creation of imports can fail due to resource limit constraints, and then // instantiation can naturally fail for a number of reasons as well. Bundle // the two steps together to match on the error below. @@ -279,12 +299,14 @@ fn instantiate_with_dummy(store: &mut Store, module: &Module) -> Op // expected that fuzz-generated programs try to allocate lots of // stuff. if store.data().0.oom.get() { + log::debug!("failed to instantiate: OOM"); return None; } // Allow traps which can happen normally with `unreachable` or a // timeout or such - if e.downcast_ref::().is_some() { + if let Some(trap) = e.downcast_ref::() { + log::debug!("failed to instantiate: {}", trap); return None; } @@ -296,11 +318,13 @@ fn instantiate_with_dummy(store: &mut Store, module: &Module) -> Op // rather than positional-based resolution || string.contains("incompatible import type") { + log::debug!("failed to instantiate: {}", string); return None; } // Also allow failures to instantiate as a result of hitting instance limits if string.contains("concurrent instances has been reached") { + log::debug!("failed to instantiate: {}", string); return None; } @@ -308,6 +332,55 @@ fn instantiate_with_dummy(store: &mut Store, module: &Module) -> Op panic!("failed to instantiate: {:?}", e); } +/// Evaluate the function identified by `name` in two different engine +/// instances--`lhs` and `rhs`. +/// +/// # Panics +/// +/// This will panic if the evaluation is different between engines (e.g., +/// results are different, hashed instance is different, one side traps, etc.). +pub fn differential( + lhs: &mut dyn DiffInstance, + rhs: &mut WasmtimeInstance, + name: &str, + args: &[DiffValue], +) -> anyhow::Result<()> { + log::debug!("Evaluating: {}({:?})", name, args); + let lhs_results = lhs.evaluate(name, args); + log::debug!(" -> results on {}: {:?}", lhs.name(), &lhs_results); + let rhs_results = rhs.evaluate(name, args); + log::debug!(" -> results on {}: {:?}", rhs.name(), &rhs_results); + match (lhs_results, rhs_results) { + // If the evaluation succeeds, we compare the results. + (Ok(lhs_results), Ok(rhs_results)) => assert_eq!(lhs_results, rhs_results), + // Both sides failed--this is an acceptable result (e.g., both sides + // trap at a divide by zero). We could compare the error strings perhaps + // (since the `lhs` and `rhs` could be failing for different reasons) + // but this seems good enough for now. + (Err(_), Err(_)) => {} + // A real bug is found if only one side fails. + (Ok(_), Err(_)) => panic!("only the `rhs` ({}) failed for this input", rhs.name()), + (Err(_), Ok(_)) => panic!("only the `lhs` ({}) failed for this input", lhs.name()), + }; + + let hash = |i: &mut dyn DiffInstance| -> anyhow::Result { + let mut hasher = DefaultHasher::new(); + i.hash(&mut hasher)?; + Ok(hasher.finish()) + }; + + if lhs.is_hashable() && rhs.is_hashable() { + log::debug!("Hashing instances:"); + let lhs_hash = hash(lhs)?; + log::debug!(" -> hash of {}: {:?}", lhs.name(), lhs_hash); + let rhs_hash = hash(rhs)?; + log::debug!(" -> hash of {}: {:?}", rhs.name(), rhs_hash); + assert_eq!(lhs_hash, rhs_hash); + } + + Ok(()) +} + /// Instantiate the given Wasm module with each `Config` and call all of its /// exports. Modulo OOM, non-canonical NaNs, and usage of Wasm features that are /// or aren't enabled for different configs, we should get the same results when diff --git a/crates/fuzzing/src/oracles/diff_spec.rs b/crates/fuzzing/src/oracles/diff_spec.rs new file mode 100644 index 0000000000..fc5d22b880 --- /dev/null +++ b/crates/fuzzing/src/oracles/diff_spec.rs @@ -0,0 +1,123 @@ +//! Evaluate an exported Wasm function using the WebAssembly specification +//! reference interpreter. + +use crate::generators::{DiffValue, ModuleConfig}; +use crate::oracles::engine::{DiffEngine, DiffInstance}; +use anyhow::{anyhow, bail, Result}; +use wasm_spec_interpreter::Value; + +/// A wrapper for `wasm-spec-interpreter` as a [`DiffEngine`]. +pub struct SpecInterpreter; + +impl SpecInterpreter { + /// Build a new [`SpecInterpreter`] but only if the configuration does not + /// rely on features that the current bindings (i.e., + /// `wasm-spec-interpreter`) do not support. + pub fn new(config: &ModuleConfig) -> Result> { + if config.config.reference_types_enabled { + bail!("the spec interpreter bindings do not support reference types") + } + if config.config.max_funcs > 1 { + // TODO + bail!("the spec interpreter bindings can only support one function for now") + } + if config.config.max_tables > 0 { + // TODO + bail!("the spec interpreter bindings do not fail as they should with out-of-bounds table accesses") + } + Ok(Box::new(Self)) + } +} + +impl DiffEngine for SpecInterpreter { + fn name(&self) -> &'static str { + "spec" + } + + fn instantiate(&self, wasm: &[u8]) -> Result> { + // TODO: ideally we would avoid copying the module bytes here. + Ok(Box::new(SpecInstance { + wasm: wasm.to_vec(), + })) + } +} + +struct SpecInstance { + wasm: Vec, +} + +impl DiffInstance for SpecInstance { + fn name(&self) -> &'static str { + "spec" + } + + fn evaluate( + &mut self, + _function_name: &str, + arguments: &[DiffValue], + ) -> Result> { + // The spec interpreter needs some work before it can fully support this + // interface: + // - TODO adapt `wasm-spec-interpreter` to use function name to select + // function to run + // - TODO adapt `wasm-spec-interpreter` to expose an "instance" with + // so we can hash memory, globals, etc. + let arguments = arguments.iter().map(Value::from).collect(); + match wasm_spec_interpreter::interpret(&self.wasm, Some(arguments)) { + Ok(results) => Ok(results.into_iter().map(Value::into).collect()), + Err(err) => Err(anyhow!(err)), + } + } + + fn is_hashable(&self) -> bool { + false + } + + fn hash(&mut self, _state: &mut std::collections::hash_map::DefaultHasher) -> Result<()> { + unimplemented!() + } +} + +impl From<&DiffValue> for Value { + fn from(v: &DiffValue) -> Self { + match *v { + DiffValue::I32(n) => Value::I32(n), + DiffValue::I64(n) => Value::I64(n), + DiffValue::F32(n) => Value::F32(n as i32), + DiffValue::F64(n) => Value::F64(n as i64), + DiffValue::V128(n) => Value::V128(n.to_le_bytes().to_vec()), + } + } +} + +impl Into for Value { + fn into(self) -> DiffValue { + match self { + Value::I32(n) => DiffValue::I32(n), + Value::I64(n) => DiffValue::I64(n), + Value::F32(n) => DiffValue::F32(n as u32), + Value::F64(n) => DiffValue::F64(n as u64), + Value::V128(n) => { + assert_eq!(n.len(), 16); + DiffValue::V128(u128::from_le_bytes(n.as_slice().try_into().unwrap())) + } + } + } +} + +/// Set up the OCaml runtime for triggering its signal handler configuration. +/// +/// Because both the OCaml runtime and Wasmtime set up signal handlers, we must +/// carefully decide when to instantiate them; this function allows us to +/// control when. Wasmtime uses these signal handlers for catching various +/// WebAssembly failures. On certain OSes (e.g. Linux `x86_64`), the signal +/// handlers interfere, observable as an uncaught `SIGSEGV`--not even caught by +/// libFuzzer. +/// +/// This failure can be mitigated by always running Wasmtime second in +/// differential fuzzing. In some cases, however, this is not possible because +/// which engine will execute first is unknown. This function can be explicitly +/// executed first, e.g., during global initialization, to avoid this issue. +pub fn setup_ocaml_runtime() { + wasm_spec_interpreter::setup_ocaml_runtime(); +} diff --git a/crates/fuzzing/src/oracles/diff_wasmi.rs b/crates/fuzzing/src/oracles/diff_wasmi.rs new file mode 100644 index 0000000000..09cb197585 --- /dev/null +++ b/crates/fuzzing/src/oracles/diff_wasmi.rs @@ -0,0 +1,178 @@ +//! Evaluate an exported Wasm function using the wasmi interpreter. + +use crate::generators::{DiffValue, ModuleConfig}; +use crate::oracles::engine::{DiffEngine, DiffInstance}; +use anyhow::{bail, Context, Result}; +use std::hash::Hash; + +/// A wrapper for `wasmi` as a [`DiffEngine`]. +pub struct WasmiEngine; + +impl WasmiEngine { + /// Build a new [`WasmiEngine`] but only if the configuration does not rely + /// on features that `wasmi` does not support. + pub fn new(config: &ModuleConfig) -> Result> { + if config.config.reference_types_enabled { + bail!("wasmi does not support reference types") + } + if config.config.simd_enabled { + bail!("wasmi does not support SIMD") + } + if config.config.multi_value_enabled { + bail!("wasmi does not support multi-value") + } + if config.config.saturating_float_to_int_enabled { + bail!("wasmi does not support saturating float-to-int conversions") + } + if config.config.sign_extension_enabled { + bail!("wasmi does not support sign-extension") + } + Ok(Box::new(Self)) + } +} + +impl DiffEngine for WasmiEngine { + fn name(&self) -> &'static str { + "wasmi" + } + + fn instantiate(&self, wasm: &[u8]) -> Result> { + let module = wasmi::Module::from_buffer(wasm).context("unable to validate Wasm module")?; + let instance = wasmi::ModuleInstance::new(&module, &wasmi::ImportsBuilder::default()) + .context("unable to instantiate module in wasmi")?; + let instance = instance.assert_no_start(); + let exports = list_export_names(wasm); + Ok(Box::new(WasmiInstance { + module, + exports, + instance, + })) + } +} + +/// A wrapper for `wasmi` Wasm instances. +struct WasmiInstance { + #[allow(dead_code)] // reason = "the module must live as long as its reference" + module: wasmi::Module, + instance: wasmi::ModuleRef, + /// `wasmi`'s instances have no way of listing their exports so, in order to + /// properly hash the instance, we keep track of the export names. + exports: Vec, +} + +impl DiffInstance for WasmiInstance { + fn name(&self) -> &'static str { + "wasmi" + } + + fn evaluate(&mut self, function_name: &str, arguments: &[DiffValue]) -> Result> { + let arguments: Vec<_> = arguments.iter().map(wasmi::RuntimeValue::from).collect(); + let export = self + .instance + .export_by_name(function_name) + .context(format!( + "unable to find function '{}' in wasmi instance", + function_name + ))?; + let function = export.as_func().context("wasmi export is not a function")?; + let result = wasmi::FuncInstance::invoke(&function, &arguments, &mut wasmi::NopExternals) + .context("failed while invoking function in wasmi")?; + Ok(if let Some(result) = result { + vec![result.into()] + } else { + vec![] + }) + } + + fn is_hashable(&self) -> bool { + true + } + + fn hash(&mut self, state: &mut std::collections::hash_map::DefaultHasher) -> Result<()> { + for export_name in &self.exports { + if let Some(export) = self.instance.export_by_name(export_name) { + match export { + wasmi::ExternVal::Func(_) => {} + wasmi::ExternVal::Table(_) => {} // TODO eventually we can hash whether the values are null or non-null. + wasmi::ExternVal::Memory(m) => { + // `wasmi` memory may be stored non-contiguously; copy + // it out to a contiguous chunk. + let mut buffer: Vec = vec![0; m.current_size().0 * 65536]; + m.get_into(0, &mut buffer[..]) + .expect("can access wasmi memory"); + buffer.hash(state) + } + wasmi::ExternVal::Global(g) => { + let val: DiffValue = g.get().into(); + val.hash(state); + } + } + } else { + panic!("unable to find export: {}", export_name) + } + } + Ok(()) + } +} + +/// List the names of all exported items in a binary Wasm module. +fn list_export_names(wasm: &[u8]) -> Vec { + let mut exports = vec![]; + for payload in wasmparser::Parser::new(0).parse_all(&wasm) { + match payload.unwrap() { + wasmparser::Payload::ExportSection(s) => { + for export in s { + exports.push(export.unwrap().name.to_string()); + } + } + _ => { + // Ignore any other sections. + } + } + } + exports +} + +impl From<&DiffValue> for wasmi::RuntimeValue { + fn from(v: &DiffValue) -> Self { + use wasmi::RuntimeValue::*; + match *v { + DiffValue::I32(n) => I32(n), + DiffValue::I64(n) => I64(n), + DiffValue::F32(n) => F32(wasmi::nan_preserving_float::F32::from_bits(n)), + DiffValue::F64(n) => F64(wasmi::nan_preserving_float::F64::from_bits(n)), + DiffValue::V128(_) => unimplemented!(), + } + } +} + +impl Into for wasmi::RuntimeValue { + fn into(self) -> DiffValue { + use wasmi::RuntimeValue::*; + match self { + I32(n) => DiffValue::I32(n), + I64(n) => DiffValue::I64(n), + F32(n) => DiffValue::F32(n.to_bits()), + F64(n) => DiffValue::F64(n.to_bits()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_list_export_names() { + let wat = r#"(module + (func (export "a") (result i32) (i32.const 42)) + (global (export "b") (mut i32) (i32.const 42)) + (memory (export "c") 1 2 shared) + )"#; + let wasm = wat::parse_str(wat).unwrap(); + assert_eq!( + list_export_names(&wasm), + vec!["a".to_string(), "b".to_string(), "c".to_string()], + ); + } +} diff --git a/crates/fuzzing/src/oracles/diff_wasmtime.rs b/crates/fuzzing/src/oracles/diff_wasmtime.rs new file mode 100644 index 0000000000..559e837f29 --- /dev/null +++ b/crates/fuzzing/src/oracles/diff_wasmtime.rs @@ -0,0 +1,159 @@ +//! Evaluate an exported Wasm function using Wasmtime. + +use crate::generators::{self, DiffValue}; +use crate::oracles::engine::DiffInstance; +use crate::oracles::{compile_module, engine::DiffEngine, instantiate_with_dummy, StoreLimits}; +use anyhow::{Context, Result}; +use std::hash::Hash; +use std::slice; +use wasmtime::{AsContextMut, Extern, FuncType, Instance, Module, Store, Val}; + +/// A wrapper for using Wasmtime as a [`DiffEngine`]. +pub struct WasmtimeEngine { + pub(crate) config: generators::Config, +} + +impl WasmtimeEngine { + /// Merely store the configuration; the engine is actually constructed + /// later. Ideally the store and engine could be built here but + /// `compile_module` takes a [`generators::Config`]; TODO re-factor this if + /// that ever changes. + pub fn new(config: &generators::Config) -> Result> { + Ok(Box::new(Self { + config: config.clone(), + })) + } +} + +impl DiffEngine for WasmtimeEngine { + fn name(&self) -> &'static str { + "wasmtime" + } + + fn instantiate(&self, wasm: &[u8]) -> Result> { + let store = self.config.to_store(); + let module = compile_module(store.engine(), wasm, true, &self.config).unwrap(); + let instance = WasmtimeInstance::new(store, module)?; + Ok(Box::new(instance)) + } +} + +/// A wrapper around a Wasmtime instance. +/// +/// The Wasmtime engine constructs a new store and compiles an instance of a +/// Wasm module. +pub struct WasmtimeInstance { + store: Store, + instance: Instance, +} + +impl WasmtimeInstance { + /// Instantiate a new Wasmtime instance. + pub fn new(mut store: Store, module: Module) -> Result { + let instance = instantiate_with_dummy(&mut store, &module) + .context("unable to instantiate module in wasmtime")?; + Ok(Self { store, instance }) + } + + /// Retrieve the names and types of all exported functions in the instance. + /// + /// This is useful for evaluating each exported function with different + /// values. The [`DiffInstance`] trait asks for the function name and we + /// need to know the function signature in order to pass in the right + /// arguments. + pub fn exported_functions(&mut self) -> Vec<(String, FuncType)> { + let exported_functions = self + .instance + .exports(&mut self.store) + .map(|e| (e.name().to_owned(), e.into_func())) + .filter_map(|(n, f)| f.map(|f| (n, f))) + .collect::>(); + exported_functions + .into_iter() + .map(|(n, f)| (n, f.ty(&self.store))) + .collect() + } +} + +impl DiffInstance for WasmtimeInstance { + fn name(&self) -> &'static str { + "wasmtime" + } + + fn evaluate(&mut self, function_name: &str, arguments: &[DiffValue]) -> Result> { + let arguments: Vec<_> = arguments.iter().map(Val::from).collect(); + + let function = self + .instance + .get_func(&mut self.store, function_name) + .expect("unable to access exported function"); + let ty = function.ty(&self.store); + let mut results = vec![Val::I32(0); ty.results().len()]; + function.call(&mut self.store, &arguments, &mut results)?; + + let results = results.into_iter().map(Val::into).collect(); + Ok(results) + } + + fn is_hashable(&self) -> bool { + true + } + + fn hash(&mut self, state: &mut std::collections::hash_map::DefaultHasher) -> Result<()> { + let exports: Vec<_> = self + .instance + .exports(self.store.as_context_mut()) + .map(|e| e.into_extern()) + .collect(); + for e in exports { + match e { + Extern::Global(g) => { + let val: DiffValue = g.get(&mut self.store).into(); + val.hash(state) + } + Extern::Memory(m) => { + let data = m.data(&mut self.store); + data.hash(state) + } + Extern::SharedMemory(m) => { + let data = unsafe { slice::from_raw_parts(m.data() as *mut u8, m.data_size()) }; + data.hash(state) + } + Extern::Table(_) => { + // TODO: it's unclear whether it is worth it to iterate + // through the table and hash the values. + } + Extern::Func(_) => { + // Note: no need to hash exported functions. + } + } + } + Ok(()) + } +} + +impl From<&DiffValue> for Val { + fn from(v: &DiffValue) -> Self { + match *v { + DiffValue::I32(n) => Val::I32(n), + DiffValue::I64(n) => Val::I64(n), + DiffValue::F32(n) => Val::F32(n), + DiffValue::F64(n) => Val::F64(n), + DiffValue::V128(n) => Val::V128(n), + } + } +} + +impl Into for Val { + fn into(self) -> DiffValue { + match self { + Val::I32(n) => DiffValue::I32(n), + Val::I64(n) => DiffValue::I64(n), + Val::F32(n) => DiffValue::F32(n), + Val::F64(n) => DiffValue::F64(n), + Val::V128(n) => DiffValue::V128(n), + Val::FuncRef(_) => unimplemented!(), + Val::ExternRef(_) => unimplemented!(), + } + } +} diff --git a/crates/fuzzing/src/oracles/engine.rs b/crates/fuzzing/src/oracles/engine.rs new file mode 100644 index 0000000000..4c7c274ce4 --- /dev/null +++ b/crates/fuzzing/src/oracles/engine.rs @@ -0,0 +1,75 @@ +//! Define the interface for differential evaluation of Wasm functions. + +use crate::generators::{Config, DiffValue}; +use crate::oracles::{diff_wasmi::WasmiEngine, diff_wasmtime::WasmtimeEngine}; +use arbitrary::Unstructured; +use std::collections::hash_map::DefaultHasher; + +/// Pick one of the engines implemented in this module that is compatible with +/// the Wasm features passed in `features` and, when fuzzing Wasmtime against +/// itself, an existing `wasmtime_engine`. +pub fn choose( + u: &mut Unstructured<'_>, + existing_config: &Config, +) -> arbitrary::Result> { + // Filter out any engines that cannot match the given configuration. + let mut engines: Vec> = vec![]; + let mut config: Config = u.arbitrary()?; // TODO change to WasmtimeConfig + config.make_compatible_with(&existing_config); + if let Result::Ok(e) = WasmtimeEngine::new(&config) { + engines.push(e) + } + if let Result::Ok(e) = WasmiEngine::new(&existing_config.module_config) { + engines.push(e) + } + #[cfg(feature = "fuzz-spec-interpreter")] + if let Result::Ok(e) = + crate::oracles::diff_spec::SpecInterpreter::new(&existing_config.module_config) + { + engines.push(e) + } + + // Choose one of the remaining engines. + if !engines.is_empty() { + let index: usize = u.int_in_range(0..=engines.len() - 1)?; + let engine = engines.swap_remove(index); + log::debug!("selected engine: {}", engine.name()); + Ok(engine) + } else { + panic!("no engines to pick from"); + // Err(arbitrary::Error::EmptyChoose) + } +} + +/// Provide a way to instantiate Wasm modules. +pub trait DiffEngine { + /// Return the name of the engine. + fn name(&self) -> &'static str; + + /// Create a new instance with the given engine. + fn instantiate(&self, wasm: &[u8]) -> anyhow::Result>; +} + +/// Provide a way to evaluate Wasm functions--a Wasm instance implemented by a +/// specific engine (i.e., compiler or interpreter). +pub trait DiffInstance { + /// Return the name of the engine behind this instance. + fn name(&self) -> &'static str; + + /// Evaluate an exported function with the given values. + fn evaluate( + &mut self, + function_name: &str, + arguments: &[DiffValue], + ) -> anyhow::Result>; + + /// Check if instances of this kind are actually hashable--not all engines + /// support this. + fn is_hashable(&self) -> bool; + + /// If the instance `is_hashable()`, this method will try to hash the + /// following exported items in the instance: globals, memory. + /// + /// TODO allow more types of hashers. + fn hash(&mut self, state: &mut DefaultHasher) -> anyhow::Result<()>; +} diff --git a/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs b/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs index acae7bb414..e28bee3b60 100644 --- a/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs +++ b/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs @@ -71,6 +71,16 @@ mod ocaml_bindings { } } +/// Initialize a persistent OCaml runtime. +/// +/// When used for fuzzing differentially with engines that also use signal +/// handlers, this function provides a way to explicitly set up the OCaml +/// runtime and configure its signal handlers. +pub fn setup_ocaml_runtime() { + let _lock = INTERPRET.lock().unwrap(); + OCamlRuntime::init_persistent(); +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs b/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs index e932dc1c51..d33bccfb46 100644 --- a/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs +++ b/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs @@ -15,3 +15,6 @@ pub fn interpret(_module: &[u8], _parameters: Option>) -> Result Result<()> { + let successes = TOTAL_SUCCESSES.load(SeqCst); + let attempts = TOTAL_ATTEMPTED.fetch_add(1, SeqCst); + if attempts > 1 && attempts % 1_000 == 0 { + println!("=== Execution rate ({} successes / {} attempted modules): {}% (total invocations: {}) ===", + successes, + attempts, + successes as f64 / attempts as f64 * 100f64, + TOTAL_INVOCATIONS.load(SeqCst) + ); + } + + let mut u = Unstructured::new(data); + let mut config: Config = u.arbitrary()?; + config.set_differential_config(); + + // Generate the Wasm module. + let wasm = if u.arbitrary()? { + // TODO figure out if this always eats up the rest of the unstructured; + // can we limit the number of instructions/functions. + let module = config.generate(&mut u, Some(1000))?; + module.to_bytes() + } else { + let module = SingleInstModule::new(&mut u, &mut config.module_config)?; + module.to_bytes() + }; + log_wasm(&wasm); + + // Choose a left-hand side Wasm engine. + let lhs = engine::choose(&mut u, &config)?; + let lhs_instance = lhs.instantiate(&wasm); + + // Choose a right-hand side Wasm engine--this will always be Wasmtime. + let rhs_store = config.to_store(); + let rhs_module = wasmtime::Module::new(rhs_store.engine(), &wasm).unwrap(); + let rhs_instance = WasmtimeInstance::new(rhs_store, rhs_module); + + // If we fail to instantiate, check that both sides do. + let (mut lhs_instance, mut rhs_instance) = match (lhs_instance, rhs_instance) { + (Ok(l), Ok(r)) => (l, r), + (Err(_), Err(_)) => return Ok(()), // TODO match the error messages. + (l, r) => panic!( + "failed to instantiate only one side: {:?} != {:?}", + l.err(), + r.err() + ), + }; + + // Call each exported function with different sets of arguments. + for (name, signature) in rhs_instance.exported_functions() { + let mut invocations = 0; + loop { + let arguments = signature + .params() + .map(|t| DiffValue::arbitrary_of_type(&mut u, t.try_into().unwrap())) + .collect::>>()?; + differential(lhs_instance.as_mut(), &mut rhs_instance, &name, &arguments) + .expect("failed to run differential evaluation"); + + // We evaluate the same function with different arguments until we + // hit a predetermined limit or we run out of unstructured data--it + // does not make sense to re-evaluate the same arguments over and + // over. + invocations += 1; + TOTAL_INVOCATIONS.fetch_add(1, SeqCst); + if invocations > NUM_INVOCATIONS || u.is_empty() { + break; + } + } + } + + TOTAL_SUCCESSES.fetch_add(1, SeqCst); + Ok(()) +}