peepmatic: Introduce the peepmatic-fuzzing crate

This crate contains oracles, generators, and fuzz targets for use with fuzzing engines (e.g. libFuzzer). This doesn't contain the actual `libfuzzer_sys::fuzz_target!` definitions (those are in the `peepmatic-fuzz` crate) but does those definitions are one liners calling out to functions defined in this crate.
2020-05-01 15:47:47 -07:00
parent 2828da1f56
commit 1a7670f964
6 changed files with 802 additions and 0 deletions
--- a/cranelift/peepmatic/crates/fuzzing/Cargo.toml
+++ b/cranelift/peepmatic/crates/fuzzing/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "peepmatic-fuzzing"
+version = "0.1.0"
+authors = ["Nick Fitzgerald <fitzgen@gmail.com>"]
+edition = "2018"
+publish = false
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+arbitrary = { version = "0.4.1", features = ["derive"] }
+bincode = "1.2.1"
+env_logger = "0.7.1"
+fst = "0.4.1"
+log = "0.4.8"
+peepmatic = { path = "../.." }
+peepmatic-automata = { path = "../automata", features = ["serde"] }
+peepmatic-runtime = { path = "../runtime", features = ["construct"] }
+peepmatic-test = { path = "../test" }
+rand = { version = "0.7.3", features = ["small_rng"] }
+serde = "1.0.106"
+wast = "13.0.0"
--- a/cranelift/peepmatic/crates/fuzzing/src/automata.rs
+++ b/cranelift/peepmatic/crates/fuzzing/src/automata.rs
@@ -0,0 +1,187 @@
+//! Helpers for fuzzing the `peepmatic-automata` crate.
+
+use peepmatic_automata::{Automaton, Builder, Output};
+use serde::{Deserialize, Serialize};
+use std::collections::{HashMap, HashSet};
+use std::hash::Hash;
+
+fn serde_roundtrip<TAlphabet, TState, TOutput>(
+    automata: Automaton<TAlphabet, TState, TOutput>,
+) -> Automaton<TAlphabet, TState, TOutput>
+where
+    TAlphabet: Serialize + for<'de> Deserialize<'de> + Clone + Eq + Hash + Ord,
+    TState: Serialize + for<'de> Deserialize<'de> + Clone + Eq + Hash,
+    TOutput: Serialize + for<'de> Deserialize<'de> + Output,
+{
+    let encoded: Vec<u8> = bincode::serialize(&automata).expect("should serialize OK");
+    bincode::deserialize(&encoded).expect("should deserialize OK")
+}
+
+/// Construct an automaton from the the given input-output pairs, and assert
+/// that:
+///
+/// * Putting in each of the input strings should result in the expected output
+///   string.
+///
+/// * Putting in an input string that is not one of the given inputs from our
+///   input-output pairs should never yield an output value.
+pub fn simple_automata(input_output_pairs: Vec<Vec<(u8, Vec<u8>)>>) {
+    let _ = env_logger::try_init();
+
+    let full_input = |pair: &[(u8, Vec<u8>)]| {
+        let mut full_input = vec![];
+        for (input, _) in pair {
+            full_input.push(*input);
+        }
+        full_input
+    };
+
+    let mut inputs = HashSet::new();
+
+    let mut input_output_pairs: Vec<_> = input_output_pairs
+        .into_iter()
+        .filter(|pair| {
+            !pair.is_empty() && {
+                // Make sure we don't have duplicate inputs.
+                let is_new = inputs.insert(full_input(pair));
+                is_new
+            }
+        })
+        .collect();
+
+    input_output_pairs.sort_by(|a, b| full_input(a).cmp(&full_input(b)));
+
+    if input_output_pairs.is_empty() {
+        return;
+    }
+
+    // A map from one of our concatenated input strings to its concatenated
+    // output.
+    let mut expected = HashMap::with_capacity(input_output_pairs.len());
+
+    let mut builder = Builder::<u8, (), Vec<u8>>::new();
+    for pair in &input_output_pairs {
+        let mut full_input = vec![];
+        let mut full_output = vec![];
+
+        let mut ins = builder.insert();
+        for (input, output) in pair.iter().cloned() {
+            full_input.push(input);
+            full_output.extend(output.iter().copied());
+
+            ins.next(input, output);
+        }
+
+        let old = expected.insert(full_input, full_output);
+        assert!(old.is_none());
+
+        ins.finish();
+    }
+
+    let automata = builder.finish();
+    let automata = serde_roundtrip(automata);
+
+    // Assert that each of our input strings yields the expected output.
+    for (input, expected_output) in &expected {
+        log::debug!("Testing input: {:?}", input);
+        let actual_output = automata.get(input);
+        assert!(actual_output.is_some());
+        assert_eq!(actual_output.as_ref().unwrap(), expected_output);
+    }
+
+    // Test that mutations of our input strings (that aren't themselves other
+    // input strings!) do not yeild any output.
+    for input in expected.keys() {
+        for i in 0..input.len() {
+            let mut mutated = input.clone();
+            mutated[i] = mutated[i].wrapping_add(1);
+            log::debug!("Testing mutated input: {:?}", mutated);
+            if !expected.contains_key(&mutated) {
+                assert!(automata.get(&mutated).is_none());
+            }
+        }
+    }
+}
+
+/// Do differential testing against the `fst` crate, which is another
+/// implementation of the algorithm we use for finite-state transducer
+/// construction in `peepmatic-automata`.
+pub fn fst_differential(map: HashMap<Vec<u8>, u64>) {
+    let _ = env_logger::try_init();
+
+    let mut inputs: Vec<_> = map.keys().filter(|i| !i.is_empty()).cloned().collect();
+    inputs.sort();
+    inputs.dedup();
+    if inputs.is_empty() {
+        return;
+    }
+
+    let mut fst = fst::MapBuilder::memory();
+    let mut builder = Builder::<u8, (), u64>::new();
+
+    for inp in &inputs {
+        fst.insert(inp, map[inp]).unwrap();
+
+        let mut ins = builder.insert();
+        for (i, ch) in inp.iter().enumerate() {
+            ins.next(*ch, if i == 0 { map[inp] } else { 0 });
+        }
+        ins.finish();
+    }
+
+    let fst = fst.into_map();
+    let automata = builder.finish();
+    let automata = serde_roundtrip(automata);
+
+    for inp in inputs {
+        // Check we have the same result as `fst` for inputs we know are in the
+        // automata.
+        log::debug!("Testing input {:?}", inp);
+        let expected = fst.get(&inp).expect("`fst` should have entry for `inp`");
+        let actual = automata
+            .get(&inp)
+            .expect("automata should have entry for `inp`");
+        assert_eq!(expected, actual);
+
+        // Check that we have the same result as `fst` for inputs that may or
+        // may not be in the automata.
+        for i in 0..inp.len() {
+            let mut mutated = inp.clone();
+            mutated[i] = mutated[i].wrapping_add(1);
+            log::debug!("Testing mutated input {:?}", mutated);
+            let expected = fst.get(&mutated);
+            let actual = automata.get(&mutated);
+            assert_eq!(expected, actual);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn check_simple_automata() {
+        crate::check(simple_automata);
+    }
+
+    #[test]
+    fn check_fst_differential() {
+        crate::check(fst_differential);
+    }
+
+    #[test]
+    fn regression_test_0() {
+        simple_automata(vec![vec![(0, vec![0]), (0, vec![1])], vec![(0, vec![2])]]);
+    }
+
+    #[test]
+    fn regression_test_1() {
+        fst_differential(vec![(vec![1, 3], 5), (vec![1, 2], 4)].into_iter().collect());
+    }
+
+    #[test]
+    fn regression_test_2() {
+        simple_automata(vec![vec![(0, vec![11]), (0, vec![])], vec![(0, vec![11])]]);
+    }
+}
--- a/cranelift/peepmatic/crates/fuzzing/src/compile.rs
+++ b/cranelift/peepmatic/crates/fuzzing/src/compile.rs
@@ -0,0 +1,71 @@
+//! Fuzz testing utilities related to AST pattern matching.
+
+use peepmatic_runtime::PeepholeOptimizations;
+use std::path::Path;
+use std::str;
+
+/// Attempt to interpret the given bytes as UTF-8 and then compile them as if
+/// they were source text of our DSL.
+pub fn compile(data: &[u8]) {
+    let source = match str::from_utf8(data) {
+        Err(_) => return,
+        Ok(s) => s,
+    };
+
+    let opt = match peepmatic::compile_str(source, Path::new("fuzz")) {
+        Err(_) => return,
+        Ok(o) => o,
+    };
+
+    // Should be able to serialize and deserialize the peephole optimizer.
+    let opt_bytes = bincode::serialize(&opt).expect("should serialize peephole optimizations OK");
+    let _: PeepholeOptimizations =
+        bincode::deserialize(&opt_bytes).expect("should deserialize peephole optimizations OK");
+
+    // Compiling the same source text again should be deterministic.
+    let opt2 = peepmatic::compile_str(source, Path::new("fuzz"))
+        .expect("should be able to compile source text again, if it compiled OK the first time");
+    let opt2_bytes =
+        bincode::serialize(&opt2).expect("should serialize second peephole optimizations OK");
+    assert_eq!(opt_bytes, opt2_bytes);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn check_compile() {
+        crate::check(|s: String| compile(s.as_bytes()));
+    }
+
+    #[test]
+    fn regression_0() {
+        compile(
+            b"
+            (=> (bor (bor $x $y) $y) $x)
+            (=> (bor (bor $x $z) $y) $x)
+        ",
+        );
+    }
+
+    #[test]
+    fn regression_1() {
+        compile(
+            b"
+            (=> (bor (bor $x $y) 0) $x)
+            (=> (bor $x 0) $x)
+            (=> (bor $y $x) $x)
+        ",
+        );
+    }
+
+    #[test]
+    fn regression_2() {
+        compile(
+            b"
+            (=> (sshr $x 11111111110) $x)
+        ",
+        );
+    }
+}
--- a/cranelift/peepmatic/crates/fuzzing/src/interp.rs
+++ b/cranelift/peepmatic/crates/fuzzing/src/interp.rs
@@ -0,0 +1,374 @@
+//! Interpreting compiled peephole optimizations against test instruction sequences.
+
+use peepmatic::{
+    Constraint, Dfs, DynAstRef, Optimizations, Pattern, Span, TraversalEvent, ValueLiteral,
+    Variable,
+};
+use peepmatic_runtime::{
+    cc::ConditionCode,
+    operator::TypingContext as TypingContextTrait,
+    part::Constant,
+    r#type::BitWidth,
+    r#type::{Kind, Type},
+};
+use peepmatic_test::{Program, TestIsa};
+use std::collections::{BTreeMap, HashMap};
+use std::path::Path;
+use std::str;
+
+/// Compile the given source text, and if it is a valid set of optimizations,
+/// then interpret the optimizations against test instruction sequences created
+/// to reflect the optimizations.
+pub fn interp(data: &[u8]) {
+    let _ = env_logger::try_init();
+
+    let source = match str::from_utf8(data) {
+        Err(_) => return,
+        Ok(s) => s,
+    };
+
+    let peep_opts = match peepmatic::compile_str(source, Path::new("fuzz")) {
+        Err(_) => return,
+        Ok(o) => o,
+    };
+    let mut optimizer = peep_opts.optimizer(TestIsa {
+        native_word_size_in_bits: 32,
+    });
+
+    // Okay, we know it compiles and verifies alright, so (re)parse the AST.
+    let buf = wast::parser::ParseBuffer::new(&source).unwrap();
+    let ast = wast::parser::parse::<Optimizations>(&buf).unwrap();
+
+    // And we need access to the assigned types, so re-verify it as well.
+    peepmatic::verify(&ast).unwrap();
+
+    // Walk over each optimization and create an instruction sequence that
+    // matches the optimization.
+    let mut program = Program::default();
+    for opt in &ast.optimizations {
+        // The instruction sequence we generate must match an optimization (not
+        // necessarily *this* optimization, if there is another that is more
+        // specific but also matches) unless there is an `bit-width`
+        // precondition or an implicit `bit-width` precondition via a type
+        // ascription. When those things exist, we might have constructed
+        // instructions with the wrong bit widths to match.
+        let mut allow_no_match = false;
+
+        // The last instruction we generated. After we've generated the full
+        // instruction sequence, this will be its root.
+        let mut last_inst = None;
+
+        // Remember the instructions associated with variables and constants, so
+        // that when they appear multiple times, we reuse the same instruction.
+        let mut id_to_inst = HashMap::new();
+
+        // Map from a pattern's span to the instruction we generated for
+        // it. This allows parent operations to get the instructions for their
+        // children.
+        let mut span_to_inst = BTreeMap::new();
+
+        for (te, lhs) in Dfs::new(&opt.lhs) {
+            // NB: We use a post-order traversal because we want arguments to be
+            // generated before they are used.
+            if te != TraversalEvent::Exit {
+                continue;
+            }
+
+            match lhs {
+                DynAstRef::Precondition(p) => {
+                    allow_no_match |= p.constraint == Constraint::BitWidth;
+                }
+
+                DynAstRef::Pattern(Pattern::Operation(op)) => {
+                    allow_no_match |= op.r#type.get().is_some();
+
+                    let num_imms = op.operator.immediates_arity() as usize;
+
+                    // Generate this operation's immediates.
+                    let mut imm_tys = vec![];
+                    op.operator
+                        .immediate_types(&mut TypingContext, op.span(), &mut imm_tys);
+                    let imms: Vec<_> = op
+                        .operands
+                        .iter()
+                        .take(num_imms)
+                        .zip(imm_tys)
+                        .map(|(pat, ty)| match pat {
+                            Pattern::ValueLiteral(ValueLiteral::Integer(i)) => {
+                                Constant::Int(i.value as _, BitWidth::ThirtyTwo).into()
+                            }
+                            Pattern::ValueLiteral(ValueLiteral::Boolean(b)) => {
+                                Constant::Bool(b.value, BitWidth::One).into()
+                            }
+                            Pattern::ValueLiteral(ValueLiteral::ConditionCode(cc)) => cc.cc.into(),
+                            Pattern::Constant(_) | Pattern::Variable(_) => match ty {
+                                TypeOrConditionCode::ConditionCode => ConditionCode::Eq.into(),
+                                TypeOrConditionCode::Type(ty) => match ty.kind {
+                                    Kind::Int => Constant::Int(1, ty.bit_width).into(),
+                                    Kind::Bool => Constant::Bool(false, ty.bit_width).into(),
+                                    Kind::Void | Kind::CpuFlags => {
+                                        unreachable!("void and cpu flags cannot be immediates")
+                                    }
+                                },
+                            },
+                            Pattern::Operation(_) => {
+                                unreachable!("operations not allowed as immediates")
+                            }
+                        })
+                        .collect();
+
+                    // Generate (or collect already-generated) instructions for
+                    // this operation's arguments.
+                    let mut arg_tys = vec![];
+                    op.operator
+                        .param_types(&mut TypingContext, op.span(), &mut arg_tys);
+                    let args: Vec<_> = op
+                        .operands
+                        .iter()
+                        .skip(num_imms)
+                        .zip(arg_tys)
+                        .map(|(pat, ty)| match pat {
+                            Pattern::Operation(op) => span_to_inst[&op.span()],
+                            Pattern::ValueLiteral(ValueLiteral::Integer(i)) => program.r#const(
+                                Constant::Int(i.value as _, BitWidth::ThirtyTwo),
+                                BitWidth::ThirtyTwo,
+                            ),
+                            Pattern::ValueLiteral(ValueLiteral::Boolean(b)) => program.r#const(
+                                Constant::Bool(b.value, BitWidth::One),
+                                BitWidth::ThirtyTwo,
+                            ),
+                            Pattern::ValueLiteral(ValueLiteral::ConditionCode(_)) => {
+                                unreachable!("condition codes cannot be arguments")
+                            }
+                            Pattern::Constant(peepmatic::Constant { id, .. })
+                            | Pattern::Variable(Variable { id, .. }) => match ty {
+                                TypeOrConditionCode::Type(ty) => {
+                                    *id_to_inst.entry(id).or_insert_with(|| match ty.kind {
+                                        Kind::Int => program.r#const(
+                                            Constant::Int(1, ty.bit_width),
+                                            BitWidth::ThirtyTwo,
+                                        ),
+                                        Kind::Bool => program.r#const(
+                                            Constant::Bool(false, ty.bit_width),
+                                            BitWidth::ThirtyTwo,
+                                        ),
+                                        Kind::CpuFlags => {
+                                            unreachable!("cpu flags cannot be an argument")
+                                        }
+                                        Kind::Void => unreachable!("void cannot be an argument"),
+                                    })
+                                }
+                                TypeOrConditionCode::ConditionCode => {
+                                    unreachable!("condition codes cannot be arguments")
+                                }
+                            },
+                        })
+                        .collect();
+
+                    let ty = match op.operator.result_type(&mut TypingContext, op.span()) {
+                        TypeOrConditionCode::Type(ty) => ty,
+                        TypeOrConditionCode::ConditionCode => {
+                            unreachable!("condition codes cannot be operation results")
+                        }
+                    };
+                    let inst = program.new_instruction(op.operator, ty, imms, args);
+                    last_inst = Some(inst);
+                    let old_inst = span_to_inst.insert(op.span(), inst);
+                    assert!(old_inst.is_none());
+                }
+                _ => continue,
+            }
+        }
+
+        // Run the optimizer on our newly generated instruction sequence.
+        if let Some(inst) = last_inst {
+            let replacement = optimizer.apply_one(&mut program, inst);
+            assert!(
+                replacement.is_some() || allow_no_match,
+                "an optimization should match the generated instruction sequence"
+            );
+        }
+    }
+
+    // Finally, just try and run the optimizer on every instruction we
+    // generated, just to potentially shake out some more bugs.
+    let instructions: Vec<_> = program.instructions().map(|(k, _)| k).collect();
+    for inst in instructions {
+        let _ = optimizer.apply_one(&mut program, inst);
+    }
+}
+
+enum TypeOrConditionCode {
+    Type(Type),
+    ConditionCode,
+}
+
+struct TypingContext;
+
+impl<'a> TypingContextTrait<'a> for TypingContext {
+    type TypeVariable = TypeOrConditionCode;
+
+    fn cc(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::ConditionCode
+    }
+
+    fn bNN(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::b1())
+    }
+
+    fn iNN(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::i32())
+    }
+
+    fn iMM(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::i32())
+    }
+
+    fn cpu_flags(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::cpu_flags())
+    }
+
+    fn b1(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::b1())
+    }
+
+    fn void(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::void())
+    }
+
+    fn bool_or_int(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::b1())
+    }
+
+    fn any_t(&mut self, _: wast::Span) -> Self::TypeVariable {
+        TypeOrConditionCode::Type(Type::i32())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn check_interp() {
+        crate::check(|s: Vec<u8>| interp(String::from_utf8_lossy(&s).as_bytes()));
+    }
+
+    #[test]
+    fn regression_0() {
+        interp(b"(=> (imul $x $x) $x)");
+    }
+
+    #[test]
+    fn regression_1() {
+        interp(b"(=> (when (imul $x $C) (is-power-of-two $C)) $x)");
+    }
+
+    #[test]
+    fn regression_2() {
+        interp(
+            b"
+            (=> (bor (bor $x $y) $x) (bor $x $y))
+            (=> (bor (bor $x $C) 5) $x)
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_3() {
+        interp(
+            b"
+            (=> (bor $y (bor $x 9)) $x)
+            (=> (bor (bor $x $y) $x) $x)
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_4() {
+        interp(
+            b"
+            (=> (bor $C 33) 0)
+            (=> (bor $x 22) 1)
+            (=> (bor $x 11) 2)
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_5() {
+        interp(
+            b"
+            (=> (bor $y (bor $x $y)) (bor $x $y))
+            (=> (bor (bor $x $y) $z) $x)
+            (=> (bor (bor $x $y) $y) $x)
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_6() {
+        interp(b"(=> (imul $x $f) of)");
+    }
+
+    #[test]
+    fn regression_7() {
+        interp(
+            b"
+            (=> (when (sdiv $x $C)
+                      (fits-in-native-word $y))
+                (sdiv $C $x))
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_8() {
+        interp(
+            b"
+            (=> (adjust_sp_down $C) (adjust_sp_down_imm $C))
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_9() {
+        interp(
+            b"
+            (=> (when $x) $x)
+            (=> (trapnz $x) (trapnz $x))
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_10() {
+        interp(b"(=> (sshr{i1} $x 0) $x)");
+    }
+
+    #[test]
+    fn regression_11() {
+        interp(
+            b"
+            (=> (when (ushr_imm $x (ishl 4 3))
+                      (bit-width $x 64))
+                (sextend{i64} (ireduce{i32} $x)))
+            ",
+        );
+    }
+
+    #[test]
+    fn regression_12() {
+        interp(b"(=> (band $C1 (band_imm $C1 1)) 1)");
+    }
+
+    #[test]
+    fn regression_13() {
+        interp(b"(=> (brz (icmp eq 0 $x)) (brz (ireduce{i32} $x)))");
+    }
+
+    #[test]
+    fn regression_14() {
+        interp(b"(=> (brz (icmp $E 0 $x)) (brz $x))");
+    }
+}
--- a/cranelift/peepmatic/crates/fuzzing/src/lib.rs
+++ b/cranelift/peepmatic/crates/fuzzing/src/lib.rs
@@ -0,0 +1,119 @@
+//! Utilities for fuzzing.
+//!
+//! The actual fuzz targets are defined in `peepmatic/fuzz/*`. This crate just
+//! has oracles and generators for fuzzing.
+
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+
+use arbitrary::{Arbitrary, Unstructured};
+use rand::prelude::*;
+use std::fmt::Debug;
+use std::panic;
+use std::time;
+
+pub mod automata;
+pub mod compile;
+pub mod interp;
+pub mod parser;
+
+/// A quickcheck-style runner for fuzz targets.
+///
+/// This is *not* intended to replace a long-running, coverage-guided fuzzing
+/// engine like libFuzzer! This is only for defining quick, purely random tests
+/// for use with `cargo test` and CI.
+pub fn check<A>(mut f: impl FnMut(A))
+where
+    A: Clone + Debug + Arbitrary,
+{
+    let seed = rand::thread_rng().gen();
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
+
+    const INITIAL_LENGTH: usize = 16;
+    const MAX_LENGTH: usize = 4096;
+
+    let mut buf: Vec<u8> = (0..INITIAL_LENGTH).map(|_| rng.gen()).collect();
+    let mut num_checked = 0;
+
+    let time_budget = time::Duration::from_secs(2);
+    let then = time::Instant::now();
+
+    let (failing_input, panic_info) = loop {
+        if num_checked > 0 && time::Instant::now().duration_since(then) > time_budget {
+            eprintln!("Checked {} random inputs.", num_checked);
+            return;
+        }
+
+        match <A as Arbitrary>::arbitrary_take_rest(Unstructured::new(&buf)) {
+            Ok(input) => {
+                num_checked += 1;
+                eprintln!("Checking input: {:#?}", input);
+                if let Err(p) = panic::catch_unwind(panic::AssertUnwindSafe(|| f(input.clone()))) {
+                    break (input, p);
+                }
+            }
+            Err(e @ arbitrary::Error::NotEnoughData) => {
+                eprintln!("warning: {}", e);
+                if *buf.last().unwrap() == 0 {
+                    if buf.len() < MAX_LENGTH {
+                        let new_size = std::cmp::min(buf.len() * 2, MAX_LENGTH);
+                        eprintln!("Growing buffer size to {}", new_size);
+                        let delta = new_size - buf.len();
+                        buf.reserve(delta);
+                        for _ in 0..delta {
+                            buf.push(rng.gen());
+                        }
+                        continue;
+                    } else {
+                        // Regenerate `buf` in the loop below and see if that
+                        // fixes things...
+                        eprintln!("Regenerating buffer data.");
+                    }
+                } else {
+                    // Shrink values in the end of `buf`, which is where
+                    // `Arbitrary` pulls container lengths from. Then try again.
+                    eprintln!("Shrinking buffer's tail values.");
+                    let i = (buf.len() as f64).sqrt() as usize;
+                    for j in i..buf.len() {
+                        buf[j] /= 2;
+                    }
+                    continue;
+                }
+            }
+            Err(e) => {
+                eprintln!("warning: {}", e);
+                // Usually this happens because `A` requires a sequence utf-8
+                // bytes but its given sequence wasn't valid utf-8. Just skip
+                // this iteration and try again after we've updated `buf` below.
+            }
+        };
+
+        // Double the size of the buffer every so often, so we don't only
+        // explore small inputs.
+        if num_checked == buf.len() {
+            buf.resize(std::cmp::min(buf.len() * 2, MAX_LENGTH), 0);
+        }
+
+        for i in 0..buf.len() {
+            buf[i] = rng.gen();
+        }
+    };
+
+    // Shrink the failing input.
+    let mut smallest_failing_input = failing_input;
+    let mut panic_info = panic_info;
+    'shrinking: loop {
+        eprintln!("Smallest failing input: {:#?}", smallest_failing_input);
+        for input in smallest_failing_input.shrink() {
+            if let Err(p) = panic::catch_unwind(panic::AssertUnwindSafe(|| f(input.clone()))) {
+                smallest_failing_input = input;
+                panic_info = p;
+                continue 'shrinking;
+            }
+        }
+        break;
+    }
+
+    // Resume the panic for the smallest input.
+    panic::resume_unwind(panic_info);
+}
--- a/cranelift/peepmatic/crates/fuzzing/src/parser.rs
+++ b/cranelift/peepmatic/crates/fuzzing/src/parser.rs
@@ -0,0 +1,29 @@
+//! Utilities for fuzzing our DSL's parser.
+
+use peepmatic::Optimizations;
+use std::str;
+
+/// Attempt to parse the given string as if it were a snippet of our DSL.
+pub fn parse(data: &[u8]) {
+    let source = match str::from_utf8(data) {
+        Ok(s) => s,
+        Err(_) => return,
+    };
+
+    let buf = match wast::parser::ParseBuffer::new(&source) {
+        Ok(buf) => buf,
+        Err(_) => return,
+    };
+
+    let _ = wast::parser::parse::<Optimizations>(&buf);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn check_parse() {
+        crate::check(|s: String| parse(s.as_bytes()));
+    }
+}