fuzzgen: Generate compiler flags (#5020)

* fuzzgen: Test compiler flags * cranelift: Generate `all()` function for all enum flags This allows a user to iterate all flags that exist. * fuzzgen: Minimize regalloc_checker compiles * fuzzgen: Limit the amount of test case inputs * fuzzgen: Add egraphs flag It's finally here! 🥳 * cranelift: Add fuzzing comment to settings * fuzzgen: Add riscv64 * fuzzgen: Unconditionally enable some flags
2022-10-21 00:40:50 +01:00
parent 0959f90755
commit 51d8734235
5 changed files with 138 additions and 29 deletions
--- a/cranelift/fuzzgen/src/config.rs
+++ b/cranelift/fuzzgen/src/config.rs
@@ -1,8 +1,13 @@
+use std::collections::HashMap;
 use std::ops::RangeInclusive;

 /// Holds the range of acceptable values to use during the generation of testcases
 pub struct Config {
-    pub test_case_inputs: RangeInclusive<usize>,
+    /// Maximum allowed test case inputs.
+    /// We build test case inputs from the rest of the bytes that the fuzzer provides us
+    /// so we allow the fuzzer to control this by feeding us more or less bytes.
+    /// The upper bound here is to prevent too many inputs that cause long test times
+    pub max_test_case_inputs: usize,
    pub signature_params: RangeInclusive<usize>,
    pub signature_rets: RangeInclusive<usize>,
    pub instructions_per_block: RangeInclusive<usize>,
@@ -51,12 +56,17 @@ pub struct Config {
    /// We insert a checking sequence to guarantee that those inputs never make
    /// it to the instruction, but sometimes we want to allow them.
    pub allowed_fcvt_traps_ratio: (usize, usize),
+
+    /// Some flags really impact compile performance, we still want to test
+    /// them, but probably at a lower rate, so that overall execution time isn't
+    /// impacted as much
+    pub compile_flag_ratio: HashMap<&'static str, (usize, usize)>,
 }

 impl Default for Config {
    fn default() -> Self {
        Config {
-            test_case_inputs: 1..=10,
+            max_test_case_inputs: 100,
            signature_params: 0..=16,
            signature_rets: 0..=16,
            instructions_per_block: 0..=64,
@@ -75,6 +85,7 @@ impl Default for Config {
            backwards_branch_ratio: (1, 1000),
            allowed_int_divz_ratio: (1, 1_000_000),
            allowed_fcvt_traps_ratio: (1, 1_000_000),
+            compile_flag_ratio: [("regalloc_checker", (1usize, 1000))].into_iter().collect(),
        }
    }
 }
--- a/cranelift/fuzzgen/src/lib.rs
+++ b/cranelift/fuzzgen/src/lib.rs
@@ -1,5 +1,6 @@
 use crate::config::Config;
 use crate::function_generator::FunctionGenerator;
+use crate::settings::{Flags, OptLevel};
 use anyhow::Result;
 use arbitrary::{Arbitrary, Unstructured};
 use cranelift::codegen::data_value::DataValue;
@@ -30,6 +31,9 @@ impl<'a> Arbitrary<'a> for SingleFunction {
 }

 pub struct TestCase {
+    /// [Flags] to use when compiling this test case
+    pub flags: Flags,
+    /// Function under test
    pub func: Function,
    /// Generate multiple test inputs for each test case.
    /// This allows us to get more coverage per compilation, which may be somewhat expensive.
@@ -38,19 +42,24 @@ pub struct TestCase {

 impl fmt::Debug for TestCase {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(
-            f,
-            r#";; Fuzzgen test case
+        writeln!(f, ";; Fuzzgen test case\n")?;
+        writeln!(f, "test interpret")?;
+        writeln!(f, "test run")?;

-test interpret
-test run
-set enable_llvm_abi_extensions
-target aarch64
-target s390x
-target x86_64
+        // Print only non default flags
+        let default_flags = Flags::new(settings::builder());
+        for (default, flag) in default_flags.iter().zip(self.flags.iter()) {
+            assert_eq!(default.name, flag.name);

-"#
-        )?;
+            if default.value_string() != flag.value_string() {
+                writeln!(f, "set {}={}", flag.name, flag.value_string())?;
+            }
+        }
+
+        writeln!(f, "target aarch64")?;
+        writeln!(f, "target s390x")?;
+        writeln!(f, "target riscv64")?;
+        writeln!(f, "target x86_64\n")?;

        writeln!(f, "{}", self.func)?;

@@ -140,7 +149,10 @@ where
    fn generate_test_inputs(mut self, signature: &Signature) -> Result<Vec<TestCaseInput>> {
        let mut inputs = Vec::new();

-        loop {
+        // Generate up to "max_test_case_inputs" inputs, we need an upper bound here since
+        // the fuzzer at some point starts trying to feed us way too many inputs. (I found one
+        // test case with 130k inputs!)
+        for _ in 0..self.config.max_test_case_inputs {
            let last_len = self.u.len();

            let test_args = signature
@@ -217,14 +229,82 @@ where
        self.run_func_passes(func)
    }

+    /// Generate a random set of cranelift flags.
+    /// Only semantics preserving flags are considered
+    fn generate_flags(&mut self) -> Result<Flags> {
+        let mut builder = settings::builder();
+
+        let opt = self.u.choose(OptLevel::all())?;
+        builder.set("opt_level", &format!("{}", opt)[..])?;
+
+        // Boolean flags
+        // TODO: probestack is semantics preserving, but only works inline and on x64
+        // TODO: enable_pinned_reg does not work with our current trampolines. See: #4376
+        // TODO: is_pic has issues:
+        //   x86: https://github.com/bytecodealliance/wasmtime/issues/5005
+        //   aarch64: https://github.com/bytecodealliance/wasmtime/issues/2735
+        let bool_settings = [
+            "enable_alias_analysis",
+            "enable_safepoints",
+            "unwind_info",
+            "preserve_frame_pointers",
+            "enable_jump_tables",
+            "enable_heap_access_spectre_mitigation",
+            "enable_table_access_spectre_mitigation",
+            "enable_incremental_compilation_cache_checks",
+            "regalloc_checker",
+            "enable_llvm_abi_extensions",
+            "use_egraphs",
+        ];
+        for flag_name in bool_settings {
+            let enabled = self
+                .config
+                .compile_flag_ratio
+                .get(&flag_name)
+                .map(|&(num, denum)| self.u.ratio(num, denum))
+                .unwrap_or_else(|| bool::arbitrary(self.u))?;
+
+            let value = format!("{}", enabled);
+            builder.set(flag_name, value.as_str())?;
+        }
+
+        // Fixed settings
+
+        // We need llvm ABI extensions for i128 values on x86, so enable it regardless of
+        // what we picked above.
+        if cfg!(target_arch = "x86_64") {
+            builder.enable("enable_llvm_abi_extensions")?;
+        }
+
+        // This is the default, but we should ensure that it wasn't accidentally turned off anywhere.
+        builder.enable("enable_verifier")?;
+
+        // These settings just panic when they're not enabled and we try to use their respective functionality
+        // so they aren't very interesting to be automatically generated.
+        builder.enable("enable_atomics")?;
+        builder.enable("enable_float")?;
+        builder.enable("enable_simd")?;
+
+        // `machine_code_cfg_info` generates additional metadata for the embedder but this doesn't feed back
+        // into compilation anywhere, we leave it on unconditionally to make sure the generation doesn't panic.
+        builder.enable("machine_code_cfg_info")?;
+
+        Ok(Flags::new(builder))
+    }
+
    pub fn generate_test(mut self) -> Result<TestCase> {
        // If we're generating test inputs as well as a function, then we're planning to execute
        // this function. That means that any function references in it need to exist. We don't yet
        // have infrastructure for generating multiple functions, so just don't generate funcrefs.
        self.config.funcrefs_per_function = 0..=0;

+        let flags = self.generate_flags()?;
        let func = self.generate_func()?;
        let inputs = self.generate_test_inputs(&func.signature)?;
-        Ok(TestCase { func, inputs })
+        Ok(TestCase {
+            flags,
+            func,
+            inputs,
+        })
    }
 }