fuzzgen: Avoid int_divz traps (#4932)

* fuzzgen: Insert `int_divz` sequence * fuzzgen: matches!
2022-09-23 18:19:42 +01:00
parent 6e76e925f4
commit bb6a8a717a
3 changed files with 96 additions and 3 deletions
--- a/cranelift/fuzzgen/src/config.rs
+++ b/cranelift/fuzzgen/src/config.rs
@@ -39,6 +39,14 @@ pub struct Config {
    /// Determines how often we generate a backwards branch
    /// Backwards branches are prone to infinite loops, and thus cause timeouts.
    pub backwards_branch_ratio: (usize, usize),
    /// How often should we allow integer division by zero traps.
    ///
    /// Some instructions such as Srem and Udiv can cause a `int_divz` trap
    /// under some inputs. We almost always insert a sequence of instructions
    /// that avoids these issues. However we can allow some `int_divz` traps
    /// by controlling this config.
    pub allowed_int_divz_ratio: (usize, usize),
 }
 impl Default for Config {
@@ -62,6 +70,7 @@ impl Default for Config {
            // 0.1% allows us to explore this, while not causing enough timeouts to significantly
            // impact execs/s
            backwards_branch_ratio: (1, 1000),
            allowed_int_divz_ratio: (1, 1_000_000),
        }
    }
 }
--- a/cranelift/fuzzgen/src/lib.rs
+++ b/cranelift/fuzzgen/src/lib.rs
@@ -12,6 +12,7 @@ use std::fmt;
 mod config;
 mod function_generator;
 mod pass;
 pub type TestCaseInput = Vec<DataValue>;
@@ -166,7 +167,7 @@ where
        Ok(inputs)
    }
-    fn run_func_passes(&self, func: Function) -> Function {
+    fn run_func_passes(&mut self, func: Function) -> Result<Function> {
        // Do a NaN Canonicalization pass on the generated function.
        //
        // Both IEEE754 and the Wasm spec are somewhat loose about what is allowed
@@ -201,12 +202,17 @@ where
        ctx.canonicalize_nans(isa.as_ref())
            .expect("Failed NaN canonicalization pass");
-        ctx.func
+        // Run the int_divz pass
        //
        // This pass replaces divs and rems with sequences that do not trap
        pass::do_int_divz_pass(self, &mut ctx.func)?;
        Ok(ctx.func)
    }
    fn generate_func(&mut self) -> Result<Function> {
        let func = FunctionGenerator::new(&mut self.u, &self.config).generate()?;
-        Ok(self.run_func_passes(func))
+        self.run_func_passes(func)
    }
    pub fn generate_test(mut self) -> Result<TestCase> {
--- a/cranelift/fuzzgen/src/pass.rs
+++ b/cranelift/fuzzgen/src/pass.rs
@@ -0,0 +1,78 @@
 use crate::FuzzGen;
 use anyhow::Result;
 use cranelift::codegen::cursor::{Cursor, FuncCursor};
 use cranelift::codegen::ir::{Function, Inst, Opcode};
 use cranelift::prelude::{InstBuilder, IntCC};
 pub fn do_int_divz_pass(fuzz: &mut FuzzGen, func: &mut Function) -> Result<()> {
    // Insert this per function, otherwise the actual rate of int_divz doesn't go down that much
    // Experimentally if we decide this per instruction with a 0.1% allow rate, we get 4.4% of runs
    // trapping. Doing this per function decreases the number of runs that trap. It also consumes
    // fewer fuzzer input bytes which is nice.
    let ratio = fuzz.config.allowed_int_divz_ratio;
    let insert_seq = !fuzz.u.ratio(ratio.0, ratio.1)?;
    if !insert_seq {
        return Ok(());
    }
    let mut pos = FuncCursor::new(func);
    while let Some(_block) = pos.next_block() {
        while let Some(inst) = pos.next_inst() {
            if can_int_divz(&pos, inst) {
                insert_int_divz_sequence(&mut pos, inst);
            }
        }
    }
    Ok(())
 }
 /// Returns true/false if this instruction can cause a `int_divz` trap
 fn can_int_divz(pos: &FuncCursor, inst: Inst) -> bool {
    let opcode = pos.func.dfg[inst].opcode();
    matches!(
        opcode,
        Opcode::Sdiv | Opcode::Udiv | Opcode::Srem | Opcode::Urem
    )
 }
 /// Prepend instructions to inst to avoid `int_divz` traps
 fn insert_int_divz_sequence(pos: &mut FuncCursor, inst: Inst) {
    let opcode = pos.func.dfg[inst].opcode();
    let inst_args = pos.func.dfg.inst_args(inst);
    let (lhs, rhs) = (inst_args[0], inst_args[1]);
    assert_eq!(pos.func.dfg.value_type(lhs), pos.func.dfg.value_type(rhs));
    let ty = pos.func.dfg.value_type(lhs);
    // All of these instructions can trap if the denominator is zero
    let zero = pos.ins().iconst(ty, 0);
    let one = pos.ins().iconst(ty, 1);
    let denominator_is_zero = pos.ins().icmp(IntCC::Equal, rhs, zero);
    let replace_denominator = if matches!(opcode, Opcode::Srem | Opcode::Sdiv) {
        // Srem and Sdiv can also trap on INT_MIN / -1. So we need to check for the second one
        // 1 << (ty bits - 1) to get INT_MIN
        let int_min = pos.ins().ishl_imm(one, ty.lane_bits() as i64 - 1);
        // Get a -1 const
        // TODO: A iconst -1 would be clearer, but #2906 makes this impossible for i128
        let neg_one = pos.ins().isub(zero, one);
        let lhs_check = pos.ins().icmp(IntCC::Equal, lhs, int_min);
        let rhs_check = pos.ins().icmp(IntCC::Equal, rhs, neg_one);
        let is_invalid = pos.ins().band(lhs_check, rhs_check);
        // These also crash if the denominator is zero, so we still need to check for that.
        pos.ins().bor(denominator_is_zero, is_invalid)
    } else {
        denominator_is_zero
    };
    // If we have a trap we replace the denominator with a 1
    let new_rhs = pos.ins().select(replace_denominator, one, rhs);
    // Replace the previous rhs with the new one
    let args = pos.func.dfg.inst_args_mut(inst);
    args[1] = new_rhs;
 }