fuzzgen: Avoid int_divz traps (#4932)

* fuzzgen: Insert `int_divz` sequence

* fuzzgen: matches!
This commit is contained in:
Afonso Bordado
2022-09-23 18:19:42 +01:00
committed by GitHub
parent 6e76e925f4
commit bb6a8a717a
3 changed files with 96 additions and 3 deletions

View File

@@ -39,6 +39,14 @@ pub struct Config {
/// Determines how often we generate a backwards branch /// Determines how often we generate a backwards branch
/// Backwards branches are prone to infinite loops, and thus cause timeouts. /// Backwards branches are prone to infinite loops, and thus cause timeouts.
pub backwards_branch_ratio: (usize, usize), pub backwards_branch_ratio: (usize, usize),
/// How often should we allow integer division by zero traps.
///
/// Some instructions such as Srem and Udiv can cause a `int_divz` trap
/// under some inputs. We almost always insert a sequence of instructions
/// that avoids these issues. However we can allow some `int_divz` traps
/// by controlling this config.
pub allowed_int_divz_ratio: (usize, usize),
} }
impl Default for Config { impl Default for Config {
@@ -62,6 +70,7 @@ impl Default for Config {
// 0.1% allows us to explore this, while not causing enough timeouts to significantly // 0.1% allows us to explore this, while not causing enough timeouts to significantly
// impact execs/s // impact execs/s
backwards_branch_ratio: (1, 1000), backwards_branch_ratio: (1, 1000),
allowed_int_divz_ratio: (1, 1_000_000),
} }
} }
} }

View File

@@ -12,6 +12,7 @@ use std::fmt;
mod config; mod config;
mod function_generator; mod function_generator;
mod pass;
pub type TestCaseInput = Vec<DataValue>; pub type TestCaseInput = Vec<DataValue>;
@@ -166,7 +167,7 @@ where
Ok(inputs) Ok(inputs)
} }
fn run_func_passes(&self, func: Function) -> Function { fn run_func_passes(&mut self, func: Function) -> Result<Function> {
// Do a NaN Canonicalization pass on the generated function. // Do a NaN Canonicalization pass on the generated function.
// //
// Both IEEE754 and the Wasm spec are somewhat loose about what is allowed // Both IEEE754 and the Wasm spec are somewhat loose about what is allowed
@@ -201,12 +202,17 @@ where
ctx.canonicalize_nans(isa.as_ref()) ctx.canonicalize_nans(isa.as_ref())
.expect("Failed NaN canonicalization pass"); .expect("Failed NaN canonicalization pass");
ctx.func // Run the int_divz pass
//
// This pass replaces divs and rems with sequences that do not trap
pass::do_int_divz_pass(self, &mut ctx.func)?;
Ok(ctx.func)
} }
fn generate_func(&mut self) -> Result<Function> { fn generate_func(&mut self) -> Result<Function> {
let func = FunctionGenerator::new(&mut self.u, &self.config).generate()?; let func = FunctionGenerator::new(&mut self.u, &self.config).generate()?;
Ok(self.run_func_passes(func)) self.run_func_passes(func)
} }
pub fn generate_test(mut self) -> Result<TestCase> { pub fn generate_test(mut self) -> Result<TestCase> {

View File

@@ -0,0 +1,78 @@
use crate::FuzzGen;
use anyhow::Result;
use cranelift::codegen::cursor::{Cursor, FuncCursor};
use cranelift::codegen::ir::{Function, Inst, Opcode};
use cranelift::prelude::{InstBuilder, IntCC};
pub fn do_int_divz_pass(fuzz: &mut FuzzGen, func: &mut Function) -> Result<()> {
// Insert this per function, otherwise the actual rate of int_divz doesn't go down that much
// Experimentally if we decide this per instruction with a 0.1% allow rate, we get 4.4% of runs
// trapping. Doing this per function decreases the number of runs that trap. It also consumes
// fewer fuzzer input bytes which is nice.
let ratio = fuzz.config.allowed_int_divz_ratio;
let insert_seq = !fuzz.u.ratio(ratio.0, ratio.1)?;
if !insert_seq {
return Ok(());
}
let mut pos = FuncCursor::new(func);
while let Some(_block) = pos.next_block() {
while let Some(inst) = pos.next_inst() {
if can_int_divz(&pos, inst) {
insert_int_divz_sequence(&mut pos, inst);
}
}
}
Ok(())
}
/// Returns true/false if this instruction can cause a `int_divz` trap
fn can_int_divz(pos: &FuncCursor, inst: Inst) -> bool {
let opcode = pos.func.dfg[inst].opcode();
matches!(
opcode,
Opcode::Sdiv | Opcode::Udiv | Opcode::Srem | Opcode::Urem
)
}
/// Prepend instructions to inst to avoid `int_divz` traps
fn insert_int_divz_sequence(pos: &mut FuncCursor, inst: Inst) {
let opcode = pos.func.dfg[inst].opcode();
let inst_args = pos.func.dfg.inst_args(inst);
let (lhs, rhs) = (inst_args[0], inst_args[1]);
assert_eq!(pos.func.dfg.value_type(lhs), pos.func.dfg.value_type(rhs));
let ty = pos.func.dfg.value_type(lhs);
// All of these instructions can trap if the denominator is zero
let zero = pos.ins().iconst(ty, 0);
let one = pos.ins().iconst(ty, 1);
let denominator_is_zero = pos.ins().icmp(IntCC::Equal, rhs, zero);
let replace_denominator = if matches!(opcode, Opcode::Srem | Opcode::Sdiv) {
// Srem and Sdiv can also trap on INT_MIN / -1. So we need to check for the second one
// 1 << (ty bits - 1) to get INT_MIN
let int_min = pos.ins().ishl_imm(one, ty.lane_bits() as i64 - 1);
// Get a -1 const
// TODO: A iconst -1 would be clearer, but #2906 makes this impossible for i128
let neg_one = pos.ins().isub(zero, one);
let lhs_check = pos.ins().icmp(IntCC::Equal, lhs, int_min);
let rhs_check = pos.ins().icmp(IntCC::Equal, rhs, neg_one);
let is_invalid = pos.ins().band(lhs_check, rhs_check);
// These also crash if the denominator is zero, so we still need to check for that.
pos.ins().bor(denominator_is_zero, is_invalid)
} else {
denominator_is_zero
};
// If we have a trap we replace the denominator with a 1
let new_rhs = pos.ins().select(replace_denominator, one, rhs);
// Replace the previous rhs with the new one
let args = pos.func.dfg.inst_args_mut(inst);
args[1] = new_rhs;
}