fuzzgen: Refactor name and signature generation (#5764)

* fuzzgen: Move cranelift type generation into CraneliftArbitrary

* fuzzgen: Deduplicate DataValue generation

* fuzzgen: Remove unused code

* fuzzgen: Pass allowed function calls into `FunctionGenerator`
This commit is contained in:
Afonso Bordado
2023-02-17 20:48:12 +00:00
committed by GitHub
parent a7bd65d116
commit 853ff787f3
4 changed files with 214 additions and 162 deletions

View File

@@ -30,7 +30,8 @@ pub struct Config {
pub switch_cases: RangeInclusive<usize>, pub switch_cases: RangeInclusive<usize>,
pub switch_max_range_size: RangeInclusive<usize>, pub switch_max_range_size: RangeInclusive<usize>,
pub funcrefs_per_function: RangeInclusive<usize>, /// Number of distinct functions in the same testsuite that we allow calling per function.
pub usercalls: RangeInclusive<usize>,
/// Stack slots. /// Stack slots.
/// The combination of these two determines stack usage per function /// The combination of these two determines stack usage per function
@@ -79,7 +80,7 @@ impl Default for Config {
switch_cases: 0..=64, switch_cases: 0..=64,
// Ranges smaller than 2 don't make sense. // Ranges smaller than 2 don't make sense.
switch_max_range_size: 2..=32, switch_max_range_size: 2..=32,
funcrefs_per_function: 0..=8, usercalls: 0..=8,
static_stack_slots_per_function: 0..=8, static_stack_slots_per_function: 0..=8,
static_stack_slot_size: 0..=128, static_stack_slot_size: 0..=128,
// We need the mix of sizes that allows us to: // We need the mix of sizes that allows us to:

View File

@@ -0,0 +1,94 @@
use crate::codegen::ir::{ArgumentExtension, ArgumentPurpose};
use anyhow::Result;
use cranelift::codegen::data_value::DataValue;
use cranelift::codegen::ir::types::*;
use cranelift::codegen::ir::{AbiParam, Signature, Type};
use cranelift::codegen::isa::CallConv;
use arbitrary::Unstructured;
use cranelift::prelude::{Ieee32, Ieee64};
/// A trait for generating random Cranelift datastructures.
pub trait CraneliftArbitrary {
fn _type(&mut self) -> Result<Type>;
fn callconv(&mut self) -> Result<CallConv>;
fn abi_param(&mut self) -> Result<AbiParam>;
fn signature(&mut self, max_params: usize, max_rets: usize) -> Result<Signature>;
fn datavalue(&mut self, ty: Type) -> Result<DataValue>;
}
impl<'a> CraneliftArbitrary for &mut Unstructured<'a> {
fn _type(&mut self) -> Result<Type> {
// TODO: It would be nice if we could get these directly from cranelift
let scalars = [
I8, I16, I32, I64, I128, F32, F64,
// R32, R64,
];
// TODO: vector types
let ty = self.choose(&scalars[..])?;
Ok(*ty)
}
fn callconv(&mut self) -> Result<CallConv> {
// TODO: Generate random CallConvs per target
Ok(CallConv::SystemV)
}
fn abi_param(&mut self) -> Result<AbiParam> {
let value_type = self._type()?;
// TODO: There are more argument purposes to be explored...
let purpose = ArgumentPurpose::Normal;
let extension = if value_type.is_int() {
*self.choose(&[
ArgumentExtension::Sext,
ArgumentExtension::Uext,
ArgumentExtension::None,
])?
} else {
ArgumentExtension::None
};
Ok(AbiParam {
value_type,
purpose,
extension,
})
}
fn signature(&mut self, max_params: usize, max_rets: usize) -> Result<Signature> {
let callconv = self.callconv()?;
let mut sig = Signature::new(callconv);
for _ in 0..max_params {
sig.params.push(self.abi_param()?);
}
for _ in 0..max_rets {
sig.returns.push(self.abi_param()?);
}
Ok(sig)
}
fn datavalue(&mut self, ty: Type) -> Result<DataValue> {
Ok(match ty {
ty if ty.is_int() => {
let imm = match ty {
I8 => self.arbitrary::<i8>()? as i128,
I16 => self.arbitrary::<i16>()? as i128,
I32 => self.arbitrary::<i32>()? as i128,
I64 => self.arbitrary::<i64>()? as i128,
I128 => self.arbitrary::<i128>()?,
_ => unreachable!(),
};
DataValue::from_integer(imm, ty)?
}
// f{32,64}::arbitrary does not generate a bunch of important values
// such as Signaling NaN's / NaN's with payload, so generate floats from integers.
F32 => DataValue::F32(Ieee32::with_bits(self.arbitrary::<u32>()?)),
F64 => DataValue::F64(Ieee64::with_bits(self.arbitrary::<u64>()?)),
_ => unimplemented!(),
})
}
}

View File

@@ -1,12 +1,13 @@
use crate::codegen::ir::{ArgumentExtension, ArgumentPurpose};
use crate::config::Config; use crate::config::Config;
use crate::cranelift_arbitrary::CraneliftArbitrary;
use anyhow::Result; use anyhow::Result;
use arbitrary::{Arbitrary, Unstructured}; use arbitrary::{Arbitrary, Unstructured};
use cranelift::codegen::data_value::DataValue;
use cranelift::codegen::ir::instructions::InstructionFormat; use cranelift::codegen::ir::instructions::InstructionFormat;
use cranelift::codegen::ir::stackslot::StackSize; use cranelift::codegen::ir::stackslot::StackSize;
use cranelift::codegen::ir::{types::*, FuncRef, LibCall, UserExternalName, UserFuncName}; use cranelift::codegen::ir::{types::*, FuncRef, LibCall, UserExternalName, UserFuncName};
use cranelift::codegen::ir::{ use cranelift::codegen::ir::{
AbiParam, Block, ExternalName, Function, Opcode, Signature, StackSlot, Type, Value, Block, ExternalName, Function, Opcode, Signature, StackSlot, Type, Value,
}; };
use cranelift::codegen::isa::CallConv; use cranelift::codegen::isa::CallConv;
use cranelift::frontend::{FunctionBuilder, FunctionBuilderContext, Switch, Variable}; use cranelift::frontend::{FunctionBuilder, FunctionBuilderContext, Switch, Variable};
@@ -1279,16 +1280,6 @@ const OPCODE_SIGNATURES: &[OpcodeSignature] = &[
(Opcode::Call, &[], &[], insert_call), (Opcode::Call, &[], &[], insert_call),
]; ];
/// These libcalls need a interpreter implementation in `cranelift-fuzzgen.rs`
const ALLOWED_LIBCALLS: &'static [LibCall] = &[
LibCall::CeilF32,
LibCall::CeilF64,
LibCall::FloorF32,
LibCall::FloorF64,
LibCall::TruncF32,
LibCall::TruncF64,
];
pub struct FunctionGenerator<'r, 'data> pub struct FunctionGenerator<'r, 'data>
where where
'data: 'r, 'data: 'r,
@@ -1297,6 +1288,8 @@ where
config: &'r Config, config: &'r Config,
resources: Resources, resources: Resources,
target_triple: Triple, target_triple: Triple,
name: UserFuncName,
signature: Signature,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -1325,6 +1318,8 @@ struct Resources {
block_terminators: Vec<BlockTerminator>, block_terminators: Vec<BlockTerminator>,
func_refs: Vec<(Signature, FuncRef)>, func_refs: Vec<(Signature, FuncRef)>,
stack_slots: Vec<(StackSlot, StackSize)>, stack_slots: Vec<(StackSlot, StackSize)>,
usercalls: Vec<(UserExternalName, Signature)>,
libcalls: Vec<LibCall>,
} }
impl Resources { impl Resources {
@@ -1359,12 +1354,26 @@ impl<'r, 'data> FunctionGenerator<'r, 'data>
where where
'data: 'r, 'data: 'r,
{ {
pub fn new(u: &'r mut Unstructured<'data>, config: &'r Config, target_triple: Triple) -> Self { pub fn new(
u: &'r mut Unstructured<'data>,
config: &'r Config,
target_triple: Triple,
name: UserFuncName,
signature: Signature,
usercalls: Vec<(UserExternalName, Signature)>,
libcalls: Vec<LibCall>,
) -> Self {
Self { Self {
u, u,
config, config,
resources: Resources::default(), resources: Resources {
usercalls,
libcalls,
..Resources::default()
},
target_triple, target_triple,
name,
signature,
} }
} }
@@ -1373,65 +1382,12 @@ where
Ok(self.u.int_in_range(param.clone())?) Ok(self.u.int_in_range(param.clone())?)
} }
fn generate_callconv(&mut self) -> Result<CallConv> {
// TODO: Generate random CallConvs per target
Ok(CallConv::SystemV)
}
fn system_callconv(&mut self) -> CallConv { fn system_callconv(&mut self) -> CallConv {
// TODO: This currently only runs on linux, so this is the only choice // TODO: This currently only runs on linux, so this is the only choice
// We should improve this once we generate flags and targets // We should improve this once we generate flags and targets
CallConv::SystemV CallConv::SystemV
} }
fn generate_type(&mut self) -> Result<Type> {
// TODO: It would be nice if we could get these directly from cranelift
let scalars = [
I8, I16, I32, I64, I128, F32, F64,
// R32, R64,
];
// TODO: vector types
let ty = self.u.choose(&scalars[..])?;
Ok(*ty)
}
fn generate_abi_param(&mut self) -> Result<AbiParam> {
let value_type = self.generate_type()?;
// TODO: There are more argument purposes to be explored...
let purpose = ArgumentPurpose::Normal;
let extension = if value_type.is_int() {
*self.u.choose(&[
ArgumentExtension::Sext,
ArgumentExtension::Uext,
ArgumentExtension::None,
])?
} else {
ArgumentExtension::None
};
Ok(AbiParam {
value_type,
purpose,
extension,
})
}
fn generate_signature(&mut self) -> Result<Signature> {
let callconv = self.generate_callconv()?;
let mut sig = Signature::new(callconv);
for _ in 0..self.param(&self.config.signature_params)? {
sig.params.push(self.generate_abi_param()?);
}
for _ in 0..self.param(&self.config.signature_rets)? {
sig.returns.push(self.generate_abi_param()?);
}
Ok(sig)
}
/// Finds a stack slot with size of at least n bytes /// Finds a stack slot with size of at least n bytes
fn stack_slot_with_size(&mut self, n: u32) -> Result<(StackSlot, StackSize)> { fn stack_slot_with_size(&mut self, n: u32) -> Result<(StackSlot, StackSize)> {
let first = self let first = self
@@ -1494,31 +1450,18 @@ where
/// Generates an instruction(`iconst`/`fconst`/etc...) to introduce a constant value /// Generates an instruction(`iconst`/`fconst`/etc...) to introduce a constant value
fn generate_const(&mut self, builder: &mut FunctionBuilder, ty: Type) -> Result<Value> { fn generate_const(&mut self, builder: &mut FunctionBuilder, ty: Type) -> Result<Value> {
Ok(match ty { Ok(match self.u.datavalue(ty)? {
I128 => { DataValue::I8(i) => builder.ins().iconst(ty, i as i64),
// See: https://github.com/bytecodealliance/wasmtime/issues/2906 DataValue::I16(i) => builder.ins().iconst(ty, i as i64),
let hi = builder.ins().iconst(I64, self.u.arbitrary::<i64>()?); DataValue::I32(i) => builder.ins().iconst(ty, i as i64),
let lo = builder.ins().iconst(I64, self.u.arbitrary::<i64>()?); DataValue::I64(i) => builder.ins().iconst(ty, i as i64),
DataValue::I128(i) => {
let hi = builder.ins().iconst(I64, (i >> 64) as i64);
let lo = builder.ins().iconst(I64, i as i64);
builder.ins().iconcat(lo, hi) builder.ins().iconcat(lo, hi)
} }
ty if ty.is_int() => { DataValue::F32(f) => builder.ins().f32const(f),
let imm64 = match ty { DataValue::F64(f) => builder.ins().f64const(f),
I8 => self.u.arbitrary::<i8>()? as i64,
I16 => self.u.arbitrary::<i16>()? as i64,
I32 => self.u.arbitrary::<i32>()? as i64,
I64 => self.u.arbitrary::<i64>()?,
_ => unreachable!(),
};
builder.ins().iconst(ty, imm64)
}
// f{32,64}::arbitrary does not generate a bunch of important values
// such as Signaling NaN's / NaN's with payload, so generate floats from integers.
F32 => builder
.ins()
.f32const(f32::from_bits(u32::arbitrary(self.u)?)),
F64 => builder
.ins()
.f64const(f64::from_bits(u64::arbitrary(self.u)?)),
_ => unimplemented!(), _ => unimplemented!(),
}) })
} }
@@ -1650,34 +1593,38 @@ where
} }
fn generate_funcrefs(&mut self, builder: &mut FunctionBuilder) -> Result<()> { fn generate_funcrefs(&mut self, builder: &mut FunctionBuilder) -> Result<()> {
let count = self.param(&self.config.funcrefs_per_function)?; let usercalls: Vec<(ExternalName, Signature)> = self
for func_index in 0..count.try_into().unwrap() { .resources
let (ext_name, sig) = if self.u.arbitrary::<bool>()? { .usercalls
let user_func_ref = builder .iter()
.func .map(|(name, signature)| {
.declare_imported_user_function(UserExternalName { let user_func_ref = builder.func.declare_imported_user_function(name.clone());
namespace: 0,
index: func_index,
});
let name = ExternalName::User(user_func_ref); let name = ExternalName::User(user_func_ref);
let signature = self.generate_signature()?; (name, signature.clone())
(name, signature) })
} else { .collect();
let libcall = *self.u.choose(ALLOWED_LIBCALLS)?;
// TODO: Use [CallConv::for_libcall] once we generate flags.
let callconv = self.system_callconv();
let signature = libcall.signature(callconv);
(ExternalName::LibCall(libcall), signature)
};
let sig_ref = builder.import_signature(sig.clone()); let lib_callconv = self.system_callconv();
let libcalls: Vec<(ExternalName, Signature)> = self
.resources
.libcalls
.iter()
.map(|libcall| {
let signature = libcall.signature(lib_callconv);
let name = ExternalName::LibCall(*libcall);
(name, signature)
})
.collect();
for (name, signature) in usercalls.into_iter().chain(libcalls) {
let sig_ref = builder.import_signature(signature.clone());
let func_ref = builder.import_function(ExtFuncData { let func_ref = builder.import_function(ExtFuncData {
name: ext_name, name,
signature: sig_ref, signature: sig_ref,
colocated: self.u.arbitrary()?, colocated: self.u.arbitrary()?,
}); });
self.resources.func_refs.push((sig, func_ref)); self.resources.func_refs.push((signature, func_ref));
} }
Ok(()) Ok(())
@@ -1727,7 +1674,7 @@ where
} }
/// Creates a random amount of blocks in this function /// Creates a random amount of blocks in this function
fn generate_blocks(&mut self, builder: &mut FunctionBuilder, sig: &Signature) -> Result<()> { fn generate_blocks(&mut self, builder: &mut FunctionBuilder) -> Result<()> {
let extra_block_count = self.param(&self.config.blocks_per_function)?; let extra_block_count = self.param(&self.config.blocks_per_function)?;
// We must always have at least one block, so we generate the "extra" blocks and add 1 for // We must always have at least one block, so we generate the "extra" blocks and add 1 for
@@ -1751,7 +1698,10 @@ where
// a random signature; // a random signature;
if is_entry { if is_entry {
builder.append_block_params_for_function_params(block); builder.append_block_params_for_function_params(block);
Ok((block, sig.params.iter().map(|a| a.value_type).collect())) Ok((
block,
self.signature.params.iter().map(|a| a.value_type).collect(),
))
} else { } else {
let sig = self.generate_block_signature()?; let sig = self.generate_block_signature()?;
sig.iter().for_each(|ty| { sig.iter().for_each(|ty| {
@@ -1882,7 +1832,7 @@ where
let mut params = Vec::with_capacity(param_count); let mut params = Vec::with_capacity(param_count);
for _ in 0..param_count { for _ in 0..param_count {
params.push(self.generate_type()?); params.push(self.u._type()?);
} }
Ok(params) Ok(params)
} }
@@ -1902,7 +1852,7 @@ where
// Create a pool of vars that are going to be used in this function // Create a pool of vars that are going to be used in this function
for _ in 0..self.param(&self.config.vars_per_function)? { for _ in 0..self.param(&self.config.vars_per_function)? {
let ty = self.generate_type()?; let ty = self.u._type()?;
let value = self.generate_const(builder, ty)?; let value = self.generate_const(builder, ty)?;
vars.push((ty, value)); vars.push((ty, value));
} }
@@ -1930,15 +1880,12 @@ where
/// Because we generate all blocks and variables up front we already know everything that /// Because we generate all blocks and variables up front we already know everything that
/// we need when generating instructions (i.e. jump targets / variables) /// we need when generating instructions (i.e. jump targets / variables)
pub fn generate(mut self) -> Result<Function> { pub fn generate(mut self) -> Result<Function> {
let sig = self.generate_signature()?;
let mut fn_builder_ctx = FunctionBuilderContext::new(); let mut fn_builder_ctx = FunctionBuilderContext::new();
// function name must be in a different namespace than TESTFILE_NAMESPACE (0) let mut func = Function::with_name_signature(self.name.clone(), self.signature.clone());
let mut func = Function::with_name_signature(UserFuncName::user(1, 0), sig.clone());
let mut builder = FunctionBuilder::new(&mut func, &mut fn_builder_ctx); let mut builder = FunctionBuilder::new(&mut func, &mut fn_builder_ctx);
self.generate_blocks(&mut builder, &sig)?; self.generate_blocks(&mut builder)?;
// Function preamble // Function preamble
self.generate_funcrefs(&mut builder)?; self.generate_funcrefs(&mut builder)?;

View File

@@ -4,34 +4,33 @@ use crate::settings::{Flags, OptLevel};
use anyhow::Result; use anyhow::Result;
use arbitrary::{Arbitrary, Unstructured}; use arbitrary::{Arbitrary, Unstructured};
use cranelift::codegen::data_value::DataValue; use cranelift::codegen::data_value::DataValue;
use cranelift::codegen::ir::types::*; use cranelift::codegen::ir::{types::*, UserExternalName, UserFuncName};
use cranelift::codegen::ir::Function; use cranelift::codegen::ir::{Function, LibCall};
use cranelift::codegen::Context; use cranelift::codegen::Context;
use cranelift::prelude::isa; use cranelift::prelude::isa;
use cranelift::prelude::*; use cranelift::prelude::*;
use cranelift_arbitrary::CraneliftArbitrary;
use cranelift_native::builder_with_options; use cranelift_native::builder_with_options;
use std::fmt; use std::fmt;
use target_lexicon::{Architecture, Triple}; use target_lexicon::{Architecture, Triple};
mod config; mod config;
mod cranelift_arbitrary;
mod function_generator; mod function_generator;
mod passes; mod passes;
/// These libcalls need a interpreter implementation in `cranelift-fuzzgen.rs`
const ALLOWED_LIBCALLS: &'static [LibCall] = &[
LibCall::CeilF32,
LibCall::CeilF64,
LibCall::FloorF32,
LibCall::FloorF64,
LibCall::TruncF32,
LibCall::TruncF64,
];
pub type TestCaseInput = Vec<DataValue>; pub type TestCaseInput = Vec<DataValue>;
/// Simple wrapper to generate a single Cranelift `Function`.
#[derive(Debug)]
pub struct SingleFunction(pub Function);
impl<'a> Arbitrary<'a> for SingleFunction {
fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
FuzzGen::new(u)
.generate_func(Triple::host())
.map_err(|_| arbitrary::Error::IncorrectFormat)
.map(Self)
}
}
/// Print only non default flags. /// Print only non default flags.
fn write_non_default_flags(f: &mut fmt::Formatter<'_>, flags: &settings::Flags) -> fmt::Result { fn write_non_default_flags(f: &mut fmt::Formatter<'_>, flags: &settings::Flags) -> fmt::Result {
let default_flags = settings::Flags::new(settings::builder()); let default_flags = settings::Flags::new(settings::builder());
@@ -176,27 +175,6 @@ where
} }
} }
fn generate_datavalue(&mut self, ty: Type) -> Result<DataValue> {
Ok(match ty {
ty if ty.is_int() => {
let imm = match ty {
I8 => self.u.arbitrary::<i8>()? as i128,
I16 => self.u.arbitrary::<i16>()? as i128,
I32 => self.u.arbitrary::<i32>()? as i128,
I64 => self.u.arbitrary::<i64>()? as i128,
I128 => self.u.arbitrary::<i128>()?,
_ => unreachable!(),
};
DataValue::from_integer(imm, ty)?
}
// f{32,64}::arbitrary does not generate a bunch of important values
// such as Signaling NaN's / NaN's with payload, so generate floats from integers.
F32 => DataValue::F32(Ieee32::with_bits(u32::arbitrary(self.u)?)),
F64 => DataValue::F64(Ieee64::with_bits(u64::arbitrary(self.u)?)),
_ => unimplemented!(),
})
}
fn generate_test_inputs(mut self, signature: &Signature) -> Result<Vec<TestCaseInput>> { fn generate_test_inputs(mut self, signature: &Signature) -> Result<Vec<TestCaseInput>> {
let mut inputs = Vec::new(); let mut inputs = Vec::new();
@@ -209,7 +187,7 @@ where
let test_args = signature let test_args = signature
.params .params
.iter() .iter()
.map(|p| self.generate_datavalue(p.value_type)) .map(|p| self.u.datavalue(p.value_type))
.collect::<Result<TestCaseInput>>()?; .collect::<Result<TestCaseInput>>()?;
inputs.push(test_args); inputs.push(test_args);
@@ -276,7 +254,38 @@ where
} }
fn generate_func(&mut self, target_triple: Triple) -> Result<Function> { fn generate_func(&mut self, target_triple: Triple) -> Result<Function> {
let func = FunctionGenerator::new(&mut self.u, &self.config, target_triple).generate()?; let max_params = self.u.int_in_range(self.config.signature_params.clone())?;
let max_rets = self.u.int_in_range(self.config.signature_rets.clone())?;
let sig = self.u.signature(max_params, max_rets)?;
// Function name must be in a different namespace than TESTFILE_NAMESPACE (0)
let fname = UserFuncName::user(1, 0);
// Generate the external functions that we allow calling in this function.
let usercalls = (0..self.u.int_in_range(self.config.usercalls.clone())?)
.map(|i| {
let max_params = self.u.int_in_range(self.config.signature_params.clone())?;
let max_rets = self.u.int_in_range(self.config.signature_rets.clone())?;
let sig = self.u.signature(max_params, max_rets)?;
let name = UserExternalName {
namespace: 2,
index: i as u32,
};
Ok((name, sig))
})
.collect::<Result<Vec<(UserExternalName, Signature)>>>()?;
let func = FunctionGenerator::new(
&mut self.u,
&self.config,
target_triple,
fname,
sig,
usercalls,
ALLOWED_LIBCALLS.to_vec(),
)
.generate()?;
self.run_func_passes(func) self.run_func_passes(func)
} }
@@ -364,8 +373,9 @@ where
pub fn generate_host_test(mut self) -> Result<TestCase> { pub fn generate_host_test(mut self) -> Result<TestCase> {
// If we're generating test inputs as well as a function, then we're planning to execute // If we're generating test inputs as well as a function, then we're planning to execute
// this function. That means that any function references in it need to exist. We don't yet // this function. That means that any function references in it need to exist. We don't yet
// have infrastructure for generating multiple functions, so just don't generate funcrefs. // have infrastructure for generating multiple functions, so just don't generate user call
self.config.funcrefs_per_function = 0..=0; // function references.
self.config.usercalls = 0..=0;
// TestCase is meant to be consumed by a runner, so we make the assumption here that we're // TestCase is meant to be consumed by a runner, so we make the assumption here that we're
// generating a TargetIsa for the host. // generating a TargetIsa for the host.