//! A verifier for ensuring that functions are well formed. //! It verifies: //! //! EBB integrity //! //! - All instructions reached from the `ebb_insts` iterator must belong to //! the EBB as reported by `inst_ebb()`. //! - Every EBB must end in a terminator instruction, and no other instruction //! can be a terminator. //! - Every value in the `ebb_args` iterator belongs to the EBB as reported by `value_ebb`. //! //! Instruction integrity //! //! - The instruction format must match the opcode. //! - All result values must be created for multi-valued instructions. //! - Instructions with no results must have a VOID `first_type()`. //! - All referenced entities must exist. (Values, EBBs, stack slots, ...) //! //! SSA form //! //! - Values must be defined by an instruction that exists and that is inserted in //! an EBB, or be an argument of an existing EBB. //! - Values used by an instruction must dominate the instruction. //! //! Control flow graph and dominator tree integrity: //! //! - All predecessors in the CFG must be branches to the EBB. //! - All branches to an EBB must be present in the CFG. //! - A recomputed dominator tree is identical to the existing one. //! //! Type checking //! //! - Compare input and output values against the opcode's type constraints. //! For polymorphic opcodes, determine the controlling type variable first. //! - Branches and jumps must pass arguments to destination EBBs that match the //! expected types exactly. The number of arguments must match. //! - All EBBs in a jump_table must take no arguments. //! - Function calls are type checked against their signature. //! - The entry block must take arguments that match the signature of the current //! function. //! - All return instructions must have return value operands matching the current //! function signature. //! //! TODO: //! Ad hoc checking //! //! - Stack slot loads and stores must be in-bounds. //! - Immediate constraints for certain opcodes, like `udiv_imm v3, 0`. //! - Extend / truncate instructions have more type constraints: Source type can't be //! larger / smaller than result type. //! - `Insertlane` and `extractlane` instructions have immediate lane numbers that must be in //! range for their polymorphic type. //! - Swizzle and shuffle instructions take a variable number of lane arguments. The number //! of arguments must match the destination type, and the lane indexes must be in range. use dominator_tree::DominatorTree; use flowgraph::ControlFlowGraph; use ir::entities::AnyEntity; use ir::instructions::{InstructionFormat, BranchInfo, ResolvedConstraint, CallInfo}; use ir::{types, Function, ValueDef, Ebb, Inst, SigRef, FuncRef, ValueList, JumpTable, Value, Type}; use Context; use std::fmt::{self, Display, Formatter}; use std::result; use std::collections::BTreeSet; /// A verifier error. #[derive(Debug, PartialEq, Eq)] pub struct Error { /// The entity causing the verifier error. pub location: AnyEntity, /// Error message. pub message: String, } impl Display for Error { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}: {}", self.location, self.message) } } /// Verifier result. pub type Result = result::Result; // Create an `Err` variant of `Result` from a location and `format!` arguments. macro_rules! err { ( $loc:expr, $msg:expr ) => { Err(Error { location: $loc.into(), message: String::from($msg), }) }; ( $loc:expr, $fmt:expr, $( $arg:expr ),+ ) => { Err(Error { location: $loc.into(), message: format!( $fmt, $( $arg ),+ ), }) }; } /// Verify `func`. pub fn verify_function(func: &Function) -> Result<()> { Verifier::new(func).run() } /// Verify `ctx`. pub fn verify_context(ctx: &Context) -> Result<()> { let verifier = Verifier::new(&ctx.func); verifier.domtree_integrity(&ctx.domtree)?; verifier.cfg_integrity(&ctx.cfg)?; verifier.run() } struct Verifier<'a> { func: &'a Function, cfg: ControlFlowGraph, domtree: DominatorTree, } impl<'a> Verifier<'a> { pub fn new(func: &'a Function) -> Verifier { let cfg = ControlFlowGraph::with_function(func); let domtree = DominatorTree::with_function(func, &cfg); Verifier { func: func, cfg: cfg, domtree: domtree, } } fn ebb_integrity(&self, ebb: Ebb, inst: Inst) -> Result<()> { let is_terminator = self.func.dfg[inst].opcode().is_terminator(); let is_last_inst = self.func.layout.last_inst(ebb) == Some(inst); if is_terminator && !is_last_inst { // Terminating instructions only occur at the end of blocks. return err!(inst, "a terminator instruction was encountered before the end of {}", ebb); } if is_last_inst && !is_terminator { return err!(ebb, "block does not end in a terminator instruction!"); } // Instructions belong to the correct ebb. let inst_ebb = self.func.layout.inst_ebb(inst); if inst_ebb != Some(ebb) { return err!(inst, "should belong to {} not {:?}", ebb, inst_ebb); } // Arguments belong to the correct ebb. for arg in self.func.dfg.ebb_args(ebb) { match self.func.dfg.value_def(arg) { ValueDef::Arg(arg_ebb, _) => { if ebb != arg_ebb { return err!(arg, "does not belong to {}", ebb); } } _ => { return err!(arg, "expected an argument, found a result"); } } } Ok(()) } fn instruction_integrity(&self, inst: Inst) -> Result<()> { let inst_data = &self.func.dfg[inst]; let dfg = &self.func.dfg; // The instruction format matches the opcode if inst_data.opcode().format() != InstructionFormat::from(inst_data) { return err!(inst, "instruction opcode doesn't match instruction format"); } let fixed_results = inst_data.opcode().constraints().fixed_results(); // var_results is 0 if we aren't a call instruction let var_results = dfg.call_signature(inst) .map(|sig| dfg.signatures[sig].return_types.len()) .unwrap_or(0); let total_results = fixed_results + var_results; if total_results == 0 { // Instructions with no results have a NULL `first_type()` let ret_type = inst_data.first_type(); if ret_type != types::VOID { return err!(inst, "instruction with no results expects NULL return type, found {}", ret_type); } } else { // All result values for multi-valued instructions are created let got_results = dfg.inst_results(inst).count(); if got_results != total_results { return err!(inst, "expected {} result values, found {}", total_results, got_results); } } self.verify_entity_references(inst) } fn verify_entity_references(&self, inst: Inst) -> Result<()> { use ir::instructions::InstructionData::*; for &arg in self.func.dfg.inst_args(inst) { self.verify_value(inst, arg)?; } for res in self.func.dfg.inst_results(inst) { self.verify_value(inst, res)?; } match &self.func.dfg[inst] { &MultiAry { ref args, .. } => { self.verify_value_list(inst, args)?; } &Jump { destination, ref args, .. } | &Branch { destination, ref args, .. } | &BranchIcmp { destination, ref args, .. } => { self.verify_ebb(inst, destination)?; self.verify_value_list(inst, args)?; } &BranchTable { table, .. } => { self.verify_jump_table(inst, table)?; } &Call { func_ref, ref args, .. } => { self.verify_func_ref(inst, func_ref)?; self.verify_value_list(inst, args)?; } &IndirectCall { sig_ref, ref args, .. } => { self.verify_sig_ref(inst, sig_ref)?; self.verify_value_list(inst, args)?; } // Exhaustive list so we can't forget to add new formats &Nullary { .. } | &Unary { .. } | &UnaryImm { .. } | &UnaryIeee32 { .. } | &UnaryIeee64 { .. } | &UnarySplit { .. } | &Binary { .. } | &BinaryImm { .. } | &BinaryOverflow { .. } | &Ternary { .. } | &InsertLane { .. } | &ExtractLane { .. } | &IntCompare { .. } | &IntCompareImm { .. } | &FloatCompare { .. } => {} } Ok(()) } fn verify_ebb(&self, inst: Inst, e: Ebb) -> Result<()> { if !self.func.dfg.ebb_is_valid(e) { err!(inst, "invalid ebb reference {}", e) } else { Ok(()) } } fn verify_sig_ref(&self, inst: Inst, s: SigRef) -> Result<()> { if !self.func.dfg.signatures.is_valid(s) { err!(inst, "invalid signature reference {}", s) } else { Ok(()) } } fn verify_func_ref(&self, inst: Inst, f: FuncRef) -> Result<()> { if !self.func.dfg.ext_funcs.is_valid(f) { err!(inst, "invalid function reference {}", f) } else { Ok(()) } } fn verify_value_list(&self, inst: Inst, l: &ValueList) -> Result<()> { if !l.is_valid(&self.func.dfg.value_lists) { err!(inst, "invalid value list reference {:?}", l) } else { Ok(()) } } fn verify_jump_table(&self, inst: Inst, j: JumpTable) -> Result<()> { if !self.func.jump_tables.is_valid(j) { err!(inst, "invalid jump table reference {}", j) } else { Ok(()) } } fn verify_value(&self, loc_inst: Inst, v: Value) -> Result<()> { let dfg = &self.func.dfg; if !dfg.value_is_valid(v) { return err!(loc_inst, "invalid value reference {}", v); } // SSA form match dfg.value_def(v) { ValueDef::Res(def_inst, _) => { // Value is defined by an instruction that exists. if !dfg.insts.is_valid(def_inst) { return err!(loc_inst, "{} is defined by invalid instruction {}", v, def_inst); } // Defining instruction is inserted in an EBB. if self.func.layout.inst_ebb(def_inst) == None { return err!(loc_inst, "{} is defined by {} which has no EBB", v, def_inst); } // Defining instruction dominates the instruction that uses the value. if !self.domtree .dominates(def_inst, loc_inst, &self.func.layout) { return err!(loc_inst, "uses value from non-dominating {}", def_inst); } } ValueDef::Arg(ebb, _) => { // Value is defined by an existing EBB. if !dfg.ebb_is_valid(ebb) { return err!(loc_inst, "{} is defined by invalid EBB {}", v, ebb); } // Defining EBB is inserted in the layout if !self.func.layout.is_ebb_inserted(ebb) { return err!(loc_inst, "{} is defined by {} which is not in the layout", v, ebb); } // The defining EBB dominates the instruction using this value. if !self.domtree .ebb_dominates(ebb, loc_inst, &self.func.layout) { return err!(loc_inst, "uses value arg from non-dominating {}", ebb); } } } Ok(()) } fn domtree_integrity(&self, domtree: &DominatorTree) -> Result<()> { // We consider two `DominatorTree`s to be equal if they return the same immediate // dominator for each EBB. Therefore the current domtree is valid if it matches the freshly // computed one. for ebb in self.func.layout.ebbs() { let expected = domtree.idom(ebb); let got = self.domtree.idom(ebb); if got != expected { return err!(ebb, "invalid domtree, expected idom({}) = {:?}, got {:?}", ebb, expected, got); } } Ok(()) } fn typecheck_entry_block_arguments(&self) -> Result<()> { if let Some(ebb) = self.func.layout.entry_block() { let expected_types = &self.func.signature.argument_types; let ebb_arg_count = self.func.dfg.num_ebb_args(ebb); if ebb_arg_count != expected_types.len() { return err!(ebb, "entry block arguments must match function signature"); } for (i, arg) in self.func.dfg.ebb_args(ebb).enumerate() { let arg_type = self.func.dfg.value_type(arg); if arg_type != expected_types[i].value_type { return err!(ebb, "entry block argument {} expected to have type {}, got {}", i, expected_types[i], arg_type); } } } Ok(()) } fn typecheck(&self, inst: Inst) -> Result<()> { let inst_data = &self.func.dfg[inst]; let constraints = inst_data.opcode().constraints(); let ctrl_type = if let Some(value_typeset) = constraints.ctrl_typeset() { // For polymorphic opcodes, determine the controlling type variable first. let ctrl_type = inst_data.ctrl_typevar(&self.func.dfg); if !value_typeset.contains(ctrl_type) { return err!(inst, "has an invalid controlling type {}", ctrl_type); } ctrl_type } else { // Non-polymorphic instructions don't check the controlling type variable, so `Option` // is unnecessary and we can just make it `VOID`. types::VOID }; self.typecheck_results(inst, ctrl_type)?; self.typecheck_fixed_args(inst, ctrl_type)?; self.typecheck_variable_args(inst)?; self.typecheck_return(inst)?; Ok(()) } fn typecheck_results(&self, inst: Inst, ctrl_type: Type) -> Result<()> { let mut i = 0; for result in self.func.dfg.inst_results(inst) { let result_type = self.func.dfg.value_type(result); let expected_type = self.func.dfg.compute_result_type(inst, i, ctrl_type); if let Some(expected_type) = expected_type { if result_type != expected_type { return err!(inst, "expected result {} ({}) to have type {}, found {}", i, result, expected_type, result_type); } } else { return err!(inst, "has more result values than expected"); } i += 1; } // There aren't any more result types left. if self.func.dfg.compute_result_type(inst, i, ctrl_type) != None { return err!(inst, "has fewer result values than expected"); } Ok(()) } fn typecheck_fixed_args(&self, inst: Inst, ctrl_type: Type) -> Result<()> { let constraints = self.func.dfg[inst].opcode().constraints(); for (i, &arg) in self.func.dfg.inst_fixed_args(inst).iter().enumerate() { let arg_type = self.func.dfg.value_type(arg); match constraints.value_argument_constraint(i, ctrl_type) { ResolvedConstraint::Bound(expected_type) => { if arg_type != expected_type { return err!(inst, "arg {} ({}) has type {}, expected {}", i, arg, arg_type, expected_type); } } ResolvedConstraint::Free(type_set) => { if !type_set.contains(arg_type) { return err!(inst, "arg {} ({}) with type {} failed to satisfy type set {:?}", i, arg, arg_type, type_set); } } } } Ok(()) } fn typecheck_variable_args(&self, inst: Inst) -> Result<()> { match self.func.dfg[inst].analyze_branch(&self.func.dfg.value_lists) { BranchInfo::SingleDest(ebb, _) => { let iter = self.func .dfg .ebb_args(ebb) .map(|v| self.func.dfg.value_type(v)); self.typecheck_variable_args_iterator(inst, iter)?; } BranchInfo::Table(table) => { for (_, ebb) in self.func.jump_tables[table].entries() { let arg_count = self.func.dfg.num_ebb_args(ebb); if arg_count != 0 { return err!(inst, "takes no arguments, but had target {} with {} arguments", ebb, arg_count); } } } BranchInfo::NotABranch => {} } match self.func.dfg[inst].analyze_call(&self.func.dfg.value_lists) { CallInfo::Direct(func_ref, _) => { let sig_ref = self.func.dfg.ext_funcs[func_ref].signature; let arg_types = self.func.dfg.signatures[sig_ref] .argument_types .iter() .map(|a| a.value_type); self.typecheck_variable_args_iterator(inst, arg_types)?; } CallInfo::Indirect(sig_ref, _) => { let arg_types = self.func.dfg.signatures[sig_ref] .argument_types .iter() .map(|a| a.value_type); self.typecheck_variable_args_iterator(inst, arg_types)?; } CallInfo::NotACall => {} } Ok(()) } fn typecheck_variable_args_iterator>(&self, inst: Inst, iter: I) -> Result<()> { let variable_args = self.func.dfg.inst_variable_args(inst); let mut i = 0; for expected_type in iter { if i >= variable_args.len() { // Result count mismatch handled below, we want the full argument count first though i += 1; continue; } let arg = variable_args[i]; let arg_type = self.func.dfg.value_type(arg); if expected_type != arg_type { return err!(inst, "arg {} ({}) has type {}, expected {}", i, variable_args[i], arg_type, expected_type); } i += 1; } if i != variable_args.len() { return err!(inst, "mismatched argument count, got {}, expected {}", variable_args.len(), i); } Ok(()) } fn typecheck_return(&self, inst: Inst) -> Result<()> { if self.func.dfg[inst].opcode().is_return() { let args = self.func.dfg.inst_variable_args(inst); let expected_types = &self.func.signature.return_types; if args.len() != expected_types.len() { return err!(inst, "arguments of return must match function signature"); } for (i, (&arg, &expected_type)) in args.iter().zip(expected_types).enumerate() { let arg_type = self.func.dfg.value_type(arg); if arg_type != expected_type.value_type { return err!(inst, "arg {} ({}) has type {}, must match function signature of {}", i, arg, arg_type, expected_type); } } } Ok(()) } fn cfg_integrity(&self, cfg: &ControlFlowGraph) -> Result<()> { let mut expected_succs = BTreeSet::::new(); let mut got_succs = BTreeSet::::new(); let mut expected_preds = BTreeSet::::new(); let mut got_preds = BTreeSet::::new(); for ebb in self.func.layout.ebbs() { expected_succs.extend(self.cfg.get_successors(ebb)); got_succs.extend(cfg.get_successors(ebb)); let missing_succs: Vec = expected_succs.difference(&got_succs).cloned().collect(); if missing_succs.len() != 0 { return err!(ebb, "cfg lacked the following successor(s) {:?}", missing_succs); } let excess_succs: Vec = got_succs.difference(&expected_succs).cloned().collect(); if excess_succs.len() != 0 { return err!(ebb, "cfg had unexpected successor(s) {:?}", excess_succs); } expected_preds.extend(self.cfg .get_predecessors(ebb) .iter() .map(|&(_, inst)| inst)); got_preds.extend(cfg.get_predecessors(ebb).iter().map(|&(_, inst)| inst)); let missing_preds: Vec = expected_preds.difference(&got_preds).cloned().collect(); if missing_preds.len() != 0 { return err!(ebb, "cfg lacked the following predecessor(s) {:?}", missing_preds); } let excess_preds: Vec = got_preds.difference(&expected_preds).cloned().collect(); if excess_preds.len() != 0 { return err!(ebb, "cfg had unexpected predecessor(s) {:?}", excess_preds); } expected_succs.clear(); got_succs.clear(); expected_preds.clear(); got_preds.clear(); } Ok(()) } pub fn run(&self) -> Result<()> { self.typecheck_entry_block_arguments()?; for ebb in self.func.layout.ebbs() { for inst in self.func.layout.ebb_insts(ebb) { self.ebb_integrity(ebb, inst)?; self.instruction_integrity(inst)?; self.typecheck(inst)?; } } Ok(()) } } #[cfg(test)] mod tests { use super::{Verifier, Error}; use ir::Function; use ir::instructions::{InstructionData, Opcode}; use ir::types; macro_rules! assert_err_with_msg { ($e:expr, $msg:expr) => ( match $e { Ok(_) => { panic!("Expected an error!") }, Err(Error { message, .. } ) => { if !message.contains($msg) { panic!(format!("'{}' did not contain the substring '{}'", message, $msg)); } } } ) } #[test] fn empty() { let func = Function::new(); let verifier = Verifier::new(&func); assert_eq!(verifier.run(), Ok(())); } #[test] fn bad_instruction_format() { let mut func = Function::new(); let ebb0 = func.dfg.make_ebb(); func.layout.append_ebb(ebb0); let nullary_with_bad_opcode = func.dfg .make_inst(InstructionData::Nullary { opcode: Opcode::Jump, ty: types::VOID, }); func.layout.append_inst(nullary_with_bad_opcode, ebb0); let verifier = Verifier::new(&func); assert_err_with_msg!(verifier.run(), "instruction format"); } }