Move the verifier into a verifier/mod.rs file.

Make room for verifier sub-modules in separate files.
2017-04-20 14:38:03 -07:00
parent 9c23196049
commit c621770507
1 changed files with 0 additions and 0 deletions
--- a/lib/cretonne/src/verifier/mod.rs
+++ b/lib/cretonne/src/verifier/mod.rs
@@ -0,0 +1,700 @@
+//! A verifier for ensuring that functions are well formed.
+//! It verifies:
+//!
+//!   EBB integrity
+//!
+//!    - All instructions reached from the `ebb_insts` iterator must belong to
+//!      the EBB as reported by `inst_ebb()`.
+//!    - Every EBB must end in a terminator instruction, and no other instruction
+//!      can be a terminator.
+//!    - Every value in the `ebb_args` iterator belongs to the EBB as reported by `value_ebb`.
+//!
+//!   Instruction integrity
+//!
+//!    - The instruction format must match the opcode.
+//!    - All result values must be created for multi-valued instructions.
+//!    - All referenced entities must exist. (Values, EBBs, stack slots, ...)
+//!
+//!   SSA form
+//!
+//!    - Values must be defined by an instruction that exists and that is inserted in
+//!      an EBB, or be an argument of an existing EBB.
+//!    - Values used by an instruction must dominate the instruction.
+//!
+//!   Control flow graph and dominator tree integrity:
+//!
+//!    - All predecessors in the CFG must be branches to the EBB.
+//!    - All branches to an EBB must be present in the CFG.
+//!    - A recomputed dominator tree is identical to the existing one.
+//!
+//!   Type checking
+//!
+//!    - Compare input and output values against the opcode's type constraints.
+//!      For polymorphic opcodes, determine the controlling type variable first.
+//!    - Branches and jumps must pass arguments to destination EBBs that match the
+//!      expected types exactly. The number of arguments must match.
+//!    - All EBBs in a jump_table must take no arguments.
+//!    - Function calls are type checked against their signature.
+//!    - The entry block must take arguments that match the signature of the current
+//!      function.
+//!    - All return instructions must have return value operands matching the current
+//!      function signature.
+//!
+//! TODO:
+//!   Ad hoc checking
+//!
+//!    - Stack slot loads and stores must be in-bounds.
+//!    - Immediate constraints for certain opcodes, like `udiv_imm v3, 0`.
+//!    - Extend / truncate instructions have more type constraints: Source type can't be
+//!      larger / smaller than result type.
+//!    - `Insertlane` and `extractlane` instructions have immediate lane numbers that must be in
+//!      range for their polymorphic type.
+//!    - Swizzle and shuffle instructions take a variable number of lane arguments. The number
+//!      of arguments must match the destination type, and the lane indexes must be in range.
+
+use dominator_tree::DominatorTree;
+use flowgraph::ControlFlowGraph;
+use ir::entities::AnyEntity;
+use ir::instructions::{InstructionFormat, BranchInfo, ResolvedConstraint, CallInfo};
+use ir::{types, Function, ValueDef, Ebb, Inst, SigRef, FuncRef, ValueList, JumpTable, StackSlot,
+         Value, Type};
+use Context;
+use std::fmt::{self, Display, Formatter};
+use std::result;
+use std::collections::BTreeSet;
+
+/// A verifier error.
+#[derive(Debug, PartialEq, Eq)]
+pub struct Error {
+    /// The entity causing the verifier error.
+    pub location: AnyEntity,
+    /// Error message.
+    pub message: String,
+}
+
+impl Display for Error {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{}: {}", self.location, self.message)
+    }
+}
+
+/// Verifier result.
+pub type Result<T> = result::Result<T, Error>;
+
+// Create an `Err` variant of `Result<X>` from a location and `format!` arguments.
+macro_rules! err {
+    ( $loc:expr, $msg:expr ) => {
+        Err(Error {
+            location: $loc.into(),
+            message: String::from($msg),
+        })
+    };
+
+    ( $loc:expr, $fmt:expr, $( $arg:expr ),+ ) => {
+        Err(Error {
+            location: $loc.into(),
+            message: format!( $fmt, $( $arg ),+ ),
+        })
+    };
+}
+
+/// Verify `func`.
+pub fn verify_function(func: &Function) -> Result<()> {
+    Verifier::new(func).run()
+}
+
+/// Verify `ctx`.
+pub fn verify_context(ctx: &Context) -> Result<()> {
+    let verifier = Verifier::new(&ctx.func);
+    verifier.domtree_integrity(&ctx.domtree)?;
+    verifier.cfg_integrity(&ctx.cfg)?;
+    verifier.run()
+}
+
+struct Verifier<'a> {
+    func: &'a Function,
+    cfg: ControlFlowGraph,
+    domtree: DominatorTree,
+}
+
+impl<'a> Verifier<'a> {
+    pub fn new(func: &'a Function) -> Verifier {
+        let cfg = ControlFlowGraph::with_function(func);
+        let domtree = DominatorTree::with_function(func, &cfg);
+        Verifier {
+            func: func,
+            cfg: cfg,
+            domtree: domtree,
+        }
+    }
+
+    fn ebb_integrity(&self, ebb: Ebb, inst: Inst) -> Result<()> {
+
+        let is_terminator = self.func.dfg[inst].opcode().is_terminator();
+        let is_last_inst = self.func.layout.last_inst(ebb) == Some(inst);
+
+        if is_terminator && !is_last_inst {
+            // Terminating instructions only occur at the end of blocks.
+            return err!(inst,
+                        "a terminator instruction was encountered before the end of {}",
+                        ebb);
+        }
+        if is_last_inst && !is_terminator {
+            return err!(ebb, "block does not end in a terminator instruction!");
+        }
+
+        // Instructions belong to the correct ebb.
+        let inst_ebb = self.func.layout.inst_ebb(inst);
+        if inst_ebb != Some(ebb) {
+            return err!(inst, "should belong to {} not {:?}", ebb, inst_ebb);
+        }
+
+        // Arguments belong to the correct ebb.
+        for &arg in self.func.dfg.ebb_args(ebb) {
+            match self.func.dfg.value_def(arg) {
+                ValueDef::Arg(arg_ebb, _) => {
+                    if ebb != arg_ebb {
+                        return err!(arg, "does not belong to {}", ebb);
+                    }
+                }
+                _ => {
+                    return err!(arg, "expected an argument, found a result");
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn instruction_integrity(&self, inst: Inst) -> Result<()> {
+        let inst_data = &self.func.dfg[inst];
+        let dfg = &self.func.dfg;
+
+        // The instruction format matches the opcode
+        if inst_data.opcode().format() != InstructionFormat::from(inst_data) {
+            return err!(inst, "instruction opcode doesn't match instruction format");
+        }
+
+        let fixed_results = inst_data.opcode().constraints().fixed_results();
+        // var_results is 0 if we aren't a call instruction
+        let var_results = dfg.call_signature(inst)
+            .map(|sig| dfg.signatures[sig].return_types.len())
+            .unwrap_or(0);
+        let total_results = fixed_results + var_results;
+
+        // All result values for multi-valued instructions are created
+        let got_results = dfg.inst_results(inst).len();
+        if got_results != total_results {
+            return err!(inst,
+                        "expected {} result values, found {}",
+                        total_results,
+                        got_results);
+        }
+
+        self.verify_entity_references(inst)
+    }
+
+    fn verify_entity_references(&self, inst: Inst) -> Result<()> {
+        use ir::instructions::InstructionData::*;
+
+        for &arg in self.func.dfg.inst_args(inst) {
+            self.verify_value(inst, arg)?;
+
+            // All used values must be attached to something.
+            let original = self.func.dfg.resolve_aliases(arg);
+            if !self.func.dfg.value_is_attached(original) {
+                return err!(inst, "argument {} -> {} is not attached", arg, original);
+            }
+        }
+
+        for &res in self.func.dfg.inst_results(inst) {
+            self.verify_value(inst, res)?;
+        }
+
+        match &self.func.dfg[inst] {
+            &MultiAry { ref args, .. } => {
+                self.verify_value_list(inst, args)?;
+            }
+            &Jump {
+                 destination,
+                 ref args,
+                 ..
+             } |
+            &Branch {
+                 destination,
+                 ref args,
+                 ..
+             } |
+            &BranchIcmp {
+                 destination,
+                 ref args,
+                 ..
+             } => {
+                self.verify_ebb(inst, destination)?;
+                self.verify_value_list(inst, args)?;
+            }
+            &BranchTable { table, .. } => {
+                self.verify_jump_table(inst, table)?;
+            }
+            &Call { func_ref, ref args, .. } => {
+                self.verify_func_ref(inst, func_ref)?;
+                self.verify_value_list(inst, args)?;
+            }
+            &IndirectCall { sig_ref, ref args, .. } => {
+                self.verify_sig_ref(inst, sig_ref)?;
+                self.verify_value_list(inst, args)?;
+            }
+            &StackLoad { stack_slot, .. } |
+            &StackStore { stack_slot, .. } => {
+                self.verify_stack_slot(inst, stack_slot)?;
+            }
+
+            // Exhaustive list so we can't forget to add new formats
+            &Nullary { .. } |
+            &Unary { .. } |
+            &UnaryImm { .. } |
+            &UnaryIeee32 { .. } |
+            &UnaryIeee64 { .. } |
+            &Binary { .. } |
+            &BinaryImm { .. } |
+            &Ternary { .. } |
+            &InsertLane { .. } |
+            &ExtractLane { .. } |
+            &IntCompare { .. } |
+            &IntCompareImm { .. } |
+            &FloatCompare { .. } |
+            &HeapLoad { .. } |
+            &HeapStore { .. } |
+            &Load { .. } |
+            &Store { .. } => {}
+        }
+
+        Ok(())
+    }
+
+    fn verify_ebb(&self, inst: Inst, e: Ebb) -> Result<()> {
+        if !self.func.dfg.ebb_is_valid(e) {
+            err!(inst, "invalid ebb reference {}", e)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn verify_sig_ref(&self, inst: Inst, s: SigRef) -> Result<()> {
+        if !self.func.dfg.signatures.is_valid(s) {
+            err!(inst, "invalid signature reference {}", s)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn verify_func_ref(&self, inst: Inst, f: FuncRef) -> Result<()> {
+        if !self.func.dfg.ext_funcs.is_valid(f) {
+            err!(inst, "invalid function reference {}", f)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn verify_stack_slot(&self, inst: Inst, ss: StackSlot) -> Result<()> {
+        if !self.func.stack_slots.is_valid(ss) {
+            err!(inst, "invalid stack slot {}", ss)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn verify_value_list(&self, inst: Inst, l: &ValueList) -> Result<()> {
+        if !l.is_valid(&self.func.dfg.value_lists) {
+            err!(inst, "invalid value list reference {:?}", l)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn verify_jump_table(&self, inst: Inst, j: JumpTable) -> Result<()> {
+        if !self.func.jump_tables.is_valid(j) {
+            err!(inst, "invalid jump table reference {}", j)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn verify_value(&self, loc_inst: Inst, v: Value) -> Result<()> {
+        let dfg = &self.func.dfg;
+        if !dfg.value_is_valid(v) {
+            return err!(loc_inst, "invalid value reference {}", v);
+        }
+
+        // SSA form
+        match dfg.value_def(v) {
+            ValueDef::Res(def_inst, _) => {
+                // Value is defined by an instruction that exists.
+                if !dfg.inst_is_valid(def_inst) {
+                    return err!(loc_inst,
+                                "{} is defined by invalid instruction {}",
+                                v,
+                                def_inst);
+                }
+                // Defining instruction is inserted in an EBB.
+                if self.func.layout.inst_ebb(def_inst) == None {
+                    return err!(loc_inst,
+                                "{} is defined by {} which has no EBB",
+                                v,
+                                def_inst);
+                }
+                // Defining instruction dominates the instruction that uses the value.
+                if !self.domtree
+                        .dominates(def_inst, loc_inst, &self.func.layout) {
+                    return err!(loc_inst, "uses value from non-dominating {}", def_inst);
+                }
+            }
+            ValueDef::Arg(ebb, _) => {
+                // Value is defined by an existing EBB.
+                if !dfg.ebb_is_valid(ebb) {
+                    return err!(loc_inst, "{} is defined by invalid EBB {}", v, ebb);
+                }
+                // Defining EBB is inserted in the layout
+                if !self.func.layout.is_ebb_inserted(ebb) {
+                    return err!(loc_inst,
+                                "{} is defined by {} which is not in the layout",
+                                v,
+                                ebb);
+                }
+                // The defining EBB dominates the instruction using this value.
+                if !self.domtree
+                        .ebb_dominates(ebb, loc_inst, &self.func.layout) {
+                    return err!(loc_inst, "uses value arg from non-dominating {}", ebb);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn domtree_integrity(&self, domtree: &DominatorTree) -> Result<()> {
+        // We consider two `DominatorTree`s to be equal if they return the same immediate
+        // dominator for each EBB. Therefore the current domtree is valid if it matches the freshly
+        // computed one.
+        for ebb in self.func.layout.ebbs() {
+            let expected = domtree.idom(ebb);
+            let got = self.domtree.idom(ebb);
+            if got != expected {
+                return err!(ebb,
+                            "invalid domtree, expected idom({}) = {:?}, got {:?}",
+                            ebb,
+                            expected,
+                            got);
+            }
+        }
+        Ok(())
+    }
+
+    fn typecheck_entry_block_arguments(&self) -> Result<()> {
+        if let Some(ebb) = self.func.layout.entry_block() {
+            let expected_types = &self.func.signature.argument_types;
+            let ebb_arg_count = self.func.dfg.num_ebb_args(ebb);
+
+            if ebb_arg_count != expected_types.len() {
+                return err!(ebb, "entry block arguments must match function signature");
+            }
+
+            for (i, &arg) in self.func.dfg.ebb_args(ebb).iter().enumerate() {
+                let arg_type = self.func.dfg.value_type(arg);
+                if arg_type != expected_types[i].value_type {
+                    return err!(ebb,
+                                "entry block argument {} expected to have type {}, got {}",
+                                i,
+                                expected_types[i],
+                                arg_type);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn typecheck(&self, inst: Inst) -> Result<()> {
+        let inst_data = &self.func.dfg[inst];
+        let constraints = inst_data.opcode().constraints();
+
+        let ctrl_type = if let Some(value_typeset) = constraints.ctrl_typeset() {
+            // For polymorphic opcodes, determine the controlling type variable first.
+            let ctrl_type = self.func.dfg.ctrl_typevar(inst);
+
+            if !value_typeset.contains(ctrl_type) {
+                return err!(inst, "has an invalid controlling type {}", ctrl_type);
+            }
+
+            ctrl_type
+        } else {
+            // Non-polymorphic instructions don't check the controlling type variable, so `Option`
+            // is unnecessary and we can just make it `VOID`.
+            types::VOID
+        };
+
+        self.typecheck_results(inst, ctrl_type)?;
+        self.typecheck_fixed_args(inst, ctrl_type)?;
+        self.typecheck_variable_args(inst)?;
+        self.typecheck_return(inst)?;
+
+        Ok(())
+    }
+
+    fn typecheck_results(&self, inst: Inst, ctrl_type: Type) -> Result<()> {
+        let mut i = 0;
+        for &result in self.func.dfg.inst_results(inst) {
+            let result_type = self.func.dfg.value_type(result);
+            let expected_type = self.func.dfg.compute_result_type(inst, i, ctrl_type);
+            if let Some(expected_type) = expected_type {
+                if result_type != expected_type {
+                    return err!(inst,
+                                "expected result {} ({}) to have type {}, found {}",
+                                i,
+                                result,
+                                expected_type,
+                                result_type);
+                }
+            } else {
+                return err!(inst, "has more result values than expected");
+            }
+            i += 1;
+        }
+
+        // There aren't any more result types left.
+        if self.func.dfg.compute_result_type(inst, i, ctrl_type) != None {
+            return err!(inst, "has fewer result values than expected");
+        }
+        Ok(())
+    }
+
+    fn typecheck_fixed_args(&self, inst: Inst, ctrl_type: Type) -> Result<()> {
+        let constraints = self.func.dfg[inst].opcode().constraints();
+
+        for (i, &arg) in self.func.dfg.inst_fixed_args(inst).iter().enumerate() {
+            let arg_type = self.func.dfg.value_type(arg);
+            match constraints.value_argument_constraint(i, ctrl_type) {
+                ResolvedConstraint::Bound(expected_type) => {
+                    if arg_type != expected_type {
+                        return err!(inst,
+                                    "arg {} ({}) has type {}, expected {}",
+                                    i,
+                                    arg,
+                                    arg_type,
+                                    expected_type);
+                    }
+                }
+                ResolvedConstraint::Free(type_set) => {
+                    if !type_set.contains(arg_type) {
+                        return err!(inst,
+                                    "arg {} ({}) with type {} failed to satisfy type set {:?}",
+                                    i,
+                                    arg,
+                                    arg_type,
+                                    type_set);
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn typecheck_variable_args(&self, inst: Inst) -> Result<()> {
+        match self.func.dfg[inst].analyze_branch(&self.func.dfg.value_lists) {
+            BranchInfo::SingleDest(ebb, _) => {
+                let iter = self.func
+                    .dfg
+                    .ebb_args(ebb)
+                    .iter()
+                    .map(|&v| self.func.dfg.value_type(v));
+                self.typecheck_variable_args_iterator(inst, iter)?;
+            }
+            BranchInfo::Table(table) => {
+                for (_, ebb) in self.func.jump_tables[table].entries() {
+                    let arg_count = self.func.dfg.num_ebb_args(ebb);
+                    if arg_count != 0 {
+                        return err!(inst,
+                                    "takes no arguments, but had target {} with {} arguments",
+                                    ebb,
+                                    arg_count);
+                    }
+                }
+            }
+            BranchInfo::NotABranch => {}
+        }
+
+        match self.func.dfg[inst].analyze_call(&self.func.dfg.value_lists) {
+            CallInfo::Direct(func_ref, _) => {
+                let sig_ref = self.func.dfg.ext_funcs[func_ref].signature;
+                let arg_types = self.func.dfg.signatures[sig_ref]
+                    .argument_types
+                    .iter()
+                    .map(|a| a.value_type);
+                self.typecheck_variable_args_iterator(inst, arg_types)?;
+            }
+            CallInfo::Indirect(sig_ref, _) => {
+                let arg_types = self.func.dfg.signatures[sig_ref]
+                    .argument_types
+                    .iter()
+                    .map(|a| a.value_type);
+                self.typecheck_variable_args_iterator(inst, arg_types)?;
+            }
+            CallInfo::NotACall => {}
+        }
+        Ok(())
+    }
+
+    fn typecheck_variable_args_iterator<I: Iterator<Item = Type>>(&self,
+                                                                  inst: Inst,
+                                                                  iter: I)
+                                                                  -> Result<()> {
+        let variable_args = self.func.dfg.inst_variable_args(inst);
+        let mut i = 0;
+
+        for expected_type in iter {
+            if i >= variable_args.len() {
+                // Result count mismatch handled below, we want the full argument count first though
+                i += 1;
+                continue;
+            }
+            let arg = variable_args[i];
+            let arg_type = self.func.dfg.value_type(arg);
+            if expected_type != arg_type {
+                return err!(inst,
+                            "arg {} ({}) has type {}, expected {}",
+                            i,
+                            variable_args[i],
+                            arg_type,
+                            expected_type);
+            }
+            i += 1;
+        }
+        if i != variable_args.len() {
+            return err!(inst,
+                        "mismatched argument count, got {}, expected {}",
+                        variable_args.len(),
+                        i);
+        }
+        Ok(())
+    }
+
+    fn typecheck_return(&self, inst: Inst) -> Result<()> {
+        if self.func.dfg[inst].opcode().is_return() {
+            let args = self.func.dfg.inst_variable_args(inst);
+            let expected_types = &self.func.signature.return_types;
+            if args.len() != expected_types.len() {
+                return err!(inst, "arguments of return must match function signature");
+            }
+            for (i, (&arg, &expected_type)) in args.iter().zip(expected_types).enumerate() {
+                let arg_type = self.func.dfg.value_type(arg);
+                if arg_type != expected_type.value_type {
+                    return err!(inst,
+                                "arg {} ({}) has type {}, must match function signature of {}",
+                                i,
+                                arg,
+                                arg_type,
+                                expected_type);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn cfg_integrity(&self, cfg: &ControlFlowGraph) -> Result<()> {
+        let mut expected_succs = BTreeSet::<Ebb>::new();
+        let mut got_succs = BTreeSet::<Ebb>::new();
+        let mut expected_preds = BTreeSet::<Inst>::new();
+        let mut got_preds = BTreeSet::<Inst>::new();
+
+        for ebb in self.func.layout.ebbs() {
+            expected_succs.extend(self.cfg.get_successors(ebb));
+            got_succs.extend(cfg.get_successors(ebb));
+
+            let missing_succs: Vec<Ebb> = expected_succs.difference(&got_succs).cloned().collect();
+            if missing_succs.len() != 0 {
+                return err!(ebb,
+                            "cfg lacked the following successor(s) {:?}",
+                            missing_succs);
+            }
+
+            let excess_succs: Vec<Ebb> = got_succs.difference(&expected_succs).cloned().collect();
+            if excess_succs.len() != 0 {
+                return err!(ebb, "cfg had unexpected successor(s) {:?}", excess_succs);
+            }
+
+            expected_preds.extend(self.cfg
+                                      .get_predecessors(ebb)
+                                      .iter()
+                                      .map(|&(_, inst)| inst));
+            got_preds.extend(cfg.get_predecessors(ebb).iter().map(|&(_, inst)| inst));
+
+            let missing_preds: Vec<Inst> = expected_preds.difference(&got_preds).cloned().collect();
+            if missing_preds.len() != 0 {
+                return err!(ebb,
+                            "cfg lacked the following predecessor(s) {:?}",
+                            missing_preds);
+            }
+
+            let excess_preds: Vec<Inst> = got_preds.difference(&expected_preds).cloned().collect();
+            if excess_preds.len() != 0 {
+                return err!(ebb, "cfg had unexpected predecessor(s) {:?}", excess_preds);
+            }
+
+            expected_succs.clear();
+            got_succs.clear();
+            expected_preds.clear();
+            got_preds.clear();
+        }
+        Ok(())
+    }
+
+    pub fn run(&self) -> Result<()> {
+        self.typecheck_entry_block_arguments()?;
+        for ebb in self.func.layout.ebbs() {
+            for inst in self.func.layout.ebb_insts(ebb) {
+                self.ebb_integrity(ebb, inst)?;
+                self.instruction_integrity(inst)?;
+                self.typecheck(inst)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{Verifier, Error};
+    use ir::Function;
+    use ir::instructions::{InstructionData, Opcode};
+
+    macro_rules! assert_err_with_msg {
+        ($e:expr, $msg:expr) => (
+            match $e {
+                Ok(_) => { panic!("Expected an error!") },
+                Err(Error { message, .. } ) => {
+                    if !message.contains($msg) {
+                       panic!(format!("'{}' did not contain the substring '{}'", message, $msg));
+                    }
+                }
+            }
+        )
+    }
+
+    #[test]
+    fn empty() {
+        let func = Function::new();
+        let verifier = Verifier::new(&func);
+        assert_eq!(verifier.run(), Ok(()));
+    }
+
+    #[test]
+    fn bad_instruction_format() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        func.layout.append_ebb(ebb0);
+        let nullary_with_bad_opcode =
+            func.dfg
+                .make_inst(InstructionData::Nullary { opcode: Opcode::Jump });
+        func.layout.append_inst(nullary_with_bad_opcode, ebb0);
+        let verifier = Verifier::new(&func);
+        assert_err_with_msg!(verifier.run(), "instruction format");
+    }
+}