diff --git a/Cargo.lock b/Cargo.lock index ba59d05861..597d049030 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -628,6 +628,7 @@ version = "0.89.0" dependencies = [ "log", "miette", + "rayon", "tempfile", ] diff --git a/cranelift/isle/isle/Cargo.toml b/cranelift/isle/isle/Cargo.toml index 09cff1d75e..6052fb567b 100644 --- a/cranelift/isle/isle/Cargo.toml +++ b/cranelift/isle/isle/Cargo.toml @@ -11,6 +11,7 @@ version = "0.89.0" [dependencies] log = { version = "0.4", optional = true } miette = { version = "5.1.0", optional = true } +rayon = "^1.5" [dev-dependencies] tempfile = "3" @@ -18,5 +19,6 @@ tempfile = "3" [features] default = [] +overlap-errors = [] logging = ["log"] miette-errors = ["miette"] diff --git a/cranelift/isle/isle/src/compile.rs b/cranelift/isle/isle/src/compile.rs index 877b8a5cec..de0a334872 100644 --- a/cranelift/isle/isle/src/compile.rs +++ b/cranelift/isle/isle/src/compile.rs @@ -7,6 +7,7 @@ use crate::{ast, codegen, sema, trie}; pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result { let mut typeenv = sema::TypeEnv::from_ast(defs)?; let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?; + crate::overlap::check(&mut typeenv, &termenv)?; let tries = trie::build_tries(&typeenv, &termenv); Ok(codegen::codegen(&typeenv, &termenv, &tries, options)) } diff --git a/cranelift/isle/isle/src/error.rs b/cranelift/isle/isle/src/error.rs index 39b6adcf87..73c66cb870 100644 --- a/cranelift/isle/isle/src/error.rs +++ b/cranelift/isle/isle/src/error.rs @@ -42,6 +42,17 @@ pub enum Error { span: Span, }, + /// The rules mentioned overlap in the input they accept. + OverlapError { + /// The error message. + msg: String, + + /// The locations of all the rules that overlap. When there are more than two rules + /// present, the first rule is the one with the most overlaps (likely a fall-through + /// wildcard case). + rules: Vec<(Source, Span)>, + }, + /// Multiple errors. Errors(Vec), } @@ -108,6 +119,10 @@ impl std::fmt::Display for Error { #[cfg(feature = "miette-errors")] Error::TypeError { msg, .. } => write!(f, "type error: {}", msg), + Error::OverlapError { msg, rules, .. } => { + writeln!(f, "overlap error: {}\n{}", msg, OverlappingRules(&rules)) + } + Error::Errors(_) => write!( f, "found {} errors:\n\n{}", @@ -128,6 +143,16 @@ impl std::fmt::Display for DisplayErrors<'_> { } } +struct OverlappingRules<'a>(&'a [(Source, Span)]); +impl std::fmt::Display for OverlappingRules<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (src, span) in self.0 { + writeln!(f, " {}", span.from.pretty_print_with_filename(&*src.name))?; + } + Ok(()) + } +} + /// A source file and its contents. #[derive(Clone)] pub struct Source { diff --git a/cranelift/isle/isle/src/lib.rs b/cranelift/isle/isle/src/lib.rs index 1164dafc3d..140ba0aff1 100644 --- a/cranelift/isle/isle/src/lib.rs +++ b/cranelift/isle/isle/src/lib.rs @@ -98,6 +98,7 @@ pub mod error; pub mod ir; pub mod lexer; mod log; +pub mod overlap; pub mod parser; pub mod sema; pub mod trie; diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs new file mode 100644 index 0000000000..fb826f68fc --- /dev/null +++ b/cranelift/isle/isle/src/overlap.rs @@ -0,0 +1,350 @@ +//! Overlap detection for rules in ISLE. + +use rayon::prelude::*; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; + +use crate::error::{Error, Result, Source, Span}; +use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, VarId}; + +/// Check for overlap. +pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { + let mut errors = check_overlaps(termenv).report(tyenv, termenv); + if cfg!(feature = "overlap-errors") { + match errors.len() { + 0 => Ok(()), + 1 => Err(errors.pop().unwrap()), + _ => Err(Error::Errors(errors)), + } + } else { + use crate::log; + log!("found {} overlap errors", errors.len()); + Ok(()) + } +} + +/// A graph of rules that overlap in the ISLE source. The edges are undirected. +#[derive(Default)] +struct Errors { + /// Edges between rules indicating overlap. + nodes: HashMap>, +} + +impl Errors { + /// Merge together two Error graphs. + fn union(mut self, other: Self) -> Self { + for (id, edges) in other.nodes { + match self.nodes.entry(id) { + Entry::Occupied(entry) => entry.into_mut().extend(edges), + Entry::Vacant(entry) => _ = entry.insert(edges), + } + } + self + } + + /// Condense the overlap information down into individual errors. We iteratively remove the + /// nodes from the graph with the highest degree, reporting errors for them and their direct + /// connections. The goal with reporting errors this way is to prefer reporting rules that + /// overlap with many others first, and then report other more targeted overlaps later. + fn report(mut self, tyenv: &TypeEnv, termenv: &TermEnv) -> Vec { + let mut errors = Vec::new(); + + let get_info = |id: RuleId| { + let rule = &termenv.rules[id.0]; + let file = rule.pos.file; + let src = Source::new( + tyenv.filenames[file].clone(), + tyenv.file_texts[file].clone(), + ); + let span = Span::new_single(rule.pos); + (src, span) + }; + + while let Some((&id, _)) = self.nodes.iter().max_by_key(|(_, edges)| edges.len()) { + let node = self.nodes.remove(&id).unwrap(); + for other in node.iter() { + if let Entry::Occupied(mut entry) = self.nodes.entry(*other) { + let back_edges = entry.get_mut(); + back_edges.remove(&id); + if back_edges.is_empty() { + entry.remove(); + } + } + } + + // build the real error + let mut rules = vec![get_info(id)]; + + rules.extend(node.into_iter().map(get_info)); + + errors.push(Error::OverlapError { + msg: String::from("rules are overlapping"), + rules, + }); + } + + errors + } + + /// Add a bidirectional edge between two rules in the graph. + fn add_edge(&mut self, a: RuleId, b: RuleId) { + // edges are undirected + self.nodes.entry(a).or_default().insert(b); + self.nodes.entry(b).or_default().insert(a); + } +} + +/// Determine if any rules overlap in the input that they accept. This checkes every unique pair of +/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the +/// presence of wildcard patterns. +fn check_overlaps(env: &TermEnv) -> Errors { + struct RulePatterns<'a> { + rule: &'a Rule, + pats: Box<[Pattern]>, + } + let mut by_term = HashMap::new(); + for rule in env.rules.iter() { + if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs { + let mut binds = Vec::new(); + let rule = RulePatterns { + rule, + pats: vars + .iter() + .map(|pat| Pattern::from_sema(env, &mut binds, pat)) + .collect(), + }; + by_term.entry(tid).or_insert_with(Vec::new).push(rule); + } + } + + // Sequentially identify all rule pairs which are in the same term. We could make this a + // parallel iterator, but that's harder to read and this loop is fast. Also, Rayon can + // efficiently partition a vector across multiple CPUs, which it might have more trouble with + // if this were an iterator. + let mut pairs = Vec::new(); + for rows in by_term.values() { + let mut cursor = &rows[..]; + while let Some((row, rest)) = cursor.split_first() { + cursor = rest; + pairs.extend(rest.iter().map(|other| (row, other))); + } + } + + // Process rule pairs in parallel. Rayon makes this easy and we have independent bite-sized + // chunks of work, so we might as well take advantage of multiple CPUs if they're available. + pairs + .into_par_iter() + .fold(Errors::default, |mut errs, (left, right)| { + if left.rule.prio == right.rule.prio { + if check_overlap_pair(&left.pats, &right.pats) { + errs.add_edge(left.rule.id, right.rule.id); + } + } + errs + }) + .reduce(Errors::default, Errors::union) +} + +/// Check if two rules overlap in the inputs they accept. +fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool { + debug_assert_eq!(a.len(), b.len()); + let mut worklist: Vec<_> = a.iter().zip(b.iter()).collect(); + + while let Some((a, b)) = worklist.pop() { + // Checking the cross-product of two and-patterns is O(n*m). Merging sorted lists or + // hash-maps might be faster in practice, but: + // - The alternatives are not asymptotically faster, because in theory all the subpatterns + // might have the same extractor or enum variant, and in that case any approach has to + // check all of the cross-product combinations anyway. + // - It's easier to reason about this doubly-nested loop than about merging sorted lists or + // picking the right hash keys. + // - These lists are always so small that performance doesn't matter. + for a in a.as_and_subpatterns() { + for b in b.as_and_subpatterns() { + let overlap = match (a, b) { + (Pattern::Int { value: a }, Pattern::Int { value: b }) => a == b, + (Pattern::Const { name: a }, Pattern::Const { name: b }) => a == b, + + // if it's the same variant or same extractor, check all pairs of subterms + ( + Pattern::Variant { + id: a, + pats: a_pats, + }, + Pattern::Variant { + id: b, + pats: b_pats, + }, + ) + | ( + Pattern::Extractor { + id: a, + pats: a_pats, + }, + Pattern::Extractor { + id: b, + pats: b_pats, + }, + ) if a == b => { + debug_assert_eq!(a_pats.len(), b_pats.len()); + worklist.extend(a_pats.iter().zip(b_pats.iter())); + true + } + + // different variants of the same enum definitely do not overlap + (Pattern::Variant { .. }, Pattern::Variant { .. }) => false, + + // an extractor which does not exactly match the other pattern might overlap + (Pattern::Extractor { .. }, _) | (_, Pattern::Extractor { .. }) => true, + + // a wildcard definitely overlaps + (Pattern::Wildcard, _) | (_, Pattern::Wildcard) => true, + + // these patterns can only be paired with patterns of the same type, or + // wildcards or extractors, and all those cases are covered above + (Pattern::Int { .. } | Pattern::Const { .. } | Pattern::Variant { .. }, _) => { + unreachable!() + } + + // and-patterns don't reach here due to as_and_subpatterns + (Pattern::And { .. }, _) => unreachable!(), + }; + + if !overlap { + return false; + } + } + } + } + true +} + +/// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier. +#[derive(Debug, Clone)] +enum Pattern { + /// Integer literal patterns. + Int { + value: i128, + }, + + /// Constant literal patterns, such as `$F32`. + Const { + name: Sym, + }, + + /// Enum variant constructors. + Variant { + id: TermId, + pats: Box<[Pattern]>, + }, + + /// Conjunctions of patterns. + And { + pats: Box<[Pattern]>, + }, + + /// Extractor uses (both fallible and infallible). + Extractor { + id: TermId, + pats: Box<[Pattern]>, + }, + + Wildcard, +} + +impl Pattern { + /// Create a [`Pattern`] from a [`sema::Pattern`]. The major differences between these two + /// representations are as follows: + /// 1. Variable bindings are removed and turned into wildcards + /// 2. Equality constraints are removed and turned into inlined versions of the patterns they + /// would have introduced equalities with + /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or + /// [`Pattern::Extractor`] cases depending on their term kind. + fn from_sema(env: &TermEnv, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { + match pat { + sema::Pattern::BindPattern(_, id, pat) => { + let pat = Self::from_sema(env, binds, pat); + binds.push((*id, pat.clone())); + pat + } + + sema::Pattern::Var(_, id) => { + for (vid, pat) in binds.iter().rev() { + if vid == id { + // We inline equality constraints for two reasons: we specialize on the + // spine of related patterns only, so more specific information about + // individual values isn't necessarily helpful; we consider overlap + // checking to be an over-approximation of overlapping rules, so handling + // equalities ends up being best-effort. As an approximation, we use + // whatever pattern happened to be at the binding of the variable for all + // of the cases where it's used for equality. For example, in the following + // rule: + // + // > (rule (example x @ (Enum.Variant y) x) ...) + // + // we will only specialize up to `(Enum.Variant _)`, so any more specific + // runtime values of `y` won't end up helping to identify overlap. As a + // result, we rewrite the patterns in the rule to look more like the + // following, as it greatly simplifies overlap checking. + // + // > (rule (example (Enum.Variant _) (Enum.Variant _)) ...) + // + // Cases that this scheme won't handle look like the following: + // + // > (rule (example2 2 3) ...) + // > (rule (example2 x x) ...) + // + // As in this case we'll not make use of the information that `2` and `3` + // aren't equal to know that the rules don't overlap. One approach that we + // could take here is delaying substitution to the point where a variable + // binding has been specialized, turning the rules into the following once + // specialization had occurred for `2`: + // + // > (rule (example2 2 3) ...) + // > (rule (example2 2 2) ...) + return pat.clone(); + } + } + + binds.push((*id, Pattern::Wildcard)); + Pattern::Wildcard + } + + sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value }, + sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name }, + + &sema::Pattern::Term(_, id, ref pats) => { + let pats = pats + .iter() + .map(|pat| Pattern::from_sema(env, binds, pat)) + .collect(); + + match &env.terms[id.0].kind { + TermKind::EnumVariant { .. } => Pattern::Variant { id, pats }, + TermKind::Decl { .. } => Pattern::Extractor { id, pats }, + } + } + + sema::Pattern::Wildcard(_) => Pattern::Wildcard, + + sema::Pattern::And(_, pats) => { + let pats = pats + .iter() + .map(|pat| Pattern::from_sema(env, binds, pat)) + .collect(); + Pattern::And { pats } + } + } + } + + /// If this is an and-pattern, return its subpatterns. Otherwise pretend like there's an + /// and-pattern which has this as its only subpattern, and return self as a single-element + /// slice. + fn as_and_subpatterns(&self) -> &[Pattern] { + if let Pattern::And { pats } = self { + pats + } else { + std::slice::from_ref(self) + } + } +}