Add an overlap checker to ISLE (#4906)
https://github.com/bytecodealliance/wasmtime/pull/4906 Co-authored-by: Jamey Sharp <jsharp@fastly.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -628,6 +628,7 @@ version = "0.89.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"miette",
|
"miette",
|
||||||
|
"rayon",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ version = "0.89.0"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
log = { version = "0.4", optional = true }
|
log = { version = "0.4", optional = true }
|
||||||
miette = { version = "5.1.0", optional = true }
|
miette = { version = "5.1.0", optional = true }
|
||||||
|
rayon = "^1.5"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
@@ -18,5 +19,6 @@ tempfile = "3"
|
|||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
|
|
||||||
|
overlap-errors = []
|
||||||
logging = ["log"]
|
logging = ["log"]
|
||||||
miette-errors = ["miette"]
|
miette-errors = ["miette"]
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use crate::{ast, codegen, sema, trie};
|
|||||||
pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result<String> {
|
pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result<String> {
|
||||||
let mut typeenv = sema::TypeEnv::from_ast(defs)?;
|
let mut typeenv = sema::TypeEnv::from_ast(defs)?;
|
||||||
let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
|
let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
|
||||||
|
crate::overlap::check(&mut typeenv, &termenv)?;
|
||||||
let tries = trie::build_tries(&typeenv, &termenv);
|
let tries = trie::build_tries(&typeenv, &termenv);
|
||||||
Ok(codegen::codegen(&typeenv, &termenv, &tries, options))
|
Ok(codegen::codegen(&typeenv, &termenv, &tries, options))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,6 +42,17 @@ pub enum Error {
|
|||||||
span: Span,
|
span: Span,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// The rules mentioned overlap in the input they accept.
|
||||||
|
OverlapError {
|
||||||
|
/// The error message.
|
||||||
|
msg: String,
|
||||||
|
|
||||||
|
/// The locations of all the rules that overlap. When there are more than two rules
|
||||||
|
/// present, the first rule is the one with the most overlaps (likely a fall-through
|
||||||
|
/// wildcard case).
|
||||||
|
rules: Vec<(Source, Span)>,
|
||||||
|
},
|
||||||
|
|
||||||
/// Multiple errors.
|
/// Multiple errors.
|
||||||
Errors(Vec<Error>),
|
Errors(Vec<Error>),
|
||||||
}
|
}
|
||||||
@@ -108,6 +119,10 @@ impl std::fmt::Display for Error {
|
|||||||
#[cfg(feature = "miette-errors")]
|
#[cfg(feature = "miette-errors")]
|
||||||
Error::TypeError { msg, .. } => write!(f, "type error: {}", msg),
|
Error::TypeError { msg, .. } => write!(f, "type error: {}", msg),
|
||||||
|
|
||||||
|
Error::OverlapError { msg, rules, .. } => {
|
||||||
|
writeln!(f, "overlap error: {}\n{}", msg, OverlappingRules(&rules))
|
||||||
|
}
|
||||||
|
|
||||||
Error::Errors(_) => write!(
|
Error::Errors(_) => write!(
|
||||||
f,
|
f,
|
||||||
"found {} errors:\n\n{}",
|
"found {} errors:\n\n{}",
|
||||||
@@ -128,6 +143,16 @@ impl std::fmt::Display for DisplayErrors<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct OverlappingRules<'a>(&'a [(Source, Span)]);
|
||||||
|
impl std::fmt::Display for OverlappingRules<'_> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
for (src, span) in self.0 {
|
||||||
|
writeln!(f, " {}", span.from.pretty_print_with_filename(&*src.name))?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A source file and its contents.
|
/// A source file and its contents.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Source {
|
pub struct Source {
|
||||||
|
|||||||
@@ -98,6 +98,7 @@ pub mod error;
|
|||||||
pub mod ir;
|
pub mod ir;
|
||||||
pub mod lexer;
|
pub mod lexer;
|
||||||
mod log;
|
mod log;
|
||||||
|
pub mod overlap;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod sema;
|
pub mod sema;
|
||||||
pub mod trie;
|
pub mod trie;
|
||||||
|
|||||||
350
cranelift/isle/isle/src/overlap.rs
Normal file
350
cranelift/isle/isle/src/overlap.rs
Normal file
@@ -0,0 +1,350 @@
|
|||||||
|
//! Overlap detection for rules in ISLE.
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use std::collections::hash_map::Entry;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
use crate::error::{Error, Result, Source, Span};
|
||||||
|
use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, VarId};
|
||||||
|
|
||||||
|
/// Check for overlap.
|
||||||
|
pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
|
||||||
|
let mut errors = check_overlaps(termenv).report(tyenv, termenv);
|
||||||
|
if cfg!(feature = "overlap-errors") {
|
||||||
|
match errors.len() {
|
||||||
|
0 => Ok(()),
|
||||||
|
1 => Err(errors.pop().unwrap()),
|
||||||
|
_ => Err(Error::Errors(errors)),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
use crate::log;
|
||||||
|
log!("found {} overlap errors", errors.len());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A graph of rules that overlap in the ISLE source. The edges are undirected.
|
||||||
|
#[derive(Default)]
|
||||||
|
struct Errors {
|
||||||
|
/// Edges between rules indicating overlap.
|
||||||
|
nodes: HashMap<RuleId, HashSet<RuleId>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Errors {
|
||||||
|
/// Merge together two Error graphs.
|
||||||
|
fn union(mut self, other: Self) -> Self {
|
||||||
|
for (id, edges) in other.nodes {
|
||||||
|
match self.nodes.entry(id) {
|
||||||
|
Entry::Occupied(entry) => entry.into_mut().extend(edges),
|
||||||
|
Entry::Vacant(entry) => _ = entry.insert(edges),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Condense the overlap information down into individual errors. We iteratively remove the
|
||||||
|
/// nodes from the graph with the highest degree, reporting errors for them and their direct
|
||||||
|
/// connections. The goal with reporting errors this way is to prefer reporting rules that
|
||||||
|
/// overlap with many others first, and then report other more targeted overlaps later.
|
||||||
|
fn report(mut self, tyenv: &TypeEnv, termenv: &TermEnv) -> Vec<Error> {
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
|
let get_info = |id: RuleId| {
|
||||||
|
let rule = &termenv.rules[id.0];
|
||||||
|
let file = rule.pos.file;
|
||||||
|
let src = Source::new(
|
||||||
|
tyenv.filenames[file].clone(),
|
||||||
|
tyenv.file_texts[file].clone(),
|
||||||
|
);
|
||||||
|
let span = Span::new_single(rule.pos);
|
||||||
|
(src, span)
|
||||||
|
};
|
||||||
|
|
||||||
|
while let Some((&id, _)) = self.nodes.iter().max_by_key(|(_, edges)| edges.len()) {
|
||||||
|
let node = self.nodes.remove(&id).unwrap();
|
||||||
|
for other in node.iter() {
|
||||||
|
if let Entry::Occupied(mut entry) = self.nodes.entry(*other) {
|
||||||
|
let back_edges = entry.get_mut();
|
||||||
|
back_edges.remove(&id);
|
||||||
|
if back_edges.is_empty() {
|
||||||
|
entry.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// build the real error
|
||||||
|
let mut rules = vec![get_info(id)];
|
||||||
|
|
||||||
|
rules.extend(node.into_iter().map(get_info));
|
||||||
|
|
||||||
|
errors.push(Error::OverlapError {
|
||||||
|
msg: String::from("rules are overlapping"),
|
||||||
|
rules,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
errors
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a bidirectional edge between two rules in the graph.
|
||||||
|
fn add_edge(&mut self, a: RuleId, b: RuleId) {
|
||||||
|
// edges are undirected
|
||||||
|
self.nodes.entry(a).or_default().insert(b);
|
||||||
|
self.nodes.entry(b).or_default().insert(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine if any rules overlap in the input that they accept. This checkes every unique pair of
|
||||||
|
/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the
|
||||||
|
/// presence of wildcard patterns.
|
||||||
|
fn check_overlaps(env: &TermEnv) -> Errors {
|
||||||
|
struct RulePatterns<'a> {
|
||||||
|
rule: &'a Rule,
|
||||||
|
pats: Box<[Pattern]>,
|
||||||
|
}
|
||||||
|
let mut by_term = HashMap::new();
|
||||||
|
for rule in env.rules.iter() {
|
||||||
|
if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs {
|
||||||
|
let mut binds = Vec::new();
|
||||||
|
let rule = RulePatterns {
|
||||||
|
rule,
|
||||||
|
pats: vars
|
||||||
|
.iter()
|
||||||
|
.map(|pat| Pattern::from_sema(env, &mut binds, pat))
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
by_term.entry(tid).or_insert_with(Vec::new).push(rule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sequentially identify all rule pairs which are in the same term. We could make this a
|
||||||
|
// parallel iterator, but that's harder to read and this loop is fast. Also, Rayon can
|
||||||
|
// efficiently partition a vector across multiple CPUs, which it might have more trouble with
|
||||||
|
// if this were an iterator.
|
||||||
|
let mut pairs = Vec::new();
|
||||||
|
for rows in by_term.values() {
|
||||||
|
let mut cursor = &rows[..];
|
||||||
|
while let Some((row, rest)) = cursor.split_first() {
|
||||||
|
cursor = rest;
|
||||||
|
pairs.extend(rest.iter().map(|other| (row, other)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process rule pairs in parallel. Rayon makes this easy and we have independent bite-sized
|
||||||
|
// chunks of work, so we might as well take advantage of multiple CPUs if they're available.
|
||||||
|
pairs
|
||||||
|
.into_par_iter()
|
||||||
|
.fold(Errors::default, |mut errs, (left, right)| {
|
||||||
|
if left.rule.prio == right.rule.prio {
|
||||||
|
if check_overlap_pair(&left.pats, &right.pats) {
|
||||||
|
errs.add_edge(left.rule.id, right.rule.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
errs
|
||||||
|
})
|
||||||
|
.reduce(Errors::default, Errors::union)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if two rules overlap in the inputs they accept.
|
||||||
|
fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool {
|
||||||
|
debug_assert_eq!(a.len(), b.len());
|
||||||
|
let mut worklist: Vec<_> = a.iter().zip(b.iter()).collect();
|
||||||
|
|
||||||
|
while let Some((a, b)) = worklist.pop() {
|
||||||
|
// Checking the cross-product of two and-patterns is O(n*m). Merging sorted lists or
|
||||||
|
// hash-maps might be faster in practice, but:
|
||||||
|
// - The alternatives are not asymptotically faster, because in theory all the subpatterns
|
||||||
|
// might have the same extractor or enum variant, and in that case any approach has to
|
||||||
|
// check all of the cross-product combinations anyway.
|
||||||
|
// - It's easier to reason about this doubly-nested loop than about merging sorted lists or
|
||||||
|
// picking the right hash keys.
|
||||||
|
// - These lists are always so small that performance doesn't matter.
|
||||||
|
for a in a.as_and_subpatterns() {
|
||||||
|
for b in b.as_and_subpatterns() {
|
||||||
|
let overlap = match (a, b) {
|
||||||
|
(Pattern::Int { value: a }, Pattern::Int { value: b }) => a == b,
|
||||||
|
(Pattern::Const { name: a }, Pattern::Const { name: b }) => a == b,
|
||||||
|
|
||||||
|
// if it's the same variant or same extractor, check all pairs of subterms
|
||||||
|
(
|
||||||
|
Pattern::Variant {
|
||||||
|
id: a,
|
||||||
|
pats: a_pats,
|
||||||
|
},
|
||||||
|
Pattern::Variant {
|
||||||
|
id: b,
|
||||||
|
pats: b_pats,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
| (
|
||||||
|
Pattern::Extractor {
|
||||||
|
id: a,
|
||||||
|
pats: a_pats,
|
||||||
|
},
|
||||||
|
Pattern::Extractor {
|
||||||
|
id: b,
|
||||||
|
pats: b_pats,
|
||||||
|
},
|
||||||
|
) if a == b => {
|
||||||
|
debug_assert_eq!(a_pats.len(), b_pats.len());
|
||||||
|
worklist.extend(a_pats.iter().zip(b_pats.iter()));
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
// different variants of the same enum definitely do not overlap
|
||||||
|
(Pattern::Variant { .. }, Pattern::Variant { .. }) => false,
|
||||||
|
|
||||||
|
// an extractor which does not exactly match the other pattern might overlap
|
||||||
|
(Pattern::Extractor { .. }, _) | (_, Pattern::Extractor { .. }) => true,
|
||||||
|
|
||||||
|
// a wildcard definitely overlaps
|
||||||
|
(Pattern::Wildcard, _) | (_, Pattern::Wildcard) => true,
|
||||||
|
|
||||||
|
// these patterns can only be paired with patterns of the same type, or
|
||||||
|
// wildcards or extractors, and all those cases are covered above
|
||||||
|
(Pattern::Int { .. } | Pattern::Const { .. } | Pattern::Variant { .. }, _) => {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
|
||||||
|
// and-patterns don't reach here due to as_and_subpatterns
|
||||||
|
(Pattern::And { .. }, _) => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if !overlap {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum Pattern {
|
||||||
|
/// Integer literal patterns.
|
||||||
|
Int {
|
||||||
|
value: i128,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Constant literal patterns, such as `$F32`.
|
||||||
|
Const {
|
||||||
|
name: Sym,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Enum variant constructors.
|
||||||
|
Variant {
|
||||||
|
id: TermId,
|
||||||
|
pats: Box<[Pattern]>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Conjunctions of patterns.
|
||||||
|
And {
|
||||||
|
pats: Box<[Pattern]>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Extractor uses (both fallible and infallible).
|
||||||
|
Extractor {
|
||||||
|
id: TermId,
|
||||||
|
pats: Box<[Pattern]>,
|
||||||
|
},
|
||||||
|
|
||||||
|
Wildcard,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Pattern {
|
||||||
|
/// Create a [`Pattern`] from a [`sema::Pattern`]. The major differences between these two
|
||||||
|
/// representations are as follows:
|
||||||
|
/// 1. Variable bindings are removed and turned into wildcards
|
||||||
|
/// 2. Equality constraints are removed and turned into inlined versions of the patterns they
|
||||||
|
/// would have introduced equalities with
|
||||||
|
/// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or
|
||||||
|
/// [`Pattern::Extractor`] cases depending on their term kind.
|
||||||
|
fn from_sema(env: &TermEnv, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self {
|
||||||
|
match pat {
|
||||||
|
sema::Pattern::BindPattern(_, id, pat) => {
|
||||||
|
let pat = Self::from_sema(env, binds, pat);
|
||||||
|
binds.push((*id, pat.clone()));
|
||||||
|
pat
|
||||||
|
}
|
||||||
|
|
||||||
|
sema::Pattern::Var(_, id) => {
|
||||||
|
for (vid, pat) in binds.iter().rev() {
|
||||||
|
if vid == id {
|
||||||
|
// We inline equality constraints for two reasons: we specialize on the
|
||||||
|
// spine of related patterns only, so more specific information about
|
||||||
|
// individual values isn't necessarily helpful; we consider overlap
|
||||||
|
// checking to be an over-approximation of overlapping rules, so handling
|
||||||
|
// equalities ends up being best-effort. As an approximation, we use
|
||||||
|
// whatever pattern happened to be at the binding of the variable for all
|
||||||
|
// of the cases where it's used for equality. For example, in the following
|
||||||
|
// rule:
|
||||||
|
//
|
||||||
|
// > (rule (example x @ (Enum.Variant y) x) ...)
|
||||||
|
//
|
||||||
|
// we will only specialize up to `(Enum.Variant _)`, so any more specific
|
||||||
|
// runtime values of `y` won't end up helping to identify overlap. As a
|
||||||
|
// result, we rewrite the patterns in the rule to look more like the
|
||||||
|
// following, as it greatly simplifies overlap checking.
|
||||||
|
//
|
||||||
|
// > (rule (example (Enum.Variant _) (Enum.Variant _)) ...)
|
||||||
|
//
|
||||||
|
// Cases that this scheme won't handle look like the following:
|
||||||
|
//
|
||||||
|
// > (rule (example2 2 3) ...)
|
||||||
|
// > (rule (example2 x x) ...)
|
||||||
|
//
|
||||||
|
// As in this case we'll not make use of the information that `2` and `3`
|
||||||
|
// aren't equal to know that the rules don't overlap. One approach that we
|
||||||
|
// could take here is delaying substitution to the point where a variable
|
||||||
|
// binding has been specialized, turning the rules into the following once
|
||||||
|
// specialization had occurred for `2`:
|
||||||
|
//
|
||||||
|
// > (rule (example2 2 3) ...)
|
||||||
|
// > (rule (example2 2 2) ...)
|
||||||
|
return pat.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
binds.push((*id, Pattern::Wildcard));
|
||||||
|
Pattern::Wildcard
|
||||||
|
}
|
||||||
|
|
||||||
|
sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value },
|
||||||
|
sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name },
|
||||||
|
|
||||||
|
&sema::Pattern::Term(_, id, ref pats) => {
|
||||||
|
let pats = pats
|
||||||
|
.iter()
|
||||||
|
.map(|pat| Pattern::from_sema(env, binds, pat))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
match &env.terms[id.0].kind {
|
||||||
|
TermKind::EnumVariant { .. } => Pattern::Variant { id, pats },
|
||||||
|
TermKind::Decl { .. } => Pattern::Extractor { id, pats },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sema::Pattern::Wildcard(_) => Pattern::Wildcard,
|
||||||
|
|
||||||
|
sema::Pattern::And(_, pats) => {
|
||||||
|
let pats = pats
|
||||||
|
.iter()
|
||||||
|
.map(|pat| Pattern::from_sema(env, binds, pat))
|
||||||
|
.collect();
|
||||||
|
Pattern::And { pats }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If this is an and-pattern, return its subpatterns. Otherwise pretend like there's an
|
||||||
|
/// and-pattern which has this as its only subpattern, and return self as a single-element
|
||||||
|
/// slice.
|
||||||
|
fn as_and_subpatterns(&self) -> &[Pattern] {
|
||||||
|
if let Pattern::And { pats } = self {
|
||||||
|
pats
|
||||||
|
} else {
|
||||||
|
std::slice::from_ref(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user