cranelift-isle: New IR and revised overlap checks (#5195)

* cranelift-isle: New IR and revised overlap checks

* Improve error reporting

* Avoid "unused argument" warnings a nicer way

* Remove unused fields

* Minimize diff and "fix" error handling

I had tried to use Miette "right" and made things worse somehow. Among
other changes, revert all my changes to unrelated parts of `error.rs`
and `error_miette.rs`.

* Review comments: Rename "unmatchable" to "unreachable"

* Review comments: newtype wrappers, not type aliases

* Review comments: more comments on overlap checks

* Review comments: Clarify `normalize_equivalence_classes`

* Review comments: use union-find instead of linked list

This saves about 50 lines of code in the trie_again module. The
union-find implementation is about twice as long as that, counting
comments and doc-tests, but that's a worth-while tradeoff.

However, this makes `normalize_equivalence_classes` slower, because now
finding all elements of an equivalence class takes time linear in the
total size of all equivalence classes. If that ever turns out to be a
problem in practice we can find some way to optimize `remove_set_of`.

* Review comments: Hide constraints HashMap

We want to enforce that consumers of this representation can't observe
non-deterministic ordering in any of its public types.

* Review comments: Normalize equivalence classes incrementally

I'm not sure whether this is a good idea. It doesn't make the logic
particularly simpler, and I think it will do more work if three or more
binding sites with enum-variant constraints get set equal to each other.

* More comments and other clarifications

* Revert "Review comments: Normalize equivalence classes incrementally"

* Even more comments
This commit is contained in:
Jamey Sharp
2022-11-13 18:29:22 -08:00
committed by GitHub
parent 95ca72a37a
commit 70c72ee2a4
4 changed files with 850 additions and 269 deletions

View File

@@ -42,6 +42,18 @@ pub enum Error {
span: Span,
},
/// The rule can never match any input.
UnreachableError {
/// The error message.
msg: String,
/// The input ISLE source.
src: Source,
/// The location of the unreachable rule.
span: Span,
},
/// The rules mentioned overlap in the input they accept.
OverlapError {
/// The error message.
@@ -119,6 +131,15 @@ impl std::fmt::Display for Error {
#[cfg(feature = "miette-errors")]
Error::TypeError { msg, .. } => write!(f, "type error: {}", msg),
Error::UnreachableError { src, span, msg } => {
write!(
f,
"{}: unreachable rule: {}",
span.from.pretty_print_with_filename(&*src.name),
msg
)
}
Error::OverlapError { msg, rules, .. } => {
writeln!(f, "overlap error: {}\n{}", msg, OverlappingRules(&rules))
}

View File

@@ -91,6 +91,114 @@ impl<K: Hash + Eq, V> Index<&K> for StableMap<K, V> {
}
}
/// Stores disjoint sets and provides efficient operations to merge two sets, and to find a
/// representative member of a set given any member of that set. In this implementation, sets always
/// have at least two members, and can only be formed by the `merge` operation.
#[derive(Debug, Default)]
pub struct DisjointSets<T> {
parent: HashMap<T, (T, u8)>,
}
impl<T: Copy + std::fmt::Debug + Eq + Hash> DisjointSets<T> {
/// Find a representative member of the set containing `x`. If `x` has not been merged with any
/// other items using `merge`, returns `None`. This method updates the data structure to make
/// future queries faster, and takes amortized constant time.
///
/// ```
/// let mut sets = cranelift_isle::DisjointSets::default();
/// sets.merge(1, 2);
/// sets.merge(1, 3);
/// sets.merge(2, 4);
/// assert_eq!(sets.find_mut(3).unwrap(), sets.find_mut(4).unwrap());
/// assert_eq!(sets.find_mut(10), None);
/// ```
pub fn find_mut(&mut self, mut x: T) -> Option<T> {
while let Some(node) = self.parent.get(&x) {
if node.0 == x {
return Some(x);
}
let grandparent = self.parent[&node.0].0;
// Re-do the lookup but take a mutable borrow this time
self.parent.get_mut(&x).unwrap().0 = grandparent;
x = grandparent;
}
None
}
/// Merge the set containing `x` with the set containing `y`. This method takes amortized
/// constant time.
pub fn merge(&mut self, x: T, y: T) {
assert_ne!(x, y);
let mut x = if let Some(x) = self.find_mut(x) {
self.parent[&x]
} else {
self.parent.insert(x, (x, 0));
(x, 0)
};
let mut y = if let Some(y) = self.find_mut(y) {
self.parent[&y]
} else {
self.parent.insert(y, (y, 0));
(y, 0)
};
if x == y {
return;
}
if x.1 < y.1 {
std::mem::swap(&mut x, &mut y);
}
self.parent.get_mut(&y.0).unwrap().0 = x.0;
if x.1 == y.1 {
let x_rank = &mut self.parent.get_mut(&x.0).unwrap().1;
*x_rank = x_rank.saturating_add(1);
}
}
/// Remove the set containing the given item, and return all members of that set. The set is
/// returned in sorted order. This method takes time linear in the total size of all sets.
///
/// ```
/// let mut sets = cranelift_isle::DisjointSets::default();
/// sets.merge(1, 2);
/// sets.merge(1, 3);
/// sets.merge(2, 4);
/// assert_eq!(sets.remove_set_of(4), &[1, 2, 3, 4]);
/// assert_eq!(sets.remove_set_of(1), &[]);
/// assert!(sets.is_empty());
/// ```
pub fn remove_set_of(&mut self, x: T) -> Vec<T>
where
T: Ord,
{
let mut set = Vec::new();
if let Some(x) = self.find_mut(x) {
set.extend(self.parent.keys().copied());
// It's important to use `find_mut` here to avoid quadratic worst-case time.
set.retain(|&y| self.find_mut(y).unwrap() == x);
for y in set.iter() {
self.parent.remove(y);
}
set.sort_unstable();
}
set
}
/// Returns true if there are no sets. This method takes constant time.
///
/// ```
/// let mut sets = cranelift_isle::DisjointSets::default();
/// assert!(sets.is_empty());
/// sets.merge(1, 2);
/// assert!(!sets.is_empty());
/// ```
pub fn is_empty(&self) -> bool {
self.parent.is_empty()
}
}
pub mod ast;
pub mod codegen;
pub mod compile;
@@ -102,6 +210,7 @@ pub mod overlap;
pub mod parser;
pub mod sema;
pub mod trie;
pub mod trie_again;
#[cfg(feature = "miette-errors")]
mod error_miette;

View File

@@ -5,15 +5,14 @@ use std::collections::{HashMap, HashSet};
use crate::error::{Error, Result, Source, Span};
use crate::lexer::Pos;
use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, VarId};
use crate::sema::{TermEnv, TermId, TermKind, TypeEnv};
use crate::trie_again;
/// Check for overlap.
pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
let mut errors = check_overlaps(termenv).report(tyenv, termenv);
errors.sort_by_key(|err| match err {
Error::OverlapError { rules, .. } => rules.first().unwrap().1.from,
_ => Pos::default(),
});
let (terms, mut errors) = trie_again::build(termenv, tyenv);
errors.append(&mut check_overlaps(terms, termenv).report(tyenv));
match errors.len() {
0 => Ok(()),
1 => Err(errors.pop().unwrap()),
@@ -25,7 +24,7 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
#[derive(Default)]
struct Errors {
/// Edges between rules indicating overlap.
nodes: HashMap<RuleId, HashSet<RuleId>>,
nodes: HashMap<Pos, HashSet<Pos>>,
}
impl Errors {
@@ -33,30 +32,29 @@ impl Errors {
/// nodes from the graph with the highest degree, reporting errors for them and their direct
/// connections. The goal with reporting errors this way is to prefer reporting rules that
/// overlap with many others first, and then report other more targeted overlaps later.
fn report(mut self, tyenv: &TypeEnv, termenv: &TermEnv) -> Vec<Error> {
fn report(mut self, tyenv: &TypeEnv) -> Vec<Error> {
let mut errors = Vec::new();
let get_info = |id: RuleId| {
let rule = &termenv.rules[id.0];
let file = rule.pos.file;
let get_info = |pos: Pos| {
let file = pos.file;
let src = Source::new(
tyenv.filenames[file].clone(),
tyenv.file_texts[file].clone(),
);
let span = Span::new_single(rule.pos);
let span = Span::new_single(pos);
(src, span)
};
while let Some((&id, _)) = self
while let Some((&pos, _)) = self
.nodes
.iter()
.max_by_key(|(id, edges)| (edges.len(), *id))
.max_by_key(|(pos, edges)| (edges.len(), *pos))
{
let node = self.nodes.remove(&id).unwrap();
let node = self.nodes.remove(&pos).unwrap();
for other in node.iter() {
if let Entry::Occupied(mut entry) = self.nodes.entry(*other) {
let back_edges = entry.get_mut();
back_edges.remove(&id);
back_edges.remove(&pos);
if back_edges.is_empty() {
entry.remove();
}
@@ -64,7 +62,7 @@ impl Errors {
}
// build the real error
let mut rules = vec![get_info(id)];
let mut rules = vec![get_info(pos)];
rules.extend(node.into_iter().map(get_info));
@@ -74,28 +72,30 @@ impl Errors {
});
}
errors.sort_by_key(|err| match err {
Error::OverlapError { rules, .. } => rules.first().unwrap().1.from,
_ => Pos::default(),
});
errors
}
/// Add a bidirectional edge between two rules in the graph.
fn add_edge(&mut self, a: RuleId, b: RuleId) {
fn check_pair(&mut self, a: &trie_again::Rule, b: &trie_again::Rule) {
if let trie_again::Overlap::Yes { .. } = a.may_overlap(b) {
if a.prio == b.prio {
// edges are undirected
self.nodes.entry(a).or_default().insert(b);
self.nodes.entry(b).or_default().insert(a);
self.nodes.entry(a.pos).or_default().insert(b.pos);
self.nodes.entry(b.pos).or_default().insert(a.pos);
}
}
}
}
/// Determine if any rules overlap in the input that they accept. This checkes every unique pair of
/// Determine if any rules overlap in the input that they accept. This checks every unique pair of
/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the
/// presence of wildcard patterns.
fn check_overlaps(env: &TermEnv) -> Errors {
struct RulePatterns<'a> {
rule: &'a Rule,
pats: Box<[Pattern]>,
}
let mut by_term = HashMap::new();
for rule in env.rules.iter() {
if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs {
fn check_overlaps(terms: Vec<(TermId, trie_again::RuleSet)>, env: &TermEnv) -> Errors {
let mut errs = Errors::default();
for (tid, ruleset) in terms {
let is_multi_ctor = match &env.terms[tid.index()].kind {
&TermKind::Decl { multi, .. } => multi,
_ => false,
@@ -108,237 +108,12 @@ fn check_overlaps(env: &TermEnv) -> Errors {
continue;
}
// Group rules by term and priority. Only rules within the same group are checked to
// see if they overlap each other. If you want to change the scope of overlap checking,
// change this key.
let key = (tid, rule.prio);
let mut binds = Vec::new();
let rule = RulePatterns {
rule,
pats: vars
.iter()
.map(|pat| Pattern::from_sema(env, &mut binds, pat))
.collect(),
};
by_term.entry(key).or_insert_with(Vec::new).push(rule);
}
}
let mut errs = Errors::default();
for (_, rows) in by_term {
let mut cursor = rows.into_iter();
let mut cursor = ruleset.rules.iter();
while let Some(left) = cursor.next() {
for right in cursor.as_slice() {
if check_overlap_pair(&left.pats, &right.pats) {
errs.add_edge(left.rule.id, right.rule.id);
}
errs.check_pair(left, right);
}
}
}
errs
}
/// Check if two rules overlap in the inputs they accept.
fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool {
debug_assert_eq!(a.len(), b.len());
let mut worklist: Vec<_> = a.iter().zip(b.iter()).collect();
while let Some((a, b)) = worklist.pop() {
// Checking the cross-product of two and-patterns is O(n*m). Merging sorted lists or
// hash-maps might be faster in practice, but:
// - The alternatives are not asymptotically faster, because in theory all the subpatterns
// might have the same extractor or enum variant, and in that case any approach has to
// check all of the cross-product combinations anyway.
// - It's easier to reason about this doubly-nested loop than about merging sorted lists or
// picking the right hash keys.
// - These lists are always so small that performance doesn't matter.
for a in a.as_and_subpatterns() {
for b in b.as_and_subpatterns() {
let overlap = match (a, b) {
(Pattern::Int { value: a }, Pattern::Int { value: b }) => a == b,
(Pattern::Const { name: a }, Pattern::Const { name: b }) => a == b,
// if it's the same variant or same extractor, check all pairs of subterms
(
Pattern::Variant {
id: a,
pats: a_pats,
},
Pattern::Variant {
id: b,
pats: b_pats,
},
)
| (
Pattern::Extractor {
id: a,
pats: a_pats,
},
Pattern::Extractor {
id: b,
pats: b_pats,
},
) if a == b => {
debug_assert_eq!(a_pats.len(), b_pats.len());
worklist.extend(a_pats.iter().zip(b_pats.iter()));
true
}
// different variants of the same enum definitely do not overlap
(Pattern::Variant { .. }, Pattern::Variant { .. }) => false,
// an extractor which does not exactly match the other pattern might overlap
(Pattern::Extractor { .. }, _) | (_, Pattern::Extractor { .. }) => true,
// a wildcard definitely overlaps
(Pattern::Wildcard, _) | (_, Pattern::Wildcard) => true,
// these patterns can only be paired with patterns of the same type, or
// wildcards or extractors, and all those cases are covered above
(Pattern::Int { .. } | Pattern::Const { .. } | Pattern::Variant { .. }, _) => {
unreachable!()
}
// and-patterns don't reach here due to as_and_subpatterns
(Pattern::And { .. }, _) => unreachable!(),
};
if !overlap {
return false;
}
}
}
}
true
}
/// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier.
#[derive(Debug, Clone)]
enum Pattern {
/// Integer literal patterns.
Int {
value: i128,
},
/// Constant literal patterns, such as `$F32`.
Const {
name: Sym,
},
/// Enum variant constructors.
Variant {
id: TermId,
pats: Box<[Pattern]>,
},
/// Conjunctions of patterns.
And {
pats: Box<[Pattern]>,
},
/// Extractor uses (both fallible and infallible).
Extractor {
id: TermId,
pats: Box<[Pattern]>,
},
Wildcard,
}
impl Pattern {
/// Create a [`Pattern`] from a [`sema::Pattern`]. The major differences between these two
/// representations are as follows:
/// 1. Variable bindings are removed and turned into wildcards
/// 2. Equality constraints are removed and turned into inlined versions of the patterns they
/// would have introduced equalities with
/// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or
/// [`Pattern::Extractor`] cases depending on their term kind.
fn from_sema(env: &TermEnv, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self {
match pat {
sema::Pattern::BindPattern(_, id, pat) => {
let pat = Self::from_sema(env, binds, pat);
binds.push((*id, pat.clone()));
pat
}
sema::Pattern::Var(_, id) => {
for (vid, pat) in binds.iter().rev() {
if vid == id {
// We inline equality constraints for two reasons: we specialize on the
// spine of related patterns only, so more specific information about
// individual values isn't necessarily helpful; we consider overlap
// checking to be an over-approximation of overlapping rules, so handling
// equalities ends up being best-effort. As an approximation, we use
// whatever pattern happened to be at the binding of the variable for all
// of the cases where it's used for equality. For example, in the following
// rule:
//
// > (rule (example x @ (Enum.Variant y) x) ...)
//
// we will only specialize up to `(Enum.Variant _)`, so any more specific
// runtime values of `y` won't end up helping to identify overlap. As a
// result, we rewrite the patterns in the rule to look more like the
// following, as it greatly simplifies overlap checking.
//
// > (rule (example (Enum.Variant _) (Enum.Variant _)) ...)
//
// Cases that this scheme won't handle look like the following:
//
// > (rule (example2 2 3) ...)
// > (rule (example2 x x) ...)
//
// As in this case we'll not make use of the information that `2` and `3`
// aren't equal to know that the rules don't overlap. One approach that we
// could take here is delaying substitution to the point where a variable
// binding has been specialized, turning the rules into the following once
// specialization had occurred for `2`:
//
// > (rule (example2 2 3) ...)
// > (rule (example2 2 2) ...)
return pat.clone();
}
}
binds.push((*id, Pattern::Wildcard));
Pattern::Wildcard
}
sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value },
sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name },
&sema::Pattern::Term(_, id, ref pats) => {
let pats = pats
.iter()
.map(|pat| Pattern::from_sema(env, binds, pat))
.collect();
match &env.terms[id.0].kind {
TermKind::EnumVariant { .. } => Pattern::Variant { id, pats },
TermKind::Decl { .. } => Pattern::Extractor { id, pats },
}
}
sema::Pattern::Wildcard(_) => Pattern::Wildcard,
sema::Pattern::And(_, pats) => {
let pats = pats
.iter()
.map(|pat| Pattern::from_sema(env, binds, pat))
.collect();
Pattern::And { pats }
}
}
}
/// If this is an and-pattern, return its subpatterns. Otherwise pretend like there's an
/// and-pattern which has this as its only subpattern, and return self as a single-element
/// slice.
fn as_and_subpatterns(&self) -> &[Pattern] {
if let Pattern::And { pats } = self {
pats
} else {
std::slice::from_ref(self)
}
}
}

View File

@@ -0,0 +1,676 @@
//! A strongly-normalizing intermediate representation for ISLE rules. This representation is chosen
//! to closely reflect the operations we can implement in Rust, to make code generation easy.
use crate::error::{Error, Source, Span};
use crate::lexer::Pos;
use crate::sema::{self, RuleVisitor};
use crate::DisjointSets;
use std::collections::{hash_map::Entry, HashMap};
/// A field index in a tuple or an enum variant.
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct TupleIndex(u8);
/// A hash-consed identifier for a binding, stored in a [RuleSet].
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct BindingId(u16);
/// A hash-consed identifier for an expression, stored in a [RuleSet].
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ExprId(u16);
impl BindingId {
/// Get the index of this id.
pub fn index(self) -> usize {
self.0.into()
}
}
impl ExprId {
/// Get the index of this id.
pub fn index(self) -> usize {
self.0.into()
}
}
/// Expressions construct new values. Rust pattern matching can only destructure existing values,
/// not call functions or construct new values. So `if-let` and external extractor invocations need
/// to interrupt pattern matching in order to evaluate a suitable expression. These expressions are
/// also used when evaluating the right-hand side of a rule.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum Expr {
/// A binding from some sequence of pattern matches, used as an expression.
Binding {
/// Which binding site is being used as an expression?
source: BindingId,
},
/// Evaluates to the given integer literal.
ConstInt {
/// The constant value.
val: i128,
},
/// Evaluates to the given primitive Rust value.
ConstPrim {
/// The constant value.
val: sema::Sym,
},
/// One of the arguments to the top-level function.
Argument {
/// Which of the function's arguments is this?
index: TupleIndex,
},
/// The result of calling an external extractor.
Extractor {
/// Which extractor should be called?
term: sema::TermId,
/// What expression should be passed to the extractor?
parameter: ExprId,
},
/// The result of calling an external constructor.
Constructor {
/// Which constructor should be called?
term: sema::TermId,
/// What expressions should be passed to the constructor?
parameters: Box<[ExprId]>,
},
/// The result of constructing an enum variant.
Variant {
/// Which enum type should be constructed?
ty: sema::TypeId,
/// Which variant of that enum should be constructed?
variant: sema::VariantId,
/// What expressions should be provided for this variant's fields?
fields: Box<[ExprId]>,
},
}
/// Binding sites are the result of Rust pattern matching. This is the dual of an expression: while
/// expressions build up values, bindings take values apart.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Binding {
/// A match begins at the result of some expression that produces a Rust value.
Expr {
/// Which expression is being matched?
constructor: ExprId,
},
/// After some sequence of matches, we'll match one of the previous bindings against an enum
/// variant and produce a new binding from one of its fields. There must be a matching
/// [Constraint] for each `source`/`variant` pair that appears in a binding.
Variant {
/// Which binding is being matched?
source: BindingId,
/// Which enum variant are we pulling binding sites from? This is somewhat redundant with
/// information in a corresponding [Constraint]. However, it must be here so that different
/// enum variants aren't hash-consed into the same binding site.
variant: sema::VariantId,
/// Which field of this enum variant are we projecting out? Although ISLE uses named fields,
/// we track them by index for constant-time comparisons. The [sema::TypeEnv] can be used to
/// get the field names.
field: TupleIndex,
},
/// After some sequence of matches, we'll match one of the previous bindings against
/// `Option::Some` and produce a new binding from its contents. (This currently only happens
/// with external extractors.)
Some {
/// Which binding is being matched?
source: BindingId,
},
/// After some sequence of matches, we'll match one of the previous bindings against a tuple and
/// produce a new binding from one of its fields. (This currently only happens with external
/// extractors.)
Tuple {
/// Which binding is being matched?
source: BindingId,
/// Which tuple field are we projecting out?
field: TupleIndex,
},
}
/// Pattern matches which can fail. Some binding sites are the result of successfully matching a
/// constraint. A rule applies constraints to binding sites to determine whether the rule matches.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Constraint {
/// The value must match this enum variant.
Variant {
/// Which enum type is being matched? This is implied by the binding where the constraint is
/// applied, but recorded here for convenience.
ty: sema::TypeId,
/// Which enum variant must this binding site match to satisfy the rule?
variant: sema::VariantId,
/// Number of fields in this variant of this enum. This is recorded in the constraint for
/// convenience, to avoid needing to look up the variant in a [sema::TypeEnv].
fields: TupleIndex,
},
/// The value must equal this integer literal.
ConstInt {
/// The constant value.
val: i128,
},
/// The value must equal this Rust primitive value.
ConstPrim {
/// The constant value.
val: sema::Sym,
},
/// The value must be an `Option::Some`, from a fallible extractor.
Some,
}
/// A term-rewriting rule. All [BindingId]s and [ExprId]s are only meaningful in the context of the
/// [RuleSet] that contains this rule.
#[derive(Debug, Default)]
pub struct Rule {
/// Where was this rule defined?
pub pos: Pos,
/// All of these bindings must match for this rule to apply. Note that within a single rule, if
/// a binding site must match two different constants, then the rule can never match.
constraints: HashMap<BindingId, Constraint>,
/// Sets of bindings which must be equal for this rule to match.
pub equals: DisjointSets<BindingId>,
/// If other rules apply along with this one, the one with the highest numeric priority is
/// evaluated. If multiple applicable rules have the same priority, that's an overlap error.
pub prio: i64,
/// If this rule applies, the top-level term should evaluate to this expression.
pub result: ExprId,
}
/// Records whether a given pair of rules can both match on some input.
pub enum Overlap {
/// There is no input on which this pair of rules can both match.
No,
/// There is at least one input on which this pair of rules can both match.
Yes {
/// True if every input accepted by one rule is also accepted by the other. This does not
/// indicate which rule is more general and in fact the rules could match exactly the same
/// set of inputs. You can work out which by comparing the number of constraints in both
/// rules: The more general rule has fewer constraints.
subset: bool,
},
}
/// A collection of [Rule]s, along with hash-consed [Binding]s and [Expr]s for all of them.
#[derive(Debug, Default)]
pub struct RuleSet {
/// The [Rule]s for a single [sema::Term].
pub rules: Vec<Rule>,
/// The bindings identified by [BindingId]s within rules.
pub bindings: Vec<Binding>,
/// The expressions identified by [ExprId]s within rules.
pub exprs: Vec<Expr>,
}
/// Construct a [RuleSet] for each term in `termenv` that has rules.
pub fn build(
termenv: &sema::TermEnv,
tyenv: &sema::TypeEnv,
) -> (Vec<(sema::TermId, RuleSet)>, Vec<Error>) {
let mut errors = Vec::new();
let mut term = HashMap::new();
for rule in termenv.rules.iter() {
term.entry(rule.lhs.root_term().unwrap())
.or_insert_with(RuleSetBuilder::default)
.add_rule(rule, termenv, tyenv, &mut errors);
}
// The `term` hash map may return terms in any order. Sort them to ensure that we produce the
// same output every time when given the same ISLE source. Rules are added to terms in `RuleId`
// order, so it's not necessary to sort within a `RuleSet`.
let mut result: Vec<_> = term
.into_iter()
.map(|(term, builder)| (term, builder.rules))
.collect();
result.sort_unstable_by_key(|(term, _)| *term);
(result, errors)
}
impl Rule {
/// Returns whether a given pair of rules can both match on some input, and if so, whether
/// either matches a subset of the other's inputs. If this function returns `No`, then the two
/// rules definitely do not overlap. However, it may return `Yes` in cases where the rules can't
/// overlap in practice, or where this analysis is not yet precise enough to decide.
pub fn may_overlap(&self, other: &Rule) -> Overlap {
// Two rules can't overlap if, for some binding site in the intersection of their
// constraints, the rules have different constraints: an input can't possibly match both
// rules then. If the rules do overlap, and one has a subset of the constraints of the
// other, then the less-constrained rule matches every input that the more-constrained rule
// matches, and possibly more. We test for both conditions at once, with the observation
// that if the intersection of two sets is equal to the smaller set, then it's a subset. So
// the outer loop needs to go over the rule with fewer constraints in order to correctly
// identify if it's a subset of the other rule. Also, that way around is faster.
let (small, big) = if self.constraints.len() <= other.constraints.len() {
(self, other)
} else {
(other, self)
};
// TODO: nonlinear constraints complicate the subset check
// For the purpose of overlap checking, equality constraints act like other constraints, in
// that they can cause rules to not overlap. However, because we don't have a concrete
// pattern to compare, the analysis to prove that is complicated. For now, we approximate
// the result. If `small` has any of these nonlinear constraints, conservatively report that
// it is not a subset of `big`.
let mut subset = small.equals.is_empty();
for (binding, a) in small.constraints.iter() {
if let Some(b) = big.constraints.get(binding) {
if a != b {
// If any binding site is constrained differently by both rules then there is
// no input where both rules can match.
return Overlap::No;
}
// Otherwise both are constrained in the same way at this binding site. That doesn't
// rule out any possibilities for what inputs the rules accept.
} else {
// The `big` rule's inputs are a subset of the `small` rule's inputs if every
// constraint in `small` is exactly matched in `big`. But we found a counterexample.
subset = false;
}
}
Overlap::Yes { subset }
}
/// Returns the constraint that the given binding site must satisfy for this rule to match, if
/// there is one.
pub fn get_constraint(&self, source: BindingId) -> Option<Constraint> {
self.constraints.get(&source).copied()
}
fn set_constraint(
&mut self,
source: BindingId,
constraint: Constraint,
) -> Result<(), UnreachableError> {
match self.constraints.entry(source) {
Entry::Occupied(entry) => {
if entry.get() != &constraint {
return Err(UnreachableError {
pos: self.pos,
constraint_a: *entry.get(),
constraint_b: constraint,
});
}
}
Entry::Vacant(entry) => {
entry.insert(constraint);
}
}
Ok(())
}
}
#[derive(Debug)]
struct UnreachableError {
pos: Pos,
constraint_a: Constraint,
constraint_b: Constraint,
}
#[derive(Debug, Default)]
struct RuleSetBuilder {
current_rule: Rule,
binding_map: HashMap<Binding, BindingId>,
expr_map: HashMap<Expr, ExprId>,
unreachable: Vec<UnreachableError>,
rules: RuleSet,
}
impl RuleSetBuilder {
fn add_rule(
&mut self,
rule: &sema::Rule,
termenv: &sema::TermEnv,
tyenv: &sema::TypeEnv,
errors: &mut Vec<Error>,
) {
self.current_rule.pos = rule.pos;
self.current_rule.prio = rule.prio;
self.current_rule.result = rule.visit(self, termenv);
self.normalize_equivalence_classes();
let rule = std::mem::take(&mut self.current_rule);
if self.unreachable.is_empty() {
self.rules.rules.push(rule);
} else {
// If this rule can never match, drop it so it doesn't affect overlap checking.
errors.extend(self.unreachable.drain(..).map(|err| {
let src = Source::new(
tyenv.filenames[err.pos.file].clone(),
tyenv.file_texts[err.pos.file].clone(),
);
Error::UnreachableError {
msg: format!(
"rule requires binding to match both {:?} and {:?}",
err.constraint_a, err.constraint_b
),
src,
span: Span::new_single(err.pos),
}
}))
}
}
/// Establish the invariant that a binding site can have a concrete constraint in `constraints`,
/// or an equality constraint in `equals`, but not both. This is useful because overlap checking
/// is most effective on concrete constraints, and also because it exposes more rule structure
/// for codegen.
///
/// If a binding site is constrained and also required to be equal to another binding site, then
/// copy the constraint and push the equality inside it. For example:
/// - `(term x @ 2 x)` is rewritten to `(term 2 2)`
/// - `(term x @ (T.A _ _) x)` is rewritten to `(term (T.A y z) (T.A y z))`
/// In the latter case, note that every field of `T.A` has been replaced with a fresh variable
/// and each of the copies are set equal.
///
/// If several binding sites are supposed to be equal but they each have conflicting constraints
/// then this rule is unreachable. For example, `(term x @ 2 (and x 3))` requires both arguments
/// to be equal but also requires them to match both 2 and 3, which can't happen for any input.
///
/// We could do this incrementally, while building the rule. The implementation is nearly
/// identical but, having tried both ways, it's slightly easier to think about this as a
/// separate pass. Also, batching up this work should be slightly faster if there are multiple
/// binding sites set equal to each other.
fn normalize_equivalence_classes(&mut self) {
// First, find all the constraints that need to be copied to other binding sites in their
// respective equivalence classes. Note: do not remove these constraints here! Yes, we'll
// put them back later, but we rely on still having them around so that
// `set_constraint_or_error` can detect conflicting constraints.
let mut deferred_constraints = Vec::new();
for (&binding, &constraint) in self.current_rule.constraints.iter() {
if let Some(root) = self.current_rule.equals.find_mut(binding) {
deferred_constraints.push((root, constraint));
}
}
// Pick one constraint and propagate it through its equivalence class. If there are no
// errors then it doesn't matter what order we do this in, because that means that any
// redundant constraints on an equivalence class were equal. We can write equal values into
// the constraint map in any order and get the same result. If there were errors, we aren't
// going to generate code from this rule, so order only affects how conflicts are reported.
while let Some((current, constraint)) = deferred_constraints.pop() {
// Remove the entire equivalence class and instead add copies of this constraint to
// every binding site in the class. If there are constraints on other binding sites in
// this class, then when we try to copy this constraint to those binding sites,
// `set_constraint_or_error` will check that the constraints are equal and record an
// appropriate error otherwise.
//
// Later, we'll re-visit those other binding sites because they're still in
// `deferred_constraints`, but `set` will be empty because we already deleted the
// equivalence class the first time we encountered it.
let set = self.current_rule.equals.remove_set_of(current);
match (constraint, set.split_first()) {
// If the equivalence class was empty we don't have to do anything.
(_, None) => continue,
// If we removed an equivalence class with an enum variant constraint, make the
// fields of the variant equal instead. Create a binding for every field of every
// member of `set`. Arbitrarily pick one to set all the others equal to. If there
// are existing constraints on the new fields, copy those around the new equivalence
// classes too.
(
Constraint::Variant {
fields, variant, ..
},
Some((&base, rest)),
) => {
let base_fields =
self.field_bindings(base, fields, variant, &mut deferred_constraints);
for &binding in rest {
for (&x, &y) in self
.field_bindings(binding, fields, variant, &mut deferred_constraints)
.iter()
.zip(base_fields.iter())
{
self.current_rule.equals.merge(x, y);
}
}
}
// These constraints don't introduce new binding sites.
(Constraint::ConstInt { .. } | Constraint::ConstPrim { .. }, _) => {}
// Currently, `Some` constraints are only introduced implicitly during the
// translation from `sema`, so there's no way to set the corresponding binding
// sites equal to each other. Instead, any equality constraints get applied on
// the results of matching `Some()` or tuple patterns.
(Constraint::Some, _) => unreachable!(),
}
for binding in set {
self.set_constraint_or_error(binding, constraint);
}
}
}
fn field_bindings(
&mut self,
binding: BindingId,
fields: TupleIndex,
variant: sema::VariantId,
deferred_constraints: &mut Vec<(BindingId, Constraint)>,
) -> Box<[BindingId]> {
(0..fields.0)
.map(TupleIndex)
.map(move |field| {
let binding = self.dedup_binding(Binding::Variant {
source: binding,
variant,
field,
});
// We've just added an equality constraint to a binding site that may not have had
// one already. If that binding site already had a concrete constraint, then we need
// to "recursively" propagate that constraint through the new equivalence class too.
if let Some(constraint) = self.current_rule.get_constraint(binding) {
deferred_constraints.push((binding, constraint));
}
binding
})
.collect()
}
fn dedup_binding(&mut self, binding: Binding) -> BindingId {
if let Some(binding) = self.binding_map.get(&binding) {
*binding
} else {
let id = BindingId(self.rules.bindings.len().try_into().unwrap());
self.rules.bindings.push(binding.clone());
self.binding_map.insert(binding, id);
id
}
}
fn dedup_expr(&mut self, expr: Expr) -> ExprId {
if let Some(expr) = self.expr_map.get(&expr) {
*expr
} else {
let id = ExprId(self.rules.exprs.len().try_into().unwrap());
self.rules.exprs.push(expr.clone());
self.expr_map.insert(expr, id);
id
}
}
fn set_constraint(&mut self, input: Binding, constraint: Constraint) -> BindingId {
let input = self.dedup_binding(input);
self.set_constraint_or_error(input, constraint);
input
}
fn set_constraint_or_error(&mut self, input: BindingId, constraint: Constraint) {
if let Err(e) = self.current_rule.set_constraint(input, constraint) {
self.unreachable.push(e);
}
}
}
impl sema::PatternVisitor for RuleSetBuilder {
/// The "identifier" this visitor uses for binding sites is a [Binding], not a [BindingId].
/// Either choice would work but this approach avoids adding bindings to the [RuleSet] if they
/// are never used in any rule.
type PatternId = Binding;
fn add_match_equal(&mut self, a: Binding, b: Binding, _ty: sema::TypeId) {
let a = self.dedup_binding(a);
let b = self.dedup_binding(b);
// If both bindings represent the same binding site, they're implicitly equal.
if a != b {
self.current_rule.equals.merge(a, b);
}
}
fn add_match_int(&mut self, input: Binding, _ty: sema::TypeId, val: i128) {
self.set_constraint(input, Constraint::ConstInt { val });
}
fn add_match_prim(&mut self, input: Binding, _ty: sema::TypeId, val: sema::Sym) {
self.set_constraint(input, Constraint::ConstPrim { val });
}
fn add_match_variant(
&mut self,
input: Binding,
input_ty: sema::TypeId,
arg_tys: &[sema::TypeId],
variant: sema::VariantId,
) -> Vec<Binding> {
let fields = TupleIndex(arg_tys.len().try_into().unwrap());
let source = self.set_constraint(
input,
Constraint::Variant {
fields,
ty: input_ty,
variant,
},
);
(0..fields.0)
.map(TupleIndex)
.map(|field| Binding::Variant {
source,
variant,
field,
})
.collect()
}
fn add_extract(
&mut self,
input: Binding,
_input_ty: sema::TypeId,
output_tys: Vec<sema::TypeId>,
term: sema::TermId,
infallible: bool,
_multi: bool,
) -> Vec<Binding> {
// ISLE treats external extractors as patterns, but in this representation they're
// expressions, because Rust doesn't support calling functions during pattern matching. To
// glue the two representations together we have to introduce suitable adapter nodes.
let input = self.pattern_as_expr(input);
let input = self.dedup_expr(Expr::Extractor {
term,
parameter: input,
});
let input = self.expr_as_pattern(input);
// If the extractor is fallible, build a pattern and constraint for `Some`
let source = if infallible {
input
} else {
let source = self.set_constraint(input, Constraint::Some);
Binding::Some { source }
};
// If the extractor has multiple outputs, create a separate binding for each
match output_tys.len().try_into().unwrap() {
0 => vec![],
1 => vec![source],
outputs => {
let source = self.dedup_binding(source);
(0..outputs)
.map(TupleIndex)
.map(|field| Binding::Tuple { source, field })
.collect()
}
}
}
}
impl sema::ExprVisitor for RuleSetBuilder {
/// Unlike the `PatternVisitor` implementation, we use [ExprId] to identify intermediate
/// expressions, not [Expr]. Visited expressions are always used so we might as well deduplicate
/// them eagerly.
type ExprId = ExprId;
fn add_const_int(&mut self, _ty: sema::TypeId, val: i128) -> ExprId {
self.dedup_expr(Expr::ConstInt { val })
}
fn add_const_prim(&mut self, _ty: sema::TypeId, val: sema::Sym) -> ExprId {
self.dedup_expr(Expr::ConstPrim { val })
}
fn add_create_variant(
&mut self,
inputs: Vec<(ExprId, sema::TypeId)>,
ty: sema::TypeId,
variant: sema::VariantId,
) -> ExprId {
self.dedup_expr(Expr::Variant {
ty,
variant,
fields: inputs.into_iter().map(|(expr, _)| expr).collect(),
})
}
fn add_construct(
&mut self,
inputs: Vec<(ExprId, sema::TypeId)>,
_ty: sema::TypeId,
term: sema::TermId,
_infallible: bool,
_multi: bool,
) -> ExprId {
self.dedup_expr(Expr::Constructor {
term,
parameters: inputs.into_iter().map(|(expr, _)| expr).collect(),
})
}
}
impl sema::RuleVisitor for RuleSetBuilder {
type PatternVisitor = Self;
type ExprVisitor = Self;
type Expr = ExprId;
fn add_arg(&mut self, index: usize, _ty: sema::TypeId) -> Binding {
// Arguments don't need to be pattern-matched to reference them, so they're expressions
let index = TupleIndex(index.try_into().unwrap());
let expr = self.dedup_expr(Expr::Argument { index });
Binding::Expr { constructor: expr }
}
fn add_pattern<F: FnOnce(&mut Self)>(&mut self, visitor: F) {
visitor(self)
}
fn add_expr<F>(&mut self, visitor: F) -> ExprId
where
F: FnOnce(&mut Self) -> sema::VisitedExpr<Self>,
{
visitor(self).value
}
fn expr_as_pattern(&mut self, expr: ExprId) -> Binding {
if let &Expr::Binding { source: binding } = &self.rules.exprs[expr.index()] {
// Short-circuit wrapping a binding around an expr from another binding
self.rules.bindings[binding.index()]
} else {
Binding::Expr { constructor: expr }
}
}
fn pattern_as_expr(&mut self, pattern: Binding) -> ExprId {
if let Binding::Expr { constructor } = pattern {
// Short-circuit wrapping an expr around a binding from another expr
constructor
} else {
let binding = self.dedup_binding(pattern);
self.dedup_expr(Expr::Binding { source: binding })
}
}
}