cranelift-isle: New IR and revised overlap checks (#5195)
* cranelift-isle: New IR and revised overlap checks * Improve error reporting * Avoid "unused argument" warnings a nicer way * Remove unused fields * Minimize diff and "fix" error handling I had tried to use Miette "right" and made things worse somehow. Among other changes, revert all my changes to unrelated parts of `error.rs` and `error_miette.rs`. * Review comments: Rename "unmatchable" to "unreachable" * Review comments: newtype wrappers, not type aliases * Review comments: more comments on overlap checks * Review comments: Clarify `normalize_equivalence_classes` * Review comments: use union-find instead of linked list This saves about 50 lines of code in the trie_again module. The union-find implementation is about twice as long as that, counting comments and doc-tests, but that's a worth-while tradeoff. However, this makes `normalize_equivalence_classes` slower, because now finding all elements of an equivalence class takes time linear in the total size of all equivalence classes. If that ever turns out to be a problem in practice we can find some way to optimize `remove_set_of`. * Review comments: Hide constraints HashMap We want to enforce that consumers of this representation can't observe non-deterministic ordering in any of its public types. * Review comments: Normalize equivalence classes incrementally I'm not sure whether this is a good idea. It doesn't make the logic particularly simpler, and I think it will do more work if three or more binding sites with enum-variant constraints get set equal to each other. * More comments and other clarifications * Revert "Review comments: Normalize equivalence classes incrementally" * Even more comments
This commit is contained in:
@@ -42,6 +42,18 @@ pub enum Error {
|
|||||||
span: Span,
|
span: Span,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// The rule can never match any input.
|
||||||
|
UnreachableError {
|
||||||
|
/// The error message.
|
||||||
|
msg: String,
|
||||||
|
|
||||||
|
/// The input ISLE source.
|
||||||
|
src: Source,
|
||||||
|
|
||||||
|
/// The location of the unreachable rule.
|
||||||
|
span: Span,
|
||||||
|
},
|
||||||
|
|
||||||
/// The rules mentioned overlap in the input they accept.
|
/// The rules mentioned overlap in the input they accept.
|
||||||
OverlapError {
|
OverlapError {
|
||||||
/// The error message.
|
/// The error message.
|
||||||
@@ -119,6 +131,15 @@ impl std::fmt::Display for Error {
|
|||||||
#[cfg(feature = "miette-errors")]
|
#[cfg(feature = "miette-errors")]
|
||||||
Error::TypeError { msg, .. } => write!(f, "type error: {}", msg),
|
Error::TypeError { msg, .. } => write!(f, "type error: {}", msg),
|
||||||
|
|
||||||
|
Error::UnreachableError { src, span, msg } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}: unreachable rule: {}",
|
||||||
|
span.from.pretty_print_with_filename(&*src.name),
|
||||||
|
msg
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
Error::OverlapError { msg, rules, .. } => {
|
Error::OverlapError { msg, rules, .. } => {
|
||||||
writeln!(f, "overlap error: {}\n{}", msg, OverlappingRules(&rules))
|
writeln!(f, "overlap error: {}\n{}", msg, OverlappingRules(&rules))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -91,6 +91,114 @@ impl<K: Hash + Eq, V> Index<&K> for StableMap<K, V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Stores disjoint sets and provides efficient operations to merge two sets, and to find a
|
||||||
|
/// representative member of a set given any member of that set. In this implementation, sets always
|
||||||
|
/// have at least two members, and can only be formed by the `merge` operation.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct DisjointSets<T> {
|
||||||
|
parent: HashMap<T, (T, u8)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Copy + std::fmt::Debug + Eq + Hash> DisjointSets<T> {
|
||||||
|
/// Find a representative member of the set containing `x`. If `x` has not been merged with any
|
||||||
|
/// other items using `merge`, returns `None`. This method updates the data structure to make
|
||||||
|
/// future queries faster, and takes amortized constant time.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut sets = cranelift_isle::DisjointSets::default();
|
||||||
|
/// sets.merge(1, 2);
|
||||||
|
/// sets.merge(1, 3);
|
||||||
|
/// sets.merge(2, 4);
|
||||||
|
/// assert_eq!(sets.find_mut(3).unwrap(), sets.find_mut(4).unwrap());
|
||||||
|
/// assert_eq!(sets.find_mut(10), None);
|
||||||
|
/// ```
|
||||||
|
pub fn find_mut(&mut self, mut x: T) -> Option<T> {
|
||||||
|
while let Some(node) = self.parent.get(&x) {
|
||||||
|
if node.0 == x {
|
||||||
|
return Some(x);
|
||||||
|
}
|
||||||
|
let grandparent = self.parent[&node.0].0;
|
||||||
|
// Re-do the lookup but take a mutable borrow this time
|
||||||
|
self.parent.get_mut(&x).unwrap().0 = grandparent;
|
||||||
|
x = grandparent;
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge the set containing `x` with the set containing `y`. This method takes amortized
|
||||||
|
/// constant time.
|
||||||
|
pub fn merge(&mut self, x: T, y: T) {
|
||||||
|
assert_ne!(x, y);
|
||||||
|
let mut x = if let Some(x) = self.find_mut(x) {
|
||||||
|
self.parent[&x]
|
||||||
|
} else {
|
||||||
|
self.parent.insert(x, (x, 0));
|
||||||
|
(x, 0)
|
||||||
|
};
|
||||||
|
let mut y = if let Some(y) = self.find_mut(y) {
|
||||||
|
self.parent[&y]
|
||||||
|
} else {
|
||||||
|
self.parent.insert(y, (y, 0));
|
||||||
|
(y, 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
if x == y {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if x.1 < y.1 {
|
||||||
|
std::mem::swap(&mut x, &mut y);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.parent.get_mut(&y.0).unwrap().0 = x.0;
|
||||||
|
if x.1 == y.1 {
|
||||||
|
let x_rank = &mut self.parent.get_mut(&x.0).unwrap().1;
|
||||||
|
*x_rank = x_rank.saturating_add(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove the set containing the given item, and return all members of that set. The set is
|
||||||
|
/// returned in sorted order. This method takes time linear in the total size of all sets.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut sets = cranelift_isle::DisjointSets::default();
|
||||||
|
/// sets.merge(1, 2);
|
||||||
|
/// sets.merge(1, 3);
|
||||||
|
/// sets.merge(2, 4);
|
||||||
|
/// assert_eq!(sets.remove_set_of(4), &[1, 2, 3, 4]);
|
||||||
|
/// assert_eq!(sets.remove_set_of(1), &[]);
|
||||||
|
/// assert!(sets.is_empty());
|
||||||
|
/// ```
|
||||||
|
pub fn remove_set_of(&mut self, x: T) -> Vec<T>
|
||||||
|
where
|
||||||
|
T: Ord,
|
||||||
|
{
|
||||||
|
let mut set = Vec::new();
|
||||||
|
if let Some(x) = self.find_mut(x) {
|
||||||
|
set.extend(self.parent.keys().copied());
|
||||||
|
// It's important to use `find_mut` here to avoid quadratic worst-case time.
|
||||||
|
set.retain(|&y| self.find_mut(y).unwrap() == x);
|
||||||
|
for y in set.iter() {
|
||||||
|
self.parent.remove(y);
|
||||||
|
}
|
||||||
|
set.sort_unstable();
|
||||||
|
}
|
||||||
|
set
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if there are no sets. This method takes constant time.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut sets = cranelift_isle::DisjointSets::default();
|
||||||
|
/// assert!(sets.is_empty());
|
||||||
|
/// sets.merge(1, 2);
|
||||||
|
/// assert!(!sets.is_empty());
|
||||||
|
/// ```
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.parent.is_empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
pub mod codegen;
|
pub mod codegen;
|
||||||
pub mod compile;
|
pub mod compile;
|
||||||
@@ -102,6 +210,7 @@ pub mod overlap;
|
|||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod sema;
|
pub mod sema;
|
||||||
pub mod trie;
|
pub mod trie;
|
||||||
|
pub mod trie_again;
|
||||||
|
|
||||||
#[cfg(feature = "miette-errors")]
|
#[cfg(feature = "miette-errors")]
|
||||||
mod error_miette;
|
mod error_miette;
|
||||||
|
|||||||
@@ -5,15 +5,14 @@ use std::collections::{HashMap, HashSet};
|
|||||||
|
|
||||||
use crate::error::{Error, Result, Source, Span};
|
use crate::error::{Error, Result, Source, Span};
|
||||||
use crate::lexer::Pos;
|
use crate::lexer::Pos;
|
||||||
use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, VarId};
|
use crate::sema::{TermEnv, TermId, TermKind, TypeEnv};
|
||||||
|
use crate::trie_again;
|
||||||
|
|
||||||
/// Check for overlap.
|
/// Check for overlap.
|
||||||
pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
|
pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
|
||||||
let mut errors = check_overlaps(termenv).report(tyenv, termenv);
|
let (terms, mut errors) = trie_again::build(termenv, tyenv);
|
||||||
errors.sort_by_key(|err| match err {
|
errors.append(&mut check_overlaps(terms, termenv).report(tyenv));
|
||||||
Error::OverlapError { rules, .. } => rules.first().unwrap().1.from,
|
|
||||||
_ => Pos::default(),
|
|
||||||
});
|
|
||||||
match errors.len() {
|
match errors.len() {
|
||||||
0 => Ok(()),
|
0 => Ok(()),
|
||||||
1 => Err(errors.pop().unwrap()),
|
1 => Err(errors.pop().unwrap()),
|
||||||
@@ -25,7 +24,7 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> {
|
|||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct Errors {
|
struct Errors {
|
||||||
/// Edges between rules indicating overlap.
|
/// Edges between rules indicating overlap.
|
||||||
nodes: HashMap<RuleId, HashSet<RuleId>>,
|
nodes: HashMap<Pos, HashSet<Pos>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Errors {
|
impl Errors {
|
||||||
@@ -33,30 +32,29 @@ impl Errors {
|
|||||||
/// nodes from the graph with the highest degree, reporting errors for them and their direct
|
/// nodes from the graph with the highest degree, reporting errors for them and their direct
|
||||||
/// connections. The goal with reporting errors this way is to prefer reporting rules that
|
/// connections. The goal with reporting errors this way is to prefer reporting rules that
|
||||||
/// overlap with many others first, and then report other more targeted overlaps later.
|
/// overlap with many others first, and then report other more targeted overlaps later.
|
||||||
fn report(mut self, tyenv: &TypeEnv, termenv: &TermEnv) -> Vec<Error> {
|
fn report(mut self, tyenv: &TypeEnv) -> Vec<Error> {
|
||||||
let mut errors = Vec::new();
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
let get_info = |id: RuleId| {
|
let get_info = |pos: Pos| {
|
||||||
let rule = &termenv.rules[id.0];
|
let file = pos.file;
|
||||||
let file = rule.pos.file;
|
|
||||||
let src = Source::new(
|
let src = Source::new(
|
||||||
tyenv.filenames[file].clone(),
|
tyenv.filenames[file].clone(),
|
||||||
tyenv.file_texts[file].clone(),
|
tyenv.file_texts[file].clone(),
|
||||||
);
|
);
|
||||||
let span = Span::new_single(rule.pos);
|
let span = Span::new_single(pos);
|
||||||
(src, span)
|
(src, span)
|
||||||
};
|
};
|
||||||
|
|
||||||
while let Some((&id, _)) = self
|
while let Some((&pos, _)) = self
|
||||||
.nodes
|
.nodes
|
||||||
.iter()
|
.iter()
|
||||||
.max_by_key(|(id, edges)| (edges.len(), *id))
|
.max_by_key(|(pos, edges)| (edges.len(), *pos))
|
||||||
{
|
{
|
||||||
let node = self.nodes.remove(&id).unwrap();
|
let node = self.nodes.remove(&pos).unwrap();
|
||||||
for other in node.iter() {
|
for other in node.iter() {
|
||||||
if let Entry::Occupied(mut entry) = self.nodes.entry(*other) {
|
if let Entry::Occupied(mut entry) = self.nodes.entry(*other) {
|
||||||
let back_edges = entry.get_mut();
|
let back_edges = entry.get_mut();
|
||||||
back_edges.remove(&id);
|
back_edges.remove(&pos);
|
||||||
if back_edges.is_empty() {
|
if back_edges.is_empty() {
|
||||||
entry.remove();
|
entry.remove();
|
||||||
}
|
}
|
||||||
@@ -64,7 +62,7 @@ impl Errors {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// build the real error
|
// build the real error
|
||||||
let mut rules = vec![get_info(id)];
|
let mut rules = vec![get_info(pos)];
|
||||||
|
|
||||||
rules.extend(node.into_iter().map(get_info));
|
rules.extend(node.into_iter().map(get_info));
|
||||||
|
|
||||||
@@ -74,28 +72,30 @@ impl Errors {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
errors.sort_by_key(|err| match err {
|
||||||
|
Error::OverlapError { rules, .. } => rules.first().unwrap().1.from,
|
||||||
|
_ => Pos::default(),
|
||||||
|
});
|
||||||
errors
|
errors
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a bidirectional edge between two rules in the graph.
|
fn check_pair(&mut self, a: &trie_again::Rule, b: &trie_again::Rule) {
|
||||||
fn add_edge(&mut self, a: RuleId, b: RuleId) {
|
if let trie_again::Overlap::Yes { .. } = a.may_overlap(b) {
|
||||||
|
if a.prio == b.prio {
|
||||||
// edges are undirected
|
// edges are undirected
|
||||||
self.nodes.entry(a).or_default().insert(b);
|
self.nodes.entry(a.pos).or_default().insert(b.pos);
|
||||||
self.nodes.entry(b).or_default().insert(a);
|
self.nodes.entry(b.pos).or_default().insert(a.pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine if any rules overlap in the input that they accept. This checkes every unique pair of
|
/// Determine if any rules overlap in the input that they accept. This checks every unique pair of
|
||||||
/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the
|
/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the
|
||||||
/// presence of wildcard patterns.
|
/// presence of wildcard patterns.
|
||||||
fn check_overlaps(env: &TermEnv) -> Errors {
|
fn check_overlaps(terms: Vec<(TermId, trie_again::RuleSet)>, env: &TermEnv) -> Errors {
|
||||||
struct RulePatterns<'a> {
|
let mut errs = Errors::default();
|
||||||
rule: &'a Rule,
|
for (tid, ruleset) in terms {
|
||||||
pats: Box<[Pattern]>,
|
|
||||||
}
|
|
||||||
let mut by_term = HashMap::new();
|
|
||||||
for rule in env.rules.iter() {
|
|
||||||
if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs {
|
|
||||||
let is_multi_ctor = match &env.terms[tid.index()].kind {
|
let is_multi_ctor = match &env.terms[tid.index()].kind {
|
||||||
&TermKind::Decl { multi, .. } => multi,
|
&TermKind::Decl { multi, .. } => multi,
|
||||||
_ => false,
|
_ => false,
|
||||||
@@ -108,237 +108,12 @@ fn check_overlaps(env: &TermEnv) -> Errors {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Group rules by term and priority. Only rules within the same group are checked to
|
let mut cursor = ruleset.rules.iter();
|
||||||
// see if they overlap each other. If you want to change the scope of overlap checking,
|
|
||||||
// change this key.
|
|
||||||
let key = (tid, rule.prio);
|
|
||||||
|
|
||||||
let mut binds = Vec::new();
|
|
||||||
let rule = RulePatterns {
|
|
||||||
rule,
|
|
||||||
pats: vars
|
|
||||||
.iter()
|
|
||||||
.map(|pat| Pattern::from_sema(env, &mut binds, pat))
|
|
||||||
.collect(),
|
|
||||||
};
|
|
||||||
by_term.entry(key).or_insert_with(Vec::new).push(rule);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut errs = Errors::default();
|
|
||||||
for (_, rows) in by_term {
|
|
||||||
let mut cursor = rows.into_iter();
|
|
||||||
while let Some(left) = cursor.next() {
|
while let Some(left) = cursor.next() {
|
||||||
for right in cursor.as_slice() {
|
for right in cursor.as_slice() {
|
||||||
if check_overlap_pair(&left.pats, &right.pats) {
|
errs.check_pair(left, right);
|
||||||
errs.add_edge(left.rule.id, right.rule.id);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
errs
|
errs
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if two rules overlap in the inputs they accept.
|
|
||||||
fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool {
|
|
||||||
debug_assert_eq!(a.len(), b.len());
|
|
||||||
let mut worklist: Vec<_> = a.iter().zip(b.iter()).collect();
|
|
||||||
|
|
||||||
while let Some((a, b)) = worklist.pop() {
|
|
||||||
// Checking the cross-product of two and-patterns is O(n*m). Merging sorted lists or
|
|
||||||
// hash-maps might be faster in practice, but:
|
|
||||||
// - The alternatives are not asymptotically faster, because in theory all the subpatterns
|
|
||||||
// might have the same extractor or enum variant, and in that case any approach has to
|
|
||||||
// check all of the cross-product combinations anyway.
|
|
||||||
// - It's easier to reason about this doubly-nested loop than about merging sorted lists or
|
|
||||||
// picking the right hash keys.
|
|
||||||
// - These lists are always so small that performance doesn't matter.
|
|
||||||
for a in a.as_and_subpatterns() {
|
|
||||||
for b in b.as_and_subpatterns() {
|
|
||||||
let overlap = match (a, b) {
|
|
||||||
(Pattern::Int { value: a }, Pattern::Int { value: b }) => a == b,
|
|
||||||
(Pattern::Const { name: a }, Pattern::Const { name: b }) => a == b,
|
|
||||||
|
|
||||||
// if it's the same variant or same extractor, check all pairs of subterms
|
|
||||||
(
|
|
||||||
Pattern::Variant {
|
|
||||||
id: a,
|
|
||||||
pats: a_pats,
|
|
||||||
},
|
|
||||||
Pattern::Variant {
|
|
||||||
id: b,
|
|
||||||
pats: b_pats,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
| (
|
|
||||||
Pattern::Extractor {
|
|
||||||
id: a,
|
|
||||||
pats: a_pats,
|
|
||||||
},
|
|
||||||
Pattern::Extractor {
|
|
||||||
id: b,
|
|
||||||
pats: b_pats,
|
|
||||||
},
|
|
||||||
) if a == b => {
|
|
||||||
debug_assert_eq!(a_pats.len(), b_pats.len());
|
|
||||||
worklist.extend(a_pats.iter().zip(b_pats.iter()));
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
// different variants of the same enum definitely do not overlap
|
|
||||||
(Pattern::Variant { .. }, Pattern::Variant { .. }) => false,
|
|
||||||
|
|
||||||
// an extractor which does not exactly match the other pattern might overlap
|
|
||||||
(Pattern::Extractor { .. }, _) | (_, Pattern::Extractor { .. }) => true,
|
|
||||||
|
|
||||||
// a wildcard definitely overlaps
|
|
||||||
(Pattern::Wildcard, _) | (_, Pattern::Wildcard) => true,
|
|
||||||
|
|
||||||
// these patterns can only be paired with patterns of the same type, or
|
|
||||||
// wildcards or extractors, and all those cases are covered above
|
|
||||||
(Pattern::Int { .. } | Pattern::Const { .. } | Pattern::Variant { .. }, _) => {
|
|
||||||
unreachable!()
|
|
||||||
}
|
|
||||||
|
|
||||||
// and-patterns don't reach here due to as_and_subpatterns
|
|
||||||
(Pattern::And { .. }, _) => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if !overlap {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
enum Pattern {
|
|
||||||
/// Integer literal patterns.
|
|
||||||
Int {
|
|
||||||
value: i128,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Constant literal patterns, such as `$F32`.
|
|
||||||
Const {
|
|
||||||
name: Sym,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Enum variant constructors.
|
|
||||||
Variant {
|
|
||||||
id: TermId,
|
|
||||||
pats: Box<[Pattern]>,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Conjunctions of patterns.
|
|
||||||
And {
|
|
||||||
pats: Box<[Pattern]>,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Extractor uses (both fallible and infallible).
|
|
||||||
Extractor {
|
|
||||||
id: TermId,
|
|
||||||
pats: Box<[Pattern]>,
|
|
||||||
},
|
|
||||||
|
|
||||||
Wildcard,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Pattern {
|
|
||||||
/// Create a [`Pattern`] from a [`sema::Pattern`]. The major differences between these two
|
|
||||||
/// representations are as follows:
|
|
||||||
/// 1. Variable bindings are removed and turned into wildcards
|
|
||||||
/// 2. Equality constraints are removed and turned into inlined versions of the patterns they
|
|
||||||
/// would have introduced equalities with
|
|
||||||
/// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or
|
|
||||||
/// [`Pattern::Extractor`] cases depending on their term kind.
|
|
||||||
fn from_sema(env: &TermEnv, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self {
|
|
||||||
match pat {
|
|
||||||
sema::Pattern::BindPattern(_, id, pat) => {
|
|
||||||
let pat = Self::from_sema(env, binds, pat);
|
|
||||||
binds.push((*id, pat.clone()));
|
|
||||||
pat
|
|
||||||
}
|
|
||||||
|
|
||||||
sema::Pattern::Var(_, id) => {
|
|
||||||
for (vid, pat) in binds.iter().rev() {
|
|
||||||
if vid == id {
|
|
||||||
// We inline equality constraints for two reasons: we specialize on the
|
|
||||||
// spine of related patterns only, so more specific information about
|
|
||||||
// individual values isn't necessarily helpful; we consider overlap
|
|
||||||
// checking to be an over-approximation of overlapping rules, so handling
|
|
||||||
// equalities ends up being best-effort. As an approximation, we use
|
|
||||||
// whatever pattern happened to be at the binding of the variable for all
|
|
||||||
// of the cases where it's used for equality. For example, in the following
|
|
||||||
// rule:
|
|
||||||
//
|
|
||||||
// > (rule (example x @ (Enum.Variant y) x) ...)
|
|
||||||
//
|
|
||||||
// we will only specialize up to `(Enum.Variant _)`, so any more specific
|
|
||||||
// runtime values of `y` won't end up helping to identify overlap. As a
|
|
||||||
// result, we rewrite the patterns in the rule to look more like the
|
|
||||||
// following, as it greatly simplifies overlap checking.
|
|
||||||
//
|
|
||||||
// > (rule (example (Enum.Variant _) (Enum.Variant _)) ...)
|
|
||||||
//
|
|
||||||
// Cases that this scheme won't handle look like the following:
|
|
||||||
//
|
|
||||||
// > (rule (example2 2 3) ...)
|
|
||||||
// > (rule (example2 x x) ...)
|
|
||||||
//
|
|
||||||
// As in this case we'll not make use of the information that `2` and `3`
|
|
||||||
// aren't equal to know that the rules don't overlap. One approach that we
|
|
||||||
// could take here is delaying substitution to the point where a variable
|
|
||||||
// binding has been specialized, turning the rules into the following once
|
|
||||||
// specialization had occurred for `2`:
|
|
||||||
//
|
|
||||||
// > (rule (example2 2 3) ...)
|
|
||||||
// > (rule (example2 2 2) ...)
|
|
||||||
return pat.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
binds.push((*id, Pattern::Wildcard));
|
|
||||||
Pattern::Wildcard
|
|
||||||
}
|
|
||||||
|
|
||||||
sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value },
|
|
||||||
sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name },
|
|
||||||
|
|
||||||
&sema::Pattern::Term(_, id, ref pats) => {
|
|
||||||
let pats = pats
|
|
||||||
.iter()
|
|
||||||
.map(|pat| Pattern::from_sema(env, binds, pat))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
match &env.terms[id.0].kind {
|
|
||||||
TermKind::EnumVariant { .. } => Pattern::Variant { id, pats },
|
|
||||||
TermKind::Decl { .. } => Pattern::Extractor { id, pats },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sema::Pattern::Wildcard(_) => Pattern::Wildcard,
|
|
||||||
|
|
||||||
sema::Pattern::And(_, pats) => {
|
|
||||||
let pats = pats
|
|
||||||
.iter()
|
|
||||||
.map(|pat| Pattern::from_sema(env, binds, pat))
|
|
||||||
.collect();
|
|
||||||
Pattern::And { pats }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If this is an and-pattern, return its subpatterns. Otherwise pretend like there's an
|
|
||||||
/// and-pattern which has this as its only subpattern, and return self as a single-element
|
|
||||||
/// slice.
|
|
||||||
fn as_and_subpatterns(&self) -> &[Pattern] {
|
|
||||||
if let Pattern::And { pats } = self {
|
|
||||||
pats
|
|
||||||
} else {
|
|
||||||
std::slice::from_ref(self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
676
cranelift/isle/isle/src/trie_again.rs
Normal file
676
cranelift/isle/isle/src/trie_again.rs
Normal file
@@ -0,0 +1,676 @@
|
|||||||
|
//! A strongly-normalizing intermediate representation for ISLE rules. This representation is chosen
|
||||||
|
//! to closely reflect the operations we can implement in Rust, to make code generation easy.
|
||||||
|
use crate::error::{Error, Source, Span};
|
||||||
|
use crate::lexer::Pos;
|
||||||
|
use crate::sema::{self, RuleVisitor};
|
||||||
|
use crate::DisjointSets;
|
||||||
|
use std::collections::{hash_map::Entry, HashMap};
|
||||||
|
|
||||||
|
/// A field index in a tuple or an enum variant.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||||
|
pub struct TupleIndex(u8);
|
||||||
|
/// A hash-consed identifier for a binding, stored in a [RuleSet].
|
||||||
|
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||||
|
pub struct BindingId(u16);
|
||||||
|
/// A hash-consed identifier for an expression, stored in a [RuleSet].
|
||||||
|
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||||
|
pub struct ExprId(u16);
|
||||||
|
|
||||||
|
impl BindingId {
|
||||||
|
/// Get the index of this id.
|
||||||
|
pub fn index(self) -> usize {
|
||||||
|
self.0.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExprId {
|
||||||
|
/// Get the index of this id.
|
||||||
|
pub fn index(self) -> usize {
|
||||||
|
self.0.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expressions construct new values. Rust pattern matching can only destructure existing values,
|
||||||
|
/// not call functions or construct new values. So `if-let` and external extractor invocations need
|
||||||
|
/// to interrupt pattern matching in order to evaluate a suitable expression. These expressions are
|
||||||
|
/// also used when evaluating the right-hand side of a rule.
|
||||||
|
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||||
|
pub enum Expr {
|
||||||
|
/// A binding from some sequence of pattern matches, used as an expression.
|
||||||
|
Binding {
|
||||||
|
/// Which binding site is being used as an expression?
|
||||||
|
source: BindingId,
|
||||||
|
},
|
||||||
|
/// Evaluates to the given integer literal.
|
||||||
|
ConstInt {
|
||||||
|
/// The constant value.
|
||||||
|
val: i128,
|
||||||
|
},
|
||||||
|
/// Evaluates to the given primitive Rust value.
|
||||||
|
ConstPrim {
|
||||||
|
/// The constant value.
|
||||||
|
val: sema::Sym,
|
||||||
|
},
|
||||||
|
/// One of the arguments to the top-level function.
|
||||||
|
Argument {
|
||||||
|
/// Which of the function's arguments is this?
|
||||||
|
index: TupleIndex,
|
||||||
|
},
|
||||||
|
/// The result of calling an external extractor.
|
||||||
|
Extractor {
|
||||||
|
/// Which extractor should be called?
|
||||||
|
term: sema::TermId,
|
||||||
|
/// What expression should be passed to the extractor?
|
||||||
|
parameter: ExprId,
|
||||||
|
},
|
||||||
|
/// The result of calling an external constructor.
|
||||||
|
Constructor {
|
||||||
|
/// Which constructor should be called?
|
||||||
|
term: sema::TermId,
|
||||||
|
/// What expressions should be passed to the constructor?
|
||||||
|
parameters: Box<[ExprId]>,
|
||||||
|
},
|
||||||
|
/// The result of constructing an enum variant.
|
||||||
|
Variant {
|
||||||
|
/// Which enum type should be constructed?
|
||||||
|
ty: sema::TypeId,
|
||||||
|
/// Which variant of that enum should be constructed?
|
||||||
|
variant: sema::VariantId,
|
||||||
|
/// What expressions should be provided for this variant's fields?
|
||||||
|
fields: Box<[ExprId]>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Binding sites are the result of Rust pattern matching. This is the dual of an expression: while
|
||||||
|
/// expressions build up values, bindings take values apart.
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
||||||
|
pub enum Binding {
|
||||||
|
/// A match begins at the result of some expression that produces a Rust value.
|
||||||
|
Expr {
|
||||||
|
/// Which expression is being matched?
|
||||||
|
constructor: ExprId,
|
||||||
|
},
|
||||||
|
/// After some sequence of matches, we'll match one of the previous bindings against an enum
|
||||||
|
/// variant and produce a new binding from one of its fields. There must be a matching
|
||||||
|
/// [Constraint] for each `source`/`variant` pair that appears in a binding.
|
||||||
|
Variant {
|
||||||
|
/// Which binding is being matched?
|
||||||
|
source: BindingId,
|
||||||
|
/// Which enum variant are we pulling binding sites from? This is somewhat redundant with
|
||||||
|
/// information in a corresponding [Constraint]. However, it must be here so that different
|
||||||
|
/// enum variants aren't hash-consed into the same binding site.
|
||||||
|
variant: sema::VariantId,
|
||||||
|
/// Which field of this enum variant are we projecting out? Although ISLE uses named fields,
|
||||||
|
/// we track them by index for constant-time comparisons. The [sema::TypeEnv] can be used to
|
||||||
|
/// get the field names.
|
||||||
|
field: TupleIndex,
|
||||||
|
},
|
||||||
|
/// After some sequence of matches, we'll match one of the previous bindings against
|
||||||
|
/// `Option::Some` and produce a new binding from its contents. (This currently only happens
|
||||||
|
/// with external extractors.)
|
||||||
|
Some {
|
||||||
|
/// Which binding is being matched?
|
||||||
|
source: BindingId,
|
||||||
|
},
|
||||||
|
/// After some sequence of matches, we'll match one of the previous bindings against a tuple and
|
||||||
|
/// produce a new binding from one of its fields. (This currently only happens with external
|
||||||
|
/// extractors.)
|
||||||
|
Tuple {
|
||||||
|
/// Which binding is being matched?
|
||||||
|
source: BindingId,
|
||||||
|
/// Which tuple field are we projecting out?
|
||||||
|
field: TupleIndex,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pattern matches which can fail. Some binding sites are the result of successfully matching a
|
||||||
|
/// constraint. A rule applies constraints to binding sites to determine whether the rule matches.
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
||||||
|
pub enum Constraint {
|
||||||
|
/// The value must match this enum variant.
|
||||||
|
Variant {
|
||||||
|
/// Which enum type is being matched? This is implied by the binding where the constraint is
|
||||||
|
/// applied, but recorded here for convenience.
|
||||||
|
ty: sema::TypeId,
|
||||||
|
/// Which enum variant must this binding site match to satisfy the rule?
|
||||||
|
variant: sema::VariantId,
|
||||||
|
/// Number of fields in this variant of this enum. This is recorded in the constraint for
|
||||||
|
/// convenience, to avoid needing to look up the variant in a [sema::TypeEnv].
|
||||||
|
fields: TupleIndex,
|
||||||
|
},
|
||||||
|
/// The value must equal this integer literal.
|
||||||
|
ConstInt {
|
||||||
|
/// The constant value.
|
||||||
|
val: i128,
|
||||||
|
},
|
||||||
|
/// The value must equal this Rust primitive value.
|
||||||
|
ConstPrim {
|
||||||
|
/// The constant value.
|
||||||
|
val: sema::Sym,
|
||||||
|
},
|
||||||
|
/// The value must be an `Option::Some`, from a fallible extractor.
|
||||||
|
Some,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A term-rewriting rule. All [BindingId]s and [ExprId]s are only meaningful in the context of the
|
||||||
|
/// [RuleSet] that contains this rule.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct Rule {
|
||||||
|
/// Where was this rule defined?
|
||||||
|
pub pos: Pos,
|
||||||
|
/// All of these bindings must match for this rule to apply. Note that within a single rule, if
|
||||||
|
/// a binding site must match two different constants, then the rule can never match.
|
||||||
|
constraints: HashMap<BindingId, Constraint>,
|
||||||
|
/// Sets of bindings which must be equal for this rule to match.
|
||||||
|
pub equals: DisjointSets<BindingId>,
|
||||||
|
/// If other rules apply along with this one, the one with the highest numeric priority is
|
||||||
|
/// evaluated. If multiple applicable rules have the same priority, that's an overlap error.
|
||||||
|
pub prio: i64,
|
||||||
|
/// If this rule applies, the top-level term should evaluate to this expression.
|
||||||
|
pub result: ExprId,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Records whether a given pair of rules can both match on some input.
|
||||||
|
pub enum Overlap {
|
||||||
|
/// There is no input on which this pair of rules can both match.
|
||||||
|
No,
|
||||||
|
/// There is at least one input on which this pair of rules can both match.
|
||||||
|
Yes {
|
||||||
|
/// True if every input accepted by one rule is also accepted by the other. This does not
|
||||||
|
/// indicate which rule is more general and in fact the rules could match exactly the same
|
||||||
|
/// set of inputs. You can work out which by comparing the number of constraints in both
|
||||||
|
/// rules: The more general rule has fewer constraints.
|
||||||
|
subset: bool,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A collection of [Rule]s, along with hash-consed [Binding]s and [Expr]s for all of them.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct RuleSet {
|
||||||
|
/// The [Rule]s for a single [sema::Term].
|
||||||
|
pub rules: Vec<Rule>,
|
||||||
|
/// The bindings identified by [BindingId]s within rules.
|
||||||
|
pub bindings: Vec<Binding>,
|
||||||
|
/// The expressions identified by [ExprId]s within rules.
|
||||||
|
pub exprs: Vec<Expr>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a [RuleSet] for each term in `termenv` that has rules.
|
||||||
|
pub fn build(
|
||||||
|
termenv: &sema::TermEnv,
|
||||||
|
tyenv: &sema::TypeEnv,
|
||||||
|
) -> (Vec<(sema::TermId, RuleSet)>, Vec<Error>) {
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
let mut term = HashMap::new();
|
||||||
|
for rule in termenv.rules.iter() {
|
||||||
|
term.entry(rule.lhs.root_term().unwrap())
|
||||||
|
.or_insert_with(RuleSetBuilder::default)
|
||||||
|
.add_rule(rule, termenv, tyenv, &mut errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The `term` hash map may return terms in any order. Sort them to ensure that we produce the
|
||||||
|
// same output every time when given the same ISLE source. Rules are added to terms in `RuleId`
|
||||||
|
// order, so it's not necessary to sort within a `RuleSet`.
|
||||||
|
let mut result: Vec<_> = term
|
||||||
|
.into_iter()
|
||||||
|
.map(|(term, builder)| (term, builder.rules))
|
||||||
|
.collect();
|
||||||
|
result.sort_unstable_by_key(|(term, _)| *term);
|
||||||
|
|
||||||
|
(result, errors)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Rule {
|
||||||
|
/// Returns whether a given pair of rules can both match on some input, and if so, whether
|
||||||
|
/// either matches a subset of the other's inputs. If this function returns `No`, then the two
|
||||||
|
/// rules definitely do not overlap. However, it may return `Yes` in cases where the rules can't
|
||||||
|
/// overlap in practice, or where this analysis is not yet precise enough to decide.
|
||||||
|
pub fn may_overlap(&self, other: &Rule) -> Overlap {
|
||||||
|
// Two rules can't overlap if, for some binding site in the intersection of their
|
||||||
|
// constraints, the rules have different constraints: an input can't possibly match both
|
||||||
|
// rules then. If the rules do overlap, and one has a subset of the constraints of the
|
||||||
|
// other, then the less-constrained rule matches every input that the more-constrained rule
|
||||||
|
// matches, and possibly more. We test for both conditions at once, with the observation
|
||||||
|
// that if the intersection of two sets is equal to the smaller set, then it's a subset. So
|
||||||
|
// the outer loop needs to go over the rule with fewer constraints in order to correctly
|
||||||
|
// identify if it's a subset of the other rule. Also, that way around is faster.
|
||||||
|
let (small, big) = if self.constraints.len() <= other.constraints.len() {
|
||||||
|
(self, other)
|
||||||
|
} else {
|
||||||
|
(other, self)
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: nonlinear constraints complicate the subset check
|
||||||
|
// For the purpose of overlap checking, equality constraints act like other constraints, in
|
||||||
|
// that they can cause rules to not overlap. However, because we don't have a concrete
|
||||||
|
// pattern to compare, the analysis to prove that is complicated. For now, we approximate
|
||||||
|
// the result. If `small` has any of these nonlinear constraints, conservatively report that
|
||||||
|
// it is not a subset of `big`.
|
||||||
|
let mut subset = small.equals.is_empty();
|
||||||
|
|
||||||
|
for (binding, a) in small.constraints.iter() {
|
||||||
|
if let Some(b) = big.constraints.get(binding) {
|
||||||
|
if a != b {
|
||||||
|
// If any binding site is constrained differently by both rules then there is
|
||||||
|
// no input where both rules can match.
|
||||||
|
return Overlap::No;
|
||||||
|
}
|
||||||
|
// Otherwise both are constrained in the same way at this binding site. That doesn't
|
||||||
|
// rule out any possibilities for what inputs the rules accept.
|
||||||
|
} else {
|
||||||
|
// The `big` rule's inputs are a subset of the `small` rule's inputs if every
|
||||||
|
// constraint in `small` is exactly matched in `big`. But we found a counterexample.
|
||||||
|
subset = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Overlap::Yes { subset }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the constraint that the given binding site must satisfy for this rule to match, if
|
||||||
|
/// there is one.
|
||||||
|
pub fn get_constraint(&self, source: BindingId) -> Option<Constraint> {
|
||||||
|
self.constraints.get(&source).copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_constraint(
|
||||||
|
&mut self,
|
||||||
|
source: BindingId,
|
||||||
|
constraint: Constraint,
|
||||||
|
) -> Result<(), UnreachableError> {
|
||||||
|
match self.constraints.entry(source) {
|
||||||
|
Entry::Occupied(entry) => {
|
||||||
|
if entry.get() != &constraint {
|
||||||
|
return Err(UnreachableError {
|
||||||
|
pos: self.pos,
|
||||||
|
constraint_a: *entry.get(),
|
||||||
|
constraint_b: constraint,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
entry.insert(constraint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct UnreachableError {
|
||||||
|
pos: Pos,
|
||||||
|
constraint_a: Constraint,
|
||||||
|
constraint_b: Constraint,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct RuleSetBuilder {
|
||||||
|
current_rule: Rule,
|
||||||
|
binding_map: HashMap<Binding, BindingId>,
|
||||||
|
expr_map: HashMap<Expr, ExprId>,
|
||||||
|
unreachable: Vec<UnreachableError>,
|
||||||
|
rules: RuleSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RuleSetBuilder {
|
||||||
|
fn add_rule(
|
||||||
|
&mut self,
|
||||||
|
rule: &sema::Rule,
|
||||||
|
termenv: &sema::TermEnv,
|
||||||
|
tyenv: &sema::TypeEnv,
|
||||||
|
errors: &mut Vec<Error>,
|
||||||
|
) {
|
||||||
|
self.current_rule.pos = rule.pos;
|
||||||
|
self.current_rule.prio = rule.prio;
|
||||||
|
self.current_rule.result = rule.visit(self, termenv);
|
||||||
|
self.normalize_equivalence_classes();
|
||||||
|
let rule = std::mem::take(&mut self.current_rule);
|
||||||
|
|
||||||
|
if self.unreachable.is_empty() {
|
||||||
|
self.rules.rules.push(rule);
|
||||||
|
} else {
|
||||||
|
// If this rule can never match, drop it so it doesn't affect overlap checking.
|
||||||
|
errors.extend(self.unreachable.drain(..).map(|err| {
|
||||||
|
let src = Source::new(
|
||||||
|
tyenv.filenames[err.pos.file].clone(),
|
||||||
|
tyenv.file_texts[err.pos.file].clone(),
|
||||||
|
);
|
||||||
|
Error::UnreachableError {
|
||||||
|
msg: format!(
|
||||||
|
"rule requires binding to match both {:?} and {:?}",
|
||||||
|
err.constraint_a, err.constraint_b
|
||||||
|
),
|
||||||
|
src,
|
||||||
|
span: Span::new_single(err.pos),
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Establish the invariant that a binding site can have a concrete constraint in `constraints`,
|
||||||
|
/// or an equality constraint in `equals`, but not both. This is useful because overlap checking
|
||||||
|
/// is most effective on concrete constraints, and also because it exposes more rule structure
|
||||||
|
/// for codegen.
|
||||||
|
///
|
||||||
|
/// If a binding site is constrained and also required to be equal to another binding site, then
|
||||||
|
/// copy the constraint and push the equality inside it. For example:
|
||||||
|
/// - `(term x @ 2 x)` is rewritten to `(term 2 2)`
|
||||||
|
/// - `(term x @ (T.A _ _) x)` is rewritten to `(term (T.A y z) (T.A y z))`
|
||||||
|
/// In the latter case, note that every field of `T.A` has been replaced with a fresh variable
|
||||||
|
/// and each of the copies are set equal.
|
||||||
|
///
|
||||||
|
/// If several binding sites are supposed to be equal but they each have conflicting constraints
|
||||||
|
/// then this rule is unreachable. For example, `(term x @ 2 (and x 3))` requires both arguments
|
||||||
|
/// to be equal but also requires them to match both 2 and 3, which can't happen for any input.
|
||||||
|
///
|
||||||
|
/// We could do this incrementally, while building the rule. The implementation is nearly
|
||||||
|
/// identical but, having tried both ways, it's slightly easier to think about this as a
|
||||||
|
/// separate pass. Also, batching up this work should be slightly faster if there are multiple
|
||||||
|
/// binding sites set equal to each other.
|
||||||
|
fn normalize_equivalence_classes(&mut self) {
|
||||||
|
// First, find all the constraints that need to be copied to other binding sites in their
|
||||||
|
// respective equivalence classes. Note: do not remove these constraints here! Yes, we'll
|
||||||
|
// put them back later, but we rely on still having them around so that
|
||||||
|
// `set_constraint_or_error` can detect conflicting constraints.
|
||||||
|
let mut deferred_constraints = Vec::new();
|
||||||
|
for (&binding, &constraint) in self.current_rule.constraints.iter() {
|
||||||
|
if let Some(root) = self.current_rule.equals.find_mut(binding) {
|
||||||
|
deferred_constraints.push((root, constraint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick one constraint and propagate it through its equivalence class. If there are no
|
||||||
|
// errors then it doesn't matter what order we do this in, because that means that any
|
||||||
|
// redundant constraints on an equivalence class were equal. We can write equal values into
|
||||||
|
// the constraint map in any order and get the same result. If there were errors, we aren't
|
||||||
|
// going to generate code from this rule, so order only affects how conflicts are reported.
|
||||||
|
while let Some((current, constraint)) = deferred_constraints.pop() {
|
||||||
|
// Remove the entire equivalence class and instead add copies of this constraint to
|
||||||
|
// every binding site in the class. If there are constraints on other binding sites in
|
||||||
|
// this class, then when we try to copy this constraint to those binding sites,
|
||||||
|
// `set_constraint_or_error` will check that the constraints are equal and record an
|
||||||
|
// appropriate error otherwise.
|
||||||
|
//
|
||||||
|
// Later, we'll re-visit those other binding sites because they're still in
|
||||||
|
// `deferred_constraints`, but `set` will be empty because we already deleted the
|
||||||
|
// equivalence class the first time we encountered it.
|
||||||
|
let set = self.current_rule.equals.remove_set_of(current);
|
||||||
|
match (constraint, set.split_first()) {
|
||||||
|
// If the equivalence class was empty we don't have to do anything.
|
||||||
|
(_, None) => continue,
|
||||||
|
|
||||||
|
// If we removed an equivalence class with an enum variant constraint, make the
|
||||||
|
// fields of the variant equal instead. Create a binding for every field of every
|
||||||
|
// member of `set`. Arbitrarily pick one to set all the others equal to. If there
|
||||||
|
// are existing constraints on the new fields, copy those around the new equivalence
|
||||||
|
// classes too.
|
||||||
|
(
|
||||||
|
Constraint::Variant {
|
||||||
|
fields, variant, ..
|
||||||
|
},
|
||||||
|
Some((&base, rest)),
|
||||||
|
) => {
|
||||||
|
let base_fields =
|
||||||
|
self.field_bindings(base, fields, variant, &mut deferred_constraints);
|
||||||
|
for &binding in rest {
|
||||||
|
for (&x, &y) in self
|
||||||
|
.field_bindings(binding, fields, variant, &mut deferred_constraints)
|
||||||
|
.iter()
|
||||||
|
.zip(base_fields.iter())
|
||||||
|
{
|
||||||
|
self.current_rule.equals.merge(x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// These constraints don't introduce new binding sites.
|
||||||
|
(Constraint::ConstInt { .. } | Constraint::ConstPrim { .. }, _) => {}
|
||||||
|
|
||||||
|
// Currently, `Some` constraints are only introduced implicitly during the
|
||||||
|
// translation from `sema`, so there's no way to set the corresponding binding
|
||||||
|
// sites equal to each other. Instead, any equality constraints get applied on
|
||||||
|
// the results of matching `Some()` or tuple patterns.
|
||||||
|
(Constraint::Some, _) => unreachable!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
for binding in set {
|
||||||
|
self.set_constraint_or_error(binding, constraint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn field_bindings(
|
||||||
|
&mut self,
|
||||||
|
binding: BindingId,
|
||||||
|
fields: TupleIndex,
|
||||||
|
variant: sema::VariantId,
|
||||||
|
deferred_constraints: &mut Vec<(BindingId, Constraint)>,
|
||||||
|
) -> Box<[BindingId]> {
|
||||||
|
(0..fields.0)
|
||||||
|
.map(TupleIndex)
|
||||||
|
.map(move |field| {
|
||||||
|
let binding = self.dedup_binding(Binding::Variant {
|
||||||
|
source: binding,
|
||||||
|
variant,
|
||||||
|
field,
|
||||||
|
});
|
||||||
|
// We've just added an equality constraint to a binding site that may not have had
|
||||||
|
// one already. If that binding site already had a concrete constraint, then we need
|
||||||
|
// to "recursively" propagate that constraint through the new equivalence class too.
|
||||||
|
if let Some(constraint) = self.current_rule.get_constraint(binding) {
|
||||||
|
deferred_constraints.push((binding, constraint));
|
||||||
|
}
|
||||||
|
binding
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dedup_binding(&mut self, binding: Binding) -> BindingId {
|
||||||
|
if let Some(binding) = self.binding_map.get(&binding) {
|
||||||
|
*binding
|
||||||
|
} else {
|
||||||
|
let id = BindingId(self.rules.bindings.len().try_into().unwrap());
|
||||||
|
self.rules.bindings.push(binding.clone());
|
||||||
|
self.binding_map.insert(binding, id);
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dedup_expr(&mut self, expr: Expr) -> ExprId {
|
||||||
|
if let Some(expr) = self.expr_map.get(&expr) {
|
||||||
|
*expr
|
||||||
|
} else {
|
||||||
|
let id = ExprId(self.rules.exprs.len().try_into().unwrap());
|
||||||
|
self.rules.exprs.push(expr.clone());
|
||||||
|
self.expr_map.insert(expr, id);
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_constraint(&mut self, input: Binding, constraint: Constraint) -> BindingId {
|
||||||
|
let input = self.dedup_binding(input);
|
||||||
|
self.set_constraint_or_error(input, constraint);
|
||||||
|
input
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_constraint_or_error(&mut self, input: BindingId, constraint: Constraint) {
|
||||||
|
if let Err(e) = self.current_rule.set_constraint(input, constraint) {
|
||||||
|
self.unreachable.push(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl sema::PatternVisitor for RuleSetBuilder {
|
||||||
|
/// The "identifier" this visitor uses for binding sites is a [Binding], not a [BindingId].
|
||||||
|
/// Either choice would work but this approach avoids adding bindings to the [RuleSet] if they
|
||||||
|
/// are never used in any rule.
|
||||||
|
type PatternId = Binding;
|
||||||
|
|
||||||
|
fn add_match_equal(&mut self, a: Binding, b: Binding, _ty: sema::TypeId) {
|
||||||
|
let a = self.dedup_binding(a);
|
||||||
|
let b = self.dedup_binding(b);
|
||||||
|
// If both bindings represent the same binding site, they're implicitly equal.
|
||||||
|
if a != b {
|
||||||
|
self.current_rule.equals.merge(a, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_match_int(&mut self, input: Binding, _ty: sema::TypeId, val: i128) {
|
||||||
|
self.set_constraint(input, Constraint::ConstInt { val });
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_match_prim(&mut self, input: Binding, _ty: sema::TypeId, val: sema::Sym) {
|
||||||
|
self.set_constraint(input, Constraint::ConstPrim { val });
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_match_variant(
|
||||||
|
&mut self,
|
||||||
|
input: Binding,
|
||||||
|
input_ty: sema::TypeId,
|
||||||
|
arg_tys: &[sema::TypeId],
|
||||||
|
variant: sema::VariantId,
|
||||||
|
) -> Vec<Binding> {
|
||||||
|
let fields = TupleIndex(arg_tys.len().try_into().unwrap());
|
||||||
|
let source = self.set_constraint(
|
||||||
|
input,
|
||||||
|
Constraint::Variant {
|
||||||
|
fields,
|
||||||
|
ty: input_ty,
|
||||||
|
variant,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
(0..fields.0)
|
||||||
|
.map(TupleIndex)
|
||||||
|
.map(|field| Binding::Variant {
|
||||||
|
source,
|
||||||
|
variant,
|
||||||
|
field,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_extract(
|
||||||
|
&mut self,
|
||||||
|
input: Binding,
|
||||||
|
_input_ty: sema::TypeId,
|
||||||
|
output_tys: Vec<sema::TypeId>,
|
||||||
|
term: sema::TermId,
|
||||||
|
infallible: bool,
|
||||||
|
_multi: bool,
|
||||||
|
) -> Vec<Binding> {
|
||||||
|
// ISLE treats external extractors as patterns, but in this representation they're
|
||||||
|
// expressions, because Rust doesn't support calling functions during pattern matching. To
|
||||||
|
// glue the two representations together we have to introduce suitable adapter nodes.
|
||||||
|
let input = self.pattern_as_expr(input);
|
||||||
|
let input = self.dedup_expr(Expr::Extractor {
|
||||||
|
term,
|
||||||
|
parameter: input,
|
||||||
|
});
|
||||||
|
let input = self.expr_as_pattern(input);
|
||||||
|
|
||||||
|
// If the extractor is fallible, build a pattern and constraint for `Some`
|
||||||
|
let source = if infallible {
|
||||||
|
input
|
||||||
|
} else {
|
||||||
|
let source = self.set_constraint(input, Constraint::Some);
|
||||||
|
Binding::Some { source }
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the extractor has multiple outputs, create a separate binding for each
|
||||||
|
match output_tys.len().try_into().unwrap() {
|
||||||
|
0 => vec![],
|
||||||
|
1 => vec![source],
|
||||||
|
outputs => {
|
||||||
|
let source = self.dedup_binding(source);
|
||||||
|
(0..outputs)
|
||||||
|
.map(TupleIndex)
|
||||||
|
.map(|field| Binding::Tuple { source, field })
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl sema::ExprVisitor for RuleSetBuilder {
|
||||||
|
/// Unlike the `PatternVisitor` implementation, we use [ExprId] to identify intermediate
|
||||||
|
/// expressions, not [Expr]. Visited expressions are always used so we might as well deduplicate
|
||||||
|
/// them eagerly.
|
||||||
|
type ExprId = ExprId;
|
||||||
|
|
||||||
|
fn add_const_int(&mut self, _ty: sema::TypeId, val: i128) -> ExprId {
|
||||||
|
self.dedup_expr(Expr::ConstInt { val })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_const_prim(&mut self, _ty: sema::TypeId, val: sema::Sym) -> ExprId {
|
||||||
|
self.dedup_expr(Expr::ConstPrim { val })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_create_variant(
|
||||||
|
&mut self,
|
||||||
|
inputs: Vec<(ExprId, sema::TypeId)>,
|
||||||
|
ty: sema::TypeId,
|
||||||
|
variant: sema::VariantId,
|
||||||
|
) -> ExprId {
|
||||||
|
self.dedup_expr(Expr::Variant {
|
||||||
|
ty,
|
||||||
|
variant,
|
||||||
|
fields: inputs.into_iter().map(|(expr, _)| expr).collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_construct(
|
||||||
|
&mut self,
|
||||||
|
inputs: Vec<(ExprId, sema::TypeId)>,
|
||||||
|
_ty: sema::TypeId,
|
||||||
|
term: sema::TermId,
|
||||||
|
_infallible: bool,
|
||||||
|
_multi: bool,
|
||||||
|
) -> ExprId {
|
||||||
|
self.dedup_expr(Expr::Constructor {
|
||||||
|
term,
|
||||||
|
parameters: inputs.into_iter().map(|(expr, _)| expr).collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl sema::RuleVisitor for RuleSetBuilder {
|
||||||
|
type PatternVisitor = Self;
|
||||||
|
type ExprVisitor = Self;
|
||||||
|
type Expr = ExprId;
|
||||||
|
|
||||||
|
fn add_arg(&mut self, index: usize, _ty: sema::TypeId) -> Binding {
|
||||||
|
// Arguments don't need to be pattern-matched to reference them, so they're expressions
|
||||||
|
let index = TupleIndex(index.try_into().unwrap());
|
||||||
|
let expr = self.dedup_expr(Expr::Argument { index });
|
||||||
|
Binding::Expr { constructor: expr }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_pattern<F: FnOnce(&mut Self)>(&mut self, visitor: F) {
|
||||||
|
visitor(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_expr<F>(&mut self, visitor: F) -> ExprId
|
||||||
|
where
|
||||||
|
F: FnOnce(&mut Self) -> sema::VisitedExpr<Self>,
|
||||||
|
{
|
||||||
|
visitor(self).value
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expr_as_pattern(&mut self, expr: ExprId) -> Binding {
|
||||||
|
if let &Expr::Binding { source: binding } = &self.rules.exprs[expr.index()] {
|
||||||
|
// Short-circuit wrapping a binding around an expr from another binding
|
||||||
|
self.rules.bindings[binding.index()]
|
||||||
|
} else {
|
||||||
|
Binding::Expr { constructor: expr }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pattern_as_expr(&mut self, pattern: Binding) -> ExprId {
|
||||||
|
if let Binding::Expr { constructor } = pattern {
|
||||||
|
// Short-circuit wrapping an expr around a binding from another expr
|
||||||
|
constructor
|
||||||
|
} else {
|
||||||
|
let binding = self.dedup_binding(pattern);
|
||||||
|
self.dedup_expr(Expr::Binding { source: binding })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user