Revert ISLE priority trie regression and fix ISLE rebuild CI job (#4102)

* Revert "ISLE compiler: fix priority-trie interval bug. (#4093)"

This reverts commit 2122337112.

* ci: Delete ISLE manifest files to force an ISLE rebuild

This makes it so that we actually rebuild ISLE rather than just check that the
manifests are up-to-date.
This commit is contained in:
Nick Fitzgerald
2022-05-05 11:43:22 -07:00
committed by GitHub
parent 9a6854456d
commit b525661d2f
4 changed files with 276 additions and 156 deletions

View File

@@ -693,11 +693,16 @@ impl<'a> Codegen<'a> {
&TrieNode::Decision { ref edges } => {
let subindent = format!("{} ", indent);
// If this is a decision node, generate each match op
// in turn (in priority order). Gather together
// adjacent MatchVariant ops with the same input and
// disjoint variants in order to create a `match`
// rather than a chain of if-lets.
// if this is a decision node, generate each match op
// in turn (in priority order). Sort the ops within
// each priority, and gather together adjacent
// MatchVariant ops with the same input and disjoint
// variants in order to create a `match` rather than a
// chain of if-lets.
let mut edges = edges.clone();
edges.sort_by(|e1, e2| {
(-e1.range.min, &e1.symbol).cmp(&(-e2.range.min, &e2.symbol))
});
let mut i = 0;
while i < edges.len() {
@@ -707,8 +712,8 @@ impl<'a> Codegen<'a> {
let mut adjacent_variants = BTreeSet::new();
let mut adjacent_variant_input = None;
log::trace!(
"edge: prio = {:?}, symbol = {:?}",
edges[i].prio,
"edge: range = {:?}, symbol = {:?}",
edges[i].range,
edges[i].symbol
);
while last < edges.len() {

View File

@@ -100,19 +100,67 @@ pub fn build_tries(typeenv: &TypeEnv, termenv: &TermEnv) -> BTreeMap<TermId, Tri
///
/// To build this trie, we construct nodes with edges to child nodes;
/// each edge consists of (i) one input token (a `PatternInst` or
/// EOM), and (ii) the priority of rules along this edge. We do not
/// merge rules of different priorities, because the logic to do so is
/// complex and error-prone, necessitating "splits" when we merge
/// together a set of rules over a priority range but later introduce
/// a new possible match op in the "middle" of the range. (E.g., match
/// op A at prio 10, B at prio 5, A at prio 0.) In fact, a previous
/// version of the ISLE compiler worked this way, but in practice the
/// complexity was unneeded.
/// EOM), and (ii) the minimum and maximum priorities of rules along
/// this edge. In a way this resembles an interval tree, though the
/// intervals of children need not be disjoint.
///
/// To add a rule to this trie, we perform the usual trie-insertion
/// logic, creating edges and subnodes where necessary. A new edge is
/// necessary whenever an edge does not exist for the (priority,
/// symbol) tuple.
/// logic, creating edges and subnodes where necessary, and updating
/// the priority-range of each edge that we traverse to include the
/// priority of the inserted rule.
///
/// However, we need to be a little bit careful, because with only
/// priority ranges in place and the potential for overlap, we have
/// something that resembles an NFA. For example, consider the case
/// where we reach a node in the trie and have two edges with two
/// match ops, one corresponding to a rule with priority 10, and the
/// other corresponding to two rules, with priorities 20 and 0. The
/// final match could lie along *either* path, so we have to traverse
/// both.
///
/// So, to avoid this, we perform a sort of moral equivalent to the
/// NFA-to-DFA conversion "on the fly" as we insert nodes by
/// duplicating subtrees. At any node, when inserting with a priority
/// P and when outgoing edges lie in a range [P_lo, P_hi] such that P
/// >= P_lo and P <= P_hi, we "priority-split the edges" at priority
/// P.
///
/// To priority-split the edges in a node at priority P:
///
/// - For each out-edge with priority [P_lo, P_hi] s.g. P \in [P_lo,
/// P_hi], and token T:
/// - Trim the subnode at P, yielding children C_lo and C_hi.
/// - Both children must be non-empty (have at least one leaf)
/// because the original node must have had a leaf at P_lo
/// and a leaf at P_hi.
/// - Replace the one edge with two edges, one for each child, with
/// the original match op, and with ranges calculated according to
/// the trimmed children.
///
/// To trim a node into range [P_lo, P_hi]:
///
/// - For a decision node:
/// - If any edges have a range outside the bounds of the trimming
/// range, trim the bounds of the edge, and trim the subtree under the
/// edge into the trimmed edge's range. If the subtree is trimmed
/// to `None`, remove the edge.
/// - If all edges are removed, the decision node becomes `None`.
/// - For a leaf node:
/// - If the priority is outside the range, the node becomes `None`.
///
/// As we descend a path to insert a leaf node, we (i) priority-split
/// if any edges' priority ranges overlap the insertion priority
/// range, and (ii) expand priority ranges on edges to include the new
/// leaf node's priority.
///
/// As long as we do this, we ensure the two key priority-trie
/// invariants:
///
/// 1. At a given node, no two edges exist with priority ranges R_1,
/// R_2 such that R_1 ∩ R_2 ≠ ∅, unless R_1 and R_2 are unit ranges
/// ([x, x]) and are on edges with different match-ops.
/// 2. Along the path from the root to any leaf node with priority P,
/// each edge has a priority range R such that P ∈ R.
///
/// Note that this means that multiple edges with a single match-op
/// may exist, with different priorities.
@@ -139,11 +187,78 @@ impl TrieSymbol {
/// A priority.
pub type Prio = i64;
/// An inclusive range of priorities.
#[derive(Clone, Copy, Debug)]
pub struct PrioRange {
/// The minimum of this range.
pub min: Prio,
/// The maximum of this range.
pub max: Prio,
}
impl PrioRange {
fn contains(&self, prio: Prio) -> bool {
prio >= self.min && prio <= self.max
}
fn is_unit(&self) -> bool {
self.min == self.max
}
fn overlaps(&self, other: PrioRange) -> bool {
// This can be derived via DeMorgan: !(self.begin > other.end
// OR other.begin > self.end).
self.min <= other.max && other.min <= self.max
}
fn intersect(&self, other: PrioRange) -> PrioRange {
PrioRange {
min: std::cmp::max(self.min, other.min),
max: std::cmp::min(self.max, other.max),
}
}
fn union(&self, other: PrioRange) -> PrioRange {
PrioRange {
min: std::cmp::min(self.min, other.min),
max: std::cmp::max(self.max, other.max),
}
}
fn split_at(&self, prio: Prio) -> (PrioRange, PrioRange) {
assert!(self.contains(prio));
assert!(!self.is_unit());
if prio == self.min {
(
PrioRange {
min: self.min,
max: self.min,
},
PrioRange {
min: self.min + 1,
max: self.max,
},
)
} else {
(
PrioRange {
min: self.min,
max: prio - 1,
},
PrioRange {
min: prio,
max: self.max,
},
)
}
}
}
/// An edge in our term trie.
#[derive(Clone, Debug)]
pub struct TrieEdge {
/// The priority for this edge's sub-trie.
pub prio: Prio,
/// The priority range for this edge's sub-trie.
pub range: PrioRange,
/// The match operation to perform for this edge.
pub symbol: TrieSymbol,
/// This edge's sub-trie.
@@ -204,19 +319,91 @@ impl TrieNode {
_ => panic!("insert on leaf node!"),
};
// Now find or insert the appropriate edge.
let edge = edges
// Do we need to split?
let needs_split = edges
.iter()
.position(|edge| edge.symbol == op && edge.prio == prio)
.unwrap_or_else(|| {
edges.push(TrieEdge {
prio,
symbol: op,
node: TrieNode::Empty,
});
edges.len() - 1
});
.any(|edge| edge.range.contains(prio) && !edge.range.is_unit());
// If so, pass over all edges/subnodes and split each.
if needs_split {
let mut new_edges = vec![];
for edge in std::mem::take(edges) {
if !edge.range.contains(prio) || edge.range.is_unit() {
new_edges.push(edge);
continue;
}
let (lo_range, hi_range) = edge.range.split_at(prio);
let lo = edge.node.trim(lo_range);
let hi = edge.node.trim(hi_range);
if let Some((node, range)) = lo {
new_edges.push(TrieEdge {
range,
symbol: edge.symbol.clone(),
node,
});
}
if let Some((node, range)) = hi {
new_edges.push(TrieEdge {
range,
symbol: edge.symbol,
node,
});
}
}
*edges = new_edges;
}
// Now find or insert the appropriate edge.
let mut edge: Option<usize> = None;
let mut last_edge_with_op: Option<usize> = None;
let mut last_edge_with_op_prio: Option<Prio> = None;
for i in 0..(edges.len() + 1) {
if i == edges.len() || prio > edges[i].range.max {
// We've passed all edges with overlapping priority
// ranges. Maybe the last edge we saw with the op
// we're inserting can have its range expanded,
// however.
if last_edge_with_op.is_some() {
// Move it to the end of the run of equal-unit-range ops.
edges.swap(last_edge_with_op.unwrap(), i - 1);
edge = Some(i - 1);
edges[i - 1].range.max = prio;
break;
}
edges.insert(
i,
TrieEdge {
range: PrioRange {
min: prio,
max: prio,
},
symbol: op.clone(),
node: TrieNode::Empty,
},
);
edge = Some(i);
break;
}
if i == edges.len() {
break;
}
if edges[i].symbol == op {
last_edge_with_op = Some(i);
last_edge_with_op_prio = Some(edges[i].range.max);
}
if last_edge_with_op_prio.is_some()
&& last_edge_with_op_prio.unwrap() < edges[i].range.max
{
last_edge_with_op = None;
last_edge_with_op_prio = None;
}
if edges[i].range.contains(prio) && edges[i].symbol == op {
edge = Some(i);
break;
}
}
let edge = edge.expect("Must have found an edge at least at last iter");
let edge = &mut edges[edge];
if is_last {
@@ -233,15 +420,57 @@ impl TrieNode {
}
}
/// Sort edges by priority.
pub fn sort(&mut self) {
fn trim(&self, range: PrioRange) -> Option<(TrieNode, PrioRange)> {
match self {
TrieNode::Decision { edges } => {
// Sort by priority, highest integer value first; then
// by trie symbol.
edges.sort_by_cached_key(|edge| (-edge.prio, edge.symbol.clone()));
&TrieNode::Empty => None,
&TrieNode::Leaf { prio, ref output } => {
if range.contains(prio) {
Some((
TrieNode::Leaf {
prio,
output: output.clone(),
},
PrioRange {
min: prio,
max: prio,
},
))
} else {
None
}
}
&TrieNode::Decision { ref edges } => {
let edges = edges
.iter()
.filter_map(|edge| {
if !edge.range.overlaps(range) {
None
} else {
let range = range.intersect(edge.range);
if let Some((node, range)) = edge.node.trim(range) {
Some(TrieEdge {
range,
symbol: edge.symbol.clone(),
node,
})
} else {
None
}
}
})
.collect::<Vec<_>>();
if edges.is_empty() {
None
} else {
let range = edges
.iter()
.map(|edge| edge.range)
.reduce(|a, b| a.union(b))
.expect("reduce on non-empty vec must not return None");
Some((TrieNode::Decision { edges }, range))
}
}
_ => {}
}
}
@@ -261,8 +490,8 @@ impl TrieNode {
for edge in edges {
s.push_str(indent);
s.push_str(&format!(
" edge: prio = {:?}, symbol: {:?}\n",
edge.prio, edge.symbol
" edge: range = {:?}, symbol: {:?}\n",
edge.range, edge.symbol
));
pretty_rec(s, &edge.node, &new_indent);
}
@@ -306,10 +535,6 @@ impl TermFunctionBuilder {
.chain(std::iter::once(TrieSymbol::EndOfMatch));
self.trie.insert(prio, symbols, expr_seq);
}
fn sort_trie(&mut self) {
self.trie.sort();
}
}
#[derive(Debug)]
@@ -349,10 +574,6 @@ impl<'a> TermFunctionsBuilder<'a> {
.or_insert_with(|| TermFunctionBuilder::new())
.add_rule(prio, pattern.clone(), expr.clone());
}
for builder in self.builders_by_term.values_mut() {
builder.sort_trie();
}
}
fn finalize(self) -> BTreeMap<TermId, TrieNode> {