trie insertion

This commit is contained in:
Chris Fallin
2021-09-04 12:36:04 -07:00
parent 77ed861857
commit 02ec77a45b
2 changed files with 216 additions and 35 deletions

View File

@@ -1,9 +1,9 @@
//! Generate Rust code from a series of Sequences.
use crate::error::Error;
use crate::ir::{lower_rule, ExprSequence, PatternInst, PatternSequence, Value};
use crate::ir::{lower_rule, ExprSequence, PatternInst, PatternSequence};
use crate::sema::{RuleId, TermEnv, TermId, TypeEnv};
use std::collections::{BTreeMap, HashMap};
use std::collections::HashMap;
/// One "input symbol" for the decision tree that handles matching on
/// a term. Each symbol represents one step: we either run a match op,
@@ -157,10 +157,13 @@ enum TrieSymbol {
EndOfMatch,
}
#[derive(Clone, Debug)]
struct TrieEdge {
key: (PrioRange, TrieSymbol),
node: Box<TrieNode>,
impl TrieSymbol {
fn is_eom(&self) -> bool {
match self {
TrieSymbol::EndOfMatch => true,
_ => false,
}
}
}
type Prio = i64;
@@ -168,49 +171,220 @@ type Prio = i64;
#[derive(Clone, Copy, Debug)]
struct PrioRange(Prio, Prio);
impl std::cmp::PartialOrd for PrioRange {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
impl PrioRange {
fn contains(&self, prio: Prio) -> bool {
prio >= self.0 && prio <= self.1
}
}
impl std::cmp::Ord for PrioRange {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
if self.1 < other.0 {
std::cmp::Ordering::Less
} else if self.0 > other.1 {
std::cmp::Ordering::Greater
fn is_unit(&self) -> bool {
self.0 == self.1
}
fn overlaps(&self, other: PrioRange) -> bool {
// This can be derived via DeMorgan: !(self.begin > other.end
// OR other.begin > self.end).
self.0 <= other.1 && other.0 <= self.1
}
fn intersect(&self, other: PrioRange) -> PrioRange {
PrioRange(
std::cmp::max(self.0, other.0),
std::cmp::min(self.1, other.1),
)
}
fn union(&self, other: PrioRange) -> PrioRange {
PrioRange(
std::cmp::min(self.0, other.0),
std::cmp::max(self.1, other.1),
)
}
fn split_at(&self, prio: Prio) -> (PrioRange, PrioRange) {
assert!(self.contains(prio));
assert!(!self.is_unit());
if prio == self.0 {
(PrioRange(self.0, self.0), PrioRange(self.0 + 1, self.1))
} else {
std::cmp::Ordering::Equal
(PrioRange(self.0, prio - 1), PrioRange(prio, self.1))
}
}
}
impl std::cmp::PartialEq for PrioRange {
fn eq(&self, other: &Self) -> bool {
self.cmp(other) == std::cmp::Ordering::Equal
}
#[derive(Clone, Debug)]
struct TrieEdge {
range: PrioRange,
symbol: TrieSymbol,
node: TrieNode,
}
impl std::cmp::Eq for PrioRange {}
#[derive(Clone, Debug)]
enum TrieNode {
Decision {
edges: BTreeMap<(PrioRange, TrieSymbol), TrieNode>,
},
Leaf {
prio: Prio,
output: Vec<ExprSequence>,
},
Decision { edges: Vec<TrieEdge> },
Leaf { prio: Prio, output: ExprSequence },
Empty,
}
impl TrieNode {
fn is_empty(&self) -> bool {
match self {
&TrieNode::Empty => true,
_ => false,
}
}
fn insert(
&mut self,
prio: Prio,
input: impl Iterator<Item = PatternInst>,
mut input: impl Iterator<Item = TrieSymbol>,
output: ExprSequence,
) {
unimplemented!()
) -> bool {
// Take one input symbol. There must be *at least* one, EOM if
// nothing else.
let op = input
.next()
.expect("Cannot insert into trie with empty input sequence");
let is_last = op.is_eom();
// If we are empty, turn into a decision node.
if self.is_empty() {
*self = TrieNode::Decision { edges: vec![] };
}
// We must be a decision node.
let edges = match self {
&mut TrieNode::Decision { ref mut edges } => edges,
_ => panic!("insert on leaf node!"),
};
// Do we need to split?
let needs_split = edges
.iter()
.any(|edge| edge.range.contains(prio) && !edge.range.is_unit());
// If so, pass over all edges/subnodes and split each.
if needs_split {
let mut new_edges = vec![];
for edge in std::mem::take(edges) {
if !edge.range.contains(prio) || edge.range.is_unit() {
new_edges.push(edge);
continue;
}
let (lo_range, hi_range) = edge.range.split_at(prio);
let lo = edge.node.trim(lo_range);
let hi = edge.node.trim(hi_range);
if let Some((node, range)) = lo {
new_edges.push(TrieEdge {
range,
symbol: edge.symbol.clone(),
node,
});
}
if let Some((node, range)) = hi {
new_edges.push(TrieEdge {
range,
symbol: edge.symbol,
node,
});
}
}
*edges = new_edges;
}
// Now find or insert the appropriate edge.
let mut edge: Option<usize> = None;
for i in 0..edges.len() {
if edges[i].range.contains(prio) && edges[i].symbol == op {
edge = Some(i);
break;
}
if prio > edges[i].range.1 {
edges.insert(
i,
TrieEdge {
range: PrioRange(prio, prio),
symbol: op.clone(),
node: TrieNode::Empty,
},
);
edge = Some(i);
break;
}
}
let edge = edge.unwrap_or_else(|| {
edges.push(TrieEdge {
range: PrioRange(prio, prio),
symbol: op.clone(),
node: TrieNode::Empty,
});
edges.len() - 1
});
let edge = &mut edges[edge];
if is_last {
if !edge.node.is_empty() {
// If a leaf node already exists at an overlapping
// prio for this op, there are two competing rules, so
// we can't insert this one.
return false;
}
edge.node = TrieNode::Leaf { prio, output };
true
} else {
edge.node.insert(prio, input, output)
}
}
fn trim(&self, range: PrioRange) -> Option<(TrieNode, PrioRange)> {
match self {
&TrieNode::Empty => None,
&TrieNode::Leaf { prio, ref output } => {
if range.contains(prio) {
Some((
TrieNode::Leaf {
prio,
output: output.clone(),
},
PrioRange(prio, prio),
))
} else {
None
}
}
&TrieNode::Decision { ref edges } => {
let edges = edges
.iter()
.filter_map(|edge| {
if !edge.range.overlaps(range) {
None
} else {
let range = range.intersect(edge.range);
if let Some((node, range)) = edge.node.trim(range) {
Some(TrieEdge {
range,
symbol: edge.symbol.clone(),
node,
})
} else {
None
}
}
})
.collect::<Vec<_>>();
if edges.is_empty() {
None
} else {
let range = edges
.iter()
.map(|edge| edge.range)
.reduce(|a, b| a.union(b))
.expect("reduce on non-empty vec must not return None");
Some((TrieNode::Decision { edges }, range))
}
}
}
}
}
@@ -224,6 +398,7 @@ impl TrieNode {
/// trying to match another rule's left-hand side against an input to
/// produce the term in question (when the term is used in the LHS of
/// the calling term).
#[derive(Debug)]
struct TermFunctionBuilder {
root_term: TermId,
trie: TrieNode,
@@ -238,11 +413,16 @@ impl TermFunctionBuilder {
}
fn add_rule(&mut self, prio: Prio, pattern_seq: PatternSequence, expr_seq: ExprSequence) {
self.trie
.insert(prio, pattern_seq.insts.into_iter(), expr_seq);
let symbols = pattern_seq
.insts
.into_iter()
.map(|op| TrieSymbol::Match { op })
.chain(std::iter::once(TrieSymbol::EndOfMatch));
self.trie.insert(prio, symbols, expr_seq);
}
}
#[derive(Debug)]
struct TermFunctionsBuilder<'a> {
typeenv: &'a TypeEnv,
termenv: &'a TermEnv,
@@ -302,6 +482,7 @@ impl Automata {
pub fn compile(typeenv: &TypeEnv, termenv: &TermEnv) -> Result<Automata, Error> {
let mut builder = TermFunctionsBuilder::new(typeenv, termenv);
builder.build();
log::trace!("builder: {:?}", builder);
// TODO
Ok(Automata::default())
}

View File

@@ -1,7 +1,7 @@
//! Compilation process, from AST to Sema to Sequences of Insts.
use crate::{ast, sema, ir, codegen};
use crate::error::Error;
use crate::{ast, codegen, sema};
pub fn compile(defs: &ast::Defs) -> Result<codegen::Automata, Error> {
let mut typeenv = sema::TypeEnv::from_ast(defs)?;