diff --git a/cranelift/isle/src/ast.rs b/cranelift/isle/src/ast.rs index 97b7facffa..53a0698d95 100644 --- a/cranelift/isle/src/ast.rs +++ b/cranelift/isle/src/ast.rs @@ -122,6 +122,8 @@ pub enum Extern { func: Ident, /// The position of this decl. pos: Pos, + /// Whether this extractor is infallible (always matches). + infallible: bool, }, /// An external constructor: `(constructor Term rustfunc)` form. Constructor { diff --git a/cranelift/isle/src/codegen.rs b/cranelift/isle/src/codegen.rs index 92c4d94188..3c22ed44a9 100644 --- a/cranelift/isle/src/codegen.rs +++ b/cranelift/isle/src/codegen.rs @@ -6,13 +6,87 @@ use crate::sema::{RuleId, TermEnv, TermId, TypeEnv}; use peepmatic_automata::{Automaton, Builder as AutomatonBuilder}; use std::collections::HashMap; -// TODO: automata built by output term as well +/// One "input symbol" for the automaton that handles matching on a +/// term. Each symbol represents one step: we either run a match op, +/// or we get a result from it. +/// +/// Note that in the original Peepmatic scheme, the problem that this +/// solves was handled slightly differently. The automaton responded +/// to alphabet symbols that corresponded only to match results, and +/// the "extra state" was used at each automaton node to represent the +/// op to run next. This extra state differentiated nodes that would +/// otherwise be merged together by deduplication. That scheme works +/// well enough, but the "extra state" is slightly confusing and +/// diverges slightly from a pure automaton. +/// +/// Instead, here, we imagine that the user of the automaton can query +/// the possible transition edges out of the current state. Each of +/// these edges corresponds to one possible match op to run. After +/// running a match op, we reach a new state corresponding to +/// successful matches up to that point. +/// +/// However, it's a bit more subtle than this; we add one additional +/// dimension to each match op, and an additional alphabet symbol. +/// +/// First, consider the prioritization problem. We want to give the +/// DSL user the ability to change the order in which rules apply, for +/// example to have a tier of "fallback rules" that apply only if more +/// custom rules do not match. +/// +/// A somewhat simplistic answer to this problem is "more specific +/// rule wins". However, this implies the existence of a total +/// ordering of linearized match sequences that may not fully capture +/// the intuitive meaning of "more specific". Consider four left-hand +/// sides: +/// +/// - (A _ _) +/// - (A (B _) _) +/// - (A _ (B _)) +/// +/// Intuitively, the first is the least specific. Given the input `(A +/// (B 1) (B 2)`, we can say for sure that the first should not be +/// chosen, because either the second or third would match "more" of +/// the input tree. But which of the second and third should be +/// chosen? A "lexicographic ordering" rule would say that we sort +/// left-hand sides such that the `(B _)` sub-pattern comes before the +/// wildcard `_`, so the second rule wins. But that is arbitrarily +/// privileging one over the other based on the order of the +/// arguments. +/// +/// Instead, we add a priority to every rule (optionally specified in +/// the source and defaulting to `0` otherwise) that conceptually +/// augments match-ops. Then, when we examine out-edges from a state +/// to decide on the next match, we sort these by highest priority +/// first. +/// +/// This, too, sacrifices some deduplication, so we refine the idea a +/// bit. First, we add an "End of Match" alphabet symbol that +/// represents a successful match. Then we stipulate that priorities +/// are attached *only* to "End of Match"... +/// +/// -- ah, this doesn't work because we need the (min, max) priority +/// range on outbound edges. When we see a possible transition to EOM +/// at prio 10 or a match op that could lead to an EOM at prio 0 or +/// 20, we need to do both, NFA-style. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum AutomataInput { + Match { op: PatternInst }, + EndOfMatch { prio: i32 }, +} /// Builder context for one function in generated code corresponding /// to one root input term. +/// +/// A `TermFunctionBuilder` can correspond to the matching +/// control-flow and operations that we execute either when evaluating +/// *forward* on a term, trying to match left-hand sides against it +/// and transforming it into another term; or *backward* on a term, +/// trying to match another rule's left-hand side against an input to +/// produce the term in question (when the term is used in the LHS of +/// the calling term). struct TermFunctionBuilder { root_term: TermId, - automaton: AutomatonBuilder, + automaton: AutomatonBuilder, } impl TermFunctionBuilder { diff --git a/cranelift/isle/src/parser.rs b/cranelift/isle/src/parser.rs index a7759cc306..7bd7c8d058 100644 --- a/cranelift/isle/src/parser.rs +++ b/cranelift/isle/src/parser.rs @@ -238,12 +238,19 @@ impl<'a> Parser<'a> { fn parse_etor(&mut self) -> ParseResult { let pos = self.pos(); + let infallible = if self.is_sym_str("infallible") { + self.symbol()?; + true + } else { + false + }; let term = self.parse_ident()?; let func = self.parse_ident()?; Ok(Extern::Extractor { term, func, pos: pos.unwrap(), + infallible, }) } diff --git a/cranelift/isle/src/sema.rs b/cranelift/isle/src/sema.rs index a11faccc49..9e2bab9fe0 100644 --- a/cranelift/isle/src/sema.rs +++ b/cranelift/isle/src/sema.rs @@ -90,13 +90,18 @@ pub struct Term { #[derive(Clone, Debug, PartialEq, Eq)] pub enum TermKind { EnumVariant { + /// Which variant of the enum: e.g. for enum type `A` if a + /// term is `(A.A1 ...)` then the variant ID corresponds to + /// `A1`. variant: VariantId, }, Regular { // Producer and consumer rules are catalogued separately after // building Sequences. Here we just record whether an // extractor and/or constructor is known. - extractor: Option, + /// Extractor func and `infallible` flag. + extractor: Option<(Sym, bool)>, + /// Constructor func. constructor: Option, }, } @@ -472,6 +477,7 @@ impl TermEnv { ref term, ref func, pos, + infallible, }) => { let term_sym = tyenv.intern_mut(term); let func_sym = tyenv.intern_mut(func); @@ -503,7 +509,7 @@ impl TermEnv { ), )); } - *extractor = Some(func_sym); + *extractor = Some((func_sym, infallible)); } } }