From b93304b327e1856483c95afb778372a18a0e8fde Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 28 Sep 2021 12:11:43 -0700 Subject: [PATCH] Add docs to all public exports and `deny(missing_docs)` going forward --- cranelift/isle/isle/README.md | 9 + cranelift/isle/isle/src/ast.rs | 13 +- cranelift/isle/isle/src/codegen.rs | 17 +- cranelift/isle/isle/src/compile.rs | 4 +- cranelift/isle/isle/src/error.rs | 7 + cranelift/isle/isle/src/ir.rs | 144 +++++++++++---- cranelift/isle/isle/src/lexer.rs | 31 ++++ cranelift/isle/isle/src/lib.rs | 21 ++- cranelift/isle/isle/src/parser.rs | 8 +- cranelift/isle/isle/src/sema.rs | 272 ++++++++++++++++++++++------- 10 files changed, 420 insertions(+), 106 deletions(-) create mode 100644 cranelift/isle/isle/README.md diff --git a/cranelift/isle/isle/README.md b/cranelift/isle/isle/README.md new file mode 100644 index 0000000000..fbd1d48e08 --- /dev/null +++ b/cranelift/isle/isle/README.md @@ -0,0 +1,9 @@ +# ISLE: Instruction Selection / Lowering Expressions + +ISLE is a domain specific language (DSL) for instruction selection and lowering +clif instructions to vcode's `MachInst`s in Cranelift. + +ISLE is a statically-typed term-rewriting language. You define rewriting rules +that map input terms (clif instructions) into output terms (`MachInst`s). These +rules get compiled down into Rust source test that uses a tree of `match` +expressions that is as good or better than what you would have written by hand. diff --git a/cranelift/isle/isle/src/ast.rs b/cranelift/isle/isle/src/ast.rs index 97584b5627..489e0b81d1 100644 --- a/cranelift/isle/isle/src/ast.rs +++ b/cranelift/isle/isle/src/ast.rs @@ -1,3 +1,7 @@ +//! Abstract syntax tree (AST) created from parsed ISLE. + +#![allow(missing_docs)] + use crate::lexer::Pos; /// The parsed form of an ISLE file. @@ -356,12 +360,13 @@ pub enum Extern { Const { name: Ident, ty: Ident, pos: Pos }, } +/// Whether an argument is an input or an output. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ArgPolarity { - /// An arg that must be given an Expr in the pattern and passes - /// data *to* the extractor op. + /// An arg that must be given an Expr in the pattern and passes data *to* + /// the extractor op. Input, - /// An arg that must be given a regular pattern (not Expr) and - /// receives data *from* the extractor op. + /// An arg that must be given a regular pattern (not Expr) and receives data + /// *from* the extractor op. Output, } diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs index cc5dda9994..576fc0aa75 100644 --- a/cranelift/isle/isle/src/codegen.rs +++ b/cranelift/isle/isle/src/codegen.rs @@ -6,6 +6,11 @@ use crate::sema::{RuleId, TermEnv, TermId, Type, TypeEnv, TypeId, Variant}; use std::collections::{HashMap, HashSet}; use std::fmt::Write; +/// Emit Rust source code for the given type and term environments. +pub fn codegen(typeenv: &TypeEnv, termenv: &TermEnv) -> String { + Codegen::compile(typeenv, termenv).generate_rust() +} + /// One "input symbol" for the decision tree that handles matching on /// a term. Each symbol represents one step: we either run a match op, /// or we finish the match. @@ -493,7 +498,7 @@ impl<'a> TermFunctionsBuilder<'a> { } #[derive(Clone, Debug)] -pub struct Codegen<'a> { +struct Codegen<'a> { typeenv: &'a TypeEnv, termenv: &'a TermEnv, functions_by_term: HashMap, @@ -506,7 +511,7 @@ struct BodyContext { } impl<'a> Codegen<'a> { - pub fn compile(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Codegen<'a> { + fn compile(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Codegen<'a> { let mut builder = TermFunctionsBuilder::new(typeenv, termenv); builder.build(); log::trace!("builder: {:?}", builder); @@ -518,7 +523,7 @@ impl<'a> Codegen<'a> { } } - pub fn generate_rust(&self) -> String { + fn generate_rust(&self) -> String { let mut code = String::new(); self.generate_header(&mut code); @@ -561,7 +566,7 @@ impl<'a> Codegen<'a> { "{}fn {}(&mut self, {}) -> {}({},){};", indent, sig.func_name, - sig.arg_tys + sig.param_tys .iter() .enumerate() .map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, /* by_ref = */ true))) @@ -728,7 +733,7 @@ impl<'a> Codegen<'a> { let sig = termdata.to_sig(self.typeenv).unwrap(); let args = sig - .arg_tys + .param_tys .iter() .enumerate() .map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, true))) @@ -874,7 +879,7 @@ impl<'a> Codegen<'a> { let outputname = self.value_name(&output); let termdata = &self.termenv.terms[term.index()]; let sig = termdata.to_sig(self.typeenv).unwrap(); - assert_eq!(input_exprs.len(), sig.arg_tys.len()); + assert_eq!(input_exprs.len(), sig.param_tys.len()); let fallible_try = if infallible { "" } else { "?" }; writeln!( code, diff --git a/cranelift/isle/isle/src/compile.rs b/cranelift/isle/isle/src/compile.rs index 69d7a2aa5e..68304852e7 100644 --- a/cranelift/isle/isle/src/compile.rs +++ b/cranelift/isle/isle/src/compile.rs @@ -3,9 +3,9 @@ use crate::error::Error; use crate::{ast, codegen, sema}; +/// Compile the given AST definitions into Rust source code. pub fn compile(defs: &ast::Defs) -> Result> { let mut typeenv = sema::TypeEnv::from_ast(defs)?; let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?; - let codegen = codegen::Codegen::compile(&typeenv, &termenv); - Ok(codegen.generate_rust()) + Ok(codegen::codegen(&typeenv, &termenv)) } diff --git a/cranelift/isle/isle/src/error.rs b/cranelift/isle/isle/src/error.rs index 70a2140401..b123511ddd 100644 --- a/cranelift/isle/isle/src/error.rs +++ b/cranelift/isle/isle/src/error.rs @@ -3,14 +3,21 @@ use crate::lexer::Pos; use std::fmt; +/// Errors produced by ISLE. #[derive(Clone, Debug)] pub enum Error { + /// The input ISLE source has an error. CompileError { + /// The error message. msg: String, + /// The ISLE source filename where the error occurs. filename: String, + /// The position within the file that the error occurs at. pos: Pos, }, + /// An error from elsewhere in the system. SystemError { + /// The error message. msg: String, }, } diff --git a/cranelift/isle/isle/src/ir.rs b/cranelift/isle/isle/src/ir.rs index 9c3171607e..3138537eb8 100644 --- a/cranelift/isle/isle/src/ir.rs +++ b/cranelift/isle/isle/src/ir.rs @@ -1,18 +1,31 @@ //! Lowered matching IR. -use crate::declare_id; use crate::lexer::Pos; use crate::sema::*; use std::collections::HashMap; -declare_id!(InstId); +declare_id!( + /// The id of an instruction in a `PatternSequence`. + InstId +); +/// A value produced by a LHS or RHS instruction. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Value { /// A value produced by an instruction in the Pattern (LHS). - Pattern { inst: InstId, output: usize }, + Pattern { + /// The instruction that produces this value. + inst: InstId, + /// This value is the `output`th value produced by this pattern. + output: usize, + }, /// A value produced by an instruction in the Expr (RHS). - Expr { inst: InstId, output: usize }, + Expr { + /// The instruction that produces this value. + inst: InstId, + /// This value is the `output`th value produced by this expression. + output: usize, + }, } /// A single Pattern instruction. @@ -20,48 +33,81 @@ pub enum Value { pub enum PatternInst { /// Get the Nth input argument, which corresponds to the Nth field /// of the root term. - Arg { index: usize, ty: TypeId }, + Arg { + /// The index of the argument to get. + index: usize, + /// The type of the argument. + ty: TypeId, + }, /// Match a value as equal to another value. Produces no values. - MatchEqual { a: Value, b: Value, ty: TypeId }, + MatchEqual { + /// The first value. + a: Value, + /// The second value. + b: Value, + /// The type of the values. + ty: TypeId, + }, /// Try matching the given value as the given integer. Produces no values. MatchInt { + /// The value to match on. input: Value, + /// The value's type. ty: TypeId, + /// The integer to match against the value. int_val: i64, }, /// Try matching the given value as the given constant. Produces no values. - MatchPrim { input: Value, ty: TypeId, val: Sym }, - - /// Try matching the given value as the given variant, producing - /// `|arg_tys|` values as output. - MatchVariant { + MatchPrim { + /// The value to match on. input: Value, + /// The type of the value. + ty: TypeId, + /// The primitive to match against the value. + val: Sym, + }, + + /// Try matching the given value as the given variant, producing `|arg_tys|` + /// values as output. + MatchVariant { + /// The value to match on. + input: Value, + /// The type of the value. input_ty: TypeId, + /// The types of values produced upon a successful match. arg_tys: Vec, + /// The value type's variant that we are matching against. variant: VariantId, }, - /// Invoke an extractor, taking the given values as input (the - /// first is the value to extract, the other are the - /// `Input`-polarity extractor args) and producing an output valu - /// efor each `Output`-polarity extractor arg. + /// Invoke an extractor, taking the given values as input (the first is the + /// value to extract, the other are the `Input`-polarity extractor args) and + /// producing an output value for each `Output`-polarity extractor arg. Extract { + /// The value to extract, followed by polarity extractor args. inputs: Vec, + /// The types of the inputs. input_tys: Vec, + /// The types of the output values produced upon a successful match. output_tys: Vec, + /// This extractor's term. term: TermId, + /// Whether this extraction is infallible or not. infallible: bool, }, - /// Evaluate an expression and provide the given value as the - /// result of this match instruction. The expression has access to - /// the pattern-values up to this point in the sequence. + /// Evaluate an expression and provide the given value as the result of this + /// match instruction. The expression has access to the pattern-values up to + /// this point in the sequence. Expr { + /// The expression to evaluate. seq: ExprSequence, + /// The value produced by the expression. output: Value, + /// The type of the output value. output_ty: TypeId, }, } @@ -70,35 +116,58 @@ pub enum PatternInst { #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum ExprInst { /// Produce a constant integer. - ConstInt { ty: TypeId, val: i64 }, + ConstInt { + /// This integer type. + ty: TypeId, + /// The integer value. Must fit within the type. + val: i64, + }, /// Produce a constant extern value. - ConstPrim { ty: TypeId, val: Sym }, + ConstPrim { + /// The primitive type. + ty: TypeId, + /// The primitive value. + val: Sym, + }, /// Create a variant. CreateVariant { + /// The input arguments that will make up this variant's fields. + /// + /// These must be in the same order as the variant's fields. inputs: Vec<(Value, TypeId)>, + /// The enum type. ty: TypeId, + /// The variant within the enum that we are contructing. variant: VariantId, }, /// Invoke a constructor. Construct { + /// The arguments to the constructor. inputs: Vec<(Value, TypeId)>, + /// The type of the constructor. ty: TypeId, + /// The constructor term. term: TermId, + /// Whether this constructor is infallible or not. infallible: bool, }, /// Set the Nth return value. Produces no values. Return { + /// The index of the return value to set. index: usize, + /// The type of the return value. ty: TypeId, + /// The value to set as the `index`th return value. value: Value, }, } impl ExprInst { + /// Invoke `f` for each value in this expression. pub fn visit_values(&self, mut f: F) { match self { &ExprInst::ConstInt { .. } => {} @@ -117,29 +186,34 @@ impl ExprInst { } /// A linear sequence of instructions that match on and destructure an -/// argument. A pattern is fallible (may not match). If it does not -/// fail, its result consists of the values produced by the -/// `PatternInst`s, which may be used by a subsequent `Expr`. +/// argument. A pattern is fallible (may not match). If it does not fail, its +/// result consists of the values produced by the `PatternInst`s, which may be +/// used by a subsequent `Expr`. #[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] pub struct PatternSequence { - /// Instruction sequence for pattern. InstId indexes into this - /// sequence for `Value::Pattern` values. + /// Instruction sequence for pattern. + /// + /// `InstId` indexes into this sequence for `Value::Pattern` values. pub insts: Vec, } -/// A linear sequence of instructions that produce a new value from -/// the right-hand side of a rule, given bindings that come from a -/// `Pattern` derived from the left-hand side. +/// A linear sequence of instructions that produce a new value from the +/// right-hand side of a rule, given bindings that come from a `Pattern` derived +/// from the left-hand side. #[derive(Clone, Debug, PartialEq, Eq, Hash, Default, PartialOrd, Ord)] pub struct ExprSequence { - /// Instruction sequence for expression. InstId indexes into this - /// sequence for `Value::Expr` values. + /// Instruction sequence for expression. + /// + /// `InstId` indexes into this sequence for `Value::Expr` values. pub insts: Vec, /// Position at which the rule producing this sequence was located. pub pos: Pos, } impl ExprSequence { + /// Is this expression sequence producing a constant integer? + /// + /// If so, return the integer type and the constant. pub fn is_const_int(&self) -> Option<(TypeId, i64)> { if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) { match &self.insts[0] { @@ -499,13 +573,17 @@ impl ExprSequence { match expr { &Expr::ConstInt(ty, val) => self.add_const_int(ty, val), &Expr::ConstPrim(ty, val) => self.add_const_prim(ty, val), - &Expr::Let(_ty, ref bindings, ref subexpr) => { + &Expr::Let { + ty: _ty, + ref bindings, + ref body, + } => { let mut vars = vars.clone(); for &(var, _var_ty, ref var_expr) in bindings { let var_value = self.gen_expr(typeenv, termenv, &*var_expr, &vars); vars.insert(var, var_value); } - self.gen_expr(typeenv, termenv, &*subexpr, &vars) + self.gen_expr(typeenv, termenv, body, &vars) } &Expr::Var(_ty, var_id) => vars.get(&var_id).cloned().unwrap(), &Expr::Term(ty, term, ref arg_exprs) => { @@ -535,7 +613,7 @@ impl ExprSequence { /* infallible = */ true, ) } - _ => panic!("Should have been caught by typechecking"), + otherwise => panic!("Should have been caught by typechecking: {:?}", otherwise), } } } diff --git a/cranelift/isle/isle/src/lexer.rs b/cranelift/isle/isle/src/lexer.rs index 372d169585..9a8fa92bb8 100644 --- a/cranelift/isle/isle/src/lexer.rs +++ b/cranelift/isle/isle/src/lexer.rs @@ -3,8 +3,14 @@ use crate::error::Error; use std::borrow::Cow; +/// The lexer. +/// +/// Breaks source text up into a sequence of tokens (with source positions). #[derive(Clone, Debug)] pub struct Lexer<'a> { + /// Arena of filenames from the input source. + /// + /// Indexed via `Pos::file`. pub filenames: Vec, file_starts: Vec, buf: Cow<'a, [u8]>, @@ -12,34 +18,52 @@ pub struct Lexer<'a> { lookahead: Option<(Pos, Token)>, } +/// A source position. #[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash, PartialOrd, Ord)] pub struct Pos { + /// This source position's file. + /// + /// Indexes into `Lexer::filenames` early in the compiler pipeline, and + /// later into `TypeEnv::filenames` once we get into semantic analysis. pub file: usize, + /// This source position's byte offset in the file. pub offset: usize, + /// This source position's line number in the file. pub line: usize, + /// This source position's column number in the file. pub col: usize, } impl Pos { + /// Print this source position as `file.isle:12:34`. pub fn pretty_print(&self, filenames: &[String]) -> String { format!("{}:{}:{}", filenames[self.file], self.line, self.col) } + /// Print this source position as `file.isle line 12`. pub fn pretty_print_line(&self, filenames: &[String]) -> String { format!("{} line {}", filenames[self.file], self.line) } } +/// A token of ISLE source. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Token { + /// Left paren. LParen, + /// Right paren. RParen, + /// A symbol, e.g. `Foo`. Symbol(String), + /// An integer. Int(i64), + /// `@` At, + /// `<` Lt, } impl<'a> Lexer<'a> { + /// Create a new lexer for the given source contents and filename. pub fn from_str(s: &'a str, filename: &'a str) -> Lexer<'a> { let mut l = Lexer { filenames: vec![filename.to_string()], @@ -57,6 +81,7 @@ impl<'a> Lexer<'a> { l } + /// Create a new lexer from the given files. pub fn from_files(filenames: Vec) -> Result, Error> { assert!(!filenames.is_empty()); let file_contents: Vec = filenames @@ -94,10 +119,12 @@ impl<'a> Lexer<'a> { Ok(l) } + /// Get the lexer's current file offset. pub fn offset(&self) -> usize { self.pos.offset } + /// Get the lexer's current source position. pub fn pos(&self) -> Pos { self.pos } @@ -218,10 +245,12 @@ impl<'a> Lexer<'a> { } } + /// Peek ahead at the next token. pub fn peek(&self) -> Option<&(Pos, Token)> { self.lookahead.as_ref() } + /// Are we at the end of the source input? pub fn eof(&self) -> bool { self.lookahead.is_none() } @@ -238,6 +267,7 @@ impl<'a> std::iter::Iterator for Lexer<'a> { } impl Token { + /// Is this an `Int` token? pub fn is_int(&self) -> bool { match self { Token::Int(_) => true, @@ -245,6 +275,7 @@ impl Token { } } + /// Is this a `Sym` token? pub fn is_sym(&self) -> bool { match self { Token::Symbol(_) => true, diff --git a/cranelift/isle/isle/src/lib.rs b/cranelift/isle/isle/src/lib.rs index 5d9dcb088a..b32fa349ff 100644 --- a/cranelift/isle/isle/src/lib.rs +++ b/cranelift/isle/isle/src/lib.rs @@ -1,3 +1,23 @@ +#![doc = include_str!("../README.md")] +#![deny(missing_docs)] + +macro_rules! declare_id { + ( + $(#[$attr:meta])* + $name:ident + ) => { + $(#[$attr])* + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $name(pub usize); + impl $name { + /// Get the index of this id. + pub fn index(self) -> usize { + self.0 + } + } + }; +} + pub mod ast; pub mod codegen; pub mod compile; @@ -6,4 +26,3 @@ pub mod ir; pub mod lexer; pub mod parser; pub mod sema; - diff --git a/cranelift/isle/isle/src/parser.rs b/cranelift/isle/isle/src/parser.rs index 81cf81c2cf..ae0a3b0059 100644 --- a/cranelift/isle/isle/src/parser.rs +++ b/cranelift/isle/isle/src/parser.rs @@ -4,19 +4,24 @@ use crate::ast::*; use crate::error::*; use crate::lexer::{Lexer, Pos, Token}; +/// The ISLE parser. +/// +/// Takes in a lexer and creates an AST. #[derive(Clone, Debug)] pub struct Parser<'a> { lexer: Lexer<'a>, } +/// Either `Ok(T)` or an `Err(isle::Error)`. pub type ParseResult = std::result::Result; impl<'a> Parser<'a> { + /// Construct a new parser from the given lexer. pub fn new(lexer: Lexer<'a>) -> Parser<'a> { Parser { lexer } } - pub fn error(&self, pos: Pos, msg: String) -> Error { + fn error(&self, pos: Pos, msg: String) -> Error { Error::CompileError { filename: self.lexer.filenames[pos.file].clone(), pos, @@ -106,6 +111,7 @@ impl<'a> Parser<'a> { } } + /// Parse the top-level ISLE definitions and return their AST. pub fn parse_defs(&mut self) -> ParseResult { let mut defs = vec![]; while !self.lexer.eof() { diff --git a/cranelift/isle/isle/src/sema.rs b/cranelift/isle/isle/src/sema.rs index 71491f84c6..148a8c0c07 100644 --- a/cranelift/isle/isle/src/sema.rs +++ b/cranelift/isle/isle/src/sema.rs @@ -1,121 +1,226 @@ //! Semantic analysis. +//! +//! This module primarily contains the type environment and term environment. +//! +//! The type environment is constructed by analyzing an input AST. The type +//! environment records the types used in the input source and the types of our +//! various rules and symbols. ISLE's type system is intentionally easy to +//! check, only requires a single pass over the AST, and doesn't require any +//! unification or anything like that. +//! +//! The term environment is constructed from both the AST and type +//! envionment. It is sort of a typed and reorganized AST that more directly +//! reflects ISLE semantics than the input ISLE source code (where as the AST is +//! the opposite). use crate::ast; use crate::error::*; use crate::lexer::Pos; use std::collections::HashMap; +/// Either `Ok(T)` or a one or more `Error`s. +/// +/// This allows us to return multiple type errors at the same time, for example. pub type SemaResult = std::result::Result>; -#[macro_export] -macro_rules! declare_id { - ($name:ident) => { - #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub struct $name(pub usize); - impl $name { - pub fn index(self) -> usize { - self.0 - } - } - }; -} - -declare_id!(Sym); -declare_id!(TypeId); -declare_id!(VariantId); -declare_id!(FieldId); -declare_id!(TermId); -declare_id!(RuleId); -declare_id!(VarId); +declare_id!( + /// The id of an interned symbol. + Sym +); +declare_id!( + /// The id of an interned type inside the `TypeEnv`. + TypeId +); +declare_id!( + /// The id of a variant inside an enum. + VariantId +); +declare_id!( + /// The id of a field inside a variant. + FieldId +); +declare_id!( + /// The id of an interned term inside the `TermEnv`. + TermId +); +declare_id!( + /// The id of an interned rule inside the `TermEnv`. + RuleId +); +declare_id!( + /// The id of a bound variable inside a `Bindings`. + VarId +); +/// The type environment. +/// +/// Keeps track of which symbols and rules have which types. #[derive(Clone, Debug)] pub struct TypeEnv { + /// Arena of input ISLE source filenames. + /// + /// We refer to these indirectly through the `Pos::file` indices. pub filenames: Vec, + + /// Arena of interned symbol names. + /// + /// Referred to indirectly via `Sym` indices. pub syms: Vec, + + /// Map of already-interned symbol names to their `Sym` ids. pub sym_map: HashMap, + + /// Arena of type definitions. + /// + /// Referred to indirectly via `TypeId`s. pub types: Vec, + + /// A map from a type name symbol to its `TypeId`. pub type_map: HashMap, + + /// The types of constant symbols. pub const_types: HashMap, + + /// Type errors that we've found so far during type checking. pub errors: Vec, } +/// A type. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Type { + /// A primitive, `Copy` type. + /// + /// These are always defined externally, and we allow literals of these + /// types to pass through from ISLE source code to the emitted Rust code. Primitive(TypeId, Sym), + + /// A sum type. + /// + /// Note that enums with only one variant are equivalent to a "struct". Enum { + /// The name of this enum. name: Sym, + /// This `enum`'s type id. id: TypeId, + /// Is this `enum` defined in external Rust code? + /// + /// If so, ISLE will not emit a definition for it. If not, then it will + /// emit a Rust definition for it. is_extern: bool, + /// The different variants for this enum. variants: Vec, + /// The ISLE source position where this `enum` is defined. pos: Pos, }, } impl Type { + /// Get the name of this `Type`. pub fn name<'a>(&self, tyenv: &'a TypeEnv) -> &'a str { match self { Self::Primitive(_, name) | Self::Enum { name, .. } => &tyenv.syms[name.index()], } } + /// Is this a primitive type? pub fn is_prim(&self) -> bool { - match self { - &Type::Primitive(..) => true, - _ => false, - } + matches!(self, Type::Primitive(..)) } } +/// A variant of an enum. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Variant { + /// The name of this variant. pub name: Sym, + + /// The full, prefixed-with-the-enum's-name name of this variant. + /// + /// E.g. if the enum is `Foo` and this variant is `Bar`, then the + /// `fullname` is `Foo.Bar`. pub fullname: Sym, + + /// The id of this variant, i.e. the index of this variant within its + /// enum's `Type::Enum::variants`. pub id: VariantId, + + /// The data fields of this enum variant. pub fields: Vec, } +/// A field of a `Variant`. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Field { + /// The name of this field. pub name: Sym, + /// This field's id. pub id: FieldId, + /// The type of this field. pub ty: TypeId, } +/// The term environment. +/// +/// This is sort of a typed and reorganized AST that more directly reflects ISLE +/// semantics than the input ISLE source code (where as the AST is the +/// opposite). #[derive(Clone, Debug)] pub struct TermEnv { + /// Arena of interned terms defined in this ISLE program. + /// + /// This is indexed by `TermId`. pub terms: Vec, + + /// A map from am interned `Term`'s name to its `TermId`. pub term_map: HashMap, + + /// Arena of interned rules defined in this ISLE program. + /// + /// This is indexed by `RuleId`. pub rules: Vec, } +/// A term. +/// +/// Maps parameter types to result types if this is a constructor term, or +/// result types to parameter types if this is an extractor term. Or both if +/// this term can be either a constructor or an extractor. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Term { + /// This term's id. pub id: TermId, + /// The name of this term. pub name: Sym, + /// The parameter types to this term. pub arg_tys: Vec, + /// The result types of this term. pub ret_ty: TypeId, + /// The kind of this term. pub kind: TermKind, } +/// The kind of a term. #[derive(Clone, Debug, PartialEq, Eq)] pub enum TermKind { + /// An enum variant constructor or extractor. EnumVariant { - /// Which variant of the enum: e.g. for enum type `A` if a - /// term is `(A.A1 ...)` then the variant ID corresponds to - /// `A1`. + /// Which variant of the enum: e.g. for enum type `A` if a term is + /// `(A.A1 ...)` then the variant ID corresponds to `A1`. variant: VariantId, }, - /// A term with "internal" rules that work in the forward - /// direction. Becomes a compiled Rust function in the generated - /// code. + /// A term with "internal" rules that work in the forward direction. Becomes + /// a compiled Rust function in the generated code. InternalConstructor, - /// A term that defines an "extractor macro" in the LHS of a - /// pattern. Its arguments take patterns and are simply - /// substituted with the given patterns when used. - InternalExtractor { template: ast::Pattern }, + /// A term that defines an "extractor macro" in the LHS of a pattern. Its + /// arguments take patterns and are simply substituted with the given + /// patterns when used. + InternalExtractor { + /// This extractor's pattern. + template: ast::Pattern, + }, /// A term defined solely by an external extractor function. ExternalExtractor { - /// Extractor func. + /// The external name of the extractor function. name: Sym, /// Which arguments of the extractor are inputs and which are outputs? arg_polarity: Vec, @@ -124,7 +229,7 @@ pub enum TermKind { }, /// A term defined solely by an external constructor function. ExternalConstructor { - /// Constructor func. + /// The external name of the constructor function. name: Sym, }, /// Declared but no body or externs associated (yet). @@ -133,27 +238,28 @@ pub enum TermKind { pub use crate::ast::ArgPolarity; +/// An external function signature. #[derive(Clone, Debug)] pub struct ExternalSig { + /// The name of the external function. pub func_name: String, + /// The name of the external function, prefixed with the context trait. pub full_name: String, - pub arg_tys: Vec, + /// The types of this function signature's parameters. + pub param_tys: Vec, + /// The types of this function signature's results. pub ret_tys: Vec, + /// Whether this signature is infallible or not. pub infallible: bool, } impl Term { + /// Get this term's type. pub fn ty(&self) -> TypeId { self.ret_ty } - pub fn to_variant(&self) -> Option { - match &self.kind { - &TermKind::EnumVariant { variant } => Some(variant), - _ => None, - } - } - + /// Is this term a constructor? pub fn is_constructor(&self) -> bool { match &self.kind { &TermKind::InternalConstructor { .. } | &TermKind::ExternalConstructor { .. } => true, @@ -161,13 +267,7 @@ impl Term { } } - pub fn is_extractor(&self) -> bool { - match &self.kind { - &TermKind::InternalExtractor { .. } | &TermKind::ExternalExtractor { .. } => true, - _ => false, - } - } - + /// Is this term external? pub fn is_external(&self) -> bool { match &self.kind { &TermKind::ExternalExtractor { .. } | &TermKind::ExternalConstructor { .. } => true, @@ -175,12 +275,13 @@ impl Term { } } + /// Get this term's external function signature, if any. pub fn to_sig(&self, tyenv: &TypeEnv) -> Option { match &self.kind { &TermKind::ExternalConstructor { name } => Some(ExternalSig { func_name: tyenv.syms[name.index()].clone(), full_name: format!("C::{}", tyenv.syms[name.index()]), - arg_tys: self.arg_tys.clone(), + param_tys: self.arg_tys.clone(), ret_tys: vec![self.ret_ty], infallible: true, }), @@ -205,7 +306,7 @@ impl Term { Some(ExternalSig { func_name: tyenv.syms[name.index()].clone(), full_name: format!("C::{}", tyenv.syms[name.index()]), - arg_tys, + param_tys: arg_tys, ret_tys, infallible, }) @@ -215,7 +316,7 @@ impl Term { Some(ExternalSig { func_name: name.clone(), full_name: name, - arg_tys: self.arg_tys.clone(), + param_tys: self.arg_tys.clone(), ret_tys: vec![self.ret_ty], infallible: false, }) @@ -225,42 +326,87 @@ impl Term { } } +/// A term rewrite rule. #[derive(Clone, Debug)] pub struct Rule { + /// This rule's id. pub id: RuleId, + /// The left-hand side pattern that this rule matches. pub lhs: Pattern, + /// The right-hand side expression that this rule evaluates upon successful + /// match. pub rhs: Expr, + /// The priority of this rule, if any. pub prio: Option, + /// The source position where this rule is defined. pub pos: Pos, } +/// A left-hand side pattern of some rule. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Pattern { + /// Bind a variable of the given type from the current value. + /// + /// Keep matching on the value with the subpattern. BindPattern(TypeId, VarId, Box), + + /// Match the current value against an already bound variable with the given + /// type. Var(TypeId, VarId), + + /// Match the current value against a constant integer of the given integer + /// type. ConstInt(TypeId, i64), + + /// Match the current value against a constant primitive value of the given + /// primitive type. ConstPrim(TypeId, Sym), + + /// Match the current value against the given extractor term with the given + /// arguments. Term(TypeId, TermId, Vec), + + /// Match anything of the given type successfully. Wildcard(TypeId), + + /// Match all of the following patterns of the given type. And(TypeId, Vec), } +/// Arguments to a term inside a pattern (i.e. an extractor). #[derive(Clone, Debug, PartialEq, Eq)] pub enum TermArgPattern { + /// A pattern to match sub-values (i.e. the extractor's results) against. Pattern(Pattern), + /// An expression to generate a value that is passed into the extractor. Expr(Expr), } +/// A right-hand side expression of some rule. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Expr { + /// Invoke this term constructor with the given arguments. Term(TypeId, TermId, Vec), + /// Get the value of a variable that was bound in the left-hand side. Var(TypeId, VarId), + /// Get a constant integer. ConstInt(TypeId, i64), + /// Get a constant primitive. ConstPrim(TypeId, Sym), - Let(TypeId, Vec<(VarId, TypeId, Box)>, Box), + /// Evaluate the nested expressions and bind their results to the given + /// variables, then evaluate the body expression. + Let { + /// The type of the result of this let expression. + ty: TypeId, + /// The expressions that are evaluated and bound to the given variables. + bindings: Vec<(VarId, TypeId, Box)>, + /// The body expression that is evaluated after the bindings. + body: Box, + }, } impl Pattern { + /// Get this pattern's type. pub fn ty(&self) -> TypeId { match self { &Self::BindPattern(t, ..) => t, @@ -273,6 +419,7 @@ impl Pattern { } } + /// Get the root term of this pattern, if any. pub fn root_term(&self) -> Option { match self { &Pattern::Term(_, term, _) => Some(term), @@ -283,18 +430,20 @@ impl Pattern { } impl Expr { + /// Get this expression's type. pub fn ty(&self) -> TypeId { match self { &Self::Term(t, ..) => t, &Self::Var(t, ..) => t, &Self::ConstInt(t, ..) => t, &Self::ConstPrim(t, ..) => t, - &Self::Let(t, ..) => t, + &Self::Let { ty: t, .. } => t, } } } impl TypeEnv { + /// Construct the type environment from the AST. pub fn from_ast(defs: &ast::Defs) -> SemaResult { let mut tyenv = TypeEnv { filenames: defs.filenames.clone(), @@ -467,7 +616,7 @@ impl TypeEnv { self.errors.push(err); } - pub fn intern_mut(&mut self, ident: &ast::Ident) -> Sym { + fn intern_mut(&mut self, ident: &ast::Ident) -> Sym { if let Some(s) = self.sym_map.get(&ident.0).cloned() { s } else { @@ -478,7 +627,7 @@ impl TypeEnv { } } - pub fn intern(&self, ident: &ast::Ident) -> Option { + fn intern(&self, ident: &ast::Ident) -> Option { self.sym_map.get(&ident.0).cloned() } } @@ -497,6 +646,7 @@ struct BoundVar { } impl TermEnv { + /// Construct the term environment from the AST and the type environment. pub fn from_ast(tyenv: &mut TypeEnv, defs: &ast::Defs) -> SemaResult { let mut env = TermEnv { terms: vec![], @@ -1274,7 +1424,11 @@ impl TermEnv { // Pop the bindings. bindings.vars.truncate(orig_binding_len); - Some(Expr::Let(body_ty, let_defs, body)) + Some(Expr::Let { + ty: body_ty, + bindings: let_defs, + body, + }) } } }