Add docs to all public exports and deny(missing_docs) going forward

This commit is contained in:
Nick Fitzgerald
2021-09-28 12:11:43 -07:00
committed by Chris Fallin
parent 922a3886d5
commit b93304b327
10 changed files with 420 additions and 106 deletions

View File

@@ -0,0 +1,9 @@
# ISLE: Instruction Selection / Lowering Expressions
ISLE is a domain specific language (DSL) for instruction selection and lowering
clif instructions to vcode's `MachInst`s in Cranelift.
ISLE is a statically-typed term-rewriting language. You define rewriting rules
that map input terms (clif instructions) into output terms (`MachInst`s). These
rules get compiled down into Rust source test that uses a tree of `match`
expressions that is as good or better than what you would have written by hand.

View File

@@ -1,3 +1,7 @@
//! Abstract syntax tree (AST) created from parsed ISLE.
#![allow(missing_docs)]
use crate::lexer::Pos; use crate::lexer::Pos;
/// The parsed form of an ISLE file. /// The parsed form of an ISLE file.
@@ -356,12 +360,13 @@ pub enum Extern {
Const { name: Ident, ty: Ident, pos: Pos }, Const { name: Ident, ty: Ident, pos: Pos },
} }
/// Whether an argument is an input or an output.
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ArgPolarity { pub enum ArgPolarity {
/// An arg that must be given an Expr in the pattern and passes /// An arg that must be given an Expr in the pattern and passes data *to*
/// data *to* the extractor op. /// the extractor op.
Input, Input,
/// An arg that must be given a regular pattern (not Expr) and /// An arg that must be given a regular pattern (not Expr) and receives data
/// receives data *from* the extractor op. /// *from* the extractor op.
Output, Output,
} }

View File

@@ -6,6 +6,11 @@ use crate::sema::{RuleId, TermEnv, TermId, Type, TypeEnv, TypeId, Variant};
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt::Write; use std::fmt::Write;
/// Emit Rust source code for the given type and term environments.
pub fn codegen(typeenv: &TypeEnv, termenv: &TermEnv) -> String {
Codegen::compile(typeenv, termenv).generate_rust()
}
/// One "input symbol" for the decision tree that handles matching on /// One "input symbol" for the decision tree that handles matching on
/// a term. Each symbol represents one step: we either run a match op, /// a term. Each symbol represents one step: we either run a match op,
/// or we finish the match. /// or we finish the match.
@@ -493,7 +498,7 @@ impl<'a> TermFunctionsBuilder<'a> {
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Codegen<'a> { struct Codegen<'a> {
typeenv: &'a TypeEnv, typeenv: &'a TypeEnv,
termenv: &'a TermEnv, termenv: &'a TermEnv,
functions_by_term: HashMap<TermId, TrieNode>, functions_by_term: HashMap<TermId, TrieNode>,
@@ -506,7 +511,7 @@ struct BodyContext {
} }
impl<'a> Codegen<'a> { impl<'a> Codegen<'a> {
pub fn compile(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Codegen<'a> { fn compile(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Codegen<'a> {
let mut builder = TermFunctionsBuilder::new(typeenv, termenv); let mut builder = TermFunctionsBuilder::new(typeenv, termenv);
builder.build(); builder.build();
log::trace!("builder: {:?}", builder); log::trace!("builder: {:?}", builder);
@@ -518,7 +523,7 @@ impl<'a> Codegen<'a> {
} }
} }
pub fn generate_rust(&self) -> String { fn generate_rust(&self) -> String {
let mut code = String::new(); let mut code = String::new();
self.generate_header(&mut code); self.generate_header(&mut code);
@@ -561,7 +566,7 @@ impl<'a> Codegen<'a> {
"{}fn {}(&mut self, {}) -> {}({},){};", "{}fn {}(&mut self, {}) -> {}({},){};",
indent, indent,
sig.func_name, sig.func_name,
sig.arg_tys sig.param_tys
.iter() .iter()
.enumerate() .enumerate()
.map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, /* by_ref = */ true))) .map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, /* by_ref = */ true)))
@@ -728,7 +733,7 @@ impl<'a> Codegen<'a> {
let sig = termdata.to_sig(self.typeenv).unwrap(); let sig = termdata.to_sig(self.typeenv).unwrap();
let args = sig let args = sig
.arg_tys .param_tys
.iter() .iter()
.enumerate() .enumerate()
.map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, true))) .map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, true)))
@@ -874,7 +879,7 @@ impl<'a> Codegen<'a> {
let outputname = self.value_name(&output); let outputname = self.value_name(&output);
let termdata = &self.termenv.terms[term.index()]; let termdata = &self.termenv.terms[term.index()];
let sig = termdata.to_sig(self.typeenv).unwrap(); let sig = termdata.to_sig(self.typeenv).unwrap();
assert_eq!(input_exprs.len(), sig.arg_tys.len()); assert_eq!(input_exprs.len(), sig.param_tys.len());
let fallible_try = if infallible { "" } else { "?" }; let fallible_try = if infallible { "" } else { "?" };
writeln!( writeln!(
code, code,

View File

@@ -3,9 +3,9 @@
use crate::error::Error; use crate::error::Error;
use crate::{ast, codegen, sema}; use crate::{ast, codegen, sema};
/// Compile the given AST definitions into Rust source code.
pub fn compile(defs: &ast::Defs) -> Result<String, Vec<Error>> { pub fn compile(defs: &ast::Defs) -> Result<String, Vec<Error>> {
let mut typeenv = sema::TypeEnv::from_ast(defs)?; let mut typeenv = sema::TypeEnv::from_ast(defs)?;
let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?; let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
let codegen = codegen::Codegen::compile(&typeenv, &termenv); Ok(codegen::codegen(&typeenv, &termenv))
Ok(codegen.generate_rust())
} }

View File

@@ -3,14 +3,21 @@
use crate::lexer::Pos; use crate::lexer::Pos;
use std::fmt; use std::fmt;
/// Errors produced by ISLE.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum Error { pub enum Error {
/// The input ISLE source has an error.
CompileError { CompileError {
/// The error message.
msg: String, msg: String,
/// The ISLE source filename where the error occurs.
filename: String, filename: String,
/// The position within the file that the error occurs at.
pos: Pos, pos: Pos,
}, },
/// An error from elsewhere in the system.
SystemError { SystemError {
/// The error message.
msg: String, msg: String,
}, },
} }

View File

@@ -1,18 +1,31 @@
//! Lowered matching IR. //! Lowered matching IR.
use crate::declare_id;
use crate::lexer::Pos; use crate::lexer::Pos;
use crate::sema::*; use crate::sema::*;
use std::collections::HashMap; use std::collections::HashMap;
declare_id!(InstId); declare_id!(
/// The id of an instruction in a `PatternSequence`.
InstId
);
/// A value produced by a LHS or RHS instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Value { pub enum Value {
/// A value produced by an instruction in the Pattern (LHS). /// A value produced by an instruction in the Pattern (LHS).
Pattern { inst: InstId, output: usize }, Pattern {
/// The instruction that produces this value.
inst: InstId,
/// This value is the `output`th value produced by this pattern.
output: usize,
},
/// A value produced by an instruction in the Expr (RHS). /// A value produced by an instruction in the Expr (RHS).
Expr { inst: InstId, output: usize }, Expr {
/// The instruction that produces this value.
inst: InstId,
/// This value is the `output`th value produced by this expression.
output: usize,
},
} }
/// A single Pattern instruction. /// A single Pattern instruction.
@@ -20,48 +33,81 @@ pub enum Value {
pub enum PatternInst { pub enum PatternInst {
/// Get the Nth input argument, which corresponds to the Nth field /// Get the Nth input argument, which corresponds to the Nth field
/// of the root term. /// of the root term.
Arg { index: usize, ty: TypeId }, Arg {
/// The index of the argument to get.
index: usize,
/// The type of the argument.
ty: TypeId,
},
/// Match a value as equal to another value. Produces no values. /// Match a value as equal to another value. Produces no values.
MatchEqual { a: Value, b: Value, ty: TypeId }, MatchEqual {
/// The first value.
a: Value,
/// The second value.
b: Value,
/// The type of the values.
ty: TypeId,
},
/// Try matching the given value as the given integer. Produces no values. /// Try matching the given value as the given integer. Produces no values.
MatchInt { MatchInt {
/// The value to match on.
input: Value, input: Value,
/// The value's type.
ty: TypeId, ty: TypeId,
/// The integer to match against the value.
int_val: i64, int_val: i64,
}, },
/// Try matching the given value as the given constant. Produces no values. /// Try matching the given value as the given constant. Produces no values.
MatchPrim { input: Value, ty: TypeId, val: Sym }, MatchPrim {
/// The value to match on.
/// Try matching the given value as the given variant, producing
/// `|arg_tys|` values as output.
MatchVariant {
input: Value, input: Value,
/// The type of the value.
ty: TypeId,
/// The primitive to match against the value.
val: Sym,
},
/// Try matching the given value as the given variant, producing `|arg_tys|`
/// values as output.
MatchVariant {
/// The value to match on.
input: Value,
/// The type of the value.
input_ty: TypeId, input_ty: TypeId,
/// The types of values produced upon a successful match.
arg_tys: Vec<TypeId>, arg_tys: Vec<TypeId>,
/// The value type's variant that we are matching against.
variant: VariantId, variant: VariantId,
}, },
/// Invoke an extractor, taking the given values as input (the /// Invoke an extractor, taking the given values as input (the first is the
/// first is the value to extract, the other are the /// value to extract, the other are the `Input`-polarity extractor args) and
/// `Input`-polarity extractor args) and producing an output valu /// producing an output value for each `Output`-polarity extractor arg.
/// efor each `Output`-polarity extractor arg.
Extract { Extract {
/// The value to extract, followed by polarity extractor args.
inputs: Vec<Value>, inputs: Vec<Value>,
/// The types of the inputs.
input_tys: Vec<TypeId>, input_tys: Vec<TypeId>,
/// The types of the output values produced upon a successful match.
output_tys: Vec<TypeId>, output_tys: Vec<TypeId>,
/// This extractor's term.
term: TermId, term: TermId,
/// Whether this extraction is infallible or not.
infallible: bool, infallible: bool,
}, },
/// Evaluate an expression and provide the given value as the /// Evaluate an expression and provide the given value as the result of this
/// result of this match instruction. The expression has access to /// match instruction. The expression has access to the pattern-values up to
/// the pattern-values up to this point in the sequence. /// this point in the sequence.
Expr { Expr {
/// The expression to evaluate.
seq: ExprSequence, seq: ExprSequence,
/// The value produced by the expression.
output: Value, output: Value,
/// The type of the output value.
output_ty: TypeId, output_ty: TypeId,
}, },
} }
@@ -70,35 +116,58 @@ pub enum PatternInst {
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum ExprInst { pub enum ExprInst {
/// Produce a constant integer. /// Produce a constant integer.
ConstInt { ty: TypeId, val: i64 }, ConstInt {
/// This integer type.
ty: TypeId,
/// The integer value. Must fit within the type.
val: i64,
},
/// Produce a constant extern value. /// Produce a constant extern value.
ConstPrim { ty: TypeId, val: Sym }, ConstPrim {
/// The primitive type.
ty: TypeId,
/// The primitive value.
val: Sym,
},
/// Create a variant. /// Create a variant.
CreateVariant { CreateVariant {
/// The input arguments that will make up this variant's fields.
///
/// These must be in the same order as the variant's fields.
inputs: Vec<(Value, TypeId)>, inputs: Vec<(Value, TypeId)>,
/// The enum type.
ty: TypeId, ty: TypeId,
/// The variant within the enum that we are contructing.
variant: VariantId, variant: VariantId,
}, },
/// Invoke a constructor. /// Invoke a constructor.
Construct { Construct {
/// The arguments to the constructor.
inputs: Vec<(Value, TypeId)>, inputs: Vec<(Value, TypeId)>,
/// The type of the constructor.
ty: TypeId, ty: TypeId,
/// The constructor term.
term: TermId, term: TermId,
/// Whether this constructor is infallible or not.
infallible: bool, infallible: bool,
}, },
/// Set the Nth return value. Produces no values. /// Set the Nth return value. Produces no values.
Return { Return {
/// The index of the return value to set.
index: usize, index: usize,
/// The type of the return value.
ty: TypeId, ty: TypeId,
/// The value to set as the `index`th return value.
value: Value, value: Value,
}, },
} }
impl ExprInst { impl ExprInst {
/// Invoke `f` for each value in this expression.
pub fn visit_values<F: FnMut(Value)>(&self, mut f: F) { pub fn visit_values<F: FnMut(Value)>(&self, mut f: F) {
match self { match self {
&ExprInst::ConstInt { .. } => {} &ExprInst::ConstInt { .. } => {}
@@ -117,29 +186,34 @@ impl ExprInst {
} }
/// A linear sequence of instructions that match on and destructure an /// A linear sequence of instructions that match on and destructure an
/// argument. A pattern is fallible (may not match). If it does not /// argument. A pattern is fallible (may not match). If it does not fail, its
/// fail, its result consists of the values produced by the /// result consists of the values produced by the `PatternInst`s, which may be
/// `PatternInst`s, which may be used by a subsequent `Expr`. /// used by a subsequent `Expr`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] #[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
pub struct PatternSequence { pub struct PatternSequence {
/// Instruction sequence for pattern. InstId indexes into this /// Instruction sequence for pattern.
/// sequence for `Value::Pattern` values. ///
/// `InstId` indexes into this sequence for `Value::Pattern` values.
pub insts: Vec<PatternInst>, pub insts: Vec<PatternInst>,
} }
/// A linear sequence of instructions that produce a new value from /// A linear sequence of instructions that produce a new value from the
/// the right-hand side of a rule, given bindings that come from a /// right-hand side of a rule, given bindings that come from a `Pattern` derived
/// `Pattern` derived from the left-hand side. /// from the left-hand side.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default, PartialOrd, Ord)] #[derive(Clone, Debug, PartialEq, Eq, Hash, Default, PartialOrd, Ord)]
pub struct ExprSequence { pub struct ExprSequence {
/// Instruction sequence for expression. InstId indexes into this /// Instruction sequence for expression.
/// sequence for `Value::Expr` values. ///
/// `InstId` indexes into this sequence for `Value::Expr` values.
pub insts: Vec<ExprInst>, pub insts: Vec<ExprInst>,
/// Position at which the rule producing this sequence was located. /// Position at which the rule producing this sequence was located.
pub pos: Pos, pub pos: Pos,
} }
impl ExprSequence { impl ExprSequence {
/// Is this expression sequence producing a constant integer?
///
/// If so, return the integer type and the constant.
pub fn is_const_int(&self) -> Option<(TypeId, i64)> { pub fn is_const_int(&self) -> Option<(TypeId, i64)> {
if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) { if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) {
match &self.insts[0] { match &self.insts[0] {
@@ -499,13 +573,17 @@ impl ExprSequence {
match expr { match expr {
&Expr::ConstInt(ty, val) => self.add_const_int(ty, val), &Expr::ConstInt(ty, val) => self.add_const_int(ty, val),
&Expr::ConstPrim(ty, val) => self.add_const_prim(ty, val), &Expr::ConstPrim(ty, val) => self.add_const_prim(ty, val),
&Expr::Let(_ty, ref bindings, ref subexpr) => { &Expr::Let {
ty: _ty,
ref bindings,
ref body,
} => {
let mut vars = vars.clone(); let mut vars = vars.clone();
for &(var, _var_ty, ref var_expr) in bindings { for &(var, _var_ty, ref var_expr) in bindings {
let var_value = self.gen_expr(typeenv, termenv, &*var_expr, &vars); let var_value = self.gen_expr(typeenv, termenv, &*var_expr, &vars);
vars.insert(var, var_value); vars.insert(var, var_value);
} }
self.gen_expr(typeenv, termenv, &*subexpr, &vars) self.gen_expr(typeenv, termenv, body, &vars)
} }
&Expr::Var(_ty, var_id) => vars.get(&var_id).cloned().unwrap(), &Expr::Var(_ty, var_id) => vars.get(&var_id).cloned().unwrap(),
&Expr::Term(ty, term, ref arg_exprs) => { &Expr::Term(ty, term, ref arg_exprs) => {
@@ -535,7 +613,7 @@ impl ExprSequence {
/* infallible = */ true, /* infallible = */ true,
) )
} }
_ => panic!("Should have been caught by typechecking"), otherwise => panic!("Should have been caught by typechecking: {:?}", otherwise),
} }
} }
} }

View File

@@ -3,8 +3,14 @@
use crate::error::Error; use crate::error::Error;
use std::borrow::Cow; use std::borrow::Cow;
/// The lexer.
///
/// Breaks source text up into a sequence of tokens (with source positions).
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Lexer<'a> { pub struct Lexer<'a> {
/// Arena of filenames from the input source.
///
/// Indexed via `Pos::file`.
pub filenames: Vec<String>, pub filenames: Vec<String>,
file_starts: Vec<usize>, file_starts: Vec<usize>,
buf: Cow<'a, [u8]>, buf: Cow<'a, [u8]>,
@@ -12,34 +18,52 @@ pub struct Lexer<'a> {
lookahead: Option<(Pos, Token)>, lookahead: Option<(Pos, Token)>,
} }
/// A source position.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash, PartialOrd, Ord)]
pub struct Pos { pub struct Pos {
/// This source position's file.
///
/// Indexes into `Lexer::filenames` early in the compiler pipeline, and
/// later into `TypeEnv::filenames` once we get into semantic analysis.
pub file: usize, pub file: usize,
/// This source position's byte offset in the file.
pub offset: usize, pub offset: usize,
/// This source position's line number in the file.
pub line: usize, pub line: usize,
/// This source position's column number in the file.
pub col: usize, pub col: usize,
} }
impl Pos { impl Pos {
/// Print this source position as `file.isle:12:34`.
pub fn pretty_print(&self, filenames: &[String]) -> String { pub fn pretty_print(&self, filenames: &[String]) -> String {
format!("{}:{}:{}", filenames[self.file], self.line, self.col) format!("{}:{}:{}", filenames[self.file], self.line, self.col)
} }
/// Print this source position as `file.isle line 12`.
pub fn pretty_print_line(&self, filenames: &[String]) -> String { pub fn pretty_print_line(&self, filenames: &[String]) -> String {
format!("{} line {}", filenames[self.file], self.line) format!("{} line {}", filenames[self.file], self.line)
} }
} }
/// A token of ISLE source.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token { pub enum Token {
/// Left paren.
LParen, LParen,
/// Right paren.
RParen, RParen,
/// A symbol, e.g. `Foo`.
Symbol(String), Symbol(String),
/// An integer.
Int(i64), Int(i64),
/// `@`
At, At,
/// `<`
Lt, Lt,
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
/// Create a new lexer for the given source contents and filename.
pub fn from_str(s: &'a str, filename: &'a str) -> Lexer<'a> { pub fn from_str(s: &'a str, filename: &'a str) -> Lexer<'a> {
let mut l = Lexer { let mut l = Lexer {
filenames: vec![filename.to_string()], filenames: vec![filename.to_string()],
@@ -57,6 +81,7 @@ impl<'a> Lexer<'a> {
l l
} }
/// Create a new lexer from the given files.
pub fn from_files(filenames: Vec<String>) -> Result<Lexer<'a>, Error> { pub fn from_files(filenames: Vec<String>) -> Result<Lexer<'a>, Error> {
assert!(!filenames.is_empty()); assert!(!filenames.is_empty());
let file_contents: Vec<String> = filenames let file_contents: Vec<String> = filenames
@@ -94,10 +119,12 @@ impl<'a> Lexer<'a> {
Ok(l) Ok(l)
} }
/// Get the lexer's current file offset.
pub fn offset(&self) -> usize { pub fn offset(&self) -> usize {
self.pos.offset self.pos.offset
} }
/// Get the lexer's current source position.
pub fn pos(&self) -> Pos { pub fn pos(&self) -> Pos {
self.pos self.pos
} }
@@ -218,10 +245,12 @@ impl<'a> Lexer<'a> {
} }
} }
/// Peek ahead at the next token.
pub fn peek(&self) -> Option<&(Pos, Token)> { pub fn peek(&self) -> Option<&(Pos, Token)> {
self.lookahead.as_ref() self.lookahead.as_ref()
} }
/// Are we at the end of the source input?
pub fn eof(&self) -> bool { pub fn eof(&self) -> bool {
self.lookahead.is_none() self.lookahead.is_none()
} }
@@ -238,6 +267,7 @@ impl<'a> std::iter::Iterator for Lexer<'a> {
} }
impl Token { impl Token {
/// Is this an `Int` token?
pub fn is_int(&self) -> bool { pub fn is_int(&self) -> bool {
match self { match self {
Token::Int(_) => true, Token::Int(_) => true,
@@ -245,6 +275,7 @@ impl Token {
} }
} }
/// Is this a `Sym` token?
pub fn is_sym(&self) -> bool { pub fn is_sym(&self) -> bool {
match self { match self {
Token::Symbol(_) => true, Token::Symbol(_) => true,

View File

@@ -1,3 +1,23 @@
#![doc = include_str!("../README.md")]
#![deny(missing_docs)]
macro_rules! declare_id {
(
$(#[$attr:meta])*
$name:ident
) => {
$(#[$attr])*
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct $name(pub usize);
impl $name {
/// Get the index of this id.
pub fn index(self) -> usize {
self.0
}
}
};
}
pub mod ast; pub mod ast;
pub mod codegen; pub mod codegen;
pub mod compile; pub mod compile;
@@ -6,4 +26,3 @@ pub mod ir;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod sema; pub mod sema;

View File

@@ -4,19 +4,24 @@ use crate::ast::*;
use crate::error::*; use crate::error::*;
use crate::lexer::{Lexer, Pos, Token}; use crate::lexer::{Lexer, Pos, Token};
/// The ISLE parser.
///
/// Takes in a lexer and creates an AST.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Parser<'a> { pub struct Parser<'a> {
lexer: Lexer<'a>, lexer: Lexer<'a>,
} }
/// Either `Ok(T)` or an `Err(isle::Error)`.
pub type ParseResult<T> = std::result::Result<T, Error>; pub type ParseResult<T> = std::result::Result<T, Error>;
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
/// Construct a new parser from the given lexer.
pub fn new(lexer: Lexer<'a>) -> Parser<'a> { pub fn new(lexer: Lexer<'a>) -> Parser<'a> {
Parser { lexer } Parser { lexer }
} }
pub fn error(&self, pos: Pos, msg: String) -> Error { fn error(&self, pos: Pos, msg: String) -> Error {
Error::CompileError { Error::CompileError {
filename: self.lexer.filenames[pos.file].clone(), filename: self.lexer.filenames[pos.file].clone(),
pos, pos,
@@ -106,6 +111,7 @@ impl<'a> Parser<'a> {
} }
} }
/// Parse the top-level ISLE definitions and return their AST.
pub fn parse_defs(&mut self) -> ParseResult<Defs> { pub fn parse_defs(&mut self) -> ParseResult<Defs> {
let mut defs = vec![]; let mut defs = vec![];
while !self.lexer.eof() { while !self.lexer.eof() {

View File

@@ -1,121 +1,226 @@
//! Semantic analysis. //! Semantic analysis.
//!
//! This module primarily contains the type environment and term environment.
//!
//! The type environment is constructed by analyzing an input AST. The type
//! environment records the types used in the input source and the types of our
//! various rules and symbols. ISLE's type system is intentionally easy to
//! check, only requires a single pass over the AST, and doesn't require any
//! unification or anything like that.
//!
//! The term environment is constructed from both the AST and type
//! envionment. It is sort of a typed and reorganized AST that more directly
//! reflects ISLE semantics than the input ISLE source code (where as the AST is
//! the opposite).
use crate::ast; use crate::ast;
use crate::error::*; use crate::error::*;
use crate::lexer::Pos; use crate::lexer::Pos;
use std::collections::HashMap; use std::collections::HashMap;
/// Either `Ok(T)` or a one or more `Error`s.
///
/// This allows us to return multiple type errors at the same time, for example.
pub type SemaResult<T> = std::result::Result<T, Vec<Error>>; pub type SemaResult<T> = std::result::Result<T, Vec<Error>>;
#[macro_export] declare_id!(
macro_rules! declare_id { /// The id of an interned symbol.
($name:ident) => { Sym
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] );
pub struct $name(pub usize); declare_id!(
impl $name { /// The id of an interned type inside the `TypeEnv`.
pub fn index(self) -> usize { TypeId
self.0 );
} declare_id!(
} /// The id of a variant inside an enum.
}; VariantId
} );
declare_id!(
declare_id!(Sym); /// The id of a field inside a variant.
declare_id!(TypeId); FieldId
declare_id!(VariantId); );
declare_id!(FieldId); declare_id!(
declare_id!(TermId); /// The id of an interned term inside the `TermEnv`.
declare_id!(RuleId); TermId
declare_id!(VarId); );
declare_id!(
/// The id of an interned rule inside the `TermEnv`.
RuleId
);
declare_id!(
/// The id of a bound variable inside a `Bindings`.
VarId
);
/// The type environment.
///
/// Keeps track of which symbols and rules have which types.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct TypeEnv { pub struct TypeEnv {
/// Arena of input ISLE source filenames.
///
/// We refer to these indirectly through the `Pos::file` indices.
pub filenames: Vec<String>, pub filenames: Vec<String>,
/// Arena of interned symbol names.
///
/// Referred to indirectly via `Sym` indices.
pub syms: Vec<String>, pub syms: Vec<String>,
/// Map of already-interned symbol names to their `Sym` ids.
pub sym_map: HashMap<String, Sym>, pub sym_map: HashMap<String, Sym>,
/// Arena of type definitions.
///
/// Referred to indirectly via `TypeId`s.
pub types: Vec<Type>, pub types: Vec<Type>,
/// A map from a type name symbol to its `TypeId`.
pub type_map: HashMap<Sym, TypeId>, pub type_map: HashMap<Sym, TypeId>,
/// The types of constant symbols.
pub const_types: HashMap<Sym, TypeId>, pub const_types: HashMap<Sym, TypeId>,
/// Type errors that we've found so far during type checking.
pub errors: Vec<Error>, pub errors: Vec<Error>,
} }
/// A type.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum Type { pub enum Type {
/// A primitive, `Copy` type.
///
/// These are always defined externally, and we allow literals of these
/// types to pass through from ISLE source code to the emitted Rust code.
Primitive(TypeId, Sym), Primitive(TypeId, Sym),
/// A sum type.
///
/// Note that enums with only one variant are equivalent to a "struct".
Enum { Enum {
/// The name of this enum.
name: Sym, name: Sym,
/// This `enum`'s type id.
id: TypeId, id: TypeId,
/// Is this `enum` defined in external Rust code?
///
/// If so, ISLE will not emit a definition for it. If not, then it will
/// emit a Rust definition for it.
is_extern: bool, is_extern: bool,
/// The different variants for this enum.
variants: Vec<Variant>, variants: Vec<Variant>,
/// The ISLE source position where this `enum` is defined.
pos: Pos, pos: Pos,
}, },
} }
impl Type { impl Type {
/// Get the name of this `Type`.
pub fn name<'a>(&self, tyenv: &'a TypeEnv) -> &'a str { pub fn name<'a>(&self, tyenv: &'a TypeEnv) -> &'a str {
match self { match self {
Self::Primitive(_, name) | Self::Enum { name, .. } => &tyenv.syms[name.index()], Self::Primitive(_, name) | Self::Enum { name, .. } => &tyenv.syms[name.index()],
} }
} }
/// Is this a primitive type?
pub fn is_prim(&self) -> bool { pub fn is_prim(&self) -> bool {
match self { matches!(self, Type::Primitive(..))
&Type::Primitive(..) => true,
_ => false,
}
} }
} }
/// A variant of an enum.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Variant { pub struct Variant {
/// The name of this variant.
pub name: Sym, pub name: Sym,
/// The full, prefixed-with-the-enum's-name name of this variant.
///
/// E.g. if the enum is `Foo` and this variant is `Bar`, then the
/// `fullname` is `Foo.Bar`.
pub fullname: Sym, pub fullname: Sym,
/// The id of this variant, i.e. the index of this variant within its
/// enum's `Type::Enum::variants`.
pub id: VariantId, pub id: VariantId,
/// The data fields of this enum variant.
pub fields: Vec<Field>, pub fields: Vec<Field>,
} }
/// A field of a `Variant`.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Field { pub struct Field {
/// The name of this field.
pub name: Sym, pub name: Sym,
/// This field's id.
pub id: FieldId, pub id: FieldId,
/// The type of this field.
pub ty: TypeId, pub ty: TypeId,
} }
/// The term environment.
///
/// This is sort of a typed and reorganized AST that more directly reflects ISLE
/// semantics than the input ISLE source code (where as the AST is the
/// opposite).
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct TermEnv { pub struct TermEnv {
/// Arena of interned terms defined in this ISLE program.
///
/// This is indexed by `TermId`.
pub terms: Vec<Term>, pub terms: Vec<Term>,
/// A map from am interned `Term`'s name to its `TermId`.
pub term_map: HashMap<Sym, TermId>, pub term_map: HashMap<Sym, TermId>,
/// Arena of interned rules defined in this ISLE program.
///
/// This is indexed by `RuleId`.
pub rules: Vec<Rule>, pub rules: Vec<Rule>,
} }
/// A term.
///
/// Maps parameter types to result types if this is a constructor term, or
/// result types to parameter types if this is an extractor term. Or both if
/// this term can be either a constructor or an extractor.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Term { pub struct Term {
/// This term's id.
pub id: TermId, pub id: TermId,
/// The name of this term.
pub name: Sym, pub name: Sym,
/// The parameter types to this term.
pub arg_tys: Vec<TypeId>, pub arg_tys: Vec<TypeId>,
/// The result types of this term.
pub ret_ty: TypeId, pub ret_ty: TypeId,
/// The kind of this term.
pub kind: TermKind, pub kind: TermKind,
} }
/// The kind of a term.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum TermKind { pub enum TermKind {
/// An enum variant constructor or extractor.
EnumVariant { EnumVariant {
/// Which variant of the enum: e.g. for enum type `A` if a /// Which variant of the enum: e.g. for enum type `A` if a term is
/// term is `(A.A1 ...)` then the variant ID corresponds to /// `(A.A1 ...)` then the variant ID corresponds to `A1`.
/// `A1`.
variant: VariantId, variant: VariantId,
}, },
/// A term with "internal" rules that work in the forward /// A term with "internal" rules that work in the forward direction. Becomes
/// direction. Becomes a compiled Rust function in the generated /// a compiled Rust function in the generated code.
/// code.
InternalConstructor, InternalConstructor,
/// A term that defines an "extractor macro" in the LHS of a /// A term that defines an "extractor macro" in the LHS of a pattern. Its
/// pattern. Its arguments take patterns and are simply /// arguments take patterns and are simply substituted with the given
/// substituted with the given patterns when used. /// patterns when used.
InternalExtractor { template: ast::Pattern }, InternalExtractor {
/// This extractor's pattern.
template: ast::Pattern,
},
/// A term defined solely by an external extractor function. /// A term defined solely by an external extractor function.
ExternalExtractor { ExternalExtractor {
/// Extractor func. /// The external name of the extractor function.
name: Sym, name: Sym,
/// Which arguments of the extractor are inputs and which are outputs? /// Which arguments of the extractor are inputs and which are outputs?
arg_polarity: Vec<ArgPolarity>, arg_polarity: Vec<ArgPolarity>,
@@ -124,7 +229,7 @@ pub enum TermKind {
}, },
/// A term defined solely by an external constructor function. /// A term defined solely by an external constructor function.
ExternalConstructor { ExternalConstructor {
/// Constructor func. /// The external name of the constructor function.
name: Sym, name: Sym,
}, },
/// Declared but no body or externs associated (yet). /// Declared but no body or externs associated (yet).
@@ -133,27 +238,28 @@ pub enum TermKind {
pub use crate::ast::ArgPolarity; pub use crate::ast::ArgPolarity;
/// An external function signature.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct ExternalSig { pub struct ExternalSig {
/// The name of the external function.
pub func_name: String, pub func_name: String,
/// The name of the external function, prefixed with the context trait.
pub full_name: String, pub full_name: String,
pub arg_tys: Vec<TypeId>, /// The types of this function signature's parameters.
pub param_tys: Vec<TypeId>,
/// The types of this function signature's results.
pub ret_tys: Vec<TypeId>, pub ret_tys: Vec<TypeId>,
/// Whether this signature is infallible or not.
pub infallible: bool, pub infallible: bool,
} }
impl Term { impl Term {
/// Get this term's type.
pub fn ty(&self) -> TypeId { pub fn ty(&self) -> TypeId {
self.ret_ty self.ret_ty
} }
pub fn to_variant(&self) -> Option<VariantId> { /// Is this term a constructor?
match &self.kind {
&TermKind::EnumVariant { variant } => Some(variant),
_ => None,
}
}
pub fn is_constructor(&self) -> bool { pub fn is_constructor(&self) -> bool {
match &self.kind { match &self.kind {
&TermKind::InternalConstructor { .. } | &TermKind::ExternalConstructor { .. } => true, &TermKind::InternalConstructor { .. } | &TermKind::ExternalConstructor { .. } => true,
@@ -161,13 +267,7 @@ impl Term {
} }
} }
pub fn is_extractor(&self) -> bool { /// Is this term external?
match &self.kind {
&TermKind::InternalExtractor { .. } | &TermKind::ExternalExtractor { .. } => true,
_ => false,
}
}
pub fn is_external(&self) -> bool { pub fn is_external(&self) -> bool {
match &self.kind { match &self.kind {
&TermKind::ExternalExtractor { .. } | &TermKind::ExternalConstructor { .. } => true, &TermKind::ExternalExtractor { .. } | &TermKind::ExternalConstructor { .. } => true,
@@ -175,12 +275,13 @@ impl Term {
} }
} }
/// Get this term's external function signature, if any.
pub fn to_sig(&self, tyenv: &TypeEnv) -> Option<ExternalSig> { pub fn to_sig(&self, tyenv: &TypeEnv) -> Option<ExternalSig> {
match &self.kind { match &self.kind {
&TermKind::ExternalConstructor { name } => Some(ExternalSig { &TermKind::ExternalConstructor { name } => Some(ExternalSig {
func_name: tyenv.syms[name.index()].clone(), func_name: tyenv.syms[name.index()].clone(),
full_name: format!("C::{}", tyenv.syms[name.index()]), full_name: format!("C::{}", tyenv.syms[name.index()]),
arg_tys: self.arg_tys.clone(), param_tys: self.arg_tys.clone(),
ret_tys: vec![self.ret_ty], ret_tys: vec![self.ret_ty],
infallible: true, infallible: true,
}), }),
@@ -205,7 +306,7 @@ impl Term {
Some(ExternalSig { Some(ExternalSig {
func_name: tyenv.syms[name.index()].clone(), func_name: tyenv.syms[name.index()].clone(),
full_name: format!("C::{}", tyenv.syms[name.index()]), full_name: format!("C::{}", tyenv.syms[name.index()]),
arg_tys, param_tys: arg_tys,
ret_tys, ret_tys,
infallible, infallible,
}) })
@@ -215,7 +316,7 @@ impl Term {
Some(ExternalSig { Some(ExternalSig {
func_name: name.clone(), func_name: name.clone(),
full_name: name, full_name: name,
arg_tys: self.arg_tys.clone(), param_tys: self.arg_tys.clone(),
ret_tys: vec![self.ret_ty], ret_tys: vec![self.ret_ty],
infallible: false, infallible: false,
}) })
@@ -225,42 +326,87 @@ impl Term {
} }
} }
/// A term rewrite rule.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Rule { pub struct Rule {
/// This rule's id.
pub id: RuleId, pub id: RuleId,
/// The left-hand side pattern that this rule matches.
pub lhs: Pattern, pub lhs: Pattern,
/// The right-hand side expression that this rule evaluates upon successful
/// match.
pub rhs: Expr, pub rhs: Expr,
/// The priority of this rule, if any.
pub prio: Option<i64>, pub prio: Option<i64>,
/// The source position where this rule is defined.
pub pos: Pos, pub pos: Pos,
} }
/// A left-hand side pattern of some rule.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pattern { pub enum Pattern {
/// Bind a variable of the given type from the current value.
///
/// Keep matching on the value with the subpattern.
BindPattern(TypeId, VarId, Box<Pattern>), BindPattern(TypeId, VarId, Box<Pattern>),
/// Match the current value against an already bound variable with the given
/// type.
Var(TypeId, VarId), Var(TypeId, VarId),
/// Match the current value against a constant integer of the given integer
/// type.
ConstInt(TypeId, i64), ConstInt(TypeId, i64),
/// Match the current value against a constant primitive value of the given
/// primitive type.
ConstPrim(TypeId, Sym), ConstPrim(TypeId, Sym),
/// Match the current value against the given extractor term with the given
/// arguments.
Term(TypeId, TermId, Vec<TermArgPattern>), Term(TypeId, TermId, Vec<TermArgPattern>),
/// Match anything of the given type successfully.
Wildcard(TypeId), Wildcard(TypeId),
/// Match all of the following patterns of the given type.
And(TypeId, Vec<Pattern>), And(TypeId, Vec<Pattern>),
} }
/// Arguments to a term inside a pattern (i.e. an extractor).
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum TermArgPattern { pub enum TermArgPattern {
/// A pattern to match sub-values (i.e. the extractor's results) against.
Pattern(Pattern), Pattern(Pattern),
/// An expression to generate a value that is passed into the extractor.
Expr(Expr), Expr(Expr),
} }
/// A right-hand side expression of some rule.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr { pub enum Expr {
/// Invoke this term constructor with the given arguments.
Term(TypeId, TermId, Vec<Expr>), Term(TypeId, TermId, Vec<Expr>),
/// Get the value of a variable that was bound in the left-hand side.
Var(TypeId, VarId), Var(TypeId, VarId),
/// Get a constant integer.
ConstInt(TypeId, i64), ConstInt(TypeId, i64),
/// Get a constant primitive.
ConstPrim(TypeId, Sym), ConstPrim(TypeId, Sym),
Let(TypeId, Vec<(VarId, TypeId, Box<Expr>)>, Box<Expr>), /// Evaluate the nested expressions and bind their results to the given
/// variables, then evaluate the body expression.
Let {
/// The type of the result of this let expression.
ty: TypeId,
/// The expressions that are evaluated and bound to the given variables.
bindings: Vec<(VarId, TypeId, Box<Expr>)>,
/// The body expression that is evaluated after the bindings.
body: Box<Expr>,
},
} }
impl Pattern { impl Pattern {
/// Get this pattern's type.
pub fn ty(&self) -> TypeId { pub fn ty(&self) -> TypeId {
match self { match self {
&Self::BindPattern(t, ..) => t, &Self::BindPattern(t, ..) => t,
@@ -273,6 +419,7 @@ impl Pattern {
} }
} }
/// Get the root term of this pattern, if any.
pub fn root_term(&self) -> Option<TermId> { pub fn root_term(&self) -> Option<TermId> {
match self { match self {
&Pattern::Term(_, term, _) => Some(term), &Pattern::Term(_, term, _) => Some(term),
@@ -283,18 +430,20 @@ impl Pattern {
} }
impl Expr { impl Expr {
/// Get this expression's type.
pub fn ty(&self) -> TypeId { pub fn ty(&self) -> TypeId {
match self { match self {
&Self::Term(t, ..) => t, &Self::Term(t, ..) => t,
&Self::Var(t, ..) => t, &Self::Var(t, ..) => t,
&Self::ConstInt(t, ..) => t, &Self::ConstInt(t, ..) => t,
&Self::ConstPrim(t, ..) => t, &Self::ConstPrim(t, ..) => t,
&Self::Let(t, ..) => t, &Self::Let { ty: t, .. } => t,
} }
} }
} }
impl TypeEnv { impl TypeEnv {
/// Construct the type environment from the AST.
pub fn from_ast(defs: &ast::Defs) -> SemaResult<TypeEnv> { pub fn from_ast(defs: &ast::Defs) -> SemaResult<TypeEnv> {
let mut tyenv = TypeEnv { let mut tyenv = TypeEnv {
filenames: defs.filenames.clone(), filenames: defs.filenames.clone(),
@@ -467,7 +616,7 @@ impl TypeEnv {
self.errors.push(err); self.errors.push(err);
} }
pub fn intern_mut(&mut self, ident: &ast::Ident) -> Sym { fn intern_mut(&mut self, ident: &ast::Ident) -> Sym {
if let Some(s) = self.sym_map.get(&ident.0).cloned() { if let Some(s) = self.sym_map.get(&ident.0).cloned() {
s s
} else { } else {
@@ -478,7 +627,7 @@ impl TypeEnv {
} }
} }
pub fn intern(&self, ident: &ast::Ident) -> Option<Sym> { fn intern(&self, ident: &ast::Ident) -> Option<Sym> {
self.sym_map.get(&ident.0).cloned() self.sym_map.get(&ident.0).cloned()
} }
} }
@@ -497,6 +646,7 @@ struct BoundVar {
} }
impl TermEnv { impl TermEnv {
/// Construct the term environment from the AST and the type environment.
pub fn from_ast(tyenv: &mut TypeEnv, defs: &ast::Defs) -> SemaResult<TermEnv> { pub fn from_ast(tyenv: &mut TypeEnv, defs: &ast::Defs) -> SemaResult<TermEnv> {
let mut env = TermEnv { let mut env = TermEnv {
terms: vec![], terms: vec![],
@@ -1274,7 +1424,11 @@ impl TermEnv {
// Pop the bindings. // Pop the bindings.
bindings.vars.truncate(orig_binding_len); bindings.vars.truncate(orig_binding_len);
Some(Expr::Let(body_ty, let_defs, body)) Some(Expr::Let {
ty: body_ty,
bindings: let_defs,
body,
})
} }
} }
} }