Initial public commit of ISLE prototype DSL compiler.

This commit is contained in:
Chris Fallin
2021-06-29 17:00:43 -07:00
parent f2939111d9
commit 84b7612b98
13 changed files with 3201 additions and 0 deletions

3
cranelift/isle/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
/target
*~
.*.swp

199
cranelift/isle/Cargo.lock generated Normal file
View File

@@ -0,0 +1,199 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
"memchr",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "isle"
version = "0.1.0"
dependencies = [
"env_logger",
"log",
"thiserror",
]
[[package]]
name = "libc"
version = "0.2.97"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if",
]
[[package]]
name = "memchr"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "proc-macro2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "syn"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f58f7e8eaa0009c5fec437aabf511bd9933e4b2d7407bd05273c01a8906ea7"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "termcolor"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
dependencies = [
"winapi-util",
]
[[package]]
name = "thiserror"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

11
cranelift/isle/Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "isle"
version = "0.1.0"
authors = ["Chris Fallin <chris@cfallin.org>"]
edition = "2018"
license = "Apache-2.0 WITH LLVM-exception"
[dependencies]
log = "0.4"
env_logger = "0.8"
thiserror = "1.0"

View File

@@ -0,0 +1,12 @@
(type u32 (primitive u32))
(type A (enum (A1 (x u32)) (A2 (x u32))))
(type B (enum (B1 (x u32)) (B2 (x u32))))
(decl Input (A) u32)
(extractor Input get_input) ;; fn get_input<C>(ctx: &mut C, ret: u32) -> Option<(A,)>
(decl Lower (A) B)
(rule
(Lower (A.A1 sub @ (Input (A.A2 42))))
(B.B2 sub))

135
cranelift/isle/src/ast.rs Normal file
View File

@@ -0,0 +1,135 @@
use crate::lexer::Pos;
/// The parsed form of an ISLE file.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Defs {
pub defs: Vec<Def>,
pub filename: String,
}
/// One toplevel form in an ISLE file.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Def {
Type(Type),
Rule(Rule),
Decl(Decl),
Extern(Extern),
}
/// An identifier -- a variable, term symbol, or type.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Ident(pub String);
/// A declaration of a type.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Type {
pub name: Ident,
pub is_extern: bool,
pub ty: TypeValue,
pub pos: Pos,
}
/// The actual type-value: a primitive or an enum with variants.
///
/// TODO: add structs as well?
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum TypeValue {
Primitive(Ident),
Enum(Vec<Variant>),
}
/// One variant of an enum type.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Variant {
pub name: Ident,
pub fields: Vec<Field>,
}
/// One field of an enum variant.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Field {
pub name: Ident,
pub ty: Ident,
}
/// A declaration of a term with its argument and return types.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Decl {
pub term: Ident,
pub arg_tys: Vec<Ident>,
pub ret_ty: Ident,
pub pos: Pos,
}
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Rule {
pub pattern: Pattern,
pub expr: Expr,
pub pos: Pos,
pub prio: Option<i64>,
}
/// A pattern: the left-hand side of a rule.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Pattern {
/// An operator that binds a variable to a subterm and match the
/// subpattern.
BindPattern { var: Ident, subpat: Box<Pattern> },
/// A variable that has already been bound (`=x` syntax).
Var { var: Ident },
/// An operator that matches a constant integer value.
ConstInt { val: i64 },
/// An application of a type variant or term.
Term { sym: Ident, args: Vec<Pattern> },
/// An operator that matches anything.
Wildcard,
}
/// An expression: the right-hand side of a rule.
///
/// Note that this *almost* looks like a core Lisp or lambda calculus,
/// except that there is no abstraction (lambda). This first-order
/// limit is what makes it analyzable.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Expr {
/// A term: `(sym args...)`.
Term { sym: Ident, args: Vec<Expr> },
/// A variable use.
Var { name: Ident },
/// A constant integer.
ConstInt { val: i64 },
/// The `(let ((var ty val)*) body)` form.
Let { defs: Vec<LetDef>, body: Box<Expr> },
}
/// One variable locally bound in a `(let ...)` expression.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct LetDef {
pub var: Ident,
pub ty: Ident,
pub val: Box<Expr>,
}
/// An external binding: an extractor or constructor function attached
/// to a term.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Extern {
/// An external extractor: `(extractor Term rustfunc)` form.
Extractor {
/// The term to which this external extractor is attached.
term: Ident,
/// The Rust function name.
func: Ident,
/// The position of this decl.
pos: Pos,
},
/// An external constructor: `(constructor Term rustfunc)` form.
Constructor {
/// The term to which this external constructor is attached.
term: Ident,
/// The Rust function name.
func: Ident,
/// The position of this decl.
pos: Pos,
},
}

View File

@@ -0,0 +1,111 @@
//! Compilation process, from AST to Sema to Sequences of Insts.
use crate::error::*;
use crate::{ast, ir, sema};
use std::collections::HashMap;
/// A Compiler manages the compilation pipeline from AST to Sequences.
pub struct Compiler<'a> {
ast: &'a ast::Defs,
type_env: sema::TypeEnv,
term_env: sema::TermEnv,
seqs: Vec<ir::Sequence>,
// TODO: if this becomes a perf issue, then build a better data
// structure. For now we index on root term/variant.
//
// TODO: index at callsites (extractors/constructors) too. We'll
// need tree-summaries of arg and expected return value at each
// callsite.
term_db: HashMap<ir::TermOrVariant, TermData>,
}
#[derive(Clone, Debug, Default)]
struct TermData {
producers: Vec<(ir::TreeSummary, sema::RuleId)>,
consumers: Vec<(ir::TreeSummary, sema::RuleId)>,
has_constructor: bool,
has_extractor: bool,
}
pub type CompileResult<T> = Result<T, Error>;
impl<'a> Compiler<'a> {
pub fn new(ast: &'a ast::Defs) -> CompileResult<Compiler<'a>> {
let mut type_env = sema::TypeEnv::from_ast(ast)?;
let term_env = sema::TermEnv::from_ast(&mut type_env, ast)?;
Ok(Compiler {
ast,
type_env,
term_env,
seqs: vec![],
term_db: HashMap::new(),
})
}
pub fn build_sequences(&mut self) -> CompileResult<()> {
for rid in 0..self.term_env.rules.len() {
let rid = sema::RuleId(rid);
let seq = ir::Sequence::from_rule(&self.type_env, &self.term_env, rid);
self.seqs.push(seq);
}
Ok(())
}
pub fn collect_tree_summaries(&mut self) -> CompileResult<()> {
// For each rule, compute summaries of its LHS and RHS, then
// index it in the appropriate TermData.
for (i, seq) in self.seqs.iter().enumerate() {
let rule_id = sema::RuleId(i);
let consumer_summary = seq.input_tree_summary();
let producer_summary = seq.output_tree_summary();
if let Some(consumer_root_term) = consumer_summary.root() {
let consumer_termdb = self
.term_db
.entry(consumer_root_term.clone())
.or_insert_with(|| Default::default());
consumer_termdb.consumers.push((consumer_summary, rule_id));
}
if let Some(producer_root_term) = producer_summary.root() {
let producer_termdb = self
.term_db
.entry(producer_root_term.clone())
.or_insert_with(|| Default::default());
producer_termdb.consumers.push((producer_summary, rule_id));
}
}
// For each term, if a constructor and/or extractor is
// present, note that.
for term in &self.term_env.terms {
if let sema::TermKind::Regular {
extractor,
constructor,
} = term.kind
{
if !extractor.is_some() && !constructor.is_some() {
continue;
}
let entry = self
.term_db
.entry(ir::TermOrVariant::Term(term.id))
.or_insert_with(|| Default::default());
if extractor.is_some() {
entry.has_extractor = true;
}
if constructor.is_some() {
entry.has_constructor = true;
}
}
}
Ok(())
}
pub fn inline_internal_terms(&mut self) -> CompileResult<()> {
unimplemented!()
}
pub fn to_sequences(self) -> Vec<ir::Sequence> {
self.seqs
}
}

View File

@@ -0,0 +1,48 @@
//! Error types.
use crate::lexer::Pos;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("Parse error")]
ParseError(#[from] ParseError),
#[error("Semantic error")]
SemaError(#[from] SemaError),
#[error("IO error")]
IoError(#[from] std::io::Error),
}
#[derive(Clone, Debug, Error)]
pub struct ParseError {
pub msg: String,
pub filename: String,
pub pos: Pos,
}
#[derive(Clone, Debug, Error)]
pub struct SemaError {
pub msg: String,
pub filename: String,
pub pos: Pos,
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"{}:{}:{}: {}",
self.filename, self.pos.line, self.pos.col, self.msg
)
}
}
impl std::fmt::Display for SemaError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"{}:{}:{}: {}",
self.filename, self.pos.line, self.pos.col, self.msg
)
}
}

1089
cranelift/isle/src/ir.rs Normal file

File diff suppressed because it is too large Load Diff

241
cranelift/isle/src/lexer.rs Normal file
View File

@@ -0,0 +1,241 @@
//! Lexer for the ISLE language.
#[derive(Clone, Debug)]
pub struct Lexer<'a> {
buf: &'a [u8],
pos: Pos,
lookahead: Option<(Pos, Token<'a>)>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Pos {
pub offset: usize,
pub line: usize,
pub col: usize,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> {
LParen,
RParen,
Symbol(&'a str),
Int(i64),
}
impl<'a> Lexer<'a> {
pub fn new(s: &'a str) -> Lexer<'a> {
let mut l = Lexer {
buf: s.as_bytes(),
pos: Pos {
offset: 0,
line: 1,
col: 0,
},
lookahead: None,
};
l.reload();
l
}
pub fn offset(&self) -> usize {
self.pos.offset
}
pub fn pos(&self) -> Pos {
self.pos
}
fn next_token(&mut self) -> Option<(Pos, Token<'a>)> {
fn is_sym_first_char(c: u8) -> bool {
match c {
b'-' | b'0'..=b'9' | b'(' | b')' | b';' => false,
c if c.is_ascii_whitespace() => false,
_ => true,
}
}
fn is_sym_other_char(c: u8) -> bool {
match c {
b'(' | b')' | b';' => false,
c if c.is_ascii_whitespace() => false,
_ => true,
}
}
// Skip any whitespace and any comments.
while self.pos.offset < self.buf.len() {
if self.buf[self.pos.offset].is_ascii_whitespace() {
self.pos.col += 1;
if self.buf[self.pos.offset] == b'\n' {
self.pos.line += 1;
self.pos.col = 0;
}
self.pos.offset += 1;
continue;
}
if self.buf[self.pos.offset] == b';' {
while self.pos.offset < self.buf.len() && self.buf[self.pos.offset] != b'\n' {
self.pos.offset += 1;
}
self.pos.line += 1;
self.pos.col = 0;
continue;
}
break;
}
if self.pos.offset == self.buf.len() {
return None;
}
let char_pos = self.pos;
match self.buf[self.pos.offset] {
b'(' => {
self.pos.offset += 1;
self.pos.col += 1;
Some((char_pos, Token::LParen))
}
b')' => {
self.pos.offset += 1;
self.pos.col += 1;
Some((char_pos, Token::RParen))
}
c if is_sym_first_char(c) => {
let start = self.pos.offset;
let start_pos = self.pos;
while self.pos.offset < self.buf.len()
&& is_sym_other_char(self.buf[self.pos.offset])
{
self.pos.col += 1;
self.pos.offset += 1;
}
let end = self.pos.offset;
let s = std::str::from_utf8(&self.buf[start..end])
.expect("Only ASCII characters, should be UTF-8");
Some((start_pos, Token::Symbol(s)))
}
c if (c >= b'0' && c <= b'9') || c == b'-' => {
let start_pos = self.pos;
let neg = if c == b'-' {
self.pos.offset += 1;
self.pos.col += 1;
true
} else {
false
};
let mut num = 0;
while self.pos.offset < self.buf.len()
&& (self.buf[self.pos.offset] >= b'0' && self.buf[self.pos.offset] <= b'9')
{
num = (num * 10) + (self.buf[self.pos.offset] - b'0') as i64;
self.pos.offset += 1;
self.pos.col += 1;
}
let tok = if neg {
Token::Int(-num)
} else {
Token::Int(num)
};
Some((start_pos, tok))
}
c => panic!("Unexpected character '{}' at offset {}", c, self.pos.offset),
}
}
fn reload(&mut self) {
if self.lookahead.is_none() && self.pos.offset < self.buf.len() {
self.lookahead = self.next_token();
}
}
pub fn peek(&self) -> Option<(Pos, Token<'a>)> {
self.lookahead
}
pub fn eof(&self) -> bool {
self.lookahead.is_none()
}
}
impl<'a> std::iter::Iterator for Lexer<'a> {
type Item = (Pos, Token<'a>);
fn next(&mut self) -> Option<(Pos, Token<'a>)> {
let tok = self.lookahead.take();
self.reload();
tok
}
}
impl<'a> Token<'a> {
pub fn is_int(&self) -> bool {
match self {
Token::Int(_) => true,
_ => false,
}
}
pub fn is_sym(&self) -> bool {
match self {
Token::Symbol(_) => true,
_ => false,
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn lexer_basic() {
assert_eq!(
Lexer::new(";; comment\n; another\r\n \t(one two three 23 -568 )\n")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![
Token::LParen,
Token::Symbol("one"),
Token::Symbol("two"),
Token::Symbol("three"),
Token::Int(23),
Token::Int(-568),
Token::RParen
]
);
}
#[test]
fn ends_with_sym() {
assert_eq!(
Lexer::new("asdf").map(|(_, tok)| tok).collect::<Vec<_>>(),
vec![Token::Symbol("asdf"),]
);
}
#[test]
fn ends_with_num() {
assert_eq!(
Lexer::new("23").map(|(_, tok)| tok).collect::<Vec<_>>(),
vec![Token::Int(23)],
);
}
#[test]
fn weird_syms() {
assert_eq!(
Lexer::new("(+ [] => !! _test!;comment\n)")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![
Token::LParen,
Token::Symbol("+"),
Token::Symbol("[]"),
Token::Symbol("=>"),
Token::Symbol("!!"),
Token::Symbol("_test!"),
Token::RParen,
]
);
}
}

View File

@@ -0,0 +1,33 @@
use crate::ir::*;
use crate::sema;
struct LowerState<'a> {
tyenv: &'a sema::TypeEnv,
func: &'a sema::Func,
builder: FuncBuilder,
control_flow: ControlInput,
}
pub fn lower(tyenv: &sema::TypeEnv, func: &sema::Func) -> Func {
let mut builder = FuncBuilder::default();
let entry = builder.intern(Node::Entry);
let mut state = LowerState {
tyenv,
func,
builder,
control_flow: ControlInput(entry, 0),
};
if !func.is_extern && !func.is_inline {
for case in &func.cases {
state.lower_case(case);
}
}
state.builder.build()
}
impl<'a> LowerState<'a> {
fn lower_case(&mut self) {}
}

View File

@@ -0,0 +1,28 @@
#![allow(dead_code)]
use std::io::stdin;
use std::io::Read;
mod ast;
mod compile;
mod error;
mod ir;
mod lexer;
mod parser;
mod sema;
fn main() -> Result<(), error::Error> {
let _ = env_logger::try_init();
let mut input = String::new();
stdin().read_to_string(&mut input)?;
let mut parser = parser::Parser::new("<stdin>", &input[..]);
let defs = parser.parse_defs()?;
let mut compiler = compile::Compiler::new(&defs)?;
compiler.build_sequences()?;
compiler.collect_tree_summaries()?;
for seq in compiler.to_sequences() {
println!("---\nsequence\n---\n{:?}\n", seq);
}
Ok(())
}

View File

@@ -0,0 +1,429 @@
//! Parser for ISLE language.
use crate::ast::*;
use crate::error::*;
use crate::lexer::{Lexer, Pos, Token};
#[derive(Clone, Debug)]
pub struct Parser<'a> {
filename: &'a str,
lexer: Lexer<'a>,
}
pub type ParseResult<T> = std::result::Result<T, ParseError>;
impl<'a> Parser<'a> {
pub fn new(filename: &'a str, s: &'a str) -> Parser<'a> {
Parser {
filename,
lexer: Lexer::new(s),
}
}
pub fn error(&self, pos: Pos, msg: String) -> ParseError {
ParseError {
filename: self.filename.to_string(),
pos,
msg,
}
}
fn take<F: Fn(Token) -> bool>(&mut self, f: F) -> ParseResult<Token<'a>> {
if let Some((pos, peek)) = self.lexer.peek() {
if !f(peek) {
return Err(self.error(pos, format!("Unexpected token {:?}", peek)));
}
self.lexer.next();
Ok(peek)
} else {
Err(self.error(self.lexer.pos(), "Unexpected EOF".to_string()))
}
}
fn is<F: Fn(Token) -> bool>(&self, f: F) -> bool {
if let Some((_, peek)) = self.lexer.peek() {
f(peek)
} else {
false
}
}
fn pos(&self) -> Option<Pos> {
self.lexer.peek().map(|(pos, _)| pos)
}
fn is_lparen(&self) -> bool {
self.is(|tok| tok == Token::LParen)
}
fn is_rparen(&self) -> bool {
self.is(|tok| tok == Token::RParen)
}
fn is_sym(&self) -> bool {
self.is(|tok| tok.is_sym())
}
fn is_int(&self) -> bool {
self.is(|tok| tok.is_int())
}
fn is_sym_str(&self, s: &str) -> bool {
self.is(|tok| tok == Token::Symbol(s))
}
fn lparen(&mut self) -> ParseResult<()> {
self.take(|tok| tok == Token::LParen).map(|_| ())
}
fn rparen(&mut self) -> ParseResult<()> {
self.take(|tok| tok == Token::RParen).map(|_| ())
}
fn symbol(&mut self) -> ParseResult<&'a str> {
match self.take(|tok| tok.is_sym())? {
Token::Symbol(s) => Ok(s),
_ => unreachable!(),
}
}
fn int(&mut self) -> ParseResult<i64> {
match self.take(|tok| tok.is_int())? {
Token::Int(i) => Ok(i),
_ => unreachable!(),
}
}
pub fn parse_defs(&mut self) -> ParseResult<Defs> {
let mut defs = vec![];
while !self.lexer.eof() {
defs.push(self.parse_def()?);
}
Ok(Defs {
defs,
filename: self.filename.to_string(),
})
}
fn parse_def(&mut self) -> ParseResult<Def> {
self.lparen()?;
let pos = self.pos();
let def = match self.symbol()? {
"type" => Def::Type(self.parse_type()?),
"rule" => Def::Rule(self.parse_rule()?),
"decl" => Def::Decl(self.parse_decl()?),
"constructor" => Def::Extern(self.parse_ctor()?),
"extractor" => Def::Extern(self.parse_etor()?),
s => {
return Err(self.error(pos.unwrap(), format!("Unexpected identifier: {}", s)));
}
};
self.rparen()?;
Ok(def)
}
fn str_to_ident(&self, pos: Pos, s: &str) -> ParseResult<Ident> {
let first = s.chars().next().unwrap();
if !first.is_alphabetic() && first != '_' {
return Err(self.error(
pos,
format!("Identifier '{}' does not start with letter or _", s),
));
}
if s.chars()
.skip(1)
.any(|c| !c.is_alphanumeric() && c != '_' && c != '.')
{
return Err(self.error(
pos,
format!(
"Identifier '{}' contains invalid character (not a-z, A-Z, 0-9, _, .)",
s
),
));
}
Ok(Ident(s.to_string()))
}
fn parse_ident(&mut self) -> ParseResult<Ident> {
let pos = self.pos();
let s = self.symbol()?;
self.str_to_ident(pos.unwrap(), s)
}
fn parse_type(&mut self) -> ParseResult<Type> {
let pos = self.pos();
let name = self.parse_ident()?;
let mut is_extern = false;
if self.is_sym_str("extern") {
self.symbol()?;
is_extern = true;
}
let ty = self.parse_typevalue()?;
Ok(Type {
name,
is_extern,
ty,
pos: pos.unwrap(),
})
}
fn parse_typevalue(&mut self) -> ParseResult<TypeValue> {
let pos = self.pos();
self.lparen()?;
if self.is_sym_str("primitive") {
self.symbol()?;
let primitive_ident = self.parse_ident()?;
self.rparen()?;
Ok(TypeValue::Primitive(primitive_ident))
} else if self.is_sym_str("enum") {
self.symbol()?;
let mut variants = vec![];
while !self.is_rparen() {
let variant = self.parse_type_variant()?;
variants.push(variant);
}
self.rparen()?;
Ok(TypeValue::Enum(variants))
} else {
Err(self.error(pos.unwrap(), "Unknown type definition".to_string()))
}
}
fn parse_type_variant(&mut self) -> ParseResult<Variant> {
self.lparen()?;
let name = self.parse_ident()?;
let mut fields = vec![];
while !self.is_rparen() {
fields.push(self.parse_type_field()?);
}
self.rparen()?;
Ok(Variant { name, fields })
}
fn parse_type_field(&mut self) -> ParseResult<Field> {
self.lparen()?;
let name = self.parse_ident()?;
let ty = self.parse_ident()?;
self.rparen()?;
Ok(Field { name, ty })
}
fn parse_decl(&mut self) -> ParseResult<Decl> {
let pos = self.pos();
let term = self.parse_ident()?;
self.lparen()?;
let mut arg_tys = vec![];
while !self.is_rparen() {
arg_tys.push(self.parse_ident()?);
}
self.rparen()?;
let ret_ty = self.parse_ident()?;
Ok(Decl {
term,
arg_tys,
ret_ty,
pos: pos.unwrap(),
})
}
fn parse_ctor(&mut self) -> ParseResult<Extern> {
let pos = self.pos();
let term = self.parse_ident()?;
let func = self.parse_ident()?;
Ok(Extern::Constructor {
term,
func,
pos: pos.unwrap(),
})
}
fn parse_etor(&mut self) -> ParseResult<Extern> {
let pos = self.pos();
let term = self.parse_ident()?;
let func = self.parse_ident()?;
Ok(Extern::Extractor {
term,
func,
pos: pos.unwrap(),
})
}
fn parse_rule(&mut self) -> ParseResult<Rule> {
let pos = self.pos();
let prio = if self.is_int() {
Some(self.int()?)
} else {
None
};
let pattern = self.parse_pattern()?;
let expr = self.parse_expr()?;
Ok(Rule {
pattern,
expr,
pos: pos.unwrap(),
prio,
})
}
fn parse_pattern(&mut self) -> ParseResult<Pattern> {
let pos = self.pos();
if self.is_int() {
Ok(Pattern::ConstInt { val: self.int()? })
} else if self.is_sym_str("_") {
self.symbol()?;
Ok(Pattern::Wildcard)
} else if self.is_sym() {
let s = self.symbol()?;
if s.starts_with("=") {
let s = &s[1..];
let var = self.str_to_ident(pos.unwrap(), s)?;
Ok(Pattern::Var { var })
} else {
let var = self.str_to_ident(pos.unwrap(), s)?;
if self.is_sym_str("@") {
self.symbol()?;
let subpat = Box::new(self.parse_pattern()?);
Ok(Pattern::BindPattern { var, subpat })
} else {
Ok(Pattern::BindPattern {
var,
subpat: Box::new(Pattern::Wildcard),
})
}
}
} else if self.is_lparen() {
self.lparen()?;
let sym = self.parse_ident()?;
let mut args = vec![];
while !self.is_rparen() {
args.push(self.parse_pattern()?);
}
self.rparen()?;
Ok(Pattern::Term { sym, args })
} else {
Err(self.error(pos.unwrap(), "Unexpected pattern".into()))
}
}
fn parse_expr(&mut self) -> ParseResult<Expr> {
let pos = self.pos();
if self.is_lparen() {
self.lparen()?;
if self.is_sym_str("let") {
self.symbol()?;
self.lparen()?;
let mut defs = vec![];
while !self.is_rparen() {
let def = self.parse_letdef()?;
defs.push(def);
}
self.rparen()?;
let body = Box::new(self.parse_expr()?);
self.rparen()?;
Ok(Expr::Let { defs, body })
} else {
let sym = self.parse_ident()?;
let mut args = vec![];
while !self.is_rparen() {
args.push(self.parse_expr()?);
}
self.rparen()?;
Ok(Expr::Term { sym, args })
}
} else if self.is_sym() {
let name = self.parse_ident()?;
Ok(Expr::Var { name })
} else if self.is_int() {
let val = self.int()?;
Ok(Expr::ConstInt { val })
} else {
Err(self.error(pos.unwrap(), "Invalid expression".into()))
}
}
fn parse_letdef(&mut self) -> ParseResult<LetDef> {
self.lparen()?;
let var = self.parse_ident()?;
let ty = self.parse_ident()?;
let val = Box::new(self.parse_expr()?);
self.rparen()?;
Ok(LetDef { var, ty, val })
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_type() {
let text = r"
;; comment
(type Inst extern (enum
(Alu (a Reg) (b Reg) (dest Reg))
(Load (a Reg) (dest Reg))))
(type u32 (primitive u32))
";
let defs = Parser::new("(none)", text)
.parse_defs()
.expect("should parse");
assert_eq!(
defs,
Defs {
filename: "(none)".to_string(),
defs: vec![
Def::Type(Type {
name: Ident("Inst".to_string()),
is_extern: true,
ty: TypeValue::Enum(vec![
Variant {
name: Ident("Alu".to_string()),
fields: vec![
Field {
name: Ident("a".to_string()),
ty: Ident("Reg".to_string()),
},
Field {
name: Ident("b".to_string()),
ty: Ident("Reg".to_string()),
},
Field {
name: Ident("dest".to_string()),
ty: Ident("Reg".to_string()),
},
],
},
Variant {
name: Ident("Load".to_string()),
fields: vec![
Field {
name: Ident("a".to_string()),
ty: Ident("Reg".to_string()),
},
Field {
name: Ident("dest".to_string()),
ty: Ident("Reg".to_string()),
},
],
}
]),
pos: Pos {
offset: 42,
line: 4,
col: 18,
},
}),
Def::Type(Type {
name: Ident("u32".to_string()),
is_extern: false,
ty: TypeValue::Primitive(Ident("u32".to_string())),
pos: Pos {
offset: 167,
line: 7,
col: 18,
},
}),
]
}
);
}
}

862
cranelift/isle/src/sema.rs Normal file
View File

@@ -0,0 +1,862 @@
//! Semantic analysis.
use crate::ast;
use crate::error::*;
use crate::lexer::Pos;
use std::collections::HashMap;
pub type SemaResult<T> = std::result::Result<T, SemaError>;
#[macro_export]
macro_rules! declare_id {
($name:ident) => {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct $name(pub usize);
impl $name {
pub fn index(self) -> usize {
self.0
}
}
};
}
declare_id!(Sym);
declare_id!(TypeId);
declare_id!(VariantId);
declare_id!(FieldId);
declare_id!(TermId);
declare_id!(RuleId);
declare_id!(VarId);
#[derive(Clone, Debug)]
pub struct TypeEnv {
pub filename: String,
pub syms: Vec<String>,
pub sym_map: HashMap<String, Sym>,
pub types: Vec<Type>,
pub type_map: HashMap<Sym, TypeId>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Type {
Primitive(TypeId, Sym),
Enum {
name: Sym,
id: TypeId,
is_extern: bool,
variants: Vec<Variant>,
pos: Pos,
},
}
impl Type {
fn name<'a>(&self, tyenv: &'a TypeEnv) -> &'a str {
match self {
Self::Primitive(_, name) | Self::Enum { name, .. } => &tyenv.syms[name.index()],
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Variant {
pub name: Sym,
pub id: VariantId,
pub fields: Vec<Field>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Field {
pub name: Sym,
pub id: FieldId,
pub ty: TypeId,
}
#[derive(Clone, Debug)]
pub struct TermEnv {
pub terms: Vec<Term>,
pub term_map: HashMap<Sym, TermId>,
pub rules: Vec<Rule>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Term {
pub id: TermId,
pub name: Sym,
pub arg_tys: Vec<TypeId>,
pub ret_ty: TypeId,
pub kind: TermKind,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum TermKind {
EnumVariant {
variant: VariantId,
},
Regular {
// Producer and consumer rules are catalogued separately after
// building Sequences. Here we just record whether an
// extractor and/or constructor is known.
extractor: Option<Sym>,
constructor: Option<Sym>,
},
}
#[derive(Clone, Debug)]
pub struct Rule {
pub id: RuleId,
pub lhs: Pattern,
pub rhs: Expr,
pub prio: Option<i64>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Pattern {
BindPattern(TypeId, VarId, Box<Pattern>),
Var(TypeId, VarId),
ConstInt(TypeId, i64),
Term(TypeId, TermId, Vec<Pattern>),
Wildcard(TypeId),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr {
Term(TypeId, TermId, Vec<Expr>),
Var(TypeId, VarId),
ConstInt(TypeId, i64),
Let(TypeId, Vec<(VarId, TypeId, Box<Expr>)>, Box<Expr>),
}
impl Pattern {
pub fn ty(&self) -> TypeId {
match self {
&Self::BindPattern(t, ..) => t,
&Self::Var(t, ..) => t,
&Self::ConstInt(t, ..) => t,
&Self::Term(t, ..) => t,
&Self::Wildcard(t, ..) => t,
}
}
}
impl Expr {
pub fn ty(&self) -> TypeId {
match self {
&Self::Term(t, ..) => t,
&Self::Var(t, ..) => t,
&Self::ConstInt(t, ..) => t,
&Self::Let(t, ..) => t,
}
}
}
impl TypeEnv {
pub fn from_ast(defs: &ast::Defs) -> SemaResult<TypeEnv> {
let mut tyenv = TypeEnv {
filename: defs.filename.clone(),
syms: vec![],
sym_map: HashMap::new(),
types: vec![],
type_map: HashMap::new(),
};
// Traverse defs, assigning type IDs to type names. We'll fill
// in types on a second pass.
for def in &defs.defs {
match def {
&ast::Def::Type(ref td) => {
let tid = TypeId(tyenv.type_map.len());
let name = tyenv.intern_mut(&td.name);
if tyenv.type_map.contains_key(&name) {
return Err(tyenv.error(
td.pos,
format!("Type name defined more than once: '{}'", td.name.0),
));
}
tyenv.type_map.insert(name, tid);
}
_ => {}
}
}
// Now lower AST nodes to type definitions, raising errors
// where typenames of fields are undefined or field names are
// duplicated.
let mut tid = 0;
for def in &defs.defs {
match def {
&ast::Def::Type(ref td) => {
let ty = tyenv.type_from_ast(TypeId(tid), td)?;
tyenv.types.push(ty);
tid += 1;
}
_ => {}
}
}
Ok(tyenv)
}
fn type_from_ast(&mut self, tid: TypeId, ty: &ast::Type) -> SemaResult<Type> {
let name = self.intern(&ty.name).unwrap();
match &ty.ty {
&ast::TypeValue::Primitive(ref id) => Ok(Type::Primitive(tid, self.intern_mut(id))),
&ast::TypeValue::Enum(ref ty_variants) => {
let mut variants = vec![];
for variant in ty_variants {
let combined_ident = ast::Ident(format!("{}.{}", ty.name.0, variant.name.0));
let var_name = self.intern_mut(&combined_ident);
let id = VariantId(variants.len());
if variants.iter().any(|v: &Variant| v.name == var_name) {
return Err(self.error(
ty.pos,
format!("Duplicate variant name in type: '{}'", variant.name.0),
));
}
let mut fields = vec![];
for field in &variant.fields {
let field_name = self.intern_mut(&field.name);
if fields.iter().any(|f: &Field| f.name == field_name) {
return Err(self.error(
ty.pos,
format!(
"Duplicate field name '{}' in variant '{}' of type",
field.name.0, variant.name.0
),
));
}
let field_ty = self.intern_mut(&field.ty);
let field_tid = match self.type_map.get(&field_ty) {
Some(tid) => *tid,
None => {
return Err(self.error(
ty.pos,
format!(
"Unknown type '{}' for field '{}' in variant '{}'",
field.ty.0, field.name.0, variant.name.0
),
));
}
};
fields.push(Field {
name: field_name,
id: FieldId(fields.len()),
ty: field_tid,
});
}
variants.push(Variant {
name: var_name,
id,
fields,
});
}
Ok(Type::Enum {
name,
id: tid,
is_extern: ty.is_extern,
variants,
pos: ty.pos,
})
}
}
}
fn error(&self, pos: Pos, msg: String) -> SemaError {
SemaError {
filename: self.filename.clone(),
pos,
msg,
}
}
pub fn intern_mut(&mut self, ident: &ast::Ident) -> Sym {
if let Some(s) = self.sym_map.get(&ident.0).cloned() {
s
} else {
let s = Sym(self.syms.len());
self.syms.push(ident.0.clone());
self.sym_map.insert(ident.0.clone(), s);
s
}
}
pub fn intern(&self, ident: &ast::Ident) -> Option<Sym> {
self.sym_map.get(&ident.0).cloned()
}
}
struct Bindings {
next_var: usize,
vars: Vec<BoundVar>,
}
struct BoundVar {
name: Sym,
id: VarId,
ty: TypeId,
}
impl TermEnv {
pub fn from_ast(tyenv: &mut TypeEnv, defs: &ast::Defs) -> SemaResult<TermEnv> {
let mut env = TermEnv {
terms: vec![],
term_map: HashMap::new(),
rules: vec![],
};
env.collect_term_sigs(tyenv, defs)?;
env.collect_enum_variant_terms(tyenv)?;
env.collect_rules(tyenv, defs)?;
Ok(env)
}
fn collect_term_sigs(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) -> SemaResult<()> {
for def in &defs.defs {
match def {
&ast::Def::Decl(ref decl) => {
let tid = TermId(self.terms.len());
let name = tyenv.intern_mut(&decl.term);
if self.term_map.contains_key(&name) {
return Err(
tyenv.error(decl.pos, format!("Duplicate decl for '{}'", decl.term.0))
);
}
self.term_map.insert(name, tid);
let arg_tys = decl
.arg_tys
.iter()
.map(|id| {
let sym = tyenv.intern_mut(id);
tyenv.type_map.get(&sym).cloned().ok_or_else(|| {
tyenv.error(decl.pos, format!("Unknown arg type: '{}'", id.0))
})
})
.collect::<SemaResult<Vec<TypeId>>>()?;
let ret_ty = {
let sym = tyenv.intern_mut(&decl.ret_ty);
tyenv.type_map.get(&sym).cloned().ok_or_else(|| {
tyenv.error(
decl.pos,
format!("Unknown return type: '{}'", decl.ret_ty.0),
)
})?
};
self.terms.push(Term {
id: tid,
name,
arg_tys,
ret_ty,
kind: TermKind::Regular {
extractor: None,
constructor: None,
},
});
}
_ => {}
}
}
Ok(())
}
fn collect_enum_variant_terms(&mut self, tyenv: &mut TypeEnv) -> SemaResult<()> {
for ty in &tyenv.types {
match ty {
&Type::Enum {
pos,
id,
ref variants,
..
} => {
for variant in variants {
if self.term_map.contains_key(&variant.name) {
return Err(tyenv.error(
pos,
format!(
"Duplicate enum variant constructor: '{}'",
tyenv.syms[variant.name.index()]
),
));
}
let tid = TermId(self.terms.len());
let arg_tys = variant.fields.iter().map(|fld| fld.ty).collect::<Vec<_>>();
let ret_ty = id;
self.terms.push(Term {
id: tid,
name: variant.name,
arg_tys,
ret_ty,
kind: TermKind::EnumVariant {
variant: variant.id,
},
});
self.term_map.insert(variant.name, tid);
}
}
_ => {}
}
}
Ok(())
}
fn collect_rules(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) -> SemaResult<()> {
for def in &defs.defs {
match def {
&ast::Def::Rule(ref rule) => {
let mut bindings = Bindings {
next_var: 0,
vars: vec![],
};
let (lhs, ty) = self.translate_pattern(
tyenv,
rule.pos,
&rule.pattern,
None,
&mut bindings,
)?;
let rhs =
self.translate_expr(tyenv, rule.pos, &rule.expr, ty, &mut bindings)?;
let rid = RuleId(self.rules.len());
self.rules.push(Rule {
id: rid,
lhs,
rhs,
prio: rule.prio,
});
}
&ast::Def::Extern(ast::Extern::Constructor {
ref term,
ref func,
pos,
}) => {
let term_sym = tyenv.intern_mut(term);
let func_sym = tyenv.intern_mut(func);
let term_id = match self.term_map.get(&term_sym) {
Some(term) => term,
None => {
return Err(tyenv.error(
pos,
format!("Constructor declared on undefined term '{}'", term.0),
))
}
};
match &mut self.terms[term_id.index()].kind {
&mut TermKind::EnumVariant { .. } => {
return Err(tyenv.error(
pos,
format!("Constructor defined on enum type '{}'", term.0),
));
}
&mut TermKind::Regular {
ref mut constructor,
..
} => {
if constructor.is_some() {
return Err(tyenv.error(
pos,
format!(
"Constructor defined more than once on term '{}'",
term.0
),
));
}
*constructor = Some(func_sym);
}
}
}
&ast::Def::Extern(ast::Extern::Extractor {
ref term,
ref func,
pos,
}) => {
let term_sym = tyenv.intern_mut(term);
let func_sym = tyenv.intern_mut(func);
let term_id = match self.term_map.get(&term_sym) {
Some(term) => term,
None => {
return Err(tyenv.error(
pos,
format!("Extractor declared on undefined term '{}'", term.0),
))
}
};
match &mut self.terms[term_id.index()].kind {
&mut TermKind::EnumVariant { .. } => {
return Err(tyenv.error(
pos,
format!("Extractor defined on enum type '{}'", term.0),
));
}
&mut TermKind::Regular {
ref mut extractor, ..
} => {
if extractor.is_some() {
return Err(tyenv.error(
pos,
format!(
"Extractor defined more than once on term '{}'",
term.0
),
));
}
*extractor = Some(func_sym);
}
}
}
_ => {}
}
}
Ok(())
}
fn translate_pattern(
&self,
tyenv: &mut TypeEnv,
pos: Pos,
pat: &ast::Pattern,
expected_ty: Option<TypeId>,
bindings: &mut Bindings,
) -> SemaResult<(Pattern, TypeId)> {
match pat {
// TODO: flag on primitive type decl indicating it's an integer type?
&ast::Pattern::ConstInt { val } => {
let ty = expected_ty.ok_or_else(|| {
tyenv.error(pos, "Need an implied type for an integer constant".into())
})?;
Ok((Pattern::ConstInt(ty, val), ty))
}
&ast::Pattern::Wildcard => {
let ty = expected_ty.ok_or_else(|| {
tyenv.error(pos, "Need an implied type for a wildcard".into())
})?;
Ok((Pattern::Wildcard(ty), ty))
}
&ast::Pattern::BindPattern {
ref var,
ref subpat,
} => {
// Do the subpattern first so we can resolve the type for sure.
let (subpat, ty) =
self.translate_pattern(tyenv, pos, &*subpat, expected_ty, bindings)?;
let name = tyenv.intern_mut(var);
if bindings.vars.iter().any(|bv| bv.name == name) {
return Err(tyenv.error(
pos,
format!("Rebound variable name in LHS pattern: '{}'", var.0),
));
}
let id = VarId(bindings.next_var);
bindings.next_var += 1;
bindings.vars.push(BoundVar { name, id, ty });
Ok((Pattern::BindPattern(ty, id, Box::new(subpat)), ty))
}
&ast::Pattern::Var { ref var } => {
// Look up the variable; it must already have been bound.
let name = tyenv.intern_mut(var);
let bv = match bindings.vars.iter().rev().find(|bv| bv.name == name) {
None => {
return Err(tyenv.error(
pos,
format!(
"Unknown variable '{}' in bound-var pattern '={}'",
var.0, var.0
),
))
}
Some(bv) => bv,
};
let ty = match expected_ty {
None => bv.ty,
Some(expected_ty) if expected_ty == bv.ty => bv.ty,
Some(expected_ty) => {
return Err(tyenv.error(pos, format!("Mismatched types: pattern expects type '{}' but already-bound var '{}' has type '{}'", tyenv.types[expected_ty.index()].name(tyenv), var.0, tyenv.types[bv.ty.index()].name(tyenv))));
}
};
Ok((Pattern::Var(ty, bv.id), ty))
}
&ast::Pattern::Term { ref sym, ref args } => {
let name = tyenv.intern_mut(&sym);
// Look up the term.
let tid = self.term_map.get(&name).ok_or_else(|| {
tyenv.error(pos, format!("Unknown term in pattern: '{}'", sym.0))
})?;
// Get the return type and arg types. Verify the
// expected type of this pattern, if any, against the
// return type of the term.
let ret_ty = self.terms[tid.index()].ret_ty;
let ty = match expected_ty {
None => ret_ty,
Some(expected_ty) if expected_ty == ret_ty => ret_ty,
Some(expected_ty) => {
return Err(tyenv.error(pos, format!("Mismatched types: pattern expects type '{}' but term has return type '{}'", tyenv.types[expected_ty.index()].name(tyenv), tyenv.types[ret_ty.index()].name(tyenv))));
}
};
// Check that we have the correct argument count.
if self.terms[tid.index()].arg_tys.len() != args.len() {
return Err(tyenv.error(
pos,
format!(
"Incorrect argument count for term '{}': got {}, expect {}",
sym.0,
args.len(),
self.terms[tid.index()].arg_tys.len()
),
));
}
// Resolve subpatterns.
let mut subpats = vec![];
for (i, arg) in args.iter().enumerate() {
let arg_ty = self.terms[tid.index()].arg_tys[i];
let (subpat, _) =
self.translate_pattern(tyenv, pos, arg, Some(arg_ty), bindings)?;
subpats.push(subpat);
}
Ok((Pattern::Term(ty, *tid, subpats), ty))
}
}
}
fn translate_expr(
&self,
tyenv: &mut TypeEnv,
pos: Pos,
expr: &ast::Expr,
ty: TypeId,
bindings: &mut Bindings,
) -> SemaResult<Expr> {
match expr {
&ast::Expr::Term { ref sym, ref args } => {
// Look up the term.
let name = tyenv.intern_mut(&sym);
// Look up the term.
let tid = self.term_map.get(&name).ok_or_else(|| {
tyenv.error(pos, format!("Unknown term in pattern: '{}'", sym.0))
})?;
// Get the return type and arg types. Verify the
// expected type of this pattern, if any, against the
// return type of the term.
let ret_ty = self.terms[tid.index()].ret_ty;
if ret_ty != ty {
return Err(tyenv.error(pos, format!("Mismatched types: expression expects type '{}' but term has return type '{}'", tyenv.types[ty.index()].name(tyenv), tyenv.types[ret_ty.index()].name(tyenv))));
}
// Check that we have the correct argument count.
if self.terms[tid.index()].arg_tys.len() != args.len() {
return Err(tyenv.error(
pos,
format!(
"Incorrect argument count for term '{}': got {}, expect {}",
sym.0,
args.len(),
self.terms[tid.index()].arg_tys.len()
),
));
}
// Resolve subexpressions.
let mut subexprs = vec![];
for (i, arg) in args.iter().enumerate() {
let arg_ty = self.terms[tid.index()].arg_tys[i];
let subexpr = self.translate_expr(tyenv, pos, arg, arg_ty, bindings)?;
subexprs.push(subexpr);
}
Ok(Expr::Term(ty, *tid, subexprs))
}
&ast::Expr::Var { ref name } => {
let sym = tyenv.intern_mut(name);
// Look through bindings, innermost (most recent) first.
let bv = match bindings.vars.iter().rev().find(|b| b.name == sym) {
None => {
return Err(tyenv.error(pos, format!("Unknown variable '{}'", name.0)));
}
Some(bv) => bv,
};
// Verify type.
if bv.ty != ty {
return Err(tyenv.error(
pos,
format!(
"Variable '{}' has type {} but we need {} in context",
name.0,
tyenv.types[bv.ty.index()].name(tyenv),
tyenv.types[ty.index()].name(tyenv)
),
));
}
Ok(Expr::Var(bv.ty, bv.id))
}
&ast::Expr::ConstInt { val } => Ok(Expr::ConstInt(ty, val)),
&ast::Expr::Let { ref defs, ref body } => {
let orig_binding_len = bindings.vars.len();
// For each new binding...
let mut let_defs = vec![];
for def in defs {
// Check that the given variable name does not already exist.
let name = tyenv.intern_mut(&def.var);
if bindings.vars.iter().any(|bv| bv.name == name) {
return Err(
tyenv.error(pos, format!("Variable '{}' already bound", def.var.0))
);
}
// Look up the type.
let tysym = match tyenv.intern(&def.ty) {
Some(ty) => ty,
None => {
return Err(tyenv.error(
pos,
format!("Unknown type {} for variable '{}'", def.ty.0, def.var.0),
))
}
};
let tid = match tyenv.type_map.get(&tysym) {
Some(tid) => *tid,
None => {
return Err(tyenv.error(
pos,
format!("Unknown type {} for variable '{}'", def.ty.0, def.var.0),
))
}
};
// Evaluate the variable's value.
let val = Box::new(self.translate_expr(tyenv, pos, &def.val, ty, bindings)?);
// Bind the var with the given type.
let id = VarId(bindings.next_var);
bindings.next_var += 1;
bindings.vars.push(BoundVar { name, id, ty: tid });
let_defs.push((id, ty, val));
}
// Evaluate the body, expecting the type of the overall let-expr.
let body = Box::new(self.translate_expr(tyenv, pos, body, ty, bindings)?);
let body_ty = body.ty();
// Pop the bindings.
bindings.vars.truncate(orig_binding_len);
Ok(Expr::Let(body_ty, let_defs, body))
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::ast::Ident;
use crate::parser::Parser;
#[test]
fn build_type_env() {
let text = r"
(type u32 (primitive u32))
(type A extern (enum (B (f1 u32) (f2 u32)) (C (f1 u32))))
";
let ast = Parser::new("file.isle", text)
.parse_defs()
.expect("should parse");
let tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors");
let sym_a = tyenv.intern(&Ident("A".to_string())).unwrap();
let sym_b = tyenv.intern(&Ident("A.B".to_string())).unwrap();
let sym_c = tyenv.intern(&Ident("A.C".to_string())).unwrap();
let sym_u32 = tyenv.intern(&Ident("u32".to_string())).unwrap();
let sym_f1 = tyenv.intern(&Ident("f1".to_string())).unwrap();
let sym_f2 = tyenv.intern(&Ident("f2".to_string())).unwrap();
assert_eq!(tyenv.type_map.get(&sym_u32).unwrap(), &TypeId(0));
assert_eq!(tyenv.type_map.get(&sym_a).unwrap(), &TypeId(1));
assert_eq!(
tyenv.types,
vec![
Type::Primitive(TypeId(0), sym_u32),
Type::Enum {
name: sym_a,
id: TypeId(1),
is_extern: true,
variants: vec![
Variant {
name: sym_b,
id: VariantId(0),
fields: vec![
Field {
name: sym_f1,
id: FieldId(0),
ty: TypeId(0),
},
Field {
name: sym_f2,
id: FieldId(1),
ty: TypeId(0),
},
],
},
Variant {
name: sym_c,
id: VariantId(1),
fields: vec![Field {
name: sym_f1,
id: FieldId(0),
ty: TypeId(0),
},],
},
],
pos: Pos {
offset: 58,
line: 3,
col: 18,
},
},
]
);
}
#[test]
fn build_rules() {
let text = r"
(type u32 (primitive u32))
(type A extern (enum (B (f1 u32) (f2 u32)) (C (f1 u32))))
(decl T1 (A) u32)
(decl T2 (A A) A)
(decl T3 (u32) A)
(constructor T1 t1_ctor)
(extractor T2 t2_etor)
(rule
(T1 _) 1)
(rule
(T2 x =x) (T3 42))
(rule
(T3 1) (A.C 2))
(rule -1
(T3 _) (A.C 3))
";
let ast = Parser::new("file.isle", text)
.parse_defs()
.expect("should parse");
let mut tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors");
let _ = TermEnv::from_ast(&mut tyenv, &ast).expect("could not typecheck rules");
}
}