Split into isle and islec crates

This commit is contained in:
Chris Fallin
2021-09-14 23:24:49 -07:00
parent e751f12ac5
commit 521010cc4f
14 changed files with 42 additions and 13 deletions

View File

@@ -0,0 +1,9 @@
[package]
name = "isle"
version = "0.1.0"
authors = ["Chris Fallin <chris@cfallin.org>"]
edition = "2018"
license = "Apache-2.0 WITH LLVM-exception"
[dependencies]
log = "0.4"

View File

@@ -0,0 +1,357 @@
use crate::lexer::Pos;
/// The parsed form of an ISLE file.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Defs {
pub defs: Vec<Def>,
pub filenames: Vec<String>,
}
/// One toplevel form in an ISLE file.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Def {
Type(Type),
Rule(Rule),
Extractor(Extractor),
Decl(Decl),
Extern(Extern),
}
/// An identifier -- a variable, term symbol, or type.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Ident(pub String, pub Pos);
/// A declaration of a type.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Type {
pub name: Ident,
pub is_extern: bool,
pub ty: TypeValue,
pub pos: Pos,
}
/// The actual type-value: a primitive or an enum with variants.
///
/// TODO: add structs as well?
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum TypeValue {
Primitive(Ident, Pos),
Enum(Vec<Variant>, Pos),
}
/// One variant of an enum type.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Variant {
pub name: Ident,
pub fields: Vec<Field>,
pub pos: Pos,
}
/// One field of an enum variant.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Field {
pub name: Ident,
pub ty: Ident,
pub pos: Pos,
}
/// A declaration of a term with its argument and return types.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Decl {
pub term: Ident,
pub arg_tys: Vec<Ident>,
pub ret_ty: Ident,
pub pos: Pos,
}
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Rule {
pub pattern: Pattern,
pub expr: Expr,
pub pos: Pos,
pub prio: Option<i64>,
}
/// An extractor macro: (A x y) becomes (B x _ y ...). Expanded during
/// ast-to-sema pass.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Extractor {
pub term: Ident,
pub args: Vec<Ident>,
pub template: Pattern,
pub pos: Pos,
}
/// A pattern: the left-hand side of a rule.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Pattern {
/// An operator that binds a variable to a subterm and match the
/// subpattern.
BindPattern {
var: Ident,
subpat: Box<Pattern>,
pos: Pos,
},
/// A variable that has already been bound (`=x` syntax).
Var { var: Ident, pos: Pos },
/// An operator that matches a constant integer value.
ConstInt { val: i64, pos: Pos },
/// An application of a type variant or term.
Term {
sym: Ident,
args: Vec<TermArgPattern>,
pos: Pos,
},
/// An operator that matches anything.
Wildcard { pos: Pos },
/// N sub-patterns that must all match.
And { subpats: Vec<Pattern>, pos: Pos },
/// Internal use only: macro argument in a template.
MacroArg { index: usize, pos: Pos },
}
impl Pattern {
pub fn root_term(&self) -> Option<&Ident> {
match self {
&Pattern::BindPattern { ref subpat, .. } => subpat.root_term(),
&Pattern::Term { ref sym, .. } => Some(sym),
_ => None,
}
}
pub fn make_macro_template(&self, macro_args: &[Ident]) -> Pattern {
log::trace!("make_macro_template: {:?} with {:?}", self, macro_args);
match self {
&Pattern::BindPattern {
ref var,
ref subpat,
pos,
..
} if matches!(&**subpat, &Pattern::Wildcard { .. }) => {
if let Some(i) = macro_args.iter().position(|arg| arg.0 == var.0) {
Pattern::MacroArg { index: i, pos }
} else {
self.clone()
}
}
&Pattern::BindPattern {
ref var,
ref subpat,
pos,
} => Pattern::BindPattern {
var: var.clone(),
subpat: Box::new(subpat.make_macro_template(macro_args)),
pos,
},
&Pattern::And { ref subpats, pos } => {
let subpats = subpats
.iter()
.map(|subpat| subpat.make_macro_template(macro_args))
.collect::<Vec<_>>();
Pattern::And { subpats, pos }
}
&Pattern::Term {
ref sym,
ref args,
pos,
} => {
let args = args
.iter()
.map(|arg| arg.make_macro_template(macro_args))
.collect::<Vec<_>>();
Pattern::Term {
sym: sym.clone(),
args,
pos,
}
}
&Pattern::Var { .. } | &Pattern::Wildcard { .. } | &Pattern::ConstInt { .. } => {
self.clone()
}
&Pattern::MacroArg { .. } => unreachable!(),
}
}
pub fn subst_macro_args(&self, macro_args: &[Pattern]) -> Pattern {
log::trace!("subst_macro_args: {:?} with {:?}", self, macro_args);
match self {
&Pattern::BindPattern {
ref var,
ref subpat,
pos,
} => Pattern::BindPattern {
var: var.clone(),
subpat: Box::new(subpat.subst_macro_args(macro_args)),
pos,
},
&Pattern::And { ref subpats, pos } => {
let subpats = subpats
.iter()
.map(|subpat| subpat.subst_macro_args(macro_args))
.collect::<Vec<_>>();
Pattern::And { subpats, pos }
}
&Pattern::Term {
ref sym,
ref args,
pos,
} => {
let args = args
.iter()
.map(|arg| arg.subst_macro_args(macro_args))
.collect::<Vec<_>>();
Pattern::Term {
sym: sym.clone(),
args,
pos,
}
}
&Pattern::Var { .. } | &Pattern::Wildcard { .. } | &Pattern::ConstInt { .. } => {
self.clone()
}
&Pattern::MacroArg { index, .. } => macro_args[index].clone(),
}
}
pub fn pos(&self) -> Pos {
match self {
&Pattern::ConstInt { pos, .. }
| &Pattern::And { pos, .. }
| &Pattern::Term { pos, .. }
| &Pattern::BindPattern { pos, .. }
| &Pattern::Var { pos, .. }
| &Pattern::Wildcard { pos, .. }
| &Pattern::MacroArg { pos, .. } => pos,
}
}
}
/// A pattern in a term argument. Adds "evaluated expression" to kinds
/// of patterns in addition to all options in `Pattern`.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum TermArgPattern {
/// A regular pattern that must match the existing value in the term's argument.
Pattern(Pattern),
/// An expression that is evaluated during the match phase and can
/// be given into an extractor. This is essentially a limited form
/// of unification or bidirectional argument flow (a la Prolog):
/// we can pass an arg *into* an extractor rather than getting the
/// arg *out of* it.
Expr(Expr),
}
impl TermArgPattern {
fn make_macro_template(&self, args: &[Ident]) -> TermArgPattern {
log::trace!("repplace_macro_args: {:?} with {:?}", self, args);
match self {
&TermArgPattern::Pattern(ref pat) => {
TermArgPattern::Pattern(pat.make_macro_template(args))
}
&TermArgPattern::Expr(_) => self.clone(),
}
}
fn subst_macro_args(&self, args: &[Pattern]) -> TermArgPattern {
match self {
&TermArgPattern::Pattern(ref pat) => {
TermArgPattern::Pattern(pat.subst_macro_args(args))
}
&TermArgPattern::Expr(_) => self.clone(),
}
}
}
/// An expression: the right-hand side of a rule.
///
/// Note that this *almost* looks like a core Lisp or lambda calculus,
/// except that there is no abstraction (lambda). This first-order
/// limit is what makes it analyzable.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Expr {
/// A term: `(sym args...)`.
Term {
sym: Ident,
args: Vec<Expr>,
pos: Pos,
},
/// A variable use.
Var { name: Ident, pos: Pos },
/// A constant integer.
ConstInt { val: i64, pos: Pos },
/// The `(let ((var ty val)*) body)` form.
Let {
defs: Vec<LetDef>,
body: Box<Expr>,
pos: Pos,
},
}
impl Expr {
pub fn pos(&self) -> Pos {
match self {
&Expr::Term { pos, .. }
| &Expr::Var { pos, .. }
| &Expr::ConstInt { pos, .. }
| &Expr::Let { pos, .. } => pos,
}
}
}
/// One variable locally bound in a `(let ...)` expression.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct LetDef {
pub var: Ident,
pub ty: Ident,
pub val: Box<Expr>,
pub pos: Pos,
}
/// An external binding: an extractor or constructor function attached
/// to a term.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Extern {
/// An external extractor: `(extractor Term rustfunc)` form.
Extractor {
/// The term to which this external extractor is attached.
term: Ident,
/// The Rust function name.
func: Ident,
/// The position of this decl.
pos: Pos,
/// Poliarity of args: whether values are inputs or outputs to
/// the external extractor function. This is a sort of
/// statically-defined approximation to Prolog-style
/// unification; we allow for the same flexible directionality
/// but fix it at DSL-definition time. By default, every arg
/// is an *output* from the extractor (and the 'retval", or
/// more precisely the term value that we are extracting, is
/// an "input").
arg_polarity: Option<Vec<ArgPolarity>>,
/// Infallibility: if an external extractor returns `(T1, T2,
/// ...)` rather than `Option<(T1, T2, ...)>`, and hence can
/// never fail, it is declared as such and allows for slightly
/// better code to be generated.
infallible: bool,
},
/// An external constructor: `(constructor Term rustfunc)` form.
Constructor {
/// The term to which this external constructor is attached.
term: Ident,
/// The Rust function name.
func: Ident,
/// The position of this decl.
pos: Pos,
},
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ArgPolarity {
/// An arg that must be given an Expr in the pattern and passes
/// data *to* the extractor op.
Input,
/// An arg that must be given a regular pattern (not Expr) and
/// receives data *from* the extractor op.
Output,
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
//! Compilation process, from AST to Sema to Sequences of Insts.
use crate::error::Error;
use crate::{ast, codegen, sema};
pub fn compile(defs: &ast::Defs) -> Result<String, Vec<Error>> {
let mut typeenv = sema::TypeEnv::from_ast(defs)?;
let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
let codegen = codegen::Codegen::compile(&typeenv, &termenv).map_err(|e| vec![e])?;
codegen.generate_rust().map_err(|e| vec![e])
}

View File

@@ -0,0 +1,50 @@
//! Error types.
use crate::lexer::Pos;
use std::fmt;
#[derive(Clone, Debug)]
pub enum Error {
CompileError {
msg: String,
filename: String,
pos: Pos,
},
SystemError {
msg: String,
},
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
&Error::CompileError {
ref msg,
ref filename,
pos,
} => {
write!(f, "{}:{}:{}: error: {}", filename, pos.line, pos.col, msg)
}
&Error::SystemError { ref msg } => {
write!(f, "{}", msg)
}
}
}
}
impl std::error::Error for Error {}
impl std::convert::From<std::fmt::Error> for Error {
fn from(e: std::fmt::Error) -> Error {
Error::SystemError {
msg: format!("{}", e),
}
}
}
impl std::convert::From<std::io::Error> for Error {
fn from(e: std::io::Error) -> Error {
Error::SystemError {
msg: format!("{}", e),
}
}
}

View File

@@ -0,0 +1,571 @@
//! Lowered matching IR.
use crate::declare_id;
use crate::lexer::Pos;
use crate::sema::*;
use std::collections::HashMap;
declare_id!(InstId);
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Value {
/// A value produced by an instruction in the Pattern (LHS).
Pattern { inst: InstId, output: usize },
/// A value produced by an instruction in the Expr (RHS).
Expr { inst: InstId, output: usize },
}
/// A single Pattern instruction.
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum PatternInst {
/// Get the Nth input argument, which corresponds to the Nth field
/// of the root term.
Arg { index: usize, ty: TypeId },
/// Match a value as equal to another value. Produces no values.
MatchEqual { a: Value, b: Value, ty: TypeId },
/// Try matching the given value as the given integer. Produces no values.
MatchInt {
input: Value,
ty: TypeId,
int_val: i64,
},
/// Try matching the given value as the given variant, producing
/// `|arg_tys|` values as output.
MatchVariant {
input: Value,
input_ty: TypeId,
arg_tys: Vec<TypeId>,
variant: VariantId,
},
/// Invoke an extractor, taking the given values as input (the
/// first is the value to extract, the other are the
/// `Input`-polarity extractor args) and producing an output valu
/// efor each `Output`-polarity extractor arg.
Extract {
inputs: Vec<Value>,
input_tys: Vec<TypeId>,
output_tys: Vec<TypeId>,
term: TermId,
infallible: bool,
},
/// Evaluate an expression and provide the given value as the
/// result of this match instruction. The expression has access to
/// the pattern-values up to this point in the sequence.
Expr {
seq: ExprSequence,
output: Value,
output_ty: TypeId,
},
}
/// A single Expr instruction.
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum ExprInst {
/// Produce a constant integer.
ConstInt { ty: TypeId, val: i64 },
/// Create a variant.
CreateVariant {
inputs: Vec<(Value, TypeId)>,
ty: TypeId,
variant: VariantId,
},
/// Invoke a constructor.
Construct {
inputs: Vec<(Value, TypeId)>,
ty: TypeId,
term: TermId,
infallible: bool,
},
/// Set the Nth return value. Produces no values.
Return {
index: usize,
ty: TypeId,
value: Value,
},
}
impl ExprInst {
pub fn visit_values<F: FnMut(Value)>(&self, mut f: F) {
match self {
&ExprInst::ConstInt { .. } => {}
&ExprInst::Construct { ref inputs, .. }
| &ExprInst::CreateVariant { ref inputs, .. } => {
for (input, _ty) in inputs {
f(*input);
}
}
&ExprInst::Return { value, .. } => {
f(value);
}
}
}
}
/// A linear sequence of instructions that match on and destructure an
/// argument. A pattern is fallible (may not match). If it does not
/// fail, its result consists of the values produced by the
/// `PatternInst`s, which may be used by a subsequent `Expr`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
pub struct PatternSequence {
/// Instruction sequence for pattern. InstId indexes into this
/// sequence for `Value::Pattern` values.
pub insts: Vec<PatternInst>,
}
/// A linear sequence of instructions that produce a new value from
/// the right-hand side of a rule, given bindings that come from a
/// `Pattern` derived from the left-hand side.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default, PartialOrd, Ord)]
pub struct ExprSequence {
/// Instruction sequence for expression. InstId indexes into this
/// sequence for `Value::Expr` values.
pub insts: Vec<ExprInst>,
/// Position at which the rule producing this sequence was located.
pub pos: Pos,
}
impl ExprSequence {
pub fn is_const_int(&self) -> Option<(TypeId, i64)> {
if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) {
match &self.insts[0] {
&ExprInst::ConstInt { ty, val } => Some((ty, val)),
_ => None,
}
} else {
None
}
}
pub fn is_const_variant(&self) -> Option<(TypeId, VariantId)> {
if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) {
match &self.insts[0] {
&ExprInst::CreateVariant {
ref inputs,
ty,
variant,
} if inputs.len() == 0 => Some((ty, variant)),
_ => None,
}
} else {
None
}
}
}
#[derive(Clone, Copy, Debug)]
enum ValueOrArgs {
Value(Value),
ImplicitTermFromArgs(TermId),
}
impl ValueOrArgs {
fn to_value(&self) -> Option<Value> {
match self {
&ValueOrArgs::Value(v) => Some(v),
_ => None,
}
}
}
impl PatternSequence {
fn add_inst(&mut self, inst: PatternInst) -> InstId {
let id = InstId(self.insts.len());
self.insts.push(inst);
id
}
fn add_arg(&mut self, index: usize, ty: TypeId) -> Value {
let inst = InstId(self.insts.len());
self.add_inst(PatternInst::Arg { index, ty });
Value::Pattern { inst, output: 0 }
}
fn add_match_equal(&mut self, a: Value, b: Value, ty: TypeId) {
self.add_inst(PatternInst::MatchEqual { a, b, ty });
}
fn add_match_int(&mut self, input: Value, ty: TypeId, int_val: i64) {
self.add_inst(PatternInst::MatchInt { input, ty, int_val });
}
fn add_match_variant(
&mut self,
input: Value,
input_ty: TypeId,
arg_tys: &[TypeId],
variant: VariantId,
) -> Vec<Value> {
let inst = InstId(self.insts.len());
let mut outs = vec![];
for (i, _arg_ty) in arg_tys.iter().enumerate() {
let val = Value::Pattern { inst, output: i };
outs.push(val);
}
let arg_tys = arg_tys.iter().cloned().collect();
self.add_inst(PatternInst::MatchVariant {
input,
input_ty,
arg_tys,
variant,
});
outs
}
fn add_extract(
&mut self,
inputs: Vec<Value>,
input_tys: Vec<TypeId>,
output_tys: Vec<TypeId>,
term: TermId,
infallible: bool,
) -> Vec<Value> {
let inst = InstId(self.insts.len());
let mut outs = vec![];
for i in 0..output_tys.len() {
let val = Value::Pattern { inst, output: i };
outs.push(val);
}
let output_tys = output_tys.iter().cloned().collect();
self.add_inst(PatternInst::Extract {
inputs,
input_tys,
output_tys,
term,
infallible,
});
outs
}
fn add_expr_seq(&mut self, seq: ExprSequence, output: Value, output_ty: TypeId) -> Value {
let inst = self.add_inst(PatternInst::Expr {
seq,
output,
output_ty,
});
// Create values for all outputs.
Value::Pattern { inst, output: 0 }
}
/// Generate PatternInsts to match the given (sub)pattern. Works
/// recursively down the AST.
fn gen_pattern(
&mut self,
input: ValueOrArgs,
typeenv: &TypeEnv,
termenv: &TermEnv,
pat: &Pattern,
vars: &mut HashMap<VarId, Value>,
) {
match pat {
&Pattern::BindPattern(_ty, var, ref subpat) => {
// Bind the appropriate variable and recurse.
assert!(!vars.contains_key(&var));
if let Some(v) = input.to_value() {
vars.insert(var, v);
}
let root_term = self.gen_pattern(input, typeenv, termenv, &*subpat, vars);
root_term
}
&Pattern::Var(ty, var) => {
// Assert that the value matches the existing bound var.
let var_val = vars
.get(&var)
.cloned()
.expect("Variable should already be bound");
let input_val = input
.to_value()
.expect("Cannot match an =var pattern against root term");
self.add_match_equal(input_val, var_val, ty);
}
&Pattern::ConstInt(ty, value) => {
// Assert that the value matches the constant integer.
let input_val = input
.to_value()
.expect("Cannot match an =var pattern against root term");
self.add_match_int(input_val, ty, value);
}
&Pattern::Term(ty, term, ref args) => {
match input {
ValueOrArgs::ImplicitTermFromArgs(termid) => {
assert_eq!(
termid, term,
"Cannot match a different term against root pattern"
);
let termdata = &termenv.terms[term.index()];
let arg_tys = &termdata.arg_tys[..];
for (i, subpat) in args.iter().enumerate() {
let value = self.add_arg(i, arg_tys[i]);
let subpat = match subpat {
&TermArgPattern::Expr(..) => {
panic!("Should have been caught in typechecking")
}
&TermArgPattern::Pattern(ref pat) => pat,
};
self.gen_pattern(
ValueOrArgs::Value(value),
typeenv,
termenv,
subpat,
vars,
);
}
}
ValueOrArgs::Value(input) => {
// Determine whether the term has an external extractor or not.
let termdata = &termenv.terms[term.index()];
let arg_tys = &termdata.arg_tys[..];
match &termdata.kind {
&TermKind::Declared => {
panic!("Pattern invocation of undefined term body");
}
&TermKind::EnumVariant { variant } => {
let arg_values =
self.add_match_variant(input, ty, arg_tys, variant);
for (subpat, value) in args.iter().zip(arg_values.into_iter()) {
let subpat = match subpat {
&TermArgPattern::Pattern(ref pat) => pat,
_ => unreachable!("Should have been caught by sema"),
};
self.gen_pattern(
ValueOrArgs::Value(value),
typeenv,
termenv,
subpat,
vars,
);
}
}
&TermKind::InternalConstructor
| &TermKind::ExternalConstructor { .. } => {
panic!("Should not invoke constructor in pattern");
}
&TermKind::InternalExtractor { .. } => {
panic!("Should have been expanded away");
}
&TermKind::ExternalExtractor {
ref arg_polarity,
infallible,
..
} => {
// Evaluate all `input` args.
let mut inputs = vec![];
let mut input_tys = vec![];
let mut output_tys = vec![];
let mut output_pats = vec![];
inputs.push(input);
input_tys.push(termdata.ret_ty);
for (arg, pol) in args.iter().zip(arg_polarity.iter()) {
match pol {
&ArgPolarity::Input => {
let expr = match arg {
&TermArgPattern::Expr(ref expr) => expr,
_ => panic!(
"Should have been caught by typechecking"
),
};
let mut seq = ExprSequence::default();
let value = seq.gen_expr(typeenv, termenv, expr, vars);
seq.add_return(expr.ty(), value);
let value = self.add_expr_seq(seq, value, expr.ty());
inputs.push(value);
input_tys.push(expr.ty());
}
&ArgPolarity::Output => {
let pat = match arg {
&TermArgPattern::Pattern(ref pat) => pat,
_ => panic!(
"Should have been caught by typechecking"
),
};
output_tys.push(pat.ty());
output_pats.push(pat);
}
}
}
// Invoke the extractor.
let arg_values = self
.add_extract(inputs, input_tys, output_tys, term, infallible);
for (pat, &val) in output_pats.iter().zip(arg_values.iter()) {
self.gen_pattern(
ValueOrArgs::Value(val),
typeenv,
termenv,
pat,
vars,
);
}
}
}
}
}
}
&Pattern::And(_ty, ref children) => {
for child in children {
self.gen_pattern(input, typeenv, termenv, child, vars);
}
}
&Pattern::Wildcard(_ty) => {
// Nothing!
}
}
}
}
impl ExprSequence {
fn add_inst(&mut self, inst: ExprInst) -> InstId {
let id = InstId(self.insts.len());
self.insts.push(inst);
id
}
fn add_const_int(&mut self, ty: TypeId, val: i64) -> Value {
let inst = InstId(self.insts.len());
self.add_inst(ExprInst::ConstInt { ty, val });
Value::Expr { inst, output: 0 }
}
fn add_create_variant(
&mut self,
inputs: &[(Value, TypeId)],
ty: TypeId,
variant: VariantId,
) -> Value {
let inst = InstId(self.insts.len());
let inputs = inputs.iter().cloned().collect();
self.add_inst(ExprInst::CreateVariant {
inputs,
ty,
variant,
});
Value::Expr { inst, output: 0 }
}
fn add_construct(
&mut self,
inputs: &[(Value, TypeId)],
ty: TypeId,
term: TermId,
infallible: bool,
) -> Value {
let inst = InstId(self.insts.len());
let inputs = inputs.iter().cloned().collect();
self.add_inst(ExprInst::Construct {
inputs,
ty,
term,
infallible,
});
Value::Expr { inst, output: 0 }
}
fn add_return(&mut self, ty: TypeId, value: Value) {
self.add_inst(ExprInst::Return {
index: 0,
ty,
value,
});
}
/// Creates a sequence of ExprInsts to generate the given
/// expression value. Returns the value ID as well as the root
/// term ID, if any.
fn gen_expr(
&mut self,
typeenv: &TypeEnv,
termenv: &TermEnv,
expr: &Expr,
vars: &HashMap<VarId, Value>,
) -> Value {
log::trace!("gen_expr: expr {:?}", expr);
match expr {
&Expr::ConstInt(ty, val) => self.add_const_int(ty, val),
&Expr::Let(_ty, ref bindings, ref subexpr) => {
let mut vars = vars.clone();
for &(var, _var_ty, ref var_expr) in bindings {
let var_value = self.gen_expr(typeenv, termenv, &*var_expr, &vars);
vars.insert(var, var_value);
}
self.gen_expr(typeenv, termenv, &*subexpr, &vars)
}
&Expr::Var(_ty, var_id) => vars.get(&var_id).cloned().unwrap(),
&Expr::Term(ty, term, ref arg_exprs) => {
let termdata = &termenv.terms[term.index()];
let mut arg_values_tys = vec![];
for (arg_ty, arg_expr) in termdata.arg_tys.iter().cloned().zip(arg_exprs.iter()) {
arg_values_tys
.push((self.gen_expr(typeenv, termenv, &*arg_expr, &vars), arg_ty));
}
match &termdata.kind {
&TermKind::EnumVariant { variant } => {
self.add_create_variant(&arg_values_tys[..], ty, variant)
}
&TermKind::InternalConstructor => {
self.add_construct(
&arg_values_tys[..],
ty,
term,
/* infallible = */ false,
)
}
&TermKind::ExternalConstructor { .. } => {
self.add_construct(
&arg_values_tys[..],
ty,
term,
/* infallible = */ true,
)
}
_ => panic!("Should have been caught by typechecking"),
}
}
}
}
}
/// Build a sequence from a rule.
pub fn lower_rule(
tyenv: &TypeEnv,
termenv: &TermEnv,
rule: RuleId,
) -> (PatternSequence, ExprSequence) {
let mut pattern_seq: PatternSequence = Default::default();
let mut expr_seq: ExprSequence = Default::default();
expr_seq.pos = termenv.rules[rule.index()].pos;
let ruledata = &termenv.rules[rule.index()];
let mut vars = HashMap::new();
let root_term = ruledata
.lhs
.root_term()
.expect("Pattern must have a term at the root");
log::trace!("lower_rule: ruledata {:?}", ruledata,);
// Lower the pattern, starting from the root input value.
pattern_seq.gen_pattern(
ValueOrArgs::ImplicitTermFromArgs(root_term),
tyenv,
termenv,
&ruledata.lhs,
&mut vars,
);
// Lower the expression, making use of the bound variables
// from the pattern.
let rhs_root_val = expr_seq.gen_expr(tyenv, termenv, &ruledata.rhs, &vars);
// Return the root RHS value.
let output_ty = ruledata.rhs.ty();
expr_seq.add_return(output_ty, rhs_root_val);
(pattern_seq, expr_seq)
}

View File

@@ -0,0 +1,318 @@
//! Lexer for the ISLE language.
use crate::error::Error;
use std::borrow::Cow;
#[derive(Clone, Debug)]
pub struct Lexer<'a> {
pub filenames: Vec<String>,
file_starts: Vec<usize>,
buf: Cow<'a, [u8]>,
pos: Pos,
lookahead: Option<(Pos, Token)>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash, PartialOrd, Ord)]
pub struct Pos {
pub file: usize,
pub offset: usize,
pub line: usize,
pub col: usize,
}
impl Pos {
pub fn pretty_print(&self, filenames: &[String]) -> String {
format!("{}:{}:{}", filenames[self.file], self.line, self.col)
}
pub fn pretty_print_line(&self, filenames: &[String]) -> String {
format!("{} line {}", filenames[self.file], self.line)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token {
LParen,
RParen,
Symbol(String),
Int(i64),
At,
Lt,
}
impl<'a> Lexer<'a> {
pub fn from_str(s: &'a str, filename: &'a str) -> Lexer<'a> {
let mut l = Lexer {
filenames: vec![filename.to_string()],
file_starts: vec![0],
buf: Cow::Borrowed(s.as_bytes()),
pos: Pos {
file: 0,
offset: 0,
line: 1,
col: 0,
},
lookahead: None,
};
l.reload();
l
}
pub fn from_files(filenames: Vec<String>) -> Result<Lexer<'a>, Error> {
assert!(!filenames.is_empty());
let file_contents: Vec<String> = filenames
.iter()
.map(|f| {
use std::io::Read;
let mut f = std::fs::File::open(f)?;
let mut s = String::new();
f.read_to_string(&mut s)?;
Ok(s)
})
.collect::<Result<Vec<String>, Error>>()?;
let mut file_starts = vec![];
let mut buf = String::new();
for file in file_contents {
file_starts.push(buf.len());
buf += &file;
buf += "\n";
}
let mut l = Lexer {
filenames,
buf: Cow::Owned(buf.into_bytes()),
file_starts,
pos: Pos {
file: 0,
offset: 0,
line: 1,
col: 0,
},
lookahead: None,
};
l.reload();
Ok(l)
}
pub fn offset(&self) -> usize {
self.pos.offset
}
pub fn pos(&self) -> Pos {
self.pos
}
fn advance_pos(&mut self) {
self.pos.col += 1;
if self.buf[self.pos.offset] == b'\n' {
self.pos.line += 1;
self.pos.col = 0;
}
self.pos.offset += 1;
if self.pos.file + 1 < self.file_starts.len() {
let next_start = self.file_starts[self.pos.file + 1];
if self.pos.offset >= next_start {
assert!(self.pos.offset == next_start);
self.pos.file += 1;
self.pos.line = 1;
}
}
}
fn next_token(&mut self) -> Option<(Pos, Token)> {
fn is_sym_first_char(c: u8) -> bool {
match c {
b'-' | b'0'..=b'9' | b'(' | b')' | b';' => false,
c if c.is_ascii_whitespace() => false,
_ => true,
}
}
fn is_sym_other_char(c: u8) -> bool {
match c {
b'(' | b')' | b';' | b'@' | b'<' => false,
c if c.is_ascii_whitespace() => false,
_ => true,
}
}
// Skip any whitespace and any comments.
while self.pos.offset < self.buf.len() {
if self.buf[self.pos.offset].is_ascii_whitespace() {
self.advance_pos();
continue;
}
if self.buf[self.pos.offset] == b';' {
while self.pos.offset < self.buf.len() && self.buf[self.pos.offset] != b'\n' {
self.advance_pos();
}
continue;
}
break;
}
if self.pos.offset == self.buf.len() {
return None;
}
let char_pos = self.pos;
match self.buf[self.pos.offset] {
b'(' => {
self.advance_pos();
Some((char_pos, Token::LParen))
}
b')' => {
self.advance_pos();
Some((char_pos, Token::RParen))
}
b'@' => {
self.advance_pos();
Some((char_pos, Token::At))
}
b'<' => {
self.advance_pos();
Some((char_pos, Token::Lt))
}
c if is_sym_first_char(c) => {
let start = self.pos.offset;
let start_pos = self.pos;
while self.pos.offset < self.buf.len()
&& is_sym_other_char(self.buf[self.pos.offset])
{
self.advance_pos();
}
let end = self.pos.offset;
let s = std::str::from_utf8(&self.buf[start..end])
.expect("Only ASCII characters, should be UTF-8");
Some((start_pos, Token::Symbol(s.to_string())))
}
c if (c >= b'0' && c <= b'9') || c == b'-' => {
let start_pos = self.pos;
let neg = if c == b'-' {
self.advance_pos();
true
} else {
false
};
let mut num = 0;
while self.pos.offset < self.buf.len()
&& (self.buf[self.pos.offset] >= b'0' && self.buf[self.pos.offset] <= b'9')
{
num = (num * 10) + (self.buf[self.pos.offset] - b'0') as i64;
self.advance_pos();
}
let tok = if neg {
Token::Int(-num)
} else {
Token::Int(num)
};
Some((start_pos, tok))
}
c => panic!("Unexpected character '{}' at offset {}", c, self.pos.offset),
}
}
fn reload(&mut self) {
if self.lookahead.is_none() && self.pos.offset < self.buf.len() {
self.lookahead = self.next_token();
}
}
pub fn peek(&self) -> Option<&(Pos, Token)> {
self.lookahead.as_ref()
}
pub fn eof(&self) -> bool {
self.lookahead.is_none()
}
}
impl<'a> std::iter::Iterator for Lexer<'a> {
type Item = (Pos, Token);
fn next(&mut self) -> Option<(Pos, Token)> {
let tok = self.lookahead.take();
self.reload();
tok
}
}
impl Token {
pub fn is_int(&self) -> bool {
match self {
Token::Int(_) => true,
_ => false,
}
}
pub fn is_sym(&self) -> bool {
match self {
Token::Symbol(_) => true,
_ => false,
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn lexer_basic() {
assert_eq!(
Lexer::from_str(
";; comment\n; another\r\n \t(one two three 23 -568 )\n",
"test"
)
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![
Token::LParen,
Token::Symbol("one".to_string()),
Token::Symbol("two".to_string()),
Token::Symbol("three".to_string()),
Token::Int(23),
Token::Int(-568),
Token::RParen
]
);
}
#[test]
fn ends_with_sym() {
assert_eq!(
Lexer::from_str("asdf", "test")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![Token::Symbol("asdf".to_string()),]
);
}
#[test]
fn ends_with_num() {
assert_eq!(
Lexer::from_str("23", "test")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![Token::Int(23)],
);
}
#[test]
fn weird_syms() {
assert_eq!(
Lexer::from_str("(+ [] => !! _test!;comment\n)", "test")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![
Token::LParen,
Token::Symbol("+".to_string()),
Token::Symbol("[]".to_string()),
Token::Symbol("=>".to_string()),
Token::Symbol("!!".to_string()),
Token::Symbol("_test!".to_string()),
Token::RParen,
]
);
}
}

View File

@@ -0,0 +1,9 @@
pub mod ast;
pub mod codegen;
pub mod compile;
pub mod error;
pub mod ir;
pub mod lexer;
pub mod parser;
pub mod sema;

View File

@@ -0,0 +1,474 @@
//! Parser for ISLE language.
use crate::ast::*;
use crate::error::*;
use crate::lexer::{Lexer, Pos, Token};
#[derive(Clone, Debug)]
pub struct Parser<'a> {
lexer: Lexer<'a>,
}
pub type ParseResult<T> = std::result::Result<T, Error>;
impl<'a> Parser<'a> {
pub fn new(lexer: Lexer<'a>) -> Parser<'a> {
Parser { lexer }
}
pub fn error(&self, pos: Pos, msg: String) -> Error {
Error::CompileError {
filename: self.lexer.filenames[pos.file].clone(),
pos,
msg,
}
}
fn take<F: Fn(&Token) -> bool>(&mut self, f: F) -> ParseResult<Token> {
if let Some(&(pos, ref peek)) = self.lexer.peek() {
if !f(peek) {
return Err(self.error(pos, format!("Unexpected token {:?}", peek)));
}
Ok(self.lexer.next().unwrap().1)
} else {
Err(self.error(self.lexer.pos(), "Unexpected EOF".to_string()))
}
}
fn is<F: Fn(&Token) -> bool>(&self, f: F) -> bool {
if let Some(&(_, ref peek)) = self.lexer.peek() {
f(peek)
} else {
false
}
}
fn pos(&self) -> Option<Pos> {
self.lexer.peek().map(|(pos, _)| *pos)
}
fn is_lparen(&self) -> bool {
self.is(|tok| *tok == Token::LParen)
}
fn is_rparen(&self) -> bool {
self.is(|tok| *tok == Token::RParen)
}
fn is_at(&self) -> bool {
self.is(|tok| *tok == Token::At)
}
fn is_lt(&self) -> bool {
self.is(|tok| *tok == Token::Lt)
}
fn is_sym(&self) -> bool {
self.is(|tok| tok.is_sym())
}
fn is_int(&self) -> bool {
self.is(|tok| tok.is_int())
}
fn is_sym_str(&self, s: &str) -> bool {
self.is(|tok| match tok {
&Token::Symbol(ref tok_s) if tok_s == s => true,
_ => false,
})
}
fn lparen(&mut self) -> ParseResult<()> {
self.take(|tok| *tok == Token::LParen).map(|_| ())
}
fn rparen(&mut self) -> ParseResult<()> {
self.take(|tok| *tok == Token::RParen).map(|_| ())
}
fn at(&mut self) -> ParseResult<()> {
self.take(|tok| *tok == Token::At).map(|_| ())
}
fn lt(&mut self) -> ParseResult<()> {
self.take(|tok| *tok == Token::Lt).map(|_| ())
}
fn symbol(&mut self) -> ParseResult<String> {
match self.take(|tok| tok.is_sym())? {
Token::Symbol(s) => Ok(s),
_ => unreachable!(),
}
}
fn int(&mut self) -> ParseResult<i64> {
match self.take(|tok| tok.is_int())? {
Token::Int(i) => Ok(i),
_ => unreachable!(),
}
}
pub fn parse_defs(&mut self) -> ParseResult<Defs> {
let mut defs = vec![];
while !self.lexer.eof() {
defs.push(self.parse_def()?);
}
Ok(Defs {
defs,
filenames: self.lexer.filenames.clone(),
})
}
fn parse_def(&mut self) -> ParseResult<Def> {
self.lparen()?;
let pos = self.pos();
let def = match &self.symbol()?[..] {
"type" => Def::Type(self.parse_type()?),
"decl" => Def::Decl(self.parse_decl()?),
"rule" => Def::Rule(self.parse_rule()?),
"extractor" => Def::Extractor(self.parse_etor()?),
"extern" => Def::Extern(self.parse_extern()?),
s => {
return Err(self.error(pos.unwrap(), format!("Unexpected identifier: {}", s)));
}
};
self.rparen()?;
Ok(def)
}
fn str_to_ident(&self, pos: Pos, s: &str) -> ParseResult<Ident> {
let first = s.chars().next().unwrap();
if !first.is_alphabetic() && first != '_' {
return Err(self.error(
pos,
format!("Identifier '{}' does not start with letter or _", s),
));
}
if s.chars()
.skip(1)
.any(|c| !c.is_alphanumeric() && c != '_' && c != '.')
{
return Err(self.error(
pos,
format!(
"Identifier '{}' contains invalid character (not a-z, A-Z, 0-9, _, .)",
s
),
));
}
Ok(Ident(s.to_string(), pos))
}
fn parse_ident(&mut self) -> ParseResult<Ident> {
let pos = self.pos();
let s = self.symbol()?;
self.str_to_ident(pos.unwrap(), &s)
}
fn parse_type(&mut self) -> ParseResult<Type> {
let pos = self.pos();
let name = self.parse_ident()?;
let mut is_extern = false;
if self.is_sym_str("extern") {
self.symbol()?;
is_extern = true;
}
let ty = self.parse_typevalue()?;
Ok(Type {
name,
is_extern,
ty,
pos: pos.unwrap(),
})
}
fn parse_typevalue(&mut self) -> ParseResult<TypeValue> {
let pos = self.pos();
self.lparen()?;
if self.is_sym_str("primitive") {
self.symbol()?;
let primitive_ident = self.parse_ident()?;
self.rparen()?;
let pos = pos.unwrap();
Ok(TypeValue::Primitive(primitive_ident, pos))
} else if self.is_sym_str("enum") {
self.symbol()?;
let mut variants = vec![];
while !self.is_rparen() {
let variant = self.parse_type_variant()?;
variants.push(variant);
}
self.rparen()?;
let pos = pos.unwrap();
Ok(TypeValue::Enum(variants, pos))
} else {
Err(self.error(pos.unwrap(), "Unknown type definition".to_string()))
}
}
fn parse_type_variant(&mut self) -> ParseResult<Variant> {
if self.is_sym() {
let pos = self.pos().unwrap();
let name = self.parse_ident()?;
Ok(Variant {
name,
fields: vec![],
pos,
})
} else {
let pos = self.pos();
self.lparen()?;
let name = self.parse_ident()?;
let mut fields = vec![];
while !self.is_rparen() {
fields.push(self.parse_type_field()?);
}
self.rparen()?;
let pos = pos.unwrap();
Ok(Variant { name, fields, pos })
}
}
fn parse_type_field(&mut self) -> ParseResult<Field> {
let pos = self.pos();
self.lparen()?;
let name = self.parse_ident()?;
let ty = self.parse_ident()?;
self.rparen()?;
let pos = pos.unwrap();
Ok(Field { name, ty, pos })
}
fn parse_decl(&mut self) -> ParseResult<Decl> {
let pos = self.pos();
let term = self.parse_ident()?;
self.lparen()?;
let mut arg_tys = vec![];
while !self.is_rparen() {
arg_tys.push(self.parse_ident()?);
}
self.rparen()?;
let ret_ty = self.parse_ident()?;
Ok(Decl {
term,
arg_tys,
ret_ty,
pos: pos.unwrap(),
})
}
fn parse_extern(&mut self) -> ParseResult<Extern> {
let pos = self.pos();
if self.is_sym_str("constructor") {
self.symbol()?;
let term = self.parse_ident()?;
let func = self.parse_ident()?;
Ok(Extern::Constructor {
term,
func,
pos: pos.unwrap(),
})
} else if self.is_sym_str("extractor") {
self.symbol()?;
let infallible = if self.is_sym_str("infallible") {
self.symbol()?;
true
} else {
false
};
let term = self.parse_ident()?;
let func = self.parse_ident()?;
let arg_polarity = if self.is_lparen() {
let mut pol = vec![];
self.lparen()?;
while !self.is_rparen() {
if self.is_sym_str("in") {
self.symbol()?;
pol.push(ArgPolarity::Input);
} else if self.is_sym_str("out") {
self.symbol()?;
pol.push(ArgPolarity::Output);
} else {
return Err(
self.error(pos.unwrap(), "Invalid argument polarity".to_string())
);
}
}
self.rparen()?;
Some(pol)
} else {
None
};
Ok(Extern::Extractor {
term,
func,
pos: pos.unwrap(),
arg_polarity,
infallible,
})
} else {
Err(self.error(
pos.unwrap(),
"Invalid extern: must be (extern constructor ...) or (extern extractor ...)"
.to_string(),
))
}
}
fn parse_etor(&mut self) -> ParseResult<Extractor> {
let pos = self.pos();
self.lparen()?;
let term = self.parse_ident()?;
let mut args = vec![];
while !self.is_rparen() {
args.push(self.parse_ident()?);
}
self.rparen()?;
let template = self.parse_pattern()?;
Ok(Extractor {
term,
args,
template,
pos: pos.unwrap(),
})
}
fn parse_rule(&mut self) -> ParseResult<Rule> {
let pos = self.pos();
let prio = if self.is_int() {
Some(self.int()?)
} else {
None
};
let pattern = self.parse_pattern()?;
let expr = self.parse_expr()?;
Ok(Rule {
pattern,
expr,
pos: pos.unwrap(),
prio,
})
}
fn parse_pattern(&mut self) -> ParseResult<Pattern> {
let pos = self.pos();
if self.is_int() {
let pos = pos.unwrap();
Ok(Pattern::ConstInt {
val: self.int()?,
pos,
})
} else if self.is_sym_str("_") {
let pos = pos.unwrap();
self.symbol()?;
Ok(Pattern::Wildcard { pos })
} else if self.is_sym() {
let pos = pos.unwrap();
let s = self.symbol()?;
if s.starts_with("=") {
let s = &s[1..];
let var = self.str_to_ident(pos, s)?;
Ok(Pattern::Var { var, pos })
} else {
let var = self.str_to_ident(pos, &s)?;
if self.is_at() {
self.at()?;
let subpat = Box::new(self.parse_pattern()?);
Ok(Pattern::BindPattern { var, subpat, pos })
} else {
Ok(Pattern::BindPattern {
var,
subpat: Box::new(Pattern::Wildcard { pos }),
pos,
})
}
}
} else if self.is_lparen() {
let pos = pos.unwrap();
self.lparen()?;
if self.is_sym_str("and") {
self.symbol()?;
let mut subpats = vec![];
while !self.is_rparen() {
subpats.push(self.parse_pattern()?);
}
self.rparen()?;
Ok(Pattern::And { subpats, pos })
} else {
let sym = self.parse_ident()?;
let mut args = vec![];
while !self.is_rparen() {
args.push(self.parse_pattern_term_arg()?);
}
self.rparen()?;
Ok(Pattern::Term { sym, args, pos })
}
} else {
Err(self.error(pos.unwrap(), "Unexpected pattern".into()))
}
}
fn parse_pattern_term_arg(&mut self) -> ParseResult<TermArgPattern> {
if self.is_lt() {
self.lt()?;
Ok(TermArgPattern::Expr(self.parse_expr()?))
} else {
Ok(TermArgPattern::Pattern(self.parse_pattern()?))
}
}
fn parse_expr(&mut self) -> ParseResult<Expr> {
let pos = self.pos();
if self.is_lparen() {
let pos = pos.unwrap();
self.lparen()?;
if self.is_sym_str("let") {
self.symbol()?;
self.lparen()?;
let mut defs = vec![];
while !self.is_rparen() {
let def = self.parse_letdef()?;
defs.push(def);
}
self.rparen()?;
let body = Box::new(self.parse_expr()?);
self.rparen()?;
Ok(Expr::Let { defs, body, pos })
} else {
let sym = self.parse_ident()?;
let mut args = vec![];
while !self.is_rparen() {
args.push(self.parse_expr()?);
}
self.rparen()?;
Ok(Expr::Term { sym, args, pos })
}
} else if self.is_sym_str("#t") {
let pos = pos.unwrap();
self.symbol()?;
Ok(Expr::ConstInt { val: 1, pos })
} else if self.is_sym_str("#f") {
let pos = pos.unwrap();
self.symbol()?;
Ok(Expr::ConstInt { val: 0, pos })
} else if self.is_sym() {
let pos = pos.unwrap();
let name = self.parse_ident()?;
Ok(Expr::Var { name, pos })
} else if self.is_int() {
let pos = pos.unwrap();
let val = self.int()?;
Ok(Expr::ConstInt { val, pos })
} else {
Err(self.error(pos.unwrap(), "Invalid expression".into()))
}
}
fn parse_letdef(&mut self) -> ParseResult<LetDef> {
let pos = self.pos();
self.lparen()?;
let pos = pos.unwrap();
let var = self.parse_ident()?;
let ty = self.parse_ident()?;
let val = Box::new(self.parse_expr()?);
self.rparen()?;
Ok(LetDef { var, ty, val, pos })
}
}

File diff suppressed because it is too large Load Diff