This commit is contained in:
Chris Fallin
2021-09-04 17:01:56 -07:00
parent 8c727b175a
commit e5d76db97a
3 changed files with 417 additions and 32 deletions

View File

@@ -1,9 +1,9 @@
//! Generate Rust code from a series of Sequences. //! Generate Rust code from a series of Sequences.
use crate::error::Error; use crate::error::Error;
use crate::ir::{lower_rule, ExprSequence, PatternInst, PatternSequence}; use crate::ir::{lower_rule, ExprInst, ExprSequence, InstId, PatternInst, PatternSequence, Value};
use crate::sema::{RuleId, TermEnv, TermId, TermKind, Type, TypeEnv, TypeId}; use crate::sema::{RuleId, Term, TermEnv, TermId, TermKind, Type, TypeEnv, TypeId};
use std::collections::HashMap; use std::collections::{HashMap, HashSet};
use std::fmt::Write; use std::fmt::Write;
/// One "input symbol" for the decision tree that handles matching on /// One "input symbol" for the decision tree that handles matching on
@@ -495,6 +495,11 @@ pub struct Codegen<'a> {
functions_by_output: HashMap<TermId, TrieNode>, functions_by_output: HashMap<TermId, TrieNode>,
} }
#[derive(Clone, Debug, Default)]
struct BodyContext {
borrowed_values: HashSet<Value>,
}
impl<'a> Codegen<'a> { impl<'a> Codegen<'a> {
pub fn compile(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Result<Codegen<'a>, Error> { pub fn compile(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Result<Codegen<'a>, Error> {
let mut builder = TermFunctionsBuilder::new(typeenv, termenv); let mut builder = TermFunctionsBuilder::new(typeenv, termenv);
@@ -591,19 +596,70 @@ impl<'a> Codegen<'a> {
} }
} }
fn type_name(&self, typeid: TypeId, by_ref: bool) -> String { fn extractor_name_and_infallible(&self, term: TermId) -> (String, bool) {
let termdata = &self.termenv.terms[term.index()];
match &termdata.kind {
&TermKind::EnumVariant { .. } => panic!("using enum variant as extractor"),
&TermKind::Regular {
extractor: Some((sym, infallible)),
..
} => (self.typeenv.syms[sym.index()].clone(), infallible),
&TermKind::Regular {
extractor: None, ..
} => (
format!("extractor_{}", self.typeenv.syms[termdata.name.index()]),
false,
),
}
}
fn type_name(&self, typeid: TypeId, by_ref: Option<&str>) -> String {
match &self.typeenv.types[typeid.index()] { match &self.typeenv.types[typeid.index()] {
&Type::Primitive(_, sym) => self.typeenv.syms[sym.index()].clone(), &Type::Primitive(_, sym) => self.typeenv.syms[sym.index()].clone(),
&Type::Enum { name, .. } => { &Type::Enum { name, .. } => {
let r = if by_ref { "&" } else { "" }; let r = by_ref.unwrap_or("");
format!("{}{}", r, self.typeenv.syms[name.index()]) format!("{}{}", r, self.typeenv.syms[name.index()])
} }
} }
} }
fn value_name(&self, value: &Value) -> String {
match value {
&Value::Pattern { inst, output } => format!("pattern{}_{}", inst.index(), output),
&Value::Expr { inst, output } => format!("expr{}_{}", inst.index(), output),
}
}
fn value_by_ref(&self, value: &Value, ctx: &BodyContext) -> String {
let raw_name = self.value_name(value);
let name_is_ref = ctx.borrowed_values.contains(value);
if name_is_ref {
raw_name
} else {
format!("&{}", raw_name)
}
}
fn value_by_val(&self, value: &Value, ctx: &BodyContext) -> String {
let raw_name = self.value_name(value);
let name_is_ref = ctx.borrowed_values.contains(value);
if name_is_ref {
format!("{}.clone()", raw_name)
} else {
raw_name
}
}
fn define_val(&self, value: &Value, ctx: &mut BodyContext, is_ref: bool) {
if is_ref {
ctx.borrowed_values.insert(value.clone());
}
}
fn generate_internal_term_constructors(&self, code: &mut dyn Write) -> Result<(), Error> { fn generate_internal_term_constructors(&self, code: &mut dyn Write) -> Result<(), Error> {
for (&termid, trie) in &self.functions_by_input { for (&termid, trie) in &self.functions_by_input {
let termdata = &self.termenv.terms[termid.index()]; let termdata = &self.termenv.terms[termid.index()];
// Skip terms that are enum variants or that have external constructors. // Skip terms that are enum variants or that have external constructors.
match &termdata.kind { match &termdata.kind {
&TermKind::EnumVariant { .. } => continue, &TermKind::EnumVariant { .. } => continue,
@@ -618,7 +674,11 @@ impl<'a> Codegen<'a> {
.iter() .iter()
.enumerate() .enumerate()
.map(|(i, &arg_ty)| { .map(|(i, &arg_ty)| {
format!("arg{}: {}", i, self.type_name(arg_ty, /* by_ref = */ true)) format!(
"arg{}: {}",
i,
self.type_name(arg_ty, /* by_ref = */ Some("&"))
)
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
writeln!( writeln!(
@@ -631,10 +691,11 @@ impl<'a> Codegen<'a> {
"fn {}<C>(ctx: &mut C, {}) -> Option<{}> {{", "fn {}<C>(ctx: &mut C, {}) -> Option<{}> {{",
func_name, func_name,
args.join(", "), args.join(", "),
self.type_name(termdata.ret_ty, /* by_ref = */ false) self.type_name(termdata.ret_ty, /* by_ref = */ None)
)?; )?;
self.generate_body(code, termid, trie)?; let mut body_ctx = Default::default();
self.generate_body(code, /* depth = */ 0, trie, " ", &mut body_ctx)?;
writeln!(code, "}}")?; writeln!(code, "}}")?;
} }
@@ -642,16 +703,322 @@ impl<'a> Codegen<'a> {
Ok(()) Ok(())
} }
fn generate_internal_term_extractors(&self, _code: &mut dyn Write) -> Result<(), Error> { fn generate_internal_term_extractors(&self, code: &mut dyn Write) -> Result<(), Error> {
for (&termid, trie) in &self.functions_by_output {
let termdata = &self.termenv.terms[termid.index()];
// Skip terms that are enum variants or that have external extractors.
match &termdata.kind {
&TermKind::EnumVariant { .. } => continue,
&TermKind::Regular { extractor, .. } if extractor.is_some() => continue,
_ => {}
}
// Get the name of the term and build up the signature.
let (func_name, _) = self.extractor_name_and_infallible(termid);
let arg = format!(
"arg: {}",
self.type_name(termdata.ret_ty, /* by_ref = */ Some("&"))
);
let ret_tuple_tys = termdata
.arg_tys
.iter()
.map(|ty| {
self.type_name(*ty, /* by_ref = */ None)
})
.collect::<Vec<_>>();
writeln!(
code,
"\n// Generated as internal extractor for term {}.",
self.typeenv.syms[termdata.name.index()],
)?;
writeln!(
code,
"fn {}<'a, C>(ctx: &mut C, {}) -> Option<({})> {{",
func_name,
arg,
ret_tuple_tys.join(", "),
)?;
let mut body_ctx = Default::default();
self.generate_extractor_header(code, termdata, &mut body_ctx)?;
self.generate_body(code, /* depth = */ 0, trie, " ", &mut body_ctx)?;
writeln!(code, " }}")?;
writeln!(code, "}}")?;
}
Ok(())
}
fn generate_extractor_header(
&self,
code: &mut dyn Write,
termdata: &Term,
ctx: &mut BodyContext,
) -> Result<(), Error> {
writeln!(code, " {{")?;
todo!();
Ok(())
}
fn generate_expr_inst(
&self,
code: &mut dyn Write,
id: InstId,
inst: &ExprInst,
indent: &str,
ctx: &mut BodyContext,
) -> Result<(), Error> {
match inst {
&ExprInst::ConstInt { ty, val } => {
let value = Value::Expr {
inst: id,
output: 0,
};
let name = self.value_name(&value);
let ty = self.type_name(ty, /* by_ref = */ None);
self.define_val(&value, ctx, /* is_ref = */ false);
writeln!(code, "{}let {}: {} = {};", indent, name, ty, val)?;
}
&ExprInst::CreateVariant {
ref inputs,
ty,
variant,
} => {
let variantinfo = match &self.typeenv.types[ty.index()] {
&Type::Primitive(..) => panic!("CreateVariant with primitive type"),
&Type::Enum { ref variants, .. } => &variants[variant.index()],
};
let mut input_fields = vec![];
for ((input_value, _), field) in inputs.iter().zip(variantinfo.fields.iter()) {
let field_name = &self.typeenv.syms[field.name.index()];
let value_expr = self.value_by_val(input_value, ctx);
input_fields.push(format!("{}: {}", field_name, value_expr));
}
let output = Value::Expr {
inst: id,
output: 0,
};
let outputname = self.value_name(&output);
let full_variant_name = format!(
"{}::{}",
self.type_name(ty, None),
self.typeenv.syms[variantinfo.name.index()]
);
writeln!(
code,
"{}let {} = {} {{",
indent, outputname, full_variant_name
)?;
for input_field in input_fields {
writeln!(code, "{} {},", indent, input_field)?;
}
writeln!(code, "{}}};", indent)?;
self.define_val(&output, ctx, /* is_ref = */ false);
}
&ExprInst::Construct {
ref inputs, term, ..
} => {
let mut input_exprs = vec![];
for (input_value, _) in inputs {
let value_expr = self.value_by_val(input_value, ctx);
input_exprs.push(value_expr);
}
let output = Value::Expr {
inst: id,
output: 0,
};
let outputname = self.value_name(&output);
let ctor_name = self.constructor_name(term);
writeln!(
code,
"{}let {} = {}(ctx, {});",
indent,
outputname,
ctor_name,
input_exprs.join(", "),
)?;
self.define_val(&output, ctx, /* is_ref = */ false);
}
&ExprInst::Return { ref value, .. } => {
let value_expr = self.value_by_val(value, ctx);
writeln!(code, "{}return Some({});", indent, value_expr)?;
}
}
Ok(())
}
fn generate_pattern_inst(
&self,
code: &mut dyn Write,
id: InstId,
inst: &PatternInst,
indent: &str,
ctx: &mut BodyContext,
) -> Result<(), Error> {
match inst {
&PatternInst::Arg { index, .. } => {
let output = Value::Expr {
inst: id,
output: 0,
};
let outputname = self.value_name(&output);
writeln!(code, "{}let {} = arg{};", indent, outputname, index)?;
writeln!(code, "{}{{", indent)?;
}
&PatternInst::MatchEqual { ref a, ref b, .. } => {
let a = self.value_by_ref(a, ctx);
let b = self.value_by_ref(b, ctx);
writeln!(code, "{}if {} == {} {{", indent, a, b)?;
}
&PatternInst::MatchInt {
ref input, int_val, ..
} => {
let input = self.value_by_val(input, ctx);
writeln!(code, "{}if {} == {} {{", indent, input, int_val)?;
}
&PatternInst::MatchVariant {
ref input,
input_ty,
variant,
ref arg_tys,
} => {
let input = self.value_by_ref(input, ctx);
let variants = match &self.typeenv.types[input_ty.index()] {
&Type::Primitive(..) => panic!("primitive type input to MatchVariant"),
&Type::Enum { ref variants, .. } => variants,
};
let ty_name = self.type_name(input_ty, /* is_ref = */ Some("&"));
let variant = &variants[variant.index()];
let variantname = &self.typeenv.syms[variant.name.index()];
let args = arg_tys
.iter()
.enumerate()
.map(|(i, ty)| {
let value = Value::Pattern {
inst: id,
output: i,
};
let valuename = self.value_name(&value);
match &self.typeenv.types[ty.index()] {
&Type::Primitive(..) => {
self.define_val(&value, ctx, /* is_ref = */ false);
valuename
}
&Type::Enum { .. } => {
self.define_val(&value, ctx, /* is_ref = */ true);
format!("ref {}", valuename)
}
}
})
.collect::<Vec<_>>();
writeln!(
code,
"{}if let {}::{} {{ {} }} = {} {{",
indent,
ty_name,
variantname,
args.join(", "),
input
)?;
}
&PatternInst::Extract {
ref input,
input_ty,
ref arg_tys,
term,
} => {
let input = self.value_by_ref(input, ctx);
let (etor_name, infallible) = self.extractor_name_and_infallible(term);
let args = arg_tys
.iter()
.enumerate()
.map(|(i, ty)| {
let value = Value::Pattern {
inst: id,
output: i,
};
self.define_val(&value, ctx, /* is_ref = */ false);
self.value_name(&value)
})
.collect::<Vec<_>>();
if infallible {
writeln!(
code,
"{}let Some(({})) = {}(ctx, {});",
indent,
args.join(", "),
etor_name,
input
)?;
writeln!(code, "{}{{", indent)?;
} else {
writeln!(
code,
"{}if let Some(({})) = {}(ctx, {}) {{",
indent,
args.join(", "),
etor_name,
input
)?;
}
}
}
Ok(()) Ok(())
} }
fn generate_body( fn generate_body(
&self, &self,
_code: &mut dyn Write, code: &mut dyn Write,
_termid: TermId, depth: usize,
_trie: &TrieNode, trie: &TrieNode,
indent: &str,
ctx: &mut BodyContext,
) -> Result<(), Error> { ) -> Result<(), Error> {
match trie {
&TrieNode::Empty => {}
&TrieNode::Leaf { ref output, .. } => {
// If this is a leaf node, generate the ExprSequence and return.
for (id, inst) in output.insts.iter().enumerate() {
let id = InstId(id);
self.generate_expr_inst(code, id, inst, indent, ctx)?;
}
}
&TrieNode::Decision { ref edges } => {
let subindent = format!("{} ", indent);
// if this is a decision node, generate each match op
// in turn (in priority order).
for &TrieEdge {
ref symbol,
ref node,
..
} in edges
{
match symbol {
&TrieSymbol::EndOfMatch => {
self.generate_body(code, depth + 1, node, &subindent, ctx)?;
}
&TrieSymbol::Match { ref op } => {
let id = InstId(depth);
self.generate_pattern_inst(code, id, op, &subindent, ctx)?;
self.generate_body(code, depth + 1, node, &subindent, ctx)?;
writeln!(code, "{}}}", subindent)?;
}
}
}
}
}
writeln!(code, "{}return None;", indent)?;
Ok(()) Ok(())
} }
} }

View File

@@ -17,8 +17,9 @@ pub enum Value {
/// A single Pattern instruction. /// A single Pattern instruction.
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum PatternInst { pub enum PatternInst {
/// Get the input root-term value. /// Get the Nth input argument, which corresponds to the Nth field
Arg { ty: TypeId }, /// of the root term.
Arg { index: usize, ty: TypeId },
/// Match a value as equal to another value. Produces no values. /// Match a value as equal to another value. Produces no values.
MatchEqual { a: Value, b: Value, ty: TypeId }, MatchEqual { a: Value, b: Value, ty: TypeId },
@@ -118,9 +119,9 @@ impl PatternSequence {
id id
} }
fn add_arg(&mut self, ty: TypeId) -> Value { fn add_arg(&mut self, index: usize, ty: TypeId) -> Value {
let inst = InstId(self.insts.len()); let inst = InstId(self.insts.len());
self.add_inst(PatternInst::Arg { ty }); self.add_inst(PatternInst::Arg { index, ty });
Value::Pattern { inst, output: 0 } Value::Pattern { inst, output: 0 }
} }
@@ -183,7 +184,9 @@ impl PatternSequence {
/// this pattern, if any. /// this pattern, if any.
fn gen_pattern( fn gen_pattern(
&mut self, &mut self,
input: Value, // If `input` is `None`, then this is the root pattern, and is
// implicitly an extraction with the N args as results.
input: Option<Value>,
typeenv: &TypeEnv, typeenv: &TypeEnv,
termenv: &TermEnv, termenv: &TermEnv,
pat: &Pattern, pat: &Pattern,
@@ -193,8 +196,9 @@ impl PatternSequence {
&Pattern::BindPattern(_ty, var, ref subpat) => { &Pattern::BindPattern(_ty, var, ref subpat) => {
// Bind the appropriate variable and recurse. // Bind the appropriate variable and recurse.
assert!(!vars.contains_key(&var)); assert!(!vars.contains_key(&var));
vars.insert(var, (None, input)); // bind first, so subpat can use it vars.insert(var, (None, input.unwrap())); // bind first, so subpat can use it
let root_term = self.gen_pattern(input, typeenv, termenv, &*subpat, vars); let root_term =
self.gen_pattern(input, typeenv, termenv, &*subpat, vars);
vars.get_mut(&var).unwrap().0 = root_term; vars.get_mut(&var).unwrap().0 = root_term;
root_term root_term
} }
@@ -204,30 +208,40 @@ impl PatternSequence {
.get(&var) .get(&var)
.cloned() .cloned()
.expect("Variable should already be bound"); .expect("Variable should already be bound");
self.add_match_equal(input, var_val, ty); self.add_match_equal(input.unwrap(), var_val, ty);
var_val_term var_val_term
} }
&Pattern::ConstInt(ty, value) => { &Pattern::ConstInt(ty, value) => {
// Assert that the value matches the constant integer. // Assert that the value matches the constant integer.
self.add_match_int(input, ty, value); self.add_match_int(input.unwrap(), ty, value);
None None
} }
&Pattern::Term(_, term, ref args) if input.is_none() => {
let termdata = &termenv.terms[term.index()];
let arg_tys = &termdata.arg_tys[..];
for (i, subpat) in args.iter().enumerate() {
let value = self.add_arg(i, arg_tys[i]);
self.gen_pattern(Some(value), typeenv, termenv, subpat, vars);
}
Some(term)
}
&Pattern::Term(ty, term, ref args) => { &Pattern::Term(ty, term, ref args) => {
// Determine whether the term has an external extractor or not. // Determine whether the term has an external extractor or not.
let termdata = &termenv.terms[term.index()]; let termdata = &termenv.terms[term.index()];
let arg_tys = &termdata.arg_tys[..]; let arg_tys = &termdata.arg_tys[..];
match &termdata.kind { match &termdata.kind {
&TermKind::EnumVariant { variant } => { &TermKind::EnumVariant { variant } => {
let arg_values = self.add_match_variant(input, ty, arg_tys, variant); let arg_values =
self.add_match_variant(input.unwrap(), ty, arg_tys, variant);
for (subpat, value) in args.iter().zip(arg_values.into_iter()) { for (subpat, value) in args.iter().zip(arg_values.into_iter()) {
self.gen_pattern(value, typeenv, termenv, subpat, vars); self.gen_pattern(Some(value), typeenv, termenv, subpat, vars);
} }
None None
} }
&TermKind::Regular { .. } => { &TermKind::Regular { .. } => {
let arg_values = self.add_extract(input, ty, arg_tys, term); let arg_values = self.add_extract(input.unwrap(), ty, arg_tys, term);
for (subpat, value) in args.iter().zip(arg_values.into_iter()) { for (subpat, value) in args.iter().zip(arg_values.into_iter()) {
self.gen_pattern(value, typeenv, termenv, subpat, vars); self.gen_pattern(Some(value), typeenv, termenv, subpat, vars);
} }
Some(term) Some(term)
} }
@@ -341,10 +355,8 @@ pub fn lower_rule(
// Lower the pattern, starting from the root input value. // Lower the pattern, starting from the root input value.
let ruledata = &termenv.rules[rule.index()]; let ruledata = &termenv.rules[rule.index()];
let input_ty = ruledata.lhs.ty();
let input = pattern_seq.add_arg(input_ty);
let mut vars = HashMap::new(); let mut vars = HashMap::new();
let lhs_root_term = pattern_seq.gen_pattern(input, tyenv, termenv, &ruledata.lhs, &mut vars); let lhs_root_term = pattern_seq.gen_pattern(None, tyenv, termenv, &ruledata.lhs, &mut vars);
// Lower the expression, making use of the bound variables // Lower the expression, making use of the bound variables
// from the pattern. // from the pattern.

View File

@@ -772,6 +772,7 @@ impl TermEnv {
mod test { mod test {
use super::*; use super::*;
use crate::ast::Ident; use crate::ast::Ident;
use crate::lexer::Lexer;
use crate::parser::Parser; use crate::parser::Parser;
#[test] #[test]
@@ -780,14 +781,16 @@ mod test {
(type u32 (primitive u32)) (type u32 (primitive u32))
(type A extern (enum (B (f1 u32) (f2 u32)) (C (f1 u32)))) (type A extern (enum (B (f1 u32) (f2 u32)) (C (f1 u32))))
"; ";
let ast = Parser::new("file.isle", text) let ast = Parser::new(Lexer::from_str(text, "file.isle"))
.parse_defs() .parse_defs()
.expect("should parse"); .expect("should parse");
let tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors"); let tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors");
let sym_a = tyenv.intern(&Ident("A".to_string())).unwrap(); let sym_a = tyenv.intern(&Ident("A".to_string())).unwrap();
let sym_b = tyenv.intern(&Ident("A.B".to_string())).unwrap(); let sym_b = tyenv.intern(&Ident("B".to_string())).unwrap();
let sym_c = tyenv.intern(&Ident("A.C".to_string())).unwrap(); let sym_c = tyenv.intern(&Ident("C".to_string())).unwrap();
let sym_a_b = tyenv.intern(&Ident("A.B".to_string())).unwrap();
let sym_a_c = tyenv.intern(&Ident("A.C".to_string())).unwrap();
let sym_u32 = tyenv.intern(&Ident("u32".to_string())).unwrap(); let sym_u32 = tyenv.intern(&Ident("u32".to_string())).unwrap();
let sym_f1 = tyenv.intern(&Ident("f1".to_string())).unwrap(); let sym_f1 = tyenv.intern(&Ident("f1".to_string())).unwrap();
let sym_f2 = tyenv.intern(&Ident("f2".to_string())).unwrap(); let sym_f2 = tyenv.intern(&Ident("f2".to_string())).unwrap();
@@ -806,6 +809,7 @@ mod test {
variants: vec![ variants: vec![
Variant { Variant {
name: sym_b, name: sym_b,
fullname: sym_a_b,
id: VariantId(0), id: VariantId(0),
fields: vec![ fields: vec![
Field { Field {
@@ -822,6 +826,7 @@ mod test {
}, },
Variant { Variant {
name: sym_c, name: sym_c,
fullname: sym_a_c,
id: VariantId(1), id: VariantId(1),
fields: vec![Field { fields: vec![Field {
name: sym_f1, name: sym_f1,
@@ -831,6 +836,7 @@ mod test {
}, },
], ],
pos: Pos { pos: Pos {
file: 0,
offset: 58, offset: 58,
line: 3, line: 3,
col: 18, col: 18,
@@ -862,7 +868,7 @@ mod test {
(rule -1 (rule -1
(T3 _) (A.C 3)) (T3 _) (A.C 3))
"; ";
let ast = Parser::new("file.isle", text) let ast = Parser::new(Lexer::from_str(text, "file.isle"))
.parse_defs() .parse_defs()
.expect("should parse"); .expect("should parse");
let mut tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors"); let mut tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors");