WIP: rip out a bunch of stuff and rework

This commit is contained in:
Chris Fallin
2021-09-02 19:30:28 -07:00
parent 84b7612b98
commit e08160845e
8 changed files with 331 additions and 993 deletions

3
cranelift/isle/.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "wasmtime"]
path = wasmtime
url = https://github.com/cfallin/wasmtime

View File

@@ -62,6 +62,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"env_logger", "env_logger",
"log", "log",
"peepmatic-automata",
"thiserror", "thiserror",
] ]
@@ -86,6 +87,10 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "peepmatic-automata"
version = "0.75.0"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.28" version = "1.0.28"

View File

@@ -9,3 +9,4 @@ license = "Apache-2.0 WITH LLVM-exception"
log = "0.4" log = "0.4"
env_logger = "0.8" env_logger = "0.8"
thiserror = "1.0" thiserror = "1.0"
peepmatic-automata = { version = "*", path = "wasmtime/cranelift/peepmatic/crates/automata" }

View File

@@ -0,0 +1,123 @@
//! Generate Rust code from a series of Sequences.
use crate::ir::{lower_rule, ExprSequence, PatternInst, PatternSequence, Value};
use crate::sema::{RuleId, TermEnv, TermId, TypeEnv};
use peepmatic_automata::{Automaton, Builder as AutomatonBuilder};
use std::collections::HashMap;
// TODO: automata built by output term as well
/// Builder context for one function in generated code corresponding
/// to one root input term.
struct TermFunctionBuilder {
root_term: TermId,
automaton: AutomatonBuilder<PatternInst, (), ExprSequence>,
}
impl TermFunctionBuilder {
fn new(root_term: TermId) -> Self {
TermFunctionBuilder {
root_term,
automaton: AutomatonBuilder::new(),
}
}
fn add_rule(&mut self, pattern_seq: PatternSequence, expr_seq: ExprSequence) {
let mut insertion = self.automaton.insert();
let mut out_idx = 0;
for (i, inst) in pattern_seq.insts.into_iter().enumerate() {
// Determine how much of the output we can emit at this
// stage (with the `Value`s that will be defined so far,
// given input insts 0..=i).
let out_start = out_idx;
let mut out_end = out_start;
while out_end < expr_seq.insts.len() {
let mut max_input_inst = 0;
expr_seq.insts[out_end].visit_values(|val| {
if let Value::Pattern { inst, .. } = val {
max_input_inst = std::cmp::max(max_input_inst, inst.index());
}
});
if max_input_inst > i {
break;
}
out_end += 1;
}
// Create an ExprSequence for the instructions that we can
// output at this point.
let out_insts = expr_seq.insts[out_start..out_end]
.iter()
.cloned()
.collect::<Vec<_>>();
let out_seq = ExprSequence { insts: out_insts };
out_idx = out_end;
insertion.next(inst, out_seq);
}
insertion.finish();
}
}
struct TermFunctionsBuilder<'a> {
typeenv: &'a TypeEnv,
termenv: &'a TermEnv,
builders_by_input: HashMap<TermId, TermFunctionBuilder>,
builders_by_output: HashMap<TermId, TermFunctionBuilder>,
}
impl<'a> TermFunctionsBuilder<'a> {
fn new(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Self {
Self {
builders_by_input: HashMap::new(),
builders_by_output: HashMap::new(),
typeenv,
termenv,
}
}
fn build(&mut self) {
for rule in 0..self.termenv.rules.len() {
let rule = RuleId(rule);
let (lhs_root, pattern, rhs_root, expr) = lower_rule(self.typeenv, self.termenv, rule);
if let Some(input_root_term) = lhs_root {
self.builders_by_input
.entry(input_root_term)
.or_insert_with(|| TermFunctionBuilder::new(input_root_term))
.add_rule(pattern.clone(), expr.clone());
}
if let Some(output_root_term) = rhs_root {
self.builders_by_output
.entry(output_root_term)
.or_insert_with(|| TermFunctionBuilder::new(output_root_term))
.add_rule(pattern, expr);
}
}
}
fn create_automata(self) -> Automata {
let automata_by_input = self
.builders_by_input
.into_iter()
.map(|(k, mut v)| (k, v.automaton.finish()))
.collect::<HashMap<_, _>>();
let automata_by_output = self
.builders_by_output
.into_iter()
.map(|(k, mut v)| (k, v.automaton.finish()))
.collect::<HashMap<_, _>>();
Automata {
automata_by_input,
automata_by_output,
}
}
}
pub struct Automata {
pub automata_by_input: HashMap<TermId, Automaton<PatternInst, (), ExprSequence>>,
pub automata_by_output: HashMap<TermId, Automaton<PatternInst, (), ExprSequence>>,
}
impl Automata {}

View File

@@ -1,111 +1 @@
//! Compilation process, from AST to Sema to Sequences of Insts. //! Compilation process, from AST to Sema to Sequences of Insts.
use crate::error::*;
use crate::{ast, ir, sema};
use std::collections::HashMap;
/// A Compiler manages the compilation pipeline from AST to Sequences.
pub struct Compiler<'a> {
ast: &'a ast::Defs,
type_env: sema::TypeEnv,
term_env: sema::TermEnv,
seqs: Vec<ir::Sequence>,
// TODO: if this becomes a perf issue, then build a better data
// structure. For now we index on root term/variant.
//
// TODO: index at callsites (extractors/constructors) too. We'll
// need tree-summaries of arg and expected return value at each
// callsite.
term_db: HashMap<ir::TermOrVariant, TermData>,
}
#[derive(Clone, Debug, Default)]
struct TermData {
producers: Vec<(ir::TreeSummary, sema::RuleId)>,
consumers: Vec<(ir::TreeSummary, sema::RuleId)>,
has_constructor: bool,
has_extractor: bool,
}
pub type CompileResult<T> = Result<T, Error>;
impl<'a> Compiler<'a> {
pub fn new(ast: &'a ast::Defs) -> CompileResult<Compiler<'a>> {
let mut type_env = sema::TypeEnv::from_ast(ast)?;
let term_env = sema::TermEnv::from_ast(&mut type_env, ast)?;
Ok(Compiler {
ast,
type_env,
term_env,
seqs: vec![],
term_db: HashMap::new(),
})
}
pub fn build_sequences(&mut self) -> CompileResult<()> {
for rid in 0..self.term_env.rules.len() {
let rid = sema::RuleId(rid);
let seq = ir::Sequence::from_rule(&self.type_env, &self.term_env, rid);
self.seqs.push(seq);
}
Ok(())
}
pub fn collect_tree_summaries(&mut self) -> CompileResult<()> {
// For each rule, compute summaries of its LHS and RHS, then
// index it in the appropriate TermData.
for (i, seq) in self.seqs.iter().enumerate() {
let rule_id = sema::RuleId(i);
let consumer_summary = seq.input_tree_summary();
let producer_summary = seq.output_tree_summary();
if let Some(consumer_root_term) = consumer_summary.root() {
let consumer_termdb = self
.term_db
.entry(consumer_root_term.clone())
.or_insert_with(|| Default::default());
consumer_termdb.consumers.push((consumer_summary, rule_id));
}
if let Some(producer_root_term) = producer_summary.root() {
let producer_termdb = self
.term_db
.entry(producer_root_term.clone())
.or_insert_with(|| Default::default());
producer_termdb.consumers.push((producer_summary, rule_id));
}
}
// For each term, if a constructor and/or extractor is
// present, note that.
for term in &self.term_env.terms {
if let sema::TermKind::Regular {
extractor,
constructor,
} = term.kind
{
if !extractor.is_some() && !constructor.is_some() {
continue;
}
let entry = self
.term_db
.entry(ir::TermOrVariant::Term(term.id))
.or_insert_with(|| Default::default());
if extractor.is_some() {
entry.has_extractor = true;
}
if constructor.is_some() {
entry.has_constructor = true;
}
}
}
Ok(())
}
pub fn inline_internal_terms(&mut self) -> CompileResult<()> {
unimplemented!()
}
pub fn to_sequences(self) -> Vec<ir::Sequence> {
self.seqs
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,7 @@ use std::io::stdin;
use std::io::Read; use std::io::Read;
mod ast; mod ast;
mod codegen;
mod compile; mod compile;
mod error; mod error;
mod ir; mod ir;
@@ -16,13 +17,7 @@ fn main() -> Result<(), error::Error> {
let mut input = String::new(); let mut input = String::new();
stdin().read_to_string(&mut input)?; stdin().read_to_string(&mut input)?;
let mut parser = parser::Parser::new("<stdin>", &input[..]); let mut parser = parser::Parser::new("<stdin>", &input[..]);
let defs = parser.parse_defs()?; let _defs = parser.parse_defs()?;
let mut compiler = compile::Compiler::new(&defs)?;
compiler.build_sequences()?;
compiler.collect_tree_summaries()?;
for seq in compiler.to_sequences() {
println!("---\nsequence\n---\n{:?}\n", seq);
}
Ok(()) Ok(())
} }

Submodule cranelift/isle/wasmtime added at e4d4b09243