Extend fuzzing to semantic analysis and codegen

* Fix a panic when we are substituting macro args, but we already had an error
  involving the macro.

* Fix a stack overflow when an internal extractor's definition is recursive.
This commit is contained in:
Nick Fitzgerald
2021-09-29 17:18:18 -07:00
committed by Chris Fallin
parent 9be1942b11
commit a099b2b590
4 changed files with 177 additions and 104 deletions

View File

@@ -15,7 +15,7 @@ libfuzzer-sys = "0.4"
log = "0.4.14" log = "0.4.14"
[[bin]] [[bin]]
name = "parse" name = "compile"
path = "fuzz_targets/parse.rs" path = "fuzz_targets/compile.rs"
test = false test = false
doc = false doc = false

View File

@@ -7,9 +7,26 @@ fuzz_target!(|s: &str| {
let lexer = isle::lexer::Lexer::from_str(s, "fuzz-input.isle"); let lexer = isle::lexer::Lexer::from_str(s, "fuzz-input.isle");
log::debug!("lexer = {:?}", lexer); log::debug!("lexer = {:?}", lexer);
let lexer = match lexer {
Ok(l) => l,
Err(_) => return,
};
if let Ok(lexer) = lexer { let defs = isle::parser::parse(lexer);
let defs = isle::parser::parse(lexer); log::debug!("defs = {:?}", defs);
log::debug!("defs = {:?}", defs); let defs = match defs {
} Ok(d) => d,
Err(_) => return,
};
let code = isle::compile::compile(&defs);
log::debug!("code = {:?}", code);
let code = match code {
Ok(c) => c,
Err(_) => return,
};
// TODO: check that the generated code is valid Rust. This will require
// stubbing out extern types, extractors, and constructors.
drop(code);
}); });

View File

@@ -127,6 +127,33 @@ impl Pattern {
} }
} }
/// Call `f` for each of the terms in this pattern.
pub fn terms(&self, f: &mut dyn FnMut(&Ident)) {
match self {
Pattern::Term { sym, args, .. } => {
f(sym);
for arg in args {
if let TermArgPattern::Pattern(p) = arg {
p.terms(f);
}
}
}
Pattern::And { subpats, .. } => {
for p in subpats {
p.terms(f);
}
}
Pattern::BindPattern { subpat, .. } => {
subpat.terms(f);
}
Pattern::Var { .. }
| Pattern::ConstInt { .. }
| Pattern::ConstPrim { .. }
| Pattern::Wildcard { .. }
| Pattern::MacroArg { .. } => {}
}
}
pub fn make_macro_template(&self, macro_args: &[Ident]) -> Pattern { pub fn make_macro_template(&self, macro_args: &[Ident]) -> Pattern {
log::trace!("make_macro_template: {:?} with {:?}", self, macro_args); log::trace!("make_macro_template: {:?} with {:?}", self, macro_args);
match self { match self {
@@ -182,24 +209,24 @@ impl Pattern {
} }
} }
pub fn subst_macro_args(&self, macro_args: &[Pattern]) -> Pattern { pub fn subst_macro_args(&self, macro_args: &[Pattern]) -> Option<Pattern> {
log::trace!("subst_macro_args: {:?} with {:?}", self, macro_args); log::trace!("subst_macro_args: {:?} with {:?}", self, macro_args);
match self { match self {
&Pattern::BindPattern { &Pattern::BindPattern {
ref var, ref var,
ref subpat, ref subpat,
pos, pos,
} => Pattern::BindPattern { } => Some(Pattern::BindPattern {
var: var.clone(), var: var.clone(),
subpat: Box::new(subpat.subst_macro_args(macro_args)), subpat: Box::new(subpat.subst_macro_args(macro_args)?),
pos, pos,
}, }),
&Pattern::And { ref subpats, pos } => { &Pattern::And { ref subpats, pos } => {
let subpats = subpats let subpats = subpats
.iter() .iter()
.map(|subpat| subpat.subst_macro_args(macro_args)) .map(|subpat| subpat.subst_macro_args(macro_args))
.collect::<Vec<_>>(); .collect::<Option<Vec<_>>>()?;
Pattern::And { subpats, pos } Some(Pattern::And { subpats, pos })
} }
&Pattern::Term { &Pattern::Term {
ref sym, ref sym,
@@ -209,19 +236,19 @@ impl Pattern {
let args = args let args = args
.iter() .iter()
.map(|arg| arg.subst_macro_args(macro_args)) .map(|arg| arg.subst_macro_args(macro_args))
.collect::<Vec<_>>(); .collect::<Option<Vec<_>>>()?;
Pattern::Term { Some(Pattern::Term {
sym: sym.clone(), sym: sym.clone(),
args, args,
pos, pos,
} })
} }
&Pattern::Var { .. } &Pattern::Var { .. }
| &Pattern::Wildcard { .. } | &Pattern::Wildcard { .. }
| &Pattern::ConstInt { .. } | &Pattern::ConstInt { .. }
| &Pattern::ConstPrim { .. } => self.clone(), | &Pattern::ConstPrim { .. } => Some(self.clone()),
&Pattern::MacroArg { index, .. } => macro_args[index].clone(), &Pattern::MacroArg { index, .. } => macro_args.get(index).cloned(),
} }
} }
@@ -264,12 +291,12 @@ impl TermArgPattern {
} }
} }
fn subst_macro_args(&self, args: &[Pattern]) -> TermArgPattern { fn subst_macro_args(&self, args: &[Pattern]) -> Option<TermArgPattern> {
match self { match self {
&TermArgPattern::Pattern(ref pat) => { &TermArgPattern::Pattern(ref pat) => {
TermArgPattern::Pattern(pat.subst_macro_args(args)) Some(TermArgPattern::Pattern(pat.subst_macro_args(args)?))
} }
&TermArgPattern::Expr(_) => self.clone(), &TermArgPattern::Expr(_) => Some(self.clone()),
} }
} }
} }

View File

@@ -17,6 +17,7 @@ use crate::ast;
use crate::error::*; use crate::error::*;
use crate::lexer::Pos; use crate::lexer::Pos;
use std::collections::HashMap; use std::collections::HashMap;
use std::collections::HashSet;
use std::sync::Arc; use std::sync::Arc;
declare_id!( declare_id!(
@@ -443,6 +444,22 @@ impl Expr {
} }
} }
/// Given an `Option<T>`, unwrap the inner `T` value, or `continue` if it is
/// `None`.
///
/// Useful for when we encountered an error earlier in our analysis but kept
/// going to find more errors, and now we've run into some missing data that
/// would have been filled in if we didn't hit that original error, but we want
/// to keep going to find more errors.
macro_rules! unwrap_or_continue {
($e:expr) => {
match $e {
Some(x) => x,
None => continue,
}
};
}
impl TypeEnv { impl TypeEnv {
/// Construct the type environment from the AST. /// Construct the type environment from the AST.
pub fn from_ast(defs: &ast::Defs) -> Result<TypeEnv> { pub fn from_ast(defs: &ast::Defs) -> Result<TypeEnv> {
@@ -484,12 +501,7 @@ impl TypeEnv {
for def in &defs.defs { for def in &defs.defs {
match def { match def {
&ast::Def::Type(ref td) => { &ast::Def::Type(ref td) => {
let ty = match tyenv.type_from_ast(TypeId(tid), td) { let ty = unwrap_or_continue!(tyenv.type_from_ast(TypeId(tid), td));
Some(ty) => ty,
None => {
continue;
}
};
tyenv.types.push(ty); tyenv.types.push(ty);
tid += 1; tid += 1;
} }
@@ -606,14 +618,16 @@ impl TypeEnv {
} }
fn error(&self, pos: Pos, msg: String) -> Error { fn error(&self, pos: Pos, msg: String) -> Error {
Error::TypeError { let e = Error::TypeError {
msg, msg,
src: Source::new( src: Source::new(
self.filenames[pos.file].clone(), self.filenames[pos.file].clone(),
self.file_texts[pos.file].clone(), self.file_texts[pos.file].clone(),
), ),
span: miette::SourceSpan::from((pos.offset, 1)), span: miette::SourceSpan::from((pos.offset, 1)),
} };
log::trace!("{}", e);
e
} }
fn report_error(&mut self, pos: Pos, msg: String) { fn report_error(&mut self, pos: Pos, msg: String) {
@@ -661,6 +675,7 @@ impl TermEnv {
env.collect_term_sigs(tyenv, defs); env.collect_term_sigs(tyenv, defs);
env.collect_enum_variant_terms(tyenv); env.collect_enum_variant_terms(tyenv);
tyenv.return_errors()?;
env.collect_constructors(tyenv, defs); env.collect_constructors(tyenv, defs);
env.collect_extractor_templates(tyenv, defs); env.collect_extractor_templates(tyenv, defs);
tyenv.return_errors()?; tyenv.return_errors()?;
@@ -769,6 +784,7 @@ impl TermEnv {
fn collect_constructors(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) { fn collect_constructors(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs { for def in &defs.defs {
log::debug!("collect_constructors from def: {:?}", def);
match def { match def {
&ast::Def::Rule(ref rule) => { &ast::Def::Rule(ref rule) => {
let pos = rule.pos; let pos = rule.pos;
@@ -811,39 +827,68 @@ impl TermEnv {
} }
fn collect_extractor_templates(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) { fn collect_extractor_templates(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
let mut extractor_call_graph = HashMap::new();
for def in &defs.defs { for def in &defs.defs {
match def { if let &ast::Def::Extractor(ref ext) = def {
&ast::Def::Extractor(ref ext) => { let sym = tyenv.intern_mut(&ext.term);
let sym = tyenv.intern_mut(&ext.term); let term = match self.term_map.get(&sym) {
let term = match self.term_map.get(&sym) { Some(x) => x,
Some(x) => x, None => {
None => { tyenv.report_error(
tyenv.report_error( ext.pos,
ext.pos, "Extractor macro body definition on a non-existent term".to_string(),
"Extractor macro body definition on a non-existent term" );
.to_string(), return;
); }
return; };
} let termdata = &mut self.terms[term.index()];
}; let template = ext.template.make_macro_template(&ext.args[..]);
let termdata = &mut self.terms[term.index()]; log::trace!("extractor def: {:?} becomes template {:?}", def, template);
let template = ext.template.make_macro_template(&ext.args[..]);
log::trace!("extractor def: {:?} becomes template {:?}", def, template); let mut callees = HashSet::new();
match &termdata.kind { template.terms(&mut |t| {
&TermKind::Declared => { let t = tyenv.intern_mut(t);
termdata.kind = TermKind::InternalExtractor { template }; callees.insert(t);
} });
_ => { extractor_call_graph.insert(sym, callees);
tyenv.report_error(
ext.pos, match &termdata.kind {
"Extractor macro body defined on term of incorrect kind" &TermKind::Declared => {
.to_string(), termdata.kind = TermKind::InternalExtractor { template };
); }
continue; _ => {
} tyenv.report_error(
ext.pos,
"Extractor macro body defined on term of incorrect kind".to_string(),
);
continue;
} }
} }
_ => {} }
}
// Check for cycles in the extractor call graph.
let mut seen = HashSet::new();
let mut stack = vec![];
'outer: for root in extractor_call_graph.keys().copied() {
seen.clear();
stack.clear();
stack.push(root);
while let Some(caller) = stack.pop() {
let already_seen = seen.insert(caller);
if already_seen {
let term = self.term_map[&caller];
let pos = match &self.terms[term.index()].kind {
TermKind::InternalExtractor { template } => template.pos(),
_ => unreachable!(),
};
tyenv.report_error(pos, "extractor definition is recursive".into());
continue 'outer;
} else {
stack.extend(extractor_call_graph[&caller].iter().copied());
}
} }
} }
} }
@@ -858,20 +903,18 @@ impl TermEnv {
vars: vec![], vars: vec![],
}; };
let (lhs, ty) = let (lhs, ty) = unwrap_or_continue!(self.translate_pattern(
match self.translate_pattern(tyenv, &rule.pattern, None, &mut bindings) { tyenv,
Some(x) => x, &rule.pattern,
None => { None,
// Keep going to collect more errors. &mut bindings
continue; ));
} let rhs = unwrap_or_continue!(self.translate_expr(
}; tyenv,
let rhs = match self.translate_expr(tyenv, &rule.expr, ty, &mut bindings) { &rule.expr,
Some(x) => x, ty,
None => { &mut bindings
continue; ));
}
};
let rid = RuleId(self.rules.len()); let rid = RuleId(self.rules.len());
self.rules.push(Rule { self.rules.push(Rule {
@@ -1021,14 +1064,12 @@ impl TermEnv {
let mut expected_ty = expected_ty; let mut expected_ty = expected_ty;
let mut children = vec![]; let mut children = vec![];
for subpat in subpats { for subpat in subpats {
let (subpat, ty) = let (subpat, ty) = unwrap_or_continue!(self.translate_pattern(
match self.translate_pattern(tyenv, &*subpat, expected_ty, bindings) { tyenv,
Some(x) => x, &*subpat,
None => { expected_ty,
// Try to keep going for more errors. bindings
continue; ));
}
};
expected_ty = expected_ty.or(Some(ty)); expected_ty = expected_ty.or(Some(ty));
children.push(subpat); children.push(subpat);
} }
@@ -1188,7 +1229,7 @@ impl TermEnv {
macro_args.push(sub_ast.clone()); macro_args.push(sub_ast.clone());
} }
log::trace!("internal extractor macro args = {:?}", args); log::trace!("internal extractor macro args = {:?}", args);
let pat = template.subst_macro_args(&macro_args[..]); let pat = template.subst_macro_args(&macro_args[..])?;
return self.translate_pattern(tyenv, &pat, expected_ty, bindings); return self.translate_pattern(tyenv, &pat, expected_ty, bindings);
} }
&TermKind::ExternalConstructor { .. } | &TermKind::InternalConstructor => { &TermKind::ExternalConstructor { .. } | &TermKind::InternalConstructor => {
@@ -1205,20 +1246,15 @@ impl TermEnv {
// Resolve subpatterns. // Resolve subpatterns.
let mut subpats = vec![]; let mut subpats = vec![];
for (i, arg) in args.iter().enumerate() { for (i, arg) in args.iter().enumerate() {
let arg_ty = self.terms[tid.index()].arg_tys[i]; let term = unwrap_or_continue!(self.terms.get(tid.index()));
let (subpat, _) = match self.translate_pattern_term_arg( let arg_ty = unwrap_or_continue!(term.arg_tys.get(i).copied());
let (subpat, _) = unwrap_or_continue!(self.translate_pattern_term_arg(
tyenv, tyenv,
pos, pos,
arg, arg,
Some(arg_ty), Some(arg_ty),
bindings, bindings,
) { ));
Some(x) => x,
None => {
// Try to keep going for more errors.
continue;
}
};
subpats.push(subpat); subpats.push(subpat);
} }
@@ -1305,13 +1341,10 @@ impl TermEnv {
// Resolve subexpressions. // Resolve subexpressions.
let mut subexprs = vec![]; let mut subexprs = vec![];
for (i, arg) in args.iter().enumerate() { for (i, arg) in args.iter().enumerate() {
let arg_ty = self.terms[tid.index()].arg_tys[i]; let term = unwrap_or_continue!(self.terms.get(tid.index()));
let subexpr = match self.translate_expr(tyenv, arg, arg_ty, bindings) { let arg_ty = unwrap_or_continue!(term.arg_tys.get(i).copied());
Some(s) => s, let subexpr =
None => { unwrap_or_continue!(self.translate_expr(tyenv, arg, arg_ty, bindings));
continue;
}
};
subexprs.push(subexpr); subexprs.push(subexpr);
} }
@@ -1406,13 +1439,9 @@ impl TermEnv {
}; };
// Evaluate the variable's value. // Evaluate the variable's value.
let val = Box::new(match self.translate_expr(tyenv, &def.val, tid, bindings) { let val = Box::new(unwrap_or_continue!(
Some(e) => e, self.translate_expr(tyenv, &def.val, tid, bindings)
None => { ));
// Keep going for more errors.
continue;
}
});
// Bind the var with the given type. // Bind the var with the given type.
let id = VarId(bindings.next_var); let id = VarId(bindings.next_var);