Cranelift: do not check in generated ISLE code; regenerate on every compile. (#4143)

This PR fixes #4066: it modifies the Cranelift `build.rs` workflow to
invoke the ISLE DSL compiler on every compilation, rather than only
when the user specifies a special "rebuild ISLE" feature.

The main benefit of this change is that it vastly simplifies the mental
model required of developers, and removes a bunch of failure modes
we have tried to work around in other ways. There is now just one
"source of truth", the ISLE source itself, in the repository, and so there
is no need to understand a special "rebuild" step and how to handle
merge errors. There is no special process needed to develop the compiler
when modifying the DSL. And there is no "noise" in the git history produced
by constantly-regenerated files.

The two main downsides we discussed in #4066 are:
- Compile time could increase, by adding more to the "meta" step before the main build;
- It becomes less obvious where the source definitions are (everything becomes
  more "magic"), which makes exploration and debugging harder.

This PR addresses each of these concerns:

1. To maintain reasonable compile time, it includes work to cut down the
   dependencies of the `cranelift-isle` crate to *nothing* (only the Rust stdlib),
   in the default build. It does this by putting the error-reporting bits
   (`miette` crate) under an optional feature, and the logging (`log` crate) under
   a feature-controlled macro, and manually writing an `Error` impl rather than
   using `thiserror`. This completely avoids proc macros and the `syn` build slowness.

   The user can still get nice errors out of `miette`: this is enabled by specifying
   a Cargo feature `--features isle-errors`.

2. To allow the user to optionally inspect the generated source, which nominally
   lives in a hard-to-find path inside `target/` now, this PR adds a feature `isle-in-source-tree`
   that, as implied by the name, moves the target for ISLE generated source into
   the source tree, at `cranelift/codegen/isle_generated_source/`. It seems reasonable
   to do this when an explicit feature (opt-in) is specified because this is how ISLE regeneration
   currently works as well. To prevent surprises, if the feature is *not* specified, the
   build fails if this directory exists.
This commit is contained in:
Chris Fallin
2022-05-11 22:25:24 -07:00
committed by GitHub
parent 7c5a56b836
commit 5d671952ee
34 changed files with 395 additions and 35486 deletions

View File

@@ -1,6 +1,7 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use std::default::Default;
fuzz_target!(|s: &str| {
let _ = env_logger::try_init();
@@ -19,7 +20,7 @@ fuzz_target!(|s: &str| {
Err(_) => return,
};
let code = cranelift_isle::compile::compile(&defs);
let code = cranelift_isle::compile::compile(&defs, &Default::default());
log::debug!("code = {:?}", code);
let code = match code {
Ok(c) => c,

View File

@@ -9,9 +9,14 @@ repository = "https://github.com/bytecodealliance/wasmtime/tree/main/cranelift/i
version = "0.85.0"
[dependencies]
log = "0.4"
miette = "3.0.0"
thiserror = "1.0.29"
log = { version = "0.4", optional = true }
miette = { version = "4.7.0", optional = true }
[dev-dependencies]
tempfile = "3"
[features]
default = []
logging = ["log"]
miette-errors = ["miette"]

View File

@@ -3,6 +3,7 @@
#![allow(missing_docs)]
use crate::lexer::Pos;
use crate::log;
use std::sync::Arc;
/// The parsed form of an ISLE file.
@@ -172,7 +173,7 @@ impl Pattern {
}
pub fn make_macro_template(&self, macro_args: &[Ident]) -> Pattern {
log::trace!("make_macro_template: {:?} with {:?}", self, macro_args);
log!("make_macro_template: {:?} with {:?}", self, macro_args);
match self {
&Pattern::BindPattern {
ref var,
@@ -233,7 +234,7 @@ impl Pattern {
}
pub fn subst_macro_args(&self, macro_args: &[Pattern]) -> Option<Pattern> {
log::trace!("subst_macro_args: {:?} with {:?}", self, macro_args);
log!("subst_macro_args: {:?} with {:?}", self, macro_args);
match self {
&Pattern::BindPattern {
ref var,

View File

@@ -1,15 +1,29 @@
//! Generate Rust code from a series of Sequences.
use crate::ir::{ExprInst, InstId, PatternInst, Value};
use crate::log;
use crate::sema::ExternalSig;
use crate::sema::{TermEnv, TermId, Type, TypeEnv, TypeId, Variant};
use crate::trie::{TrieEdge, TrieNode, TrieSymbol};
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
/// Options for code generation.
#[derive(Clone, Debug, Default)]
pub struct CodegenOptions {
/// Do not include the `#![allow(...)]` pragmas in the generated
/// source. Useful if it must be include!()'d elsewhere.
pub exclude_global_allow_pragmas: bool,
}
/// Emit Rust source code for the given type and term environments.
pub fn codegen(typeenv: &TypeEnv, termenv: &TermEnv, tries: &BTreeMap<TermId, TrieNode>) -> String {
Codegen::compile(typeenv, termenv, tries).generate_rust()
pub fn codegen(
typeenv: &TypeEnv,
termenv: &TermEnv,
tries: &BTreeMap<TermId, TrieNode>,
options: &CodegenOptions,
) -> String {
Codegen::compile(typeenv, termenv, tries).generate_rust(options)
}
#[derive(Clone, Debug)]
@@ -38,10 +52,10 @@ impl<'a> Codegen<'a> {
}
}
fn generate_rust(&self) -> String {
fn generate_rust(&self, options: &CodegenOptions) -> String {
let mut code = String::new();
self.generate_header(&mut code);
self.generate_header(&mut code, options);
self.generate_ctx_trait(&mut code);
self.generate_internal_types(&mut code);
self.generate_internal_term_constructors(&mut code);
@@ -49,7 +63,7 @@ impl<'a> Codegen<'a> {
code
}
fn generate_header(&self, code: &mut String) {
fn generate_header(&self, code: &mut String, options: &CodegenOptions) {
writeln!(code, "// GENERATED BY ISLE. DO NOT EDIT!").unwrap();
writeln!(code, "//").unwrap();
writeln!(
@@ -61,17 +75,19 @@ impl<'a> Codegen<'a> {
writeln!(code, "// - {}", file).unwrap();
}
writeln!(
code,
"\n#![allow(dead_code, unreachable_code, unreachable_patterns)]"
)
.unwrap();
writeln!(
code,
"#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]"
)
.unwrap();
writeln!(code, "#![allow(irrefutable_let_patterns)]").unwrap();
if !options.exclude_global_allow_pragmas {
writeln!(
code,
"\n#![allow(dead_code, unreachable_code, unreachable_patterns)]"
)
.unwrap();
writeln!(
code,
"#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]"
)
.unwrap();
writeln!(code, "#![allow(irrefutable_let_patterns)]").unwrap();
}
writeln!(code, "\nuse super::*; // Pulls in all external types.").unwrap();
}
@@ -311,7 +327,7 @@ impl<'a> Codegen<'a> {
ctx: &mut BodyContext,
returns: &mut Vec<(usize, String)>,
) {
log::trace!("generate_expr_inst: {:?}", inst);
log!("generate_expr_inst: {:?}", inst);
match inst {
&ExprInst::ConstInt { ty, val } => {
let value = Value::Expr {
@@ -665,7 +681,7 @@ impl<'a> Codegen<'a> {
indent: &str,
ctx: &mut BodyContext,
) -> bool {
log::trace!("generate_body:\n{}", trie.pretty());
log!("generate_body:\n{}", trie.pretty());
let mut returned = false;
match trie {
&TrieNode::Empty => {}
@@ -706,7 +722,7 @@ impl<'a> Codegen<'a> {
let mut last = i;
let mut adjacent_variants = BTreeSet::new();
let mut adjacent_variant_input = None;
log::trace!(
log!(
"edge: prio = {:?}, symbol = {:?}",
edges[i].prio,
edges[i].symbol

View File

@@ -4,9 +4,9 @@ use crate::error::Result;
use crate::{ast, codegen, sema, trie};
/// Compile the given AST definitions into Rust source code.
pub fn compile(defs: &ast::Defs) -> Result<String> {
pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result<String> {
let mut typeenv = sema::TypeEnv::from_ast(defs)?;
let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
let tries = trie::build_tries(&typeenv, &termenv);
Ok(codegen::codegen(&typeenv, &termenv, &tries))
Ok(codegen::codegen(&typeenv, &termenv, &tries, options))
}

View File

@@ -1,62 +1,47 @@
//! Error types.
use miette::{Diagnostic, SourceCode, SourceSpan};
use std::sync::Arc;
/// Either `Ok(T)` or `Err(isle::Error)`.
pub type Result<T> = std::result::Result<T, Error>;
/// Errors produced by ISLE.
#[derive(thiserror::Error, Diagnostic, Clone, Debug)]
#[derive(Clone, Debug)]
pub enum Error {
/// An I/O error.
#[error("{context}")]
IoError {
/// The underlying I/O error.
#[source]
error: Arc<std::io::Error>,
/// The context explaining what caused the I/O error.
context: String,
},
/// The input ISLE source has a parse error.
#[error("parse error: {msg}")]
#[diagnostic()]
ParseError {
/// The error message.
msg: String,
/// The input ISLE source.
#[source_code]
src: Source,
/// The location of the parse error.
#[label("{msg}")]
span: SourceSpan,
span: Span,
},
/// The input ISLE source has a type error.
#[error("type error: {msg}")]
#[diagnostic()]
TypeError {
/// The error message.
msg: String,
/// The input ISLE source.
#[source_code]
src: Source,
/// The location of the type error.
#[label("{msg}")]
span: SourceSpan,
span: Span,
},
/// Multiple errors.
#[error("Found {} errors:\n\n{}",
self.unwrap_errors().len(),
DisplayErrors(self.unwrap_errors()))]
#[diagnostic()]
Errors(#[related] Vec<Error>),
Errors(Vec<Error>),
}
impl Error {
@@ -84,6 +69,31 @@ impl Error {
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::IoError { error, .. } => Some(&*error as &dyn std::error::Error),
_ => None,
}
}
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Error::IoError { context, .. } => write!(f, "{}", context),
Error::ParseError { msg, .. } => write!(f, "parse error: {}", msg),
Error::TypeError { msg, .. } => write!(f, "type error: {}", msg),
Error::Errors(_) => write!(
f,
"found {} errors:\n\n{}",
self.unwrap_errors().len(),
DisplayErrors(self.unwrap_errors())
),
}
}
}
struct DisplayErrors<'a>(&'a [Error]);
impl std::fmt::Display for DisplayErrors<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -97,8 +107,11 @@ impl std::fmt::Display for DisplayErrors<'_> {
/// A source file and its contents.
#[derive(Clone)]
pub struct Source {
name: Arc<str>,
text: Arc<str>,
/// The name of this source file.
pub name: Arc<str>,
/// The text of this source file.
#[allow(unused)] // Used only when miette is enabled.
pub text: Arc<str>,
}
impl std::fmt::Debug for Source {
@@ -126,23 +139,21 @@ impl Source {
}
}
impl SourceCode for Source {
fn read_span<'a>(
&'a self,
span: &SourceSpan,
context_lines_before: usize,
context_lines_after: usize,
) -> std::result::Result<Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
let contents = self
.text
.read_span(span, context_lines_before, context_lines_after)?;
Ok(Box::new(miette::MietteSpanContents::new_named(
self.name.to_string(),
contents.data(),
contents.span().clone(),
contents.line(),
contents.column(),
contents.line_count(),
)))
/// A span in a given source.
#[derive(Clone, Debug)]
pub struct Span {
/// The byte offset of the start of the span.
pub from: usize,
/// The byte offset of the end of the span.
pub to: usize,
}
impl Span {
/// Create a new span that covers one character at the given offset.
pub fn new_single(offset: usize) -> Span {
Span {
from: offset,
to: offset + 1,
}
}
}

View File

@@ -0,0 +1,65 @@
//! miette-specific trait implementations. This is kept separate so
//! that we can have a very lightweight build of the ISLE compiler as
//! part of the Cranelift build process without pulling in any
//! dependencies.
use crate::error::{Error, Source, Span};
use miette::{SourceCode, SourceSpan};
impl From<Span> for SourceSpan {
fn from(span: Span) -> Self {
SourceSpan::new(span.from.into(), span.to.into())
}
}
impl SourceCode for Source {
fn read_span<'a>(
&'a self,
span: &SourceSpan,
context_lines_before: usize,
context_lines_after: usize,
) -> std::result::Result<Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
let contents = self
.text
.read_span(span, context_lines_before, context_lines_after)?;
Ok(Box::new(miette::MietteSpanContents::new_named(
self.name.to_string(),
contents.data(),
contents.span().clone(),
contents.line(),
contents.column(),
contents.line_count(),
)))
}
}
impl miette::Diagnostic for Error {
fn labels(&self) -> Option<Box<dyn Iterator<Item = miette::LabeledSpan> + '_>> {
match self {
Self::ParseError { msg, span, .. } | Self::TypeError { msg, span, .. } => {
Some(Box::new(
vec![miette::LabeledSpan::new_with_span(
Some(msg.clone()),
span.clone(),
)]
.into_iter(),
))
}
_ => None,
}
}
fn source_code(&self) -> std::option::Option<&dyn miette::SourceCode> {
match self {
Self::ParseError { src, .. } | Self::TypeError { src, .. } => Some(src),
_ => None,
}
}
fn related(&self) -> Option<Box<dyn Iterator<Item = &dyn miette::Diagnostic> + '_>> {
match self {
Self::Errors(errors) => Some(Box::new(
errors.iter().map(|x| x as &dyn miette::Diagnostic),
)),
_ => None,
}
}
}

View File

@@ -1,6 +1,7 @@
//! Lowered matching IR.
use crate::lexer::Pos;
use crate::log;
use crate::sema::*;
use std::collections::BTreeMap;
@@ -549,7 +550,7 @@ impl ExprSequence {
expr: &Expr,
vars: &BTreeMap<VarId, Value>,
) -> Value {
log::trace!("gen_expr: expr {:?}", expr);
log!("gen_expr: expr {:?}", expr);
match expr {
&Expr::ConstInt(ty, val) => self.add_const_int(ty, val),
&Expr::ConstPrim(ty, val) => self.add_const_prim(ty, val),
@@ -627,7 +628,7 @@ pub fn lower_rule(
.root_term()
.expect("Pattern must have a term at the root");
log::trace!("lower_rule: ruledata {:?}", ruledata,);
log!("lower_rule: ruledata {:?}", ruledata,);
// Lower the pattern, starting from the root input value.
pattern_seq.gen_pattern(

View File

@@ -1,6 +1,6 @@
//! Lexer for the ISLE language.
use crate::error::{Error, Result, Source};
use crate::error::{Error, Result, Source, Span};
use std::borrow::Cow;
use std::path::Path;
use std::sync::Arc;
@@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
self.filenames[pos.file].clone(),
self.file_texts[pos.file].clone(),
),
span: miette::SourceSpan::from((self.pos().offset, 1)),
span: Span::new_single(self.pos().offset),
}
}

View File

@@ -24,6 +24,10 @@ pub mod compile;
pub mod error;
pub mod ir;
pub mod lexer;
mod log;
pub mod parser;
pub mod sema;
pub mod trie;
#[cfg(feature = "miette-errors")]
mod error_miette;

View File

@@ -0,0 +1,17 @@
//! Debug logging from the ISLE compiler itself.
/// Log a compiler-internal message for debugging purposes.
#[cfg(feature = "logging")]
#[macro_export]
macro_rules! log {
($($msg:tt)*) => {
::log::trace!($($msg)*)
};
}
/// Log a compiler-internal message for debugging purposes.
#[cfg(not(feature = "logging"))]
#[macro_export]
macro_rules! log {
($($msg:tt)*) => {};
}

View File

@@ -39,7 +39,7 @@ impl<'a> Parser<'a> {
self.lexer.filenames[pos.file].clone(),
self.lexer.file_texts[pos.file].clone(),
),
span: miette::SourceSpan::from((pos.offset, 1)),
span: Span::new_single(pos.offset),
}
}

View File

@@ -17,6 +17,7 @@ use crate::ast;
use crate::ast::Ident;
use crate::error::*;
use crate::lexer::Pos;
use crate::log;
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
@@ -725,9 +726,9 @@ impl TypeEnv {
self.filenames[pos.file].clone(),
self.file_texts[pos.file].clone(),
),
span: miette::SourceSpan::from((pos.offset, 1)),
span: Span::new_single(pos.offset),
};
log::trace!("{}", e);
log!("{}", e);
e
}
@@ -902,7 +903,7 @@ impl TermEnv {
fn collect_constructors(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
log::debug!("collect_constructors from def: {:?}", def);
log!("collect_constructors from def: {:?}", def);
match def {
&ast::Def::Rule(ref rule) => {
let pos = rule.pos;
@@ -982,7 +983,7 @@ impl TermEnv {
};
let template = ext.template.make_macro_template(&ext.args[..]);
log::trace!("extractor def: {:?} becomes template {:?}", def, template);
log!("extractor def: {:?} becomes template {:?}", def, template);
let mut callees = BTreeSet::new();
template.terms(&mut |pos, t| {
@@ -1451,8 +1452,8 @@ impl TermEnv {
bindings: &mut Bindings,
is_root: bool,
) -> Option<(Pattern, TypeId)> {
log::trace!("translate_pattern: {:?}", pat);
log::trace!("translate_pattern: bindings = {:?}", bindings);
log!("translate_pattern: {:?}", pat);
log!("translate_pattern: bindings = {:?}", bindings);
match pat {
// TODO: flag on primitive type decl indicating it's an integer type?
&ast::Pattern::ConstInt { val, pos } => {
@@ -1546,7 +1547,7 @@ impl TermEnv {
}
let id = VarId(bindings.next_var);
bindings.next_var += 1;
log::trace!("binding var {:?}", var.0);
log!("binding var {:?}", var.0);
bindings.vars.push(BoundVar { name, id, ty });
Some((Pattern::BindPattern(ty, id, Box::new(subpat)), ty))
@@ -1572,7 +1573,7 @@ impl TermEnv {
};
let id = VarId(bindings.next_var);
bindings.next_var += 1;
log::trace!("binding var {:?}", var.0);
log!("binding var {:?}", var.0);
bindings.vars.push(BoundVar { name, id, ty });
Some((
Pattern::BindPattern(ty, id, Box::new(Pattern::Wildcard(ty))),
@@ -1687,7 +1688,7 @@ impl TermEnv {
for template_arg in args {
macro_args.push(template_arg.clone());
}
log::trace!("internal extractor macro args = {:?}", args);
log!("internal extractor macro args = {:?}", args);
let pat = template.subst_macro_args(&macro_args[..])?;
return self.translate_pattern(
tyenv,
@@ -1767,7 +1768,7 @@ impl TermEnv {
bindings: &mut Bindings,
pure: bool,
) -> Option<Expr> {
log::trace!("translate_expr: {:?}", expr);
log!("translate_expr: {:?}", expr);
match expr {
&ast::Expr::Term {
ref sym,

View File

@@ -1,6 +1,7 @@
//! Trie construction.
use crate::ir::{lower_rule, ExprSequence, PatternInst, PatternSequence};
use crate::log;
use crate::sema::{RuleId, TermEnv, TermId, TypeEnv};
use std::collections::BTreeMap;
@@ -8,7 +9,7 @@ use std::collections::BTreeMap;
pub fn build_tries(typeenv: &TypeEnv, termenv: &TermEnv) -> BTreeMap<TermId, TrieNode> {
let mut builder = TermFunctionsBuilder::new(typeenv, termenv);
builder.build();
log::trace!("builder: {:?}", builder);
log!("builder: {:?}", builder);
builder.finalize()
}
@@ -324,8 +325,8 @@ struct TermFunctionsBuilder<'a> {
impl<'a> TermFunctionsBuilder<'a> {
fn new(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Self {
log::trace!("typeenv: {:?}", typeenv);
log::trace!("termenv: {:?}", termenv);
log!("typeenv: {:?}", typeenv);
log!("termenv: {:?}", termenv);
Self {
builders_by_term: BTreeMap::new(),
typeenv,
@@ -341,7 +342,7 @@ impl<'a> TermFunctionsBuilder<'a> {
let (pattern, expr) = lower_rule(self.typeenv, self.termenv, rule);
let root_term = self.termenv.rules[rule.index()].lhs.root_term().unwrap();
log::trace!(
log!(
"build:\n- rule {:?}\n- pattern {:?}\n- expr {:?}",
self.termenv.rules[rule.index()],
pattern,

View File

@@ -2,11 +2,12 @@
use cranelift_isle::error::Result;
use cranelift_isle::{compile, lexer, parser};
use std::default::Default;
fn build(filename: &str) -> Result<String> {
let lexer = lexer::Lexer::from_files(vec![filename])?;
let defs = parser::parse(lexer)?;
compile::compile(&defs)
compile::compile(&defs, &Default::default())
}
pub fn run_pass(filename: &str) {

View File

@@ -7,7 +7,7 @@ license = "Apache-2.0 WITH LLVM-exception"
publish = false
[dependencies]
cranelift-isle = { version = "*", path = "../isle/" }
cranelift-isle = { version = "*", path = "../isle/", features = ["miette-errors", "logging"] }
env_logger = { version = "0.9", default-features = false }
miette = { version = "3.0.0", features = ["fancy"] }
miette = { version = "4.7.0", features = ["fancy"] }
clap = { version = "3.1.12", features = ["derive"] }

View File

@@ -2,6 +2,7 @@ use clap::Parser;
use cranelift_isle::{compile, lexer, parser};
use miette::{Context, IntoDiagnostic, Result};
use std::{
default::Default,
fs,
io::{self, Write},
path::PathBuf,
@@ -36,7 +37,7 @@ fn main() -> Result<()> {
let lexer = lexer::Lexer::from_files(opts.inputs)?;
let defs = parser::parse(lexer)?;
let code = compile::compile(&defs)?;
let code = compile::compile(&defs, &Default::default())?;
let stdout = io::stdout();
let (mut output, output_name): (Box<dyn Write>, _) = match &opts.output {