ISLE: guard against stale generated source in default build config.
Currently, the `build.rs` script that generates Rust source from the ISLE DSL will only do this generation if the `rebuild-isle` Cargo feature is specified. By default, it is not. This is based on the principle that we (the build script) do not modify the source tree as managed by git; git-managed files are strictly a human-managed and human-edited resource. By adding the opt-in Cargo feature, a developer is requesting the build script to perform an explicit action. (In my understanding at least, this principle comes from the general philosophy of hermetic builds: the output should be a pure function of the input, and part of this is that the input is read-only. If we modify the source tree, then all bets are off.) Unfortunately, requiring the opt-in feature also creates a footgun that is easy to hit: if a developer modifies the ISLE DSL source, but forgets to specify the Cargo feature, then the compiler will silently be built successfully with stale source, and will silently exclude any changes that were made. The generated source is checked into git for a good reason: we want DSL compiler to not affect build times for the overwhelmingly common case that Cranelift is used as a dependency but the backends are not being actively developed. (This overhead comes mainly from building `islec` itself.) So, what to do? This PR implements a middle ground first described in [this conversation](https://github.com/bytecodealliance/wasmtime/pull/3506#discussion_r743113351), in which we: - Generate a hash (SHA-512) of the ISLE DSL source and produce a "manifest" of ISLE inputs alongside the generated source; and - Always read the ISLE DSL source, and see if the manifest is still valid, on builds that do not have the opt-in "rebuild" feature. This allows us to know whether the ISLE compiler output would have been the same (modulo changes to the DSL compiler itself, which are out-of-scope here), without actually building the ISLE compiler and running it. If the compiler-backend developer modifies an ISLE source file and then tries to build `cranelift-codegen` without adding the `rebuild-isle` Cargo feature, they get the following output: ```text Error: the ISLE source files that resulted in the generated Rust source * src/isa/x64/lower/isle/generated_code.rs have changed but the generated source was not rebuilt! These ISLE source files are: * src/clif.isle * src/prelude.isle * src/isa/x64/inst.isle * src/isa/x64/lower.isle Please add `--features rebuild-isle` to your `cargo build` command if you wish to rebuild the generated source, then include these changes in any git commits you make that include the changes to the ISLE. For example: $ cargo build -p cranelift-codegen --features rebuild-isle (This build script cannot do this for you by default because we cannot modify checked-into-git source without your explicit opt-in.) ``` which will tell them exactly what they need to do to fix the problem! Note that I am leaving the "Rebuild ISLE" CI job alone for now, because otherwise, we are trusting whomever submits a PR to generate the correct generated source. In other words, the manifest is a communication from the checked-in tree to the developer, but we still need to verify that the checked-in generated Rust source and the manifest are correct with respect to the checked-in ISLE source.
This commit is contained in:
24
Cargo.lock
generated
24
Cargo.lock
generated
@@ -34,7 +34,7 @@ checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cipher",
|
||||
"cpufeatures 0.2.1",
|
||||
"cpufeatures",
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
@@ -414,7 +414,7 @@ checksum = "01b72a433d0cf2aef113ba70f62634c56fddb0f244e6377185c56a7cadbd8f91"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cipher",
|
||||
"cpufeatures 0.2.1",
|
||||
"cpufeatures",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
@@ -506,15 +506,6 @@ dependencies = [
|
||||
"glob",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed00c67cb5d0a7d64a44f6ad2668db7e7530311dd53ea79bcd4fb022c64911c8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.1"
|
||||
@@ -559,6 +550,7 @@ dependencies = [
|
||||
"peepmatic-traits",
|
||||
"regalloc",
|
||||
"serde",
|
||||
"sha2",
|
||||
"smallvec",
|
||||
"souper-ir",
|
||||
"target-lexicon",
|
||||
@@ -2178,7 +2170,7 @@ version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "048aeb476be11a4b6ca432ca569e375810de9294ae78f4774e78ea98a9246ede"
|
||||
dependencies = [
|
||||
"cpufeatures 0.2.1",
|
||||
"cpufeatures",
|
||||
"opaque-debug",
|
||||
"universal-hash",
|
||||
]
|
||||
@@ -2190,7 +2182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cpufeatures 0.2.1",
|
||||
"cpufeatures",
|
||||
"opaque-debug",
|
||||
"universal-hash",
|
||||
]
|
||||
@@ -2694,13 +2686,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.9.5"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b362ae5752fd2137731f9fa25fd4d9058af34666ca1966fb969119cc35719f12"
|
||||
checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"cfg-if 1.0.0",
|
||||
"cpufeatures 0.1.4",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
@@ -41,6 +41,7 @@ criterion = "0.3"
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.78.0" }
|
||||
isle = { path = "../isle/isle", version = "0.78.0", optional = true }
|
||||
miette = { version = "3", features = ["fancy"] }
|
||||
sha2 = "0.9.8"
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
use cranelift_codegen_meta as meta;
|
||||
|
||||
use sha2::{Digest, Sha512};
|
||||
use std::env;
|
||||
use std::io::Read;
|
||||
use std::process;
|
||||
@@ -77,18 +78,7 @@ fn main() {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
{
|
||||
if let Err(e) = rebuild_isle(crate_dir) {
|
||||
eprintln!("Error building ISLE files: {:?}", e);
|
||||
let mut source = e.source();
|
||||
while let Some(e) = source {
|
||||
eprintln!("{:?}", e);
|
||||
source = e.source();
|
||||
}
|
||||
std::process::abort();
|
||||
}
|
||||
}
|
||||
maybe_rebuild_isle(crate_dir).expect("Unhandled failure in ISLE rebuild");
|
||||
|
||||
let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
|
||||
let mut cmd = std::process::Command::new("git");
|
||||
@@ -127,12 +117,219 @@ fn main() {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Rebuild ISLE DSL source text into generated Rust code.
|
||||
/// Strip the current directory from the file paths, because `islec`
|
||||
/// includes them in the generated source, and this helps us maintain
|
||||
/// deterministic builds that don't include those local file paths.
|
||||
fn make_isle_source_path_relative(
|
||||
cur_dir: &std::path::PathBuf,
|
||||
filename: std::path::PathBuf,
|
||||
) -> std::path::PathBuf {
|
||||
if let Ok(suffix) = filename.strip_prefix(&cur_dir) {
|
||||
suffix.to_path_buf()
|
||||
} else {
|
||||
filename
|
||||
}
|
||||
}
|
||||
|
||||
/// A list of compilations (transformations from ISLE source to
|
||||
/// generated Rust source) that exist in the repository.
|
||||
///
|
||||
/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since
|
||||
/// it consumes files generated by them.
|
||||
/// This list is used either to regenerate the Rust source in-tree (if
|
||||
/// the `rebuild-isle` feature is enabled), or to verify that the ISLE
|
||||
/// source in-tree corresponds to the ISLE source that was last used
|
||||
/// to rebuild the Rust source (if the `rebuild-isle` feature is not
|
||||
/// enabled).
|
||||
#[derive(Clone, Debug)]
|
||||
struct IsleCompilations {
|
||||
items: Vec<IsleCompilation>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct IsleCompilation {
|
||||
output: std::path::PathBuf,
|
||||
inputs: Vec<std::path::PathBuf>,
|
||||
}
|
||||
|
||||
impl IsleCompilation {
|
||||
/// Compute the manifest filename for the given generated Rust file.
|
||||
fn manifest_filename(&self) -> std::path::PathBuf {
|
||||
self.output.with_extension("manifest")
|
||||
}
|
||||
|
||||
/// Compute the content of the source manifest for all ISLE source
|
||||
/// files that go into the compilation of one Rust file.
|
||||
///
|
||||
/// We store this alongside the `<generated_filename>.rs` file as
|
||||
/// `<generated_filename>.manifest` and use it to verify that a
|
||||
/// rebuild was done if necessary.
|
||||
fn compute_manifest(&self) -> Result<String, Box<dyn std::error::Error + 'static>> {
|
||||
use std::fmt::Write;
|
||||
|
||||
let mut manifest = String::new();
|
||||
|
||||
for filename in &self.inputs {
|
||||
// Our source must be valid UTF-8 for this to work, else user
|
||||
// will get an error on build. This is not expected to be an
|
||||
// issue.
|
||||
let content = std::fs::read_to_string(filename)?;
|
||||
// On Windows, source is checked out with line-endings changed
|
||||
// to `\r\n`; canonicalize the source that we hash to
|
||||
// Unix-style (`\n`) so hashes will match.
|
||||
let content = content.replace("\r\n", "\n");
|
||||
// One line in the manifest: <filename> <sha-512 hash>.
|
||||
let mut hasher = Sha512::default();
|
||||
hasher.update(content.as_bytes());
|
||||
writeln!(
|
||||
&mut manifest,
|
||||
"{} {:x}",
|
||||
filename.display(),
|
||||
hasher.finalize()
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(manifest)
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the list of compilations (transformations from ISLE
|
||||
/// source to generated Rust source) that exist in the repository.
|
||||
fn get_isle_compilations(crate_dir: &std::path::Path) -> Result<IsleCompilations, std::io::Error> {
|
||||
let cur_dir = std::env::current_dir()?;
|
||||
|
||||
let clif_isle =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("clif.isle"));
|
||||
let prelude_isle =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude.isle"));
|
||||
let src_isa_x64 =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("x64"));
|
||||
|
||||
// This is a set of ISLE compilation units.
|
||||
//
|
||||
// The format of each entry is:
|
||||
//
|
||||
// (output Rust code file, input ISLE source files)
|
||||
//
|
||||
// There should be one entry for each backend that uses ISLE for lowering,
|
||||
// and if/when we replace our peephole optimization passes with ISLE, there
|
||||
// should be an entry for each of those as well.
|
||||
Ok(IsleCompilations {
|
||||
items: vec![
|
||||
// The x86-64 instruction selector.
|
||||
IsleCompilation {
|
||||
output: src_isa_x64
|
||||
.join("lower")
|
||||
.join("isle")
|
||||
.join("generated_code.rs"),
|
||||
inputs: vec![
|
||||
clif_isle,
|
||||
prelude_isle,
|
||||
src_isa_x64.join("inst.isle"),
|
||||
src_isa_x64.join("lower.isle"),
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
/// Check the manifest for the ISLE generated code, which documents
|
||||
/// what ISLE source went into generating the Rust, and if there is a
|
||||
/// mismatch, either invoke the ISLE compiler (if we have the
|
||||
/// `rebuild-isle` feature) or exit with an error (if not).
|
||||
///
|
||||
/// We do this by computing a hash of the ISLE source and checking it
|
||||
/// against a "manifest" that is also checked into git, alongside the
|
||||
/// generated Rust.
|
||||
///
|
||||
/// (Why not include the `rebuild-isle` feature by default? Because
|
||||
/// the build process must not modify the checked-in source by
|
||||
/// default; any checked-in source is a human-managed bit of data, and
|
||||
/// we can only act as an agent of the human developer when explicitly
|
||||
/// requested to do so. This manifest check is a middle ground that
|
||||
/// ensures this explicit control while also avoiding the easy footgun
|
||||
/// of "I changed the ISLE, why isn't the compiler updated?!".)
|
||||
fn maybe_rebuild_isle(
|
||||
crate_dir: &std::path::Path,
|
||||
) -> Result<(), Box<dyn std::error::Error + 'static>> {
|
||||
let isle_compilations = get_isle_compilations(crate_dir)?;
|
||||
let mut rebuild_compilations = vec![];
|
||||
|
||||
for compilation in &isle_compilations.items {
|
||||
for file in &compilation.inputs {
|
||||
println!("cargo:rerun-if-changed={}", file.display());
|
||||
}
|
||||
|
||||
let manifest = std::fs::read_to_string(compilation.manifest_filename())?;
|
||||
let expected_manifest = compilation.compute_manifest()?;
|
||||
if manifest != expected_manifest {
|
||||
rebuild_compilations.push((compilation, expected_manifest));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
fn rebuild_isle(crate_dir: &std::path::Path) -> Result<(), Box<dyn std::error::Error + 'static>> {
|
||||
{
|
||||
if !rebuild_compilations.is_empty() {
|
||||
set_miette_hook();
|
||||
}
|
||||
let mut had_error = false;
|
||||
for (compilation, expected_manifest) in rebuild_compilations {
|
||||
if let Err(e) = rebuild_isle(compilation, &expected_manifest) {
|
||||
eprintln!("Error building ISLE files: {:?}", e);
|
||||
let mut source = e.source();
|
||||
while let Some(e) = source {
|
||||
eprintln!("{:?}", e);
|
||||
source = e.source();
|
||||
}
|
||||
had_error = true;
|
||||
}
|
||||
}
|
||||
|
||||
if had_error {
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "rebuild-isle"))]
|
||||
{
|
||||
if !rebuild_compilations.is_empty() {
|
||||
for (compilation, _) in rebuild_compilations {
|
||||
eprintln!("");
|
||||
eprintln!(
|
||||
"Error: the ISLE source files that resulted in the generated Rust source"
|
||||
);
|
||||
eprintln!("");
|
||||
eprintln!(" * {}", compilation.output.display());
|
||||
eprintln!("");
|
||||
eprintln!(
|
||||
"have changed but the generated source was not rebuilt! These ISLE source"
|
||||
);
|
||||
eprintln!("files are:");
|
||||
eprintln!("");
|
||||
for file in &compilation.inputs {
|
||||
eprintln!(" * {}", file.display());
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("");
|
||||
eprintln!("Please add `--features rebuild-isle` to your `cargo build` command");
|
||||
eprintln!("if you wish to rebuild the generated source, then include these changes");
|
||||
eprintln!("in any git commits you make that include the changes to the ISLE.");
|
||||
eprintln!("");
|
||||
eprintln!("For example:");
|
||||
eprintln!("");
|
||||
eprintln!(" $ cargo build -p cranelift-codegen --features rebuild-isle");
|
||||
eprintln!("");
|
||||
eprintln!("(This build script cannot do this for you by default because we cannot");
|
||||
eprintln!("modify checked-into-git source without your explicit opt-in.)");
|
||||
eprintln!("");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
fn set_miette_hook() {
|
||||
use std::sync::Once;
|
||||
static SET_MIETTE_HOOK: Once = Once::new();
|
||||
SET_MIETTE_HOOK.call_once(|| {
|
||||
@@ -146,51 +343,25 @@ fn rebuild_isle(crate_dir: &std::path::Path) -> Result<(), Box<dyn std::error::E
|
||||
)
|
||||
}));
|
||||
});
|
||||
|
||||
let clif_isle = crate_dir.join("src").join("clif.isle");
|
||||
let prelude_isle = crate_dir.join("src").join("prelude.isle");
|
||||
let src_isa_x64 = crate_dir.join("src").join("isa").join("x64");
|
||||
|
||||
// This is a set of ISLE compilation units.
|
||||
//
|
||||
// The format of each entry is:
|
||||
//
|
||||
// (output Rust code file, input ISLE source files)
|
||||
//
|
||||
// There should be one entry for each backend that uses ISLE for lowering,
|
||||
// and if/when we replace our peephole optimization passes with ISLE, there
|
||||
// should be an entry for each of those as well.
|
||||
let isle_compilations = vec![
|
||||
// The x86-64 instruction selector.
|
||||
(
|
||||
src_isa_x64
|
||||
.join("lower")
|
||||
.join("isle")
|
||||
.join("generated_code.rs"),
|
||||
vec![
|
||||
clif_isle,
|
||||
prelude_isle,
|
||||
src_isa_x64.join("inst.isle"),
|
||||
src_isa_x64.join("lower.isle"),
|
||||
],
|
||||
),
|
||||
];
|
||||
|
||||
let cur_dir = std::env::current_dir()?;
|
||||
for (out_file, mut files) in isle_compilations {
|
||||
for file in files.iter_mut() {
|
||||
println!("cargo:rerun-if-changed={}", file.display());
|
||||
|
||||
// Strip the current directory from the file paths, because `islec`
|
||||
// includes them in the generated source, and this helps us maintain
|
||||
// deterministic builds that don't include those local file paths.
|
||||
if let Ok(suffix) = file.strip_prefix(&cur_dir) {
|
||||
*file = suffix.to_path_buf();
|
||||
}
|
||||
}
|
||||
|
||||
/// Rebuild ISLE DSL source text into generated Rust code.
|
||||
///
|
||||
/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since
|
||||
/// it consumes files generated by them.
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
fn rebuild_isle(
|
||||
compilation: &IsleCompilation,
|
||||
manifest: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error + 'static>> {
|
||||
// First, remove the manifest, if any; we will recreate it
|
||||
// below if the compilation is successful. Ignore error if no
|
||||
// manifest was present.
|
||||
let manifest_filename = compilation.manifest_filename();
|
||||
let _ = std::fs::remove_file(&manifest_filename);
|
||||
|
||||
let code = (|| {
|
||||
let lexer = isle::lexer::Lexer::from_files(files)?;
|
||||
let lexer = isle::lexer::Lexer::from_files(&compilation.inputs[..])?;
|
||||
let defs = isle::parser::parse(lexer)?;
|
||||
isle::compile::compile(&defs)
|
||||
})()
|
||||
@@ -226,9 +397,18 @@ fn rebuild_isle(crate_dir: &std::path::Path) -> Result<(), Box<dyn std::error::E
|
||||
code
|
||||
});
|
||||
|
||||
println!("Writing ISLE-generated Rust code to {}", out_file.display());
|
||||
std::fs::write(out_file, code)?;
|
||||
}
|
||||
println!(
|
||||
"Writing ISLE-generated Rust code to {}",
|
||||
compilation.output.display()
|
||||
);
|
||||
std::fs::write(&compilation.output, code)?;
|
||||
|
||||
// Write the manifest so that, in the default build configuration
|
||||
// without the `rebuild-isle` feature, we can at least verify that
|
||||
// no changes were made that will not be picked up. Note that we
|
||||
// only write this *after* we write the source above, so no
|
||||
// manifest is produced if there was an error.
|
||||
std::fs::write(&manifest_filename, manifest)?;
|
||||
|
||||
return Ok(());
|
||||
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
|
||||
src/prelude.isle 21143c73c97885afd9c34acedb8dbc653dca3b5a3e2e73754a5761762f7fe6ce2eefa520227e64c2b18f5626ca0ffa0a6aff54971099e619a464a23adf365bd2
|
||||
src/isa/x64/inst.isle fdfbfc6dfad1fc5ed252e0a14ccc69baba51d0538e05cfb9916f6213e5a6fcfc9d22605a29bd684d6a66f6d5e1c8ec36a963660d52c2e8b3fb6e0758f7adb7b5
|
||||
src/isa/x64/lower.isle 8555abdae385431c96aaabc392b7b3a8b1bbe733be08b007ef776850860cb77e85a140db02f427586c155c0b0129f9ffd531abd2e4a772c72667535cc015e609
|
||||
Reference in New Issue
Block a user