diff --git a/Cargo.lock b/Cargo.lock index 6780b4cfc9..cd943a6a2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -754,6 +754,7 @@ dependencies = [ "cranelift-reader", "cranelift-wasm", "filecheck", + "fxhash", "indicatif", "log", "pretty_env_logger", diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index 0dff273f87..03af48c318 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -48,6 +48,7 @@ clap = { workspace = true } similar = { workspace = true } toml = { workspace = true } serde = { workspace = true } +fxhash = "0.2.1" [features] default = ["disas", "wasm", "cranelift-codegen/all-arch", "cranelift-codegen/trace-log", "souper-harvest"] diff --git a/cranelift/src/souper_harvest.rs b/cranelift/src/souper_harvest.rs index 0a616023ea..cd8d1b9ebe 100644 --- a/cranelift/src/souper_harvest.rs +++ b/cranelift/src/souper_harvest.rs @@ -4,6 +4,8 @@ use cranelift_codegen::Context; use cranelift_reader::parse_sets_and_triple; use cranelift_wasm::DummyEnvironment; use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::collections::HashSet; +use std::io::Write; use std::path::{Path, PathBuf}; use std::{fs, io}; @@ -18,9 +20,10 @@ pub struct Options { /// Specify an input file to be used. Use '-' for stdin. input: PathBuf, - /// Specify the output file to be used. Use '-' for stdout. - #[clap(short, long, default_value("-"))] - output: PathBuf, + /// Specify the directory where harvested left-hand side files should be + /// written to. + #[clap(short, long)] + output_dir: PathBuf, /// Configure Cranelift settings #[clap(long = "set")] @@ -29,6 +32,12 @@ pub struct Options { /// Specify the Cranelift target #[clap(long = "target")] target: String, + + /// Add a comment from which CLIF variable and function each left-hand side + /// was harvested from. This prevents deduplicating harvested left-hand + /// sides. + #[clap(long)] + add_harvest_source: bool, } pub fn run(options: &Options) -> Result<()> { @@ -47,13 +56,25 @@ pub fn run(options: &Options) -> Result<()> { )) }; - let mut output: Box = if options.output == Path::new("-") { - Box::new(io::stdout()) - } else { - Box::new(io::BufWriter::new( - fs::File::create(&options.output).context("failed to create output file")?, - )) - }; + match std::fs::create_dir_all(&options.output_dir) { + Ok(_) => {} + Err(e) + if e.kind() == io::ErrorKind::AlreadyExists + && fs::metadata(&options.output_dir) + .with_context(|| { + format!( + "failed to read file metadata: {}", + options.output_dir.display(), + ) + })? + .is_dir() => {} + Err(e) => { + return Err(e).context(format!( + "failed to create output directory: {}", + options.output_dir.display() + )) + } + } let mut contents = vec![]; input @@ -77,13 +98,33 @@ pub fn run(options: &Options) -> Result<()> { let (send, recv) = std::sync::mpsc::channel::(); - let writing_thread = std::thread::spawn(move || -> Result<()> { - for lhs in recv { - output - .write_all(lhs.as_bytes()) - .context("failed to write to output file")?; + let writing_thread = std::thread::spawn({ + let output_dir = options.output_dir.clone(); + let keep_harvest_source = options.add_harvest_source; + move || -> Result<()> { + let mut already_harvested = HashSet::new(); + for lhs in recv { + let lhs = if keep_harvest_source { + &lhs + } else { + // Remove the first `;; Harvested from v12 in u:34` line. + let i = lhs.find('\n').unwrap(); + &lhs[i + 1..] + }; + let hash = fxhash::hash(lhs.as_bytes()); + if already_harvested.insert(hash) { + let output_path = output_dir.join(hash.to_string()); + let mut output = + io::BufWriter::new(fs::File::create(&output_path).with_context(|| { + format!("failed to create file: {}", output_path.display()) + })?); + output.write_all(lhs.as_bytes()).with_context(|| { + format!("failed to write to output file: {}", output_path.display()) + })?; + } + } + Ok(()) } - Ok(()) }); funcs @@ -92,9 +133,8 @@ pub fn run(options: &Options) -> Result<()> { let mut ctx = Context::new(); ctx.func = func; - ctx.compute_cfg(); - ctx.preopt(fisa.isa.unwrap()) - .context("failed to run preopt")?; + ctx.optimize(fisa.isa.unwrap()) + .context("failed to run optimizations")?; ctx.souper_harvest(send) .context("failed to run souper harvester")?; diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index 87eecfae99..db4c3c577a 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -24,6 +24,15 @@ criteria = "safe-to-deploy" version = "1.1.4" notes = "I am the author of this crate." +[[audits.arrayref]] +who = "Nick Fitzgerald " +criteria = "safe-to-deploy" +version = "0.3.6" +notes = """ +Unsafe code, but its logic looks good to me. Necessary given what it is +doing. Well tested, has quickchecks. +""" + [[audits.arrayvec]] who = "Nick Fitzgerald " criteria = "safe-to-deploy" @@ -167,6 +176,12 @@ criteria = "safe-to-deploy" version = "0.11.1" notes = "This library uses `forbid(unsafe_code)` and has no filesystem or network I/O." +[[audits.constant_time_eq]] +who = "Nick Fitzgerald " +criteria = "safe-to-deploy" +version = "0.2.4" +notes = "A few tiny blocks of `unsafe` but each of them is very obviously correct." + [[audits.criterion]] who = "Alex Crichton " criteria = "safe-to-run"