From 3a6dd832c0fcc51d72e7ef866cda246e8bafe849 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 2 Sep 2020 15:26:42 -0700 Subject: [PATCH 1/6] Harvest left-hand side superoptimization candidates. Given a clif function, harvest all its integer subexpressions, so that they can be fed into [Souper](https://github.com/google/souper) as candidates for superoptimization. For some of these candidates, Souper will successfully synthesize a right-hand side that is equivalent but has lower cost than the left-hand side. Then, we can combine these left- and right-hand sides into a complete optimization, and add it to our peephole passes. To harvest the expression that produced a given value `x`, we do a post-order traversal of the dataflow graph starting from `x`. As we do this traversal, we maintain a map from clif values to their translated Souper values. We stop traversing when we reach anything that can't be translated into Souper IR: a memory load, a float-to-int conversion, a block parameter, etc. For values produced by these instructions, we create a Souper `var`, which is an input variable to the optimization. For instructions that have a direct mapping into Souper IR, we get the Souper version of each of its operands and then create the Souper version of the instruction itself. It should now be clear why we do a post-order traversal: we need an instruction's translated operands in order to translate the instruction itself. Once this instruction is translated, we update the clif-to-souper map with this new translation so that any other instruction that uses this result as an operand has access to the translated value. When the traversal is complete we return the translation of `x` as the root of left-hand side candidate. --- Cargo.lock | 2 + cranelift/Cargo.toml | 4 +- cranelift/codegen/Cargo.toml | 4 + cranelift/codegen/src/context.rs | 17 + cranelift/codegen/src/lib.rs | 3 + cranelift/codegen/src/souper_harvest.rs | 500 ++++++++++++++++++++++++ cranelift/src/clif-util.rs | 27 +- cranelift/src/souper_harvest.rs | 87 +++++ 8 files changed, 642 insertions(+), 2 deletions(-) create mode 100644 cranelift/codegen/src/souper_harvest.rs mode change 100644 => 100755 cranelift/src/clif-util.rs create mode 100644 cranelift/src/souper_harvest.rs diff --git a/Cargo.lock b/Cargo.lock index 96d40bf923..38cecde0ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,6 +390,7 @@ dependencies = [ "regalloc", "serde", "smallvec", + "souper-ir", "target-lexicon", "thiserror", "wast", @@ -566,6 +567,7 @@ dependencies = [ "log", "peepmatic-souper", "pretty_env_logger", + "rayon", "target-lexicon", "term", "thiserror", diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index 7347c75d96..5f29620e33 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -38,14 +38,16 @@ wat = { version = "1.0.18", optional = true } target-lexicon = "0.10" peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.66.0", optional = true } pretty_env_logger = "0.4.0" +rayon = { version = "1", optional = true } file-per-thread-logger = "0.1.2" indicatif = "0.13.0" thiserror = "1.0.15" walkdir = "2.2" [features] -default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper"] +default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper", "souper-harvest"] disas = ["capstone"] enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"] wasm = ["wat", "cranelift-wasm"] experimental_x64 = ["cranelift-codegen/x64"] +souper-harvest = ["cranelift-codegen/souper-harvest", "rayon"] diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index cc9e4421ea..d0b7120ee1 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -29,6 +29,7 @@ peepmatic = { path = "../peepmatic", optional = true, version = "0.66.0" } peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.66.0" } peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.66.0" } regalloc = "0.0.30" +souper-ir = { version = "1", optional = true } wast = { version = "22.0.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary @@ -87,5 +88,8 @@ rebuild-peephole-optimizers = ["peepmatic", "peepmatic-traits", "wast"] # Enable the use of `peepmatic`-generated peephole optimizers. enable-peepmatic = ["peepmatic-runtime", "peepmatic-traits", "serde"] +# Enable support for the Souper harvester. +souper-harvest = ["souper-ir", "souper-ir/stringify"] + [badges] maintenance = { status = "experimental" } diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 3d2595f270..81b9dcbd23 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -36,9 +36,14 @@ use crate::timing; use crate::unreachable_code::eliminate_unreachable_code; use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges}; use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult}; +#[cfg(feature = "souper-harvest")] +use alloc::string::String; use alloc::vec::Vec; use log::debug; +#[cfg(feature = "souper-harvest")] +use crate::souper_harvest::do_souper_harvest; + /// Persistent data structures and compilation pipeline. pub struct Context { /// The function we're compiling. @@ -447,4 +452,16 @@ impl Context { isa, )) } + + /// Harvest candidate left-hand sides for superoptimization with Souper. + #[cfg(feature = "souper-harvest")] + pub fn souper_harvest( + &mut self, + isa: &dyn TargetIsa, + out: &mut std::sync::mpsc::Sender, + ) -> CodegenResult<()> { + self.preopt(isa)?; + do_souper_harvest(&self.func, out); + Ok(()) + } } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 38b173de13..053d7b979c 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -116,6 +116,9 @@ mod value_label; #[cfg(feature = "enable-peepmatic")] mod peepmatic; +#[cfg(feature = "souper-harvest")] +mod souper_harvest; + pub use crate::result::{CodegenError, CodegenResult}; /// Version number of this crate. diff --git a/cranelift/codegen/src/souper_harvest.rs b/cranelift/codegen/src/souper_harvest.rs new file mode 100644 index 0000000000..1e0d0eb382 --- /dev/null +++ b/cranelift/codegen/src/souper_harvest.rs @@ -0,0 +1,500 @@ +//! Harvest left-hand side superoptimization candidates. +//! +//! Given a clif function, harvest all its integer subexpressions, so that they +//! can be fed into [Souper](https://github.com/google/souper) as candidates for +//! superoptimization. For some of these candidates, Souper will successfully +//! synthesize a right-hand side that is equivalent but has lower cost than the +//! left-hand side. Then, we can combine these left- and right-hand sides into a +//! complete optimization, and add it to our peephole passes. +//! +//! To harvest the expression that produced a given value `x`, we do a +//! post-order traversal of the dataflow graph starting from `x`. As we do this +//! traversal, we maintain a map from clif values to their translated Souper +//! values. We stop traversing when we reach anything that can't be translated +//! into Souper IR: a memory load, a float-to-int conversion, a block parameter, +//! etc. For values produced by these instructions, we create a Souper `var`, +//! which is an input variable to the optimization. For instructions that have a +//! direct mapping into Souper IR, we get the Souper version of each of its +//! operands and then create the Souper version of the instruction itself. It +//! should now be clear why we do a post-order traversal: we need an +//! instruction's translated operands in order to translate the instruction +//! itself. Once this instruction is translated, we update the clif-to-souper +//! map with this new translation so that any other instruction that uses this +//! result as an operand has access to the translated value. When the traversal +//! is complete we return the translation of `x` as the root of left-hand side +//! candidate. + +use crate::ir; +use souper_ir::ast; +use std::collections::{HashMap, HashSet}; +use std::string::String; +use std::sync::mpsc; +use std::vec::Vec; + +/// Harvest Souper left-hand side candidates from the given function. +/// +/// Candidates are reported through the given MPSC sender. +pub fn do_souper_harvest(func: &ir::Function, out: &mut mpsc::Sender) { + let mut allocs = Allocs::default(); + + // Iterate over each instruction in each block and try and harvest a + // left-hand side from its result. + for block in func.layout.blocks() { + let mut option_inst = func.layout.first_inst(block); + while let Some(inst) = option_inst { + let results = func.dfg.inst_results(inst); + if results.len() == 1 { + let val = results[0]; + let ty = func.dfg.value_type(val); + if ty.is_int() && ty.lane_count() == 1 { + harvest_candidate_lhs(&mut allocs, func, val, out); + } + } + option_inst = func.layout.next_inst(inst); + } + } +} + +/// Allocations that we reuse across many LHS candidate harvests. +#[derive(Default)] +struct Allocs { + /// A map from cranelift IR to souper IR for values that we've already + /// translated into souper IR. + ir_to_souper_val: HashMap, + + /// Stack of to-visit and to-trace values for the post-order DFS. + dfs_stack: Vec, + + /// Set of values we've already seen in our post-order DFS. + dfs_seen: HashSet, +} + +impl Allocs { + /// Reset the collections to their empty state (without deallocating their + /// backing data). + fn reset(&mut self) { + self.ir_to_souper_val.clear(); + self.dfs_stack.clear(); + self.dfs_seen.clear(); + } +} + +/// Harvest a candidate LHS for `val` from the dataflow graph. +fn harvest_candidate_lhs( + allocs: &mut Allocs, + func: &ir::Function, + val: ir::Value, + out: &mut mpsc::Sender, +) { + allocs.reset(); + let mut lhs = ast::LeftHandSideBuilder::default(); + let mut non_var_count = 0; + + // Should we keep tracing through the given `val`? Only if it is defined + // by an instruction that we can translate to Souper IR. + let should_trace = |val| match func.dfg.value_def(val) { + ir::ValueDef::Result(inst, 0) => match func.dfg[inst].opcode() { + ir::Opcode::Iadd + | ir::Opcode::IaddImm + | ir::Opcode::IrsubImm + | ir::Opcode::Imul + | ir::Opcode::ImulImm + | ir::Opcode::Udiv + | ir::Opcode::UdivImm + | ir::Opcode::Sdiv + | ir::Opcode::SdivImm + | ir::Opcode::Urem + | ir::Opcode::UremImm + | ir::Opcode::Srem + | ir::Opcode::SremImm + | ir::Opcode::Band + | ir::Opcode::BandImm + | ir::Opcode::Bor + | ir::Opcode::BorImm + | ir::Opcode::Bxor + | ir::Opcode::BxorImm + | ir::Opcode::Ishl + | ir::Opcode::IshlImm + | ir::Opcode::Sshr + | ir::Opcode::SshrImm + | ir::Opcode::Ushr + | ir::Opcode::UshrImm + | ir::Opcode::Select + | ir::Opcode::Uextend + | ir::Opcode::Sextend + | ir::Opcode::Trunc + | ir::Opcode::Icmp + | ir::Opcode::Popcnt + | ir::Opcode::Bitrev + | ir::Opcode::Clz + | ir::Opcode::Ctz + // TODO: ir::Opcode::IaddCarry + | ir::Opcode::SaddSat + | ir::Opcode::SsubSat + | ir::Opcode::UsubSat => true, + _ => false, + }, + _ => false, + }; + + post_order_dfs(allocs, &func.dfg, val, should_trace, |allocs, val| { + let souper_assignment_rhs = match func.dfg.value_def(val) { + ir::ValueDef::Result(inst, 0) => { + let args = func.dfg.inst_args(inst); + let arg = |allocs: &mut Allocs, n| allocs.ir_to_souper_val[&args[n]].into(); + + match (func.dfg[inst].opcode(), &func.dfg[inst]) { + (ir::Opcode::Iadd, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Add { a, b }.into() + } + (ir::Opcode::IaddImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Add { a, b }.into() + } + (ir::Opcode::IrsubImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let b = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let a = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Sub { a, b }.into() + } + (ir::Opcode::Imul, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Mul { a, b }.into() + } + (ir::Opcode::ImulImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Mul { a, b }.into() + } + (ir::Opcode::Udiv, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Udiv { a, b }.into() + } + (ir::Opcode::UdivImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Udiv { a, b }.into() + } + (ir::Opcode::Sdiv, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Sdiv { a, b }.into() + } + (ir::Opcode::SdivImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Sdiv { a, b }.into() + } + (ir::Opcode::Urem, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Urem { a, b }.into() + } + (ir::Opcode::UremImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Urem { a, b }.into() + } + (ir::Opcode::Srem, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Srem { a, b }.into() + } + (ir::Opcode::SremImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Srem { a, b }.into() + } + (ir::Opcode::Band, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::And { a, b }.into() + } + (ir::Opcode::BandImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::And { a, b }.into() + } + (ir::Opcode::Bor, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Or { a, b }.into() + } + (ir::Opcode::BorImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Or { a, b }.into() + } + (ir::Opcode::Bxor, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Xor { a, b }.into() + } + (ir::Opcode::BxorImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Xor { a, b }.into() + } + (ir::Opcode::Ishl, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Shl { a, b }.into() + } + (ir::Opcode::IshlImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Shl { a, b }.into() + } + (ir::Opcode::Sshr, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Ashr { a, b }.into() + } + (ir::Opcode::SshrImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Ashr { a, b }.into() + } + (ir::Opcode::Ushr, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Lshr { a, b }.into() + } + (ir::Opcode::UshrImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Lshr { a, b }.into() + } + (ir::Opcode::Select, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + let c = arg(allocs, 2); + ast::Instruction::Select { a, b, c }.into() + } + (ir::Opcode::Uextend, _) => { + let a = arg(allocs, 0); + ast::Instruction::Zext { a }.into() + } + (ir::Opcode::Sextend, _) => { + let a = arg(allocs, 0); + ast::Instruction::Sext { a }.into() + } + (ir::Opcode::Trunc, _) => { + let a = arg(allocs, 0); + ast::Instruction::Trunc { a }.into() + } + (ir::Opcode::Popcnt, _) => { + let a = arg(allocs, 0); + ast::Instruction::Ctpop { a }.into() + } + (ir::Opcode::Bitrev, _) => { + let a = arg(allocs, 0); + ast::Instruction::BitReverse { a }.into() + } + (ir::Opcode::Clz, _) => { + let a = arg(allocs, 0); + ast::Instruction::Ctlz { a }.into() + } + (ir::Opcode::Ctz, _) => { + let a = arg(allocs, 0); + ast::Instruction::Cttz { a }.into() + } + // TODO: ir::Opcode::IaddCarry + (ir::Opcode::SaddSat, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::SaddSat { a, b }.into() + } + (ir::Opcode::SsubSat, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::SsubSat { a, b }.into() + } + (ir::Opcode::UsubSat, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::UsubSat { a, b }.into() + } + (ir::Opcode::Iconst, ir::InstructionData::UnaryImm { imm, .. }) => { + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into() + } + (ir::Opcode::Bconst, ir::InstructionData::UnaryBool { imm, .. }) => { + let value = *imm as i128; + ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into() + } + _ => ast::AssignmentRhs::Var, + } + } + _ => ast::AssignmentRhs::Var, + }; + + non_var_count += !matches!(souper_assignment_rhs, ast::AssignmentRhs::Var) as u32; + let souper_ty = souper_type_of(&func.dfg, val); + let souper_val = lhs.assignment(None, souper_ty, souper_assignment_rhs, vec![]); + let old_value = allocs.ir_to_souper_val.insert(val, souper_val); + assert!(old_value.is_none()); + }); + + // We end up harvesting a lot of candidates like: + // + // %0:i32 = var + // infer %0 + // + // and + // + // %0:i32 = var + // %1:i32 = var + // %2:i32 = add %0, %1 + // + // Both of these are useless. Only actually harvest the candidate if there + // are at least two actual operations. + if non_var_count >= 2 { + let lhs = lhs.finish(allocs.ir_to_souper_val[&val], None); + out.send(format!( + ";; Harvested from `{}` in `{}`\n{}\n", + val, func.name, lhs + )) + .unwrap(); + } +} + +fn souper_type_of(dfg: &ir::DataFlowGraph, val: ir::Value) -> Option { + let ty = dfg.value_type(val); + assert!(ty.is_int() || ty.is_bool()); + assert_eq!(ty.lane_count(), 1); + Some(ast::Type { width: ty.bits() }) +} + +#[derive(Debug)] +enum StackEntry { + Visit(ir::Value), + Trace(ir::Value), +} + +fn post_order_dfs( + allocs: &mut Allocs, + dfg: &ir::DataFlowGraph, + val: ir::Value, + should_trace: impl Fn(ir::Value) -> bool, + mut visit: impl FnMut(&mut Allocs, ir::Value), +) { + allocs.dfs_stack.push(StackEntry::Trace(val)); + + while let Some(entry) = allocs.dfs_stack.pop() { + match entry { + StackEntry::Visit(val) => { + let is_new = allocs.dfs_seen.insert(val); + if is_new { + visit(allocs, val); + } + } + StackEntry::Trace(val) => { + if allocs.dfs_seen.contains(&val) { + continue; + } + + allocs.dfs_stack.push(StackEntry::Visit(val)); + if should_trace(val) { + if let ir::ValueDef::Result(inst, 0) = dfg.value_def(val) { + let args = dfg.inst_args(inst); + for v in args.iter().rev().copied() { + allocs.dfs_stack.push(StackEntry::Trace(v)); + } + } + } + } + } + } +} diff --git a/cranelift/src/clif-util.rs b/cranelift/src/clif-util.rs old mode 100644 new mode 100755 index 2948c544f9..fb07fba621 --- a/cranelift/src/clif-util.rs +++ b/cranelift/src/clif-util.rs @@ -27,6 +27,8 @@ mod disasm; mod interpret; mod print_cfg; mod run; +#[cfg(feature = "souper-harvest")] +mod souper_harvest; mod utils; #[cfg(feature = "peepmatic-souper")] @@ -265,6 +267,13 @@ fn main() { .about("Convert Souper optimizations into Peepmatic DSL.") .arg(add_single_input_file_arg()) .arg(add_output_arg()), + ) + .subcommand( + SubCommand::with_name("souper-harvest") + .arg(add_single_input_file_arg()) + .arg(add_output_arg()) + .arg(add_target_flag()) + .arg(add_set_flag()), ); let res_util = match app_cmds.get_matches().subcommand() { @@ -392,12 +401,28 @@ fn main() { #[cfg(not(feature = "peepmatic-souper"))] { Err( - "Error: clif-util was compiled without suport for the `souper-to-peepmatic` \ + "Error: clif-util was compiled without support for the `souper-to-peepmatic` \ subcommand" .into(), ) } } + ("souper-harvest", Some(rest_cmd)) => { + #[cfg(feature = "souper-harvest")] + { + souper_harvest::run( + rest_cmd.value_of("target").unwrap_or_default(), + rest_cmd.value_of("single-file").unwrap(), + rest_cmd.value_of("output").unwrap(), + &get_vec(rest_cmd.values_of("set")), + ) + } + + #[cfg(not(feature = "souper-harvest"))] + { + Err("clif-util was compiled without `souper-harvest` support".into()) + } + } _ => Err("Invalid subcommand.".to_owned()), }; diff --git a/cranelift/src/souper_harvest.rs b/cranelift/src/souper_harvest.rs new file mode 100644 index 0000000000..4167611b72 --- /dev/null +++ b/cranelift/src/souper_harvest.rs @@ -0,0 +1,87 @@ +use crate::utils::parse_sets_and_triple; +use cranelift_codegen::Context; +use cranelift_wasm::{DummyEnvironment, ReturnMode}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::{fs, io}; + +static WASM_MAGIC: &[u8] = &[0x00, 0x61, 0x73, 0x6D]; + +pub fn run(target: &str, input: &str, output: &str, flag_set: &[String]) -> Result<(), String> { + let parsed = parse_sets_and_triple(flag_set, target)?; + let fisa = parsed.as_fisa(); + if fisa.isa.is_none() { + return Err("`souper-harvest` requires a target isa".into()); + } + + let stdin = io::stdin(); + let mut input: Box = match input { + "-" => Box::new(stdin.lock()), + _ => Box::new(io::BufReader::new( + fs::File::open(input).map_err(|e| format!("failed to open input file: {}", e))?, + )), + }; + + let mut output: Box = match output { + "-" => Box::new(io::stdout()), + _ => Box::new(io::BufWriter::new( + fs::File::create(output).map_err(|e| format!("failed to create output file: {}", e))?, + )), + }; + + let mut contents = vec![]; + input + .read_to_end(&mut contents) + .map_err(|e| format!("failed to read from input file: {}", e))?; + + let funcs = if &contents[..WASM_MAGIC.len()] == WASM_MAGIC { + let mut dummy_environ = DummyEnvironment::new( + fisa.isa.unwrap().frontend_config(), + ReturnMode::NormalReturns, + false, + ); + cranelift_wasm::translate_module(&contents, &mut dummy_environ) + .map_err(|e| format!("failed to translate Wasm module to clif: {}", e))?; + dummy_environ + .info + .function_bodies + .iter() + .map(|(_, f)| f.clone()) + .collect() + } else { + let contents = String::from_utf8(contents) + .map_err(|e| format!("input is not a UTF-8 string: {}", e))?; + cranelift_reader::parse_functions(&contents) + .map_err(|e| format!("failed to parse clif: {}", e))? + }; + + let (send, recv) = std::sync::mpsc::channel::(); + + let writing_thread = std::thread::spawn(move || -> Result<(), String> { + for lhs in recv { + output + .write_all(lhs.as_bytes()) + .map_err(|e| format!("failed to write to output file: {}", e))?; + } + Ok(()) + }); + + funcs + .into_par_iter() + .map_with(send, move |send, func| { + let mut ctx = Context::new(); + ctx.func = func; + + ctx.souper_harvest(fisa.isa.unwrap(), send) + .map_err(|e| format!("failed to run souper harvester: {}", e))?; + + Ok(()) + }) + .collect::>()?; + + match writing_thread.join() { + Ok(result) => result?, + Err(e) => std::panic::resume_unwind(e), + } + + Ok(()) +} From 89f1e02f1fc0416459a64c6af4d9109d418b5a43 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 2 Sep 2020 15:28:07 -0700 Subject: [PATCH 2/6] Remove executable bits from a few Rust source files --- cranelift/codegen/meta/src/shared/instructions.rs | 0 crates/wasmtime/src/ref.rs | 0 fuzz/fuzz_targets/instantiate-wasm-smith.rs | 0 fuzz/fuzz_targets/table_ops.rs | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 cranelift/codegen/meta/src/shared/instructions.rs mode change 100755 => 100644 crates/wasmtime/src/ref.rs mode change 100755 => 100644 fuzz/fuzz_targets/instantiate-wasm-smith.rs mode change 100755 => 100644 fuzz/fuzz_targets/table_ops.rs diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs old mode 100755 new mode 100644 diff --git a/crates/wasmtime/src/ref.rs b/crates/wasmtime/src/ref.rs old mode 100755 new mode 100644 diff --git a/fuzz/fuzz_targets/instantiate-wasm-smith.rs b/fuzz/fuzz_targets/instantiate-wasm-smith.rs old mode 100755 new mode 100644 diff --git a/fuzz/fuzz_targets/table_ops.rs b/fuzz/fuzz_targets/table_ops.rs old mode 100755 new mode 100644 From 5a871711215aac0623ec205c708559bf8ea62440 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 2 Sep 2020 15:57:18 -0700 Subject: [PATCH 3/6] Do not use the `matches!` macro so we work with older rustc versions --- cranelift/codegen/src/souper_harvest.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/souper_harvest.rs b/cranelift/codegen/src/souper_harvest.rs index 1e0d0eb382..1ca7aaa227 100644 --- a/cranelift/codegen/src/souper_harvest.rs +++ b/cranelift/codegen/src/souper_harvest.rs @@ -420,7 +420,10 @@ fn harvest_candidate_lhs( _ => ast::AssignmentRhs::Var, }; - non_var_count += !matches!(souper_assignment_rhs, ast::AssignmentRhs::Var) as u32; + non_var_count += match souper_assignment_rhs { + ast::AssignmentRhs::Var => 0, + _ => 1, + }; let souper_ty = souper_type_of(&func.dfg, val); let souper_val = lhs.assignment(None, souper_ty, souper_assignment_rhs, vec![]); let old_value = allocs.ir_to_souper_val.insert(val, souper_val); From b2acec116480509b0b5defa2802fdaa24e39d113 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 10 Sep 2020 11:33:24 -0700 Subject: [PATCH 4/6] Harvest integer comparisons into Souper left-hand side candidates --- cranelift/codegen/src/souper_harvest.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cranelift/codegen/src/souper_harvest.rs b/cranelift/codegen/src/souper_harvest.rs index 1ca7aaa227..e97b8c0fa5 100644 --- a/cranelift/codegen/src/souper_harvest.rs +++ b/cranelift/codegen/src/souper_harvest.rs @@ -365,6 +365,28 @@ fn harvest_candidate_lhs( let a = arg(allocs, 0); ast::Instruction::Trunc { a }.into() } + (ir::Opcode::Icmp, ir::InstructionData::IntCompare { cond, .. }) + | (ir::Opcode::IcmpImm, ir::InstructionData::IntCompare { cond, .. }) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + match cond { + ir::condcodes::IntCC::Equal => ast::Instruction::Eq { a, b }.into(), + ir::condcodes::IntCC::NotEqual => ast::Instruction::Ne { a, b }.into(), + ir::condcodes::IntCC::UnsignedLessThan => { + ast::Instruction::Ult { a, b }.into() + } + ir::condcodes::IntCC::SignedLessThan => { + ast::Instruction::Slt { a, b }.into() + } + ir::condcodes::IntCC::UnsignedLessThanOrEqual => { + ast::Instruction::Sle { a, b }.into() + } + ir::condcodes::IntCC::SignedLessThanOrEqual => { + ast::Instruction::Sle { a, b }.into() + } + _ => ast::AssignmentRhs::Var, + } + } (ir::Opcode::Popcnt, _) => { let a = arg(allocs, 0); ast::Instruction::Ctpop { a }.into() From c87aaeeecea7c8a4f29114b7fa5bdda9e7c7f457 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 14 Sep 2020 08:41:56 -0700 Subject: [PATCH 5/6] cranelift_codegen::souper_harvest: Update TODOs to include more instructions --- cranelift/codegen/src/souper_harvest.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cranelift/codegen/src/souper_harvest.rs b/cranelift/codegen/src/souper_harvest.rs index e97b8c0fa5..fcf53b0ed5 100644 --- a/cranelift/codegen/src/souper_harvest.rs +++ b/cranelift/codegen/src/souper_harvest.rs @@ -129,6 +129,7 @@ fn harvest_candidate_lhs( | ir::Opcode::Clz | ir::Opcode::Ctz // TODO: ir::Opcode::IaddCarry + // TODO: ir::Opcode::IaddCout | ir::Opcode::SaddSat | ir::Opcode::SsubSat | ir::Opcode::UsubSat => true, @@ -404,6 +405,7 @@ fn harvest_candidate_lhs( ast::Instruction::Cttz { a }.into() } // TODO: ir::Opcode::IaddCarry + // TODO: ir::Opcode::IaddCout (ir::Opcode::SaddSat, _) => { let a = arg(allocs, 0); let b = arg(allocs, 1); From e1c8878b33a6a1908248f42069e407105618167e Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 14 Sep 2020 08:46:34 -0700 Subject: [PATCH 6/6] cranelift_codegen::souper_harvest: Move preopt out of `Context`, into `clif-util` This allows for more flexibility of when/where to harvest LHS candidates. For example, we could choose to harvest candidates that overlap with and supercede our current preopt peepholes. This commit also makes sure that we compute the CFG before running preopt, when harvesting LHS candidates via `clif-util souper-harvest`. --- cranelift/codegen/src/context.rs | 2 -- cranelift/src/souper_harvest.rs | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 81b9dcbd23..842a48ab21 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -457,10 +457,8 @@ impl Context { #[cfg(feature = "souper-harvest")] pub fn souper_harvest( &mut self, - isa: &dyn TargetIsa, out: &mut std::sync::mpsc::Sender, ) -> CodegenResult<()> { - self.preopt(isa)?; do_souper_harvest(&self.func, out); Ok(()) } diff --git a/cranelift/src/souper_harvest.rs b/cranelift/src/souper_harvest.rs index 4167611b72..6c15547ae1 100644 --- a/cranelift/src/souper_harvest.rs +++ b/cranelift/src/souper_harvest.rs @@ -71,7 +71,11 @@ pub fn run(target: &str, input: &str, output: &str, flag_set: &[String]) -> Resu let mut ctx = Context::new(); ctx.func = func; - ctx.souper_harvest(fisa.isa.unwrap(), send) + ctx.compute_cfg(); + ctx.preopt(fisa.isa.unwrap()) + .map_err(|e| format!("failed to run preopt: {}", e))?; + + ctx.souper_harvest(send) .map_err(|e| format!("failed to run souper harvester: {}", e))?; Ok(())