Cranelift: remove non-egraphs optimization pipeline and use_egraphs option. (#6167)
* Cranelift: remove non-egraphs optimization pipeline and `use_egraphs` option. This PR removes the LICM, GVN, and preopt passes, and associated support pieces, from `cranelift-codegen`. Not to worry, we still have optimizations: the egraph framework subsumes all of these, and has been on by default since #5181. A few decision points: - Filetests for the legacy LICM, GVN and simple_preopt were removed too. As we built optimizations in the egraph framework we wrote new tests for the equivalent functionality, and many of the old tests were testing specific behaviors in the old implementations that may not be relevant anymore. However if folks prefer I could take a different approach here and try to port over all of the tests. - The corresponding filetest modes (commands) were deleted too. The `test alias_analysis` mode remains, but no longer invokes a separate GVN first (since there is no separate GVN that will not also do alias analysis) so the tests were tweaked slightly to work with that. The egrpah testsuite also covers alias analysis. - The `divconst_magic_numbers` module is removed since it's unused without `simple_preopt`, though this is the one remaining optimization we still need to build in the egraphs framework, pending #5908. The magic numbers will live forever in git history so removing this in the meantime is not a major issue IMHO. - The `use_egraphs` setting itself was removed at both the Cranelift and Wasmtime levels. It has been marked deprecated for a few releases now (Wasmtime 6.0, 7.0, upcoming 8.0, and corresponding Cranelift versions) so I think this is probably OK. As an alternative if anyone feels strongly, we could leave the setting and make it a no-op. * Update test outputs for remaining test differences.
This commit is contained in:
@@ -17,15 +17,12 @@ use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::legalizer::simple_legalize;
|
||||
use crate::licm::do_licm;
|
||||
use crate::loop_analysis::LoopAnalysis;
|
||||
use crate::machinst::{CompiledCode, CompiledCodeStencil};
|
||||
use crate::nan_canonicalization::do_nan_canonicalization;
|
||||
use crate::remove_constant_phis::do_remove_constant_phis;
|
||||
use crate::result::{CodegenResult, CompileResult};
|
||||
use crate::settings::{FlagsOrIsa, OptLevel};
|
||||
use crate::simple_gvn::do_simple_gvn;
|
||||
use crate::simple_preopt::do_preopt;
|
||||
use crate::trace;
|
||||
use crate::unreachable_code::eliminate_unreachable_code;
|
||||
use crate::verifier::{verify_context, VerifierErrors, VerifierResult};
|
||||
@@ -172,22 +169,12 @@ impl Context {
|
||||
);
|
||||
|
||||
self.compute_cfg();
|
||||
if !isa.flags().use_egraphs() && opt_level != OptLevel::None {
|
||||
self.preopt(isa)?;
|
||||
}
|
||||
if isa.flags().enable_nan_canonicalization() {
|
||||
self.canonicalize_nans(isa)?;
|
||||
}
|
||||
|
||||
self.legalize(isa)?;
|
||||
|
||||
if !isa.flags().use_egraphs() && opt_level != OptLevel::None {
|
||||
self.compute_domtree();
|
||||
self.compute_loop_analysis();
|
||||
self.licm(isa)?;
|
||||
self.simple_gvn(isa)?;
|
||||
}
|
||||
|
||||
self.compute_domtree();
|
||||
self.eliminate_unreachable_code(isa)?;
|
||||
|
||||
@@ -198,14 +185,7 @@ impl Context {
|
||||
self.remove_constant_phis(isa)?;
|
||||
|
||||
if opt_level != OptLevel::None {
|
||||
if isa.flags().use_egraphs() {
|
||||
self.egraph_pass()?;
|
||||
} else if isa.flags().enable_alias_analysis() {
|
||||
for _ in 0..2 {
|
||||
self.replace_redundant_loads()?;
|
||||
self.simple_gvn(isa)?;
|
||||
}
|
||||
}
|
||||
self.egraph_pass()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -294,13 +274,6 @@ impl Context {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform pre-legalization rewrites on the function.
|
||||
pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
do_preopt(&mut self.func, isa);
|
||||
self.verify_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform NaN canonicalizing rewrites on the function.
|
||||
pub fn canonicalize_nans(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
do_nan_canonicalization(&mut self.func);
|
||||
@@ -341,23 +314,6 @@ impl Context {
|
||||
self.compute_domtree()
|
||||
}
|
||||
|
||||
/// Perform simple GVN on the function.
|
||||
pub fn simple_gvn<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
|
||||
do_simple_gvn(&mut self.func, &mut self.domtree);
|
||||
self.verify_if(fisa)
|
||||
}
|
||||
|
||||
/// Perform LICM on the function.
|
||||
pub fn licm(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
do_licm(
|
||||
&mut self.func,
|
||||
&mut self.cfg,
|
||||
&mut self.domtree,
|
||||
&mut self.loop_analysis,
|
||||
);
|
||||
self.verify_if(isa)
|
||||
}
|
||||
|
||||
/// Perform unreachable code elimination.
|
||||
pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()>
|
||||
where
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,11 +23,6 @@ pub fn FxHashMap<K: Hash + Eq, V>() -> FxHashMap<K, V> {
|
||||
HashMap::default()
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn FxHashSet<V: Hash + Eq>() -> FxHashSet<V> {
|
||||
HashSet::default()
|
||||
}
|
||||
|
||||
/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
|
||||
/// by default uses SipHash which isn't quite as speedy as we want. In the
|
||||
/// compiler we're not really worried about DOS attempts, so we use a fast
|
||||
|
||||
@@ -102,21 +102,17 @@ mod constant_hash;
|
||||
mod context;
|
||||
mod ctxhash;
|
||||
mod dce;
|
||||
mod divconst_magic_numbers;
|
||||
mod egraph;
|
||||
mod fx;
|
||||
mod inst_predicates;
|
||||
mod isle_prelude;
|
||||
mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod nan_canonicalization;
|
||||
mod opts;
|
||||
mod remove_constant_phis;
|
||||
mod result;
|
||||
mod scoped_hash_map;
|
||||
mod simple_gvn;
|
||||
mod simple_preopt;
|
||||
mod unionfind;
|
||||
mod unreachable_code;
|
||||
mod value_label;
|
||||
|
||||
@@ -1,241 +0,0 @@
|
||||
//! A Loop Invariant Code Motion optimization pass
|
||||
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::{EntityList, ListPool};
|
||||
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
|
||||
use crate::fx::FxHashSet;
|
||||
use crate::ir::{
|
||||
Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value,
|
||||
};
|
||||
use crate::loop_analysis::{Loop, LoopAnalysis};
|
||||
use crate::timing;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// Performs the LICM pass by detecting loops within the CFG and moving
|
||||
/// loop-invariant instructions out of them.
|
||||
/// Changes the CFG and domtree in-place during the operation.
|
||||
pub fn do_licm(
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
loop_analysis: &mut LoopAnalysis,
|
||||
) {
|
||||
let _tt = timing::licm();
|
||||
debug_assert!(cfg.is_valid());
|
||||
debug_assert!(domtree.is_valid());
|
||||
debug_assert!(loop_analysis.is_valid());
|
||||
|
||||
for lp in loop_analysis.loops() {
|
||||
// For each loop that we want to optimize we determine the set of loop-invariant
|
||||
// instructions
|
||||
let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
|
||||
// Then we create the loop's pre-header and fill it with the invariant instructions
|
||||
// Then we remove the invariant instructions from the loop body
|
||||
if !invariant_insts.is_empty() {
|
||||
// If the loop has a natural pre-header we use it, otherwise we create it.
|
||||
let mut pos;
|
||||
match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) {
|
||||
None => {
|
||||
let pre_header =
|
||||
create_pre_header(loop_analysis.loop_header(lp), func, cfg, domtree);
|
||||
pos = FuncCursor::new(func).at_last_inst(pre_header);
|
||||
}
|
||||
// If there is a natural pre-header we insert new instructions just before the
|
||||
// related jumping instruction (which is not necessarily at the end).
|
||||
Some((_, last_inst)) => {
|
||||
pos = FuncCursor::new(func).at_inst(last_inst);
|
||||
}
|
||||
};
|
||||
// The last instruction of the pre-header is the termination instruction (usually
|
||||
// a jump) so we need to insert just before this.
|
||||
for inst in invariant_insts {
|
||||
pos.insert_inst(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
// We have to recompute the domtree to account for the changes
|
||||
cfg.compute(func);
|
||||
domtree.compute(func, cfg);
|
||||
}
|
||||
|
||||
/// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
|
||||
/// A jump instruction to the header is placed at the end of the pre-header.
|
||||
fn create_pre_header(
|
||||
header: Block,
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
) -> Block {
|
||||
let pool = &mut ListPool::<Value>::new();
|
||||
let header_args_values = func.dfg.block_params(header).to_vec();
|
||||
let header_args_types: Vec<Type> = header_args_values
|
||||
.into_iter()
|
||||
.map(|val| func.dfg.value_type(val))
|
||||
.collect();
|
||||
let pre_header = func.dfg.make_block();
|
||||
let mut pre_header_args_value: EntityList<Value> = EntityList::new();
|
||||
for typ in header_args_types {
|
||||
pre_header_args_value.push(func.dfg.append_block_param(pre_header, typ), pool);
|
||||
}
|
||||
|
||||
for BlockPredecessor {
|
||||
inst: last_inst, ..
|
||||
} in cfg.pred_iter(header)
|
||||
{
|
||||
// We only follow normal edges (not the back edges)
|
||||
if !domtree.dominates(header, last_inst, &func.layout) {
|
||||
func.rewrite_branch_destination(last_inst, header, pre_header);
|
||||
}
|
||||
}
|
||||
|
||||
// Inserts the pre-header at the right place in the layout.
|
||||
let mut pos = FuncCursor::new(func).at_top(header);
|
||||
pos.insert_block(pre_header);
|
||||
pos.next_inst();
|
||||
pos.ins().jump(header, pre_header_args_value.as_slice(pool));
|
||||
|
||||
pre_header
|
||||
}
|
||||
|
||||
/// Detects if a loop header has a natural pre-header.
|
||||
///
|
||||
/// A loop header has a pre-header if there is only one predecessor that the header doesn't
|
||||
/// dominate.
|
||||
/// Returns the pre-header Block and the instruction jumping to the header.
|
||||
fn has_pre_header(
|
||||
layout: &Layout,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
header: Block,
|
||||
) -> Option<(Block, Inst)> {
|
||||
let mut result = None;
|
||||
for BlockPredecessor {
|
||||
block: pred_block,
|
||||
inst: branch_inst,
|
||||
} in cfg.pred_iter(header)
|
||||
{
|
||||
// We only count normal edges (not the back edges)
|
||||
if !domtree.dominates(header, branch_inst, layout) {
|
||||
if result.is_some() {
|
||||
// We have already found one, there are more than one
|
||||
return None;
|
||||
}
|
||||
if branch_inst != layout.last_inst(pred_block).unwrap()
|
||||
|| cfg.succ_iter(pred_block).nth(1).is_some()
|
||||
{
|
||||
// It's along a critical edge, so don't use it.
|
||||
return None;
|
||||
}
|
||||
result = Some((pred_block, branch_inst));
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider for LICM.
|
||||
fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
|
||||
opcode.can_store()
|
||||
|| opcode.is_call()
|
||||
|| opcode.is_branch()
|
||||
|| opcode.is_terminator()
|
||||
|| opcode.is_return()
|
||||
|| opcode.can_trap()
|
||||
|| opcode.other_side_effects()
|
||||
}
|
||||
|
||||
fn is_unsafe_load(inst_data: &InstructionData) -> bool {
|
||||
match *inst_data {
|
||||
InstructionData::Load { flags, .. } => !flags.readonly() || !flags.notrap(),
|
||||
_ => inst_data.opcode().can_load(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Test whether the given instruction is loop-invariant.
|
||||
fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
|
||||
if trivially_unsafe_for_licm(dfg.insts[inst].opcode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if is_unsafe_load(&dfg.insts[inst]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for arg in dfg.inst_values(inst) {
|
||||
let arg = dfg.resolve_aliases(arg);
|
||||
if loop_values.contains(&arg) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Traverses a loop in reverse post-order from a header block and identify loop-invariant
|
||||
/// instructions. These loop-invariant instructions are then removed from the code and returned
|
||||
/// (in reverse post-order) for later use.
|
||||
fn remove_loop_invariant_instructions(
|
||||
lp: Loop,
|
||||
func: &mut Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
loop_analysis: &LoopAnalysis,
|
||||
) -> Vec<Inst> {
|
||||
let mut loop_values: FxHashSet<Value> = FxHashSet();
|
||||
let mut invariant_insts: Vec<Inst> = Vec::new();
|
||||
let mut pos = FuncCursor::new(func);
|
||||
// We traverse the loop block in reverse post-order.
|
||||
for block in postorder_blocks_loop(loop_analysis, cfg, lp).iter().rev() {
|
||||
// Arguments of the block are loop values
|
||||
for val in pos.func.dfg.block_params(*block) {
|
||||
loop_values.insert(*val);
|
||||
}
|
||||
pos.goto_top(*block);
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::block_in_if_condition_stmt))]
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if is_loop_invariant(inst, &pos.func.dfg, &loop_values) {
|
||||
// If all the instruction's argument are defined outside the loop
|
||||
// then this instruction is loop-invariant
|
||||
invariant_insts.push(inst);
|
||||
// We remove it from the loop
|
||||
pos.remove_inst_and_step_back();
|
||||
} else {
|
||||
// If the instruction is not loop-invariant we push its results in the set of
|
||||
// loop values
|
||||
for out in pos.func.dfg.inst_results(inst) {
|
||||
loop_values.insert(*out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
invariant_insts
|
||||
}
|
||||
|
||||
/// Return blocks from a loop in post-order, starting from an entry point in the block.
|
||||
fn postorder_blocks_loop(
|
||||
loop_analysis: &LoopAnalysis,
|
||||
cfg: &ControlFlowGraph,
|
||||
lp: Loop,
|
||||
) -> Vec<Block> {
|
||||
let mut grey = FxHashSet();
|
||||
let mut black = FxHashSet();
|
||||
let mut stack = vec![loop_analysis.loop_header(lp)];
|
||||
let mut postorder = Vec::new();
|
||||
|
||||
while !stack.is_empty() {
|
||||
let node = stack.pop().unwrap();
|
||||
if !grey.contains(&node) {
|
||||
// This is a white node. Mark it as gray.
|
||||
grey.insert(node);
|
||||
stack.push(node);
|
||||
// Get any children we've never seen before.
|
||||
for child in cfg.succ_iter(node) {
|
||||
if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
} else if !black.contains(&node) {
|
||||
postorder.push(node);
|
||||
black.insert(node);
|
||||
}
|
||||
}
|
||||
postorder
|
||||
}
|
||||
@@ -128,29 +128,6 @@ macro_rules! isle_lower_prelude_methods {
|
||||
|
||||
#[inline]
|
||||
fn put_in_regs(&mut self, val: Value) -> ValueRegs {
|
||||
// If the value is a constant, then (re)materialize it at each
|
||||
// use. This lowers register pressure. (Only do this if we are
|
||||
// not using egraph-based compilation; the egraph framework
|
||||
// more efficiently rematerializes constants where needed.)
|
||||
if !(self.backend.flags().use_egraphs()
|
||||
&& self.backend.flags().opt_level() != OptLevel::None)
|
||||
{
|
||||
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
if inputs.constant.is_some() {
|
||||
let insn = match inputs.inst {
|
||||
InputSourceInst::UniqueUse(insn, 0) => Some(insn),
|
||||
InputSourceInst::Use(insn, 0) => Some(insn),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(insn) = insn {
|
||||
if let Some(regs) = self.backend.lower(self.lower_ctx, insn) {
|
||||
assert!(regs.len() == 1);
|
||||
return regs[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.lower_ctx.put_value_in_regs(val)
|
||||
}
|
||||
|
||||
|
||||
@@ -528,7 +528,6 @@ probestack_strategy = "outline"
|
||||
regalloc_checker = false
|
||||
regalloc_verbose_logs = false
|
||||
enable_alias_analysis = true
|
||||
use_egraphs = true
|
||||
enable_verifier = true
|
||||
is_pic = false
|
||||
use_colocated_libcalls = false
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
//! A simple GVN pass.
|
||||
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::{Function, Inst, InstructionData, Opcode, Type};
|
||||
use crate::scoped_hash_map::ScopedHashMap;
|
||||
use crate::timing;
|
||||
use alloc::vec::Vec;
|
||||
use core::cell::{Ref, RefCell};
|
||||
use core::hash::{Hash, Hasher};
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider for GVN.
|
||||
fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool {
|
||||
opcode.is_call()
|
||||
|| opcode.is_branch()
|
||||
|| opcode.is_terminator()
|
||||
|| opcode.is_return()
|
||||
|| opcode.can_store()
|
||||
|| (opcode.can_trap() && !opcode.side_effects_idempotent())
|
||||
|| (opcode.other_side_effects() && !opcode.side_effects_idempotent())
|
||||
}
|
||||
|
||||
/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag.
|
||||
fn is_load_and_not_readonly(inst_data: &InstructionData) -> bool {
|
||||
match *inst_data {
|
||||
InstructionData::Load { flags, .. } => !flags.readonly(),
|
||||
_ => inst_data.opcode().can_load(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper around `InstructionData` which implements `Eq` and `Hash`
|
||||
#[derive(Clone)]
|
||||
struct HashKey<'a, 'f: 'a> {
|
||||
inst: InstructionData,
|
||||
ty: Type,
|
||||
pos: &'a RefCell<FuncCursor<'f>>,
|
||||
}
|
||||
impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
let pool = &self.pos.borrow().func.dfg.value_lists;
|
||||
self.inst.hash(state, pool, |value| value);
|
||||
self.ty.hash(state);
|
||||
}
|
||||
}
|
||||
impl<'a, 'f: 'a> PartialEq for HashKey<'a, 'f> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
let pool = &self.pos.borrow().func.dfg.value_lists;
|
||||
self.inst.eq(&other.inst, pool, |value| value) && self.ty == other.ty
|
||||
}
|
||||
}
|
||||
impl<'a, 'f: 'a> Eq for HashKey<'a, 'f> {}
|
||||
|
||||
/// Perform simple GVN on `func`.
|
||||
///
|
||||
pub fn do_simple_gvn(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let _tt = timing::gvn();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
// Visit blocks in a reverse post-order.
|
||||
//
|
||||
// The RefCell here is a bit ugly since the HashKeys in the ScopedHashMap
|
||||
// need a reference to the function.
|
||||
let pos = RefCell::new(FuncCursor::new(func));
|
||||
|
||||
let mut visible_values: ScopedHashMap<HashKey, Inst> = ScopedHashMap::new();
|
||||
let mut scope_stack: Vec<Inst> = Vec::new();
|
||||
|
||||
for &block in domtree.cfg_postorder().iter().rev() {
|
||||
{
|
||||
// Pop any scopes that we just exited.
|
||||
let layout = &pos.borrow().func.layout;
|
||||
loop {
|
||||
if let Some(current) = scope_stack.last() {
|
||||
if domtree.dominates(*current, block, layout) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
scope_stack.pop();
|
||||
visible_values.decrement_depth();
|
||||
}
|
||||
|
||||
// Push a scope for the current block.
|
||||
scope_stack.push(layout.first_inst(block).unwrap());
|
||||
visible_values.increment_depth();
|
||||
}
|
||||
|
||||
pos.borrow_mut().goto_top(block);
|
||||
while let Some(inst) = {
|
||||
let mut pos = pos.borrow_mut();
|
||||
pos.next_inst()
|
||||
} {
|
||||
// Resolve aliases, particularly aliases we created earlier.
|
||||
pos.borrow_mut().func.dfg.resolve_aliases_in_arguments(inst);
|
||||
|
||||
let func = Ref::map(pos.borrow(), |pos| &pos.func);
|
||||
|
||||
let opcode = func.dfg.insts[inst].opcode();
|
||||
|
||||
if opcode.is_branch() && !opcode.is_terminator() {
|
||||
scope_stack.push(func.layout.next_inst(inst).unwrap());
|
||||
visible_values.increment_depth();
|
||||
}
|
||||
|
||||
if trivially_unsafe_for_gvn(opcode) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// These are split up to separate concerns.
|
||||
if is_load_and_not_readonly(&func.dfg.insts[inst]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let ctrl_typevar = func.dfg.ctrl_typevar(inst);
|
||||
let key = HashKey {
|
||||
inst: func.dfg.insts[inst],
|
||||
ty: ctrl_typevar,
|
||||
pos: &pos,
|
||||
};
|
||||
use crate::scoped_hash_map::Entry::*;
|
||||
match visible_values.entry(key) {
|
||||
Occupied(entry) => {
|
||||
#[allow(clippy::debug_assert_with_mut_call)]
|
||||
{
|
||||
// Clippy incorrectly believes `&func.layout` should not be used here:
|
||||
// https://github.com/rust-lang/rust-clippy/issues/4737
|
||||
debug_assert!(domtree.dominates(*entry.get(), inst, &func.layout));
|
||||
}
|
||||
|
||||
// If the redundant instruction is representing the current
|
||||
// scope, pick a new representative.
|
||||
let old = scope_stack.last_mut().unwrap();
|
||||
if *old == inst {
|
||||
*old = func.layout.next_inst(inst).unwrap();
|
||||
}
|
||||
// Replace the redundant instruction and remove it.
|
||||
drop(func);
|
||||
let mut pos = pos.borrow_mut();
|
||||
pos.func.dfg.replace_with_aliases(inst, *entry.get());
|
||||
pos.remove_inst_and_step_back();
|
||||
}
|
||||
Vacant(entry) => {
|
||||
entry.insert(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,796 +0,0 @@
|
||||
//! A pre-legalization rewriting pass.
|
||||
//!
|
||||
//! This module provides early-stage optimizations. The optimizations found
|
||||
//! should be useful for already well-optimized code.
|
||||
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64};
|
||||
use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
|
||||
use crate::ir::{
|
||||
condcodes::IntCC,
|
||||
instructions::Opcode,
|
||||
types::{I128, I32, I64},
|
||||
DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
|
||||
};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::timing;
|
||||
|
||||
#[inline]
|
||||
/// Replaces the unique result of the instruction inst to an alias of the given value, and
|
||||
/// replaces the instruction with a nop. Can be used only on instructions producing one unique
|
||||
/// result, otherwise will assert.
|
||||
fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) {
|
||||
// Replace the result value by an alias.
|
||||
let results = dfg.detach_results(inst);
|
||||
debug_assert!(results.len(&dfg.value_lists) == 1);
|
||||
let result = results.get(0, &dfg.value_lists).unwrap();
|
||||
dfg.change_to_alias(result, value);
|
||||
|
||||
// Replace instruction by a nop.
|
||||
dfg.replace(inst).nop();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// Pattern-match helpers and transformation for div and rem by constants.
|
||||
|
||||
// Simple math helpers
|
||||
|
||||
/// if `x` is a power of two, or the negation thereof, return the power along
|
||||
/// with a boolean that indicates whether `x` is negative. Else return None.
|
||||
#[inline]
|
||||
fn i32_is_power_of_two(x: i32) -> Option<(bool, u32)> {
|
||||
// We have to special-case this because abs(x) isn't representable.
|
||||
if x == -0x8000_0000 {
|
||||
return Some((true, 31));
|
||||
}
|
||||
let abs_x = i32::wrapping_abs(x) as u32;
|
||||
if abs_x.is_power_of_two() {
|
||||
return Some((x < 0, abs_x.trailing_zeros()));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Same comments as for i32_is_power_of_two apply.
|
||||
#[inline]
|
||||
fn i64_is_power_of_two(x: i64) -> Option<(bool, u32)> {
|
||||
// We have to special-case this because abs(x) isn't representable.
|
||||
if x == -0x8000_0000_0000_0000 {
|
||||
return Some((true, 63));
|
||||
}
|
||||
let abs_x = i64::wrapping_abs(x) as u64;
|
||||
if abs_x.is_power_of_two() {
|
||||
return Some((x < 0, abs_x.trailing_zeros()));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Representation of an instruction that can be replaced by a single division/remainder operation
|
||||
/// between a left Value operand and a right immediate operand.
|
||||
#[derive(Debug)]
|
||||
enum DivRemByConstInfo {
|
||||
DivU32(Value, u32),
|
||||
DivU64(Value, u64),
|
||||
DivS32(Value, i32),
|
||||
DivS64(Value, i64),
|
||||
RemU32(Value, u32),
|
||||
RemU64(Value, u64),
|
||||
RemS32(Value, i32),
|
||||
RemS64(Value, i64),
|
||||
}
|
||||
|
||||
/// Possibly create a DivRemByConstInfo from the given components, by figuring out which, if any,
|
||||
/// of the 8 cases apply, and also taking care to sanity-check the immediate.
|
||||
fn package_up_divrem_info(
|
||||
value: Value,
|
||||
value_type: Type,
|
||||
imm_i64: i64,
|
||||
is_signed: bool,
|
||||
is_rem: bool,
|
||||
) -> Option<DivRemByConstInfo> {
|
||||
let imm_u64 = imm_i64 as u64;
|
||||
|
||||
match (is_signed, value_type) {
|
||||
(false, I32) => {
|
||||
if imm_u64 < 0x1_0000_0000 {
|
||||
if is_rem {
|
||||
Some(DivRemByConstInfo::RemU32(value, imm_u64 as u32))
|
||||
} else {
|
||||
Some(DivRemByConstInfo::DivU32(value, imm_u64 as u32))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
(false, I64) => {
|
||||
// unsigned 64, no range constraint.
|
||||
if is_rem {
|
||||
Some(DivRemByConstInfo::RemU64(value, imm_u64))
|
||||
} else {
|
||||
Some(DivRemByConstInfo::DivU64(value, imm_u64))
|
||||
}
|
||||
}
|
||||
|
||||
(true, I32) => {
|
||||
if imm_u64 <= 0x7fff_ffff || imm_u64 >= 0xffff_ffff_8000_0000 {
|
||||
if is_rem {
|
||||
Some(DivRemByConstInfo::RemS32(value, imm_u64 as i32))
|
||||
} else {
|
||||
Some(DivRemByConstInfo::DivS32(value, imm_u64 as i32))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
(true, I64) => {
|
||||
// signed 64, no range constraint.
|
||||
if is_rem {
|
||||
Some(DivRemByConstInfo::RemS64(value, imm_u64 as i64))
|
||||
} else {
|
||||
Some(DivRemByConstInfo::DivS64(value, imm_u64 as i64))
|
||||
}
|
||||
}
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
|
||||
/// signedness, operation size and div-vs-rem-ness in a handy bundle.
|
||||
fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
|
||||
if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg.insts[inst] {
|
||||
let (is_signed, is_rem) = match opcode {
|
||||
Opcode::UdivImm => (false, false),
|
||||
Opcode::UremImm => (false, true),
|
||||
Opcode::SdivImm => (true, false),
|
||||
Opcode::SremImm => (true, true),
|
||||
_ => return None,
|
||||
};
|
||||
return package_up_divrem_info(arg, dfg.value_type(arg), imm.into(), is_signed, is_rem);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Actually do the transformation given a bundle containing the relevant information.
|
||||
/// `divrem_info` describes a div or rem by a constant, that `pos` currently points at, and `inst`
|
||||
/// is the associated instruction. `inst` is replaced by a sequence of other operations that
|
||||
/// calculate the same result. Note that there are various `divrem_info` cases where we cannot do
|
||||
/// any transformation, in which case `inst` is left unchanged.
|
||||
fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
|
||||
let is_rem = match *divrem_info {
|
||||
DivRemByConstInfo::DivU32(_, _)
|
||||
| DivRemByConstInfo::DivU64(_, _)
|
||||
| DivRemByConstInfo::DivS32(_, _)
|
||||
| DivRemByConstInfo::DivS64(_, _) => false,
|
||||
DivRemByConstInfo::RemU32(_, _)
|
||||
| DivRemByConstInfo::RemU64(_, _)
|
||||
| DivRemByConstInfo::RemS32(_, _)
|
||||
| DivRemByConstInfo::RemS64(_, _) => true,
|
||||
};
|
||||
|
||||
match *divrem_info {
|
||||
// -------------------- U32 --------------------
|
||||
|
||||
// U32 div, rem by zero: ignore
|
||||
DivRemByConstInfo::DivU32(_n1, 0) | DivRemByConstInfo::RemU32(_n1, 0) => {}
|
||||
|
||||
// U32 div by 1: identity
|
||||
// U32 rem by 1: zero
|
||||
DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => {
|
||||
if is_rem {
|
||||
pos.func.dfg.replace(inst).iconst(I32, 0);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
|
||||
}
|
||||
}
|
||||
|
||||
// U32 div, rem by a power-of-2
|
||||
DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d)
|
||||
if d.is_power_of_two() =>
|
||||
{
|
||||
debug_assert!(d >= 2);
|
||||
// compute k where d == 2^k
|
||||
let k = d.trailing_zeros();
|
||||
debug_assert!(k >= 1 && k <= 31);
|
||||
if is_rem {
|
||||
let mask = (1u64 << k) - 1;
|
||||
pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
|
||||
}
|
||||
}
|
||||
|
||||
// U32 div, rem by non-power-of-2
|
||||
DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => {
|
||||
debug_assert!(d >= 3);
|
||||
let MU32 {
|
||||
mul_by,
|
||||
do_add,
|
||||
shift_by,
|
||||
} = magic_u32(d);
|
||||
let qf; // final quotient
|
||||
let q0 = pos.ins().iconst(I32, mul_by as i64);
|
||||
let q1 = pos.ins().umulhi(n1, q0);
|
||||
if do_add {
|
||||
debug_assert!(shift_by >= 1 && shift_by <= 32);
|
||||
let t1 = pos.ins().isub(n1, q1);
|
||||
let t2 = pos.ins().ushr_imm(t1, 1);
|
||||
let t3 = pos.ins().iadd(t2, q1);
|
||||
// I never found any case where shift_by == 1 here.
|
||||
// So there's no attempt to fold out a zero shift.
|
||||
debug_assert_ne!(shift_by, 1);
|
||||
qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
|
||||
} else {
|
||||
debug_assert!(shift_by >= 0 && shift_by <= 31);
|
||||
// Whereas there are known cases here for shift_by == 0.
|
||||
if shift_by > 0 {
|
||||
qf = pos.ins().ushr_imm(q1, shift_by as i64);
|
||||
} else {
|
||||
qf = q1;
|
||||
}
|
||||
}
|
||||
// Now qf holds the final quotient. If necessary calculate the
|
||||
// remainder instead.
|
||||
if is_rem {
|
||||
let tt = pos.ins().imul_imm(qf, d as i64);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- U64 --------------------
|
||||
|
||||
// U64 div, rem by zero: ignore
|
||||
DivRemByConstInfo::DivU64(_n1, 0) | DivRemByConstInfo::RemU64(_n1, 0) => {}
|
||||
|
||||
// U64 div by 1: identity
|
||||
// U64 rem by 1: zero
|
||||
DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => {
|
||||
if is_rem {
|
||||
pos.func.dfg.replace(inst).iconst(I64, 0);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
|
||||
}
|
||||
}
|
||||
|
||||
// U64 div, rem by a power-of-2
|
||||
DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d)
|
||||
if d.is_power_of_two() =>
|
||||
{
|
||||
debug_assert!(d >= 2);
|
||||
// compute k where d == 2^k
|
||||
let k = d.trailing_zeros();
|
||||
debug_assert!(k >= 1 && k <= 63);
|
||||
if is_rem {
|
||||
let mask = (1u64 << k) - 1;
|
||||
pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
|
||||
}
|
||||
}
|
||||
|
||||
// U64 div, rem by non-power-of-2
|
||||
DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) => {
|
||||
debug_assert!(d >= 3);
|
||||
let MU64 {
|
||||
mul_by,
|
||||
do_add,
|
||||
shift_by,
|
||||
} = magic_u64(d);
|
||||
let qf; // final quotient
|
||||
let q0 = pos.ins().iconst(I64, mul_by as i64);
|
||||
let q1 = pos.ins().umulhi(n1, q0);
|
||||
if do_add {
|
||||
debug_assert!(shift_by >= 1 && shift_by <= 64);
|
||||
let t1 = pos.ins().isub(n1, q1);
|
||||
let t2 = pos.ins().ushr_imm(t1, 1);
|
||||
let t3 = pos.ins().iadd(t2, q1);
|
||||
// I never found any case where shift_by == 1 here.
|
||||
// So there's no attempt to fold out a zero shift.
|
||||
debug_assert_ne!(shift_by, 1);
|
||||
qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
|
||||
} else {
|
||||
debug_assert!(shift_by >= 0 && shift_by <= 63);
|
||||
// Whereas there are known cases here for shift_by == 0.
|
||||
if shift_by > 0 {
|
||||
qf = pos.ins().ushr_imm(q1, shift_by as i64);
|
||||
} else {
|
||||
qf = q1;
|
||||
}
|
||||
}
|
||||
// Now qf holds the final quotient. If necessary calculate the
|
||||
// remainder instead.
|
||||
if is_rem {
|
||||
let tt = pos.ins().imul_imm(qf, d as i64);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- S32 --------------------
|
||||
|
||||
// S32 div, rem by zero or -1: ignore
|
||||
DivRemByConstInfo::DivS32(_n1, -1)
|
||||
| DivRemByConstInfo::RemS32(_n1, -1)
|
||||
| DivRemByConstInfo::DivS32(_n1, 0)
|
||||
| DivRemByConstInfo::RemS32(_n1, 0) => {}
|
||||
|
||||
// S32 div by 1: identity
|
||||
// S32 rem by 1: zero
|
||||
DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => {
|
||||
if is_rem {
|
||||
pos.func.dfg.replace(inst).iconst(I32, 0);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
|
||||
}
|
||||
}
|
||||
|
||||
DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => {
|
||||
if let Some((is_negative, k)) = i32_is_power_of_two(d) {
|
||||
// k can be 31 only in the case that d is -2^31.
|
||||
debug_assert!(k >= 1 && k <= 31);
|
||||
let t1 = if k - 1 == 0 {
|
||||
n1
|
||||
} else {
|
||||
pos.ins().sshr_imm(n1, (k - 1) as i64)
|
||||
};
|
||||
let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
|
||||
let t3 = pos.ins().iadd(n1, t2);
|
||||
if is_rem {
|
||||
// S32 rem by a power-of-2
|
||||
let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
|
||||
// Curiously, we don't care here what the sign of d is.
|
||||
pos.func.dfg.replace(inst).isub(n1, t4);
|
||||
} else {
|
||||
// S32 div by a power-of-2
|
||||
let t4 = pos.ins().sshr_imm(t3, k as i64);
|
||||
if is_negative {
|
||||
pos.func.dfg.replace(inst).irsub_imm(t4, 0);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// S32 div, rem by a non-power-of-2
|
||||
debug_assert!(d < -2 || d > 2);
|
||||
let MS32 { mul_by, shift_by } = magic_s32(d);
|
||||
let q0 = pos.ins().iconst(I32, mul_by as i64);
|
||||
let q1 = pos.ins().smulhi(n1, q0);
|
||||
let q2 = if d > 0 && mul_by < 0 {
|
||||
pos.ins().iadd(q1, n1)
|
||||
} else if d < 0 && mul_by > 0 {
|
||||
pos.ins().isub(q1, n1)
|
||||
} else {
|
||||
q1
|
||||
};
|
||||
debug_assert!(shift_by >= 0 && shift_by <= 31);
|
||||
let q3 = if shift_by == 0 {
|
||||
q2
|
||||
} else {
|
||||
pos.ins().sshr_imm(q2, shift_by as i64)
|
||||
};
|
||||
let t1 = pos.ins().ushr_imm(q3, 31);
|
||||
let qf = pos.ins().iadd(q3, t1);
|
||||
// Now qf holds the final quotient. If necessary calculate
|
||||
// the remainder instead.
|
||||
if is_rem {
|
||||
let tt = pos.ins().imul_imm(qf, d as i64);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- S64 --------------------
|
||||
|
||||
// S64 div, rem by zero or -1: ignore
|
||||
DivRemByConstInfo::DivS64(_n1, -1)
|
||||
| DivRemByConstInfo::RemS64(_n1, -1)
|
||||
| DivRemByConstInfo::DivS64(_n1, 0)
|
||||
| DivRemByConstInfo::RemS64(_n1, 0) => {}
|
||||
|
||||
// S64 div by 1: identity
|
||||
// S64 rem by 1: zero
|
||||
DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => {
|
||||
if is_rem {
|
||||
pos.func.dfg.replace(inst).iconst(I64, 0);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
|
||||
}
|
||||
}
|
||||
|
||||
DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => {
|
||||
if let Some((is_negative, k)) = i64_is_power_of_two(d) {
|
||||
// k can be 63 only in the case that d is -2^63.
|
||||
debug_assert!(k >= 1 && k <= 63);
|
||||
let t1 = if k - 1 == 0 {
|
||||
n1
|
||||
} else {
|
||||
pos.ins().sshr_imm(n1, (k - 1) as i64)
|
||||
};
|
||||
let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
|
||||
let t3 = pos.ins().iadd(n1, t2);
|
||||
if is_rem {
|
||||
// S64 rem by a power-of-2
|
||||
let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
|
||||
// Curiously, we don't care here what the sign of d is.
|
||||
pos.func.dfg.replace(inst).isub(n1, t4);
|
||||
} else {
|
||||
// S64 div by a power-of-2
|
||||
let t4 = pos.ins().sshr_imm(t3, k as i64);
|
||||
if is_negative {
|
||||
pos.func.dfg.replace(inst).irsub_imm(t4, 0);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// S64 div, rem by a non-power-of-2
|
||||
debug_assert!(d < -2 || d > 2);
|
||||
let MS64 { mul_by, shift_by } = magic_s64(d);
|
||||
let q0 = pos.ins().iconst(I64, mul_by);
|
||||
let q1 = pos.ins().smulhi(n1, q0);
|
||||
let q2 = if d > 0 && mul_by < 0 {
|
||||
pos.ins().iadd(q1, n1)
|
||||
} else if d < 0 && mul_by > 0 {
|
||||
pos.ins().isub(q1, n1)
|
||||
} else {
|
||||
q1
|
||||
};
|
||||
debug_assert!(shift_by >= 0 && shift_by <= 63);
|
||||
let q3 = if shift_by == 0 {
|
||||
q2
|
||||
} else {
|
||||
pos.ins().sshr_imm(q2, shift_by as i64)
|
||||
};
|
||||
let t1 = pos.ins().ushr_imm(q3, 63);
|
||||
let qf = pos.ins().iadd(q3, t1);
|
||||
// Now qf holds the final quotient. If necessary calculate
|
||||
// the remainder instead.
|
||||
if is_rem {
|
||||
let tt = pos.ins().imul_imm(qf, d);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod simplify {
|
||||
use super::*;
|
||||
use crate::ir::{
|
||||
dfg::ValueDef,
|
||||
immediates,
|
||||
instructions::Opcode,
|
||||
types::{I16, I32, I8},
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct PeepholeOptimizer<'a, 'b> {
|
||||
phantom: PhantomData<(&'a (), &'b ())>,
|
||||
}
|
||||
|
||||
pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
|
||||
PeepholeOptimizer {
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_all<'a, 'b>(
|
||||
_optimizer: &mut PeepholeOptimizer<'a, 'b>,
|
||||
pos: &mut FuncCursor<'a>,
|
||||
inst: Inst,
|
||||
native_word_width: u32,
|
||||
) {
|
||||
simplify(pos, inst, native_word_width);
|
||||
branch_opt(pos, inst);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
|
||||
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = dfg.insts[candidate_inst]
|
||||
{
|
||||
return Some(imm);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
|
||||
/// Returns true if the final instruction has been converted to such a move.
|
||||
fn try_fold_extended_move(
|
||||
pos: &mut FuncCursor,
|
||||
inst: Inst,
|
||||
opcode: Opcode,
|
||||
arg: Value,
|
||||
imm: immediates::Imm64,
|
||||
) -> bool {
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IshlImm,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg.insts[arg_inst]
|
||||
{
|
||||
if imm != *prev_imm {
|
||||
return false;
|
||||
}
|
||||
|
||||
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let imm_bits: i64 = imm.into();
|
||||
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
|
||||
8 => I8,
|
||||
16 => I16,
|
||||
32 => I32,
|
||||
_ => return false,
|
||||
};
|
||||
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
|
||||
|
||||
// This becomes a no-op, since ireduce_ty has a smaller lane width than
|
||||
// the argument type (also the destination type).
|
||||
let arg = *prev_arg;
|
||||
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
|
||||
|
||||
if opcode == Opcode::UshrImm {
|
||||
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
|
||||
/// simplifications.
|
||||
///
|
||||
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
|
||||
/// controlling type's width of the instruction. This would result in an illegal instruction that
|
||||
/// would likely be expanded back into an instruction on smaller types with the same initial
|
||||
/// opcode, creating unnecessary churn.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
|
||||
match pos.func.dfg.insts[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(new_opcode, ty, imm, args[0]);
|
||||
|
||||
// Repeat for BinaryImm simplification.
|
||||
simplify(pos, inst, native_word_width);
|
||||
}
|
||||
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(new_opcode, ty, imm, args[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::BinaryImm64 { opcode, arg, imm } => {
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
|
||||
let mut arg = arg;
|
||||
let mut imm = imm;
|
||||
match opcode {
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BandImm
|
||||
| Opcode::BxorImm => {
|
||||
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm64 {
|
||||
opcode: prev_opcode,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg.insts[arg_inst]
|
||||
{
|
||||
if opcode == *prev_opcode
|
||||
&& ty == pos.func.dfg.ctrl_typevar(arg_inst)
|
||||
{
|
||||
let lhs: i64 = imm.into();
|
||||
let rhs: i64 = (*prev_imm).into();
|
||||
let new_imm = match opcode {
|
||||
Opcode::BorImm => lhs | rhs,
|
||||
Opcode::BandImm => lhs & rhs,
|
||||
Opcode::BxorImm => lhs ^ rhs,
|
||||
Opcode::IaddImm => lhs.wrapping_add(rhs),
|
||||
Opcode::ImulImm => lhs.wrapping_mul(rhs),
|
||||
_ => panic!("can't happen"),
|
||||
};
|
||||
let new_imm = immediates::Imm64::from(new_imm);
|
||||
let new_arg = *prev_arg;
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(opcode, ty, new_imm, new_arg);
|
||||
imm = new_imm;
|
||||
arg = new_arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::UshrImm | Opcode::SshrImm => {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
|
||||
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// Replace operations that are no-ops.
|
||||
match (opcode, imm.into(), ty) {
|
||||
(Opcode::IaddImm, 0, _)
|
||||
| (Opcode::ImulImm, 1, _)
|
||||
| (Opcode::SdivImm, 1, _)
|
||||
| (Opcode::UdivImm, 1, _)
|
||||
| (Opcode::BorImm, 0, _)
|
||||
| (Opcode::BandImm, -1, _)
|
||||
| (Opcode::BxorImm, 0, _)
|
||||
| (Opcode::RotlImm, 0, _)
|
||||
| (Opcode::RotrImm, 0, _)
|
||||
| (Opcode::IshlImm, 0, _)
|
||||
| (Opcode::UshrImm, 0, _)
|
||||
| (Opcode::SshrImm, 0, _) => {
|
||||
// Alias the result value with the original argument.
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
|
||||
}
|
||||
(Opcode::ImulImm, 0, ty) | (Opcode::BandImm, 0, ty) if ty != I128 => {
|
||||
// Replace by zero.
|
||||
pos.func.dfg.replace(inst).iconst(ty, 0);
|
||||
}
|
||||
(Opcode::BorImm, -1, ty) if ty != I128 => {
|
||||
// Replace by minus one.
|
||||
pos.func.dfg.replace(inst).iconst(ty, -1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fold comparisons into branch operations when possible.
|
||||
///
|
||||
/// This matches against operations which compare against zero, then use the
|
||||
/// result in a conditional branch.
|
||||
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
|
||||
let (cmp_arg, new_then, new_else) = if let InstructionData::Brif {
|
||||
arg: first_arg,
|
||||
blocks: [block_then, block_else],
|
||||
..
|
||||
} = pos.func.dfg.insts[inst]
|
||||
{
|
||||
let icmp_inst =
|
||||
if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
icmp_inst
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let InstructionData::IntCompareImm {
|
||||
opcode: Opcode::IcmpImm,
|
||||
arg: cmp_arg,
|
||||
cond: cmp_cond,
|
||||
imm: cmp_imm,
|
||||
} = pos.func.dfg.insts[icmp_inst]
|
||||
{
|
||||
let cmp_imm: i64 = cmp_imm.into();
|
||||
if cmp_imm != 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let (new_then, new_else) = match cmp_cond {
|
||||
IntCC::Equal => (block_else, block_then),
|
||||
IntCC::NotEqual => (block_then, block_else),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
(cmp_arg, new_then, new_else)
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let InstructionData::Brif { arg, blocks, .. } = &mut pos.func.dfg.insts[inst] {
|
||||
*arg = cmp_arg;
|
||||
blocks[0] = new_then;
|
||||
blocks[1] = new_else;
|
||||
} else {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The main pre-opt pass.
|
||||
pub fn do_preopt(func: &mut Function, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::preopt();
|
||||
|
||||
let mut pos = FuncCursor::new(func);
|
||||
let native_word_width = isa.pointer_bytes() as u32;
|
||||
let mut optimizer = simplify::peephole_optimizer(isa);
|
||||
|
||||
while let Some(_) = pos.next_block() {
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
|
||||
|
||||
// Try to transform divide-by-constant into simpler operations.
|
||||
if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
|
||||
do_divrem_transformation(&divrem_info, &mut pos, inst);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user