Optimize immediates and compare and branch sequences (#286)

* Add a pre-opt optimization to change constants into immediates. This converts 'iadd' + 'iconst' into 'iadd_imm', and so on. * Optimize away redundant `bint` instructions. Cretonne has a concept of "Testable" values, which can be either boolean or integer. When the an instruction needing a "Testable" value receives the result of a `bint`, converting boolean to integer, eliminate the `bint`, as it's redundant. * Postopt: Optimize using CPU flags. This introduces a post-legalization optimization pass which converts compare+branch sequences to use flags values on CPUs which support it. * Define a form of x86's `urm` that doesn't clobber FLAGS. movzbl/movsbl/etc. don't clobber FLAGS; define a form of the `urm` recipe that represents this. * Implement a DCE pass. This pass deletes instructions with no side effects and no results that are used. * Clarify ambiguity about "32-bit" and "64-bit" in comments. * Add x86 encodings for icmp_imm. * Add a testcase for postopt CPU flags optimization. This covers the basic functionality of transforming compare+branch sequences to use CPU flags. * Pattern-match irsub_imm in preopt.
2018-03-30 12:30:07 -07:00
parent 5377092e5b
commit 6606b88136
22 changed files with 921 additions and 109 deletions
--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -21,9 +21,11 @@ use result::{CtonError, CtonResult};
 use settings::{FlagsOrIsa, OptLevel};
 use unreachable_code::eliminate_unreachable_code;
 use verifier;
+use dce::do_dce;
 use simple_gvn::do_simple_gvn;
 use licm::do_licm;
 use preopt::do_preopt;
+use postopt::do_postopt;
 use timing;

 /// Persistent data structures and compilation pipeline.
@@ -92,6 +94,9 @@ impl Context {
            self.preopt(isa)?;
        }
        self.legalize(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.postopt(isa)?;
+        }
        if isa.flags().opt_level() == OptLevel::Best {
            self.compute_domtree();
            self.compute_loop_analysis();
@@ -100,6 +105,7 @@ impl Context {
        }
        self.compute_domtree();
        self.eliminate_unreachable_code(isa)?;
+        self.dce(isa)?;
        self.regalloc(isa)?;
        self.prologue_epilogue(isa)?;
        self.relax_branches(isa)
@@ -153,6 +159,13 @@ impl Context {
        }
    }

+    /// Perform dead-code elimination on the function.
+    pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
+        do_dce(&mut self.func, &mut self.domtree);
+        self.verify_if(fisa)?;
+        Ok(())
+    }
+
    /// Perform pre-legalization rewrites on the function.
    pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
        do_preopt(&mut self.func);
@@ -170,6 +183,13 @@ impl Context {
        self.verify_if(isa)
    }

+    /// Perform post-legalization rewrites on the function.
+    pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_postopt(&mut self.func, isa);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
    /// Compute the control flow graph.
    pub fn compute_cfg(&mut self) {
        self.cfg.compute(&self.func)
--- a/lib/cretonne/src/dce.rs
+++ b/lib/cretonne/src/dce.rs
@@ -0,0 +1,68 @@
+//! A Dead-Code Elimination (DCE) pass.
+//!
+//! Dead code here means instructions that have no side effects and have no
+//! result values used by other instructions.
+
+use cursor::{Cursor, FuncCursor};
+use dominator_tree::DominatorTree;
+use entity::EntityRef;
+use ir::{Function, Inst, Opcode, DataFlowGraph};
+use ir::instructions::InstructionData;
+use timing;
+use std::vec::Vec;
+
+/// Test whether the given opcode is unsafe to even consider for DCE.
+fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
+    opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
+        opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
+        opcode.can_store()
+}
+
+/// Preserve instructions with used result values.
+fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
+    dfg.inst_results(inst).iter().any(|v| live[v.index()])
+}
+
+/// Load instructions without the `notrap` flag are defined to trap when
+/// operating on inaccessible memory, so we can't DCE them even if the
+/// loaded value is unused.
+fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
+    if !opcode.can_load() {
+        return false;
+    }
+    match *data {
+        InstructionData::StackLoad { .. } => false,
+        InstructionData::Load { flags, .. } => !flags.notrap(),
+        _ => true,
+    }
+}
+
+/// Perform DCE on `func`.
+pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
+    let _tt = timing::dce();
+    debug_assert!(domtree.is_valid());
+
+    let mut live = Vec::with_capacity(func.dfg.num_values());
+    live.resize(func.dfg.num_values(), false);
+
+    for &ebb in domtree.cfg_postorder().iter() {
+        let mut pos = FuncCursor::new(func).at_bottom(ebb);
+        while let Some(inst) = pos.prev_inst() {
+            {
+                let data = &pos.func.dfg[inst];
+                let opcode = data.opcode();
+                if trivially_unsafe_for_dce(opcode) ||
+                    is_load_with_defined_trapping(opcode, &data) ||
+                    any_inst_results_used(inst, &live, &pos.func.dfg)
+                {
+                    for arg in pos.func.dfg.inst_args(inst) {
+                        let v = pos.func.dfg.resolve_aliases(*arg);
+                        live[v.index()] = true;
+                    }
+                    continue;
+                }
+            }
+            pos.remove_inst();
+        }
+    }
+}
--- a/lib/cretonne/src/ir/immediates.rs
+++ b/lib/cretonne/src/ir/immediates.rs
@@ -21,6 +21,11 @@ impl Imm64 {
    pub fn new(x: i64) -> Imm64 {
        Imm64(x)
    }
+
+    /// Return self negated.
+    pub fn wrapping_neg(self) -> Imm64 {
+        Imm64(self.0.wrapping_neg())
+    }
 }

 impl Into<i64> for Imm64 {
--- a/lib/cretonne/src/isa/intel/mod.rs
+++ b/lib/cretonne/src/isa/intel/mod.rs
@@ -58,6 +58,10 @@ impl TargetIsa for Isa {
        &self.shared_flags
    }

+    fn uses_cpu_flags(&self) -> bool {
+        true
+    }
+
    fn register_info(&self) -> RegInfo {
        registers::INFO.clone()
    }
--- a/lib/cretonne/src/isa/mod.rs
+++ b/lib/cretonne/src/isa/mod.rs
@@ -158,6 +158,11 @@ pub trait TargetIsa: fmt::Display {
    /// Get the ISA-independent flags that were used to make this trait object.
    fn flags(&self) -> &settings::Flags;

+    /// Does the CPU implement scalar comparisons using a CPU flags register?
+    fn uses_cpu_flags(&self) -> bool {
+        false
+    }
+
    /// Get a data structure describing the registers in this ISA.
    fn register_info(&self) -> RegInfo;

--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -68,11 +68,13 @@ mod abi;
 mod bitset;
 mod constant_hash;
 mod context;
+mod dce;
 mod divconst_magic_numbers;
 mod iterators;
 mod legalizer;
 mod licm;
 mod partition_slice;
+mod postopt;
 mod predicates;
 mod preopt;
 mod ref_slice;
--- a/lib/cretonne/src/postopt.rs
+++ b/lib/cretonne/src/postopt.rs
@@ -0,0 +1,211 @@
+//! A post-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use cursor::{Cursor, EncCursor};
+use ir::dfg::ValueDef;
+use ir::{Function, InstructionData, Value, InstBuilder, Ebb, Inst};
+use ir::condcodes::{CondCode, IntCC, FloatCC};
+use ir::instructions::{Opcode, ValueList};
+use ir::immediates::Imm64;
+use isa::TargetIsa;
+use timing;
+
+/// Information collected about a compare+branch sequence.
+struct CmpBrInfo {
+    /// The branch instruction.
+    br_inst: Inst,
+    /// The icmp, icmp_imm, or fcmp instruction.
+    cmp_inst: Inst,
+    /// The destination of the branch.
+    destination: Ebb,
+    /// The arguments of the branch.
+    args: ValueList,
+    /// The first argument to the comparison. The second is in the `kind` field.
+    cmp_arg: Value,
+    /// If the branch is `brz` rather than `brnz`, we need to invert the condition
+    /// before the branch.
+    invert_branch_cond: bool,
+    /// The kind of comparison, and the second argument.
+    kind: CmpBrKind,
+}
+
+enum CmpBrKind {
+    Icmp { cond: IntCC, arg: Value },
+    IcmpImm { cond: IntCC, imm: Imm64 },
+    Fcmp { cond: FloatCC, arg: Value },
+}
+
+/// Optimize comparisons to use flags values, to avoid materializing conditions
+/// in integer registers.
+///
+/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
+/// sequences.
+fn optimize_cpu_flags(
+    pos: &mut EncCursor,
+    inst: Inst,
+    last_flags_clobber: Option<Inst>,
+    isa: &TargetIsa,
+) {
+    // Look for compare and branch patterns.
+    // This code could be considerably simplified with non-lexical lifetimes.
+    let info = match pos.func.dfg[inst] {
+        InstructionData::Branch {
+            opcode,
+            destination,
+            ref args,
+        } => {
+            let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
+            let invert_branch_cond = match opcode {
+                Opcode::Brz => true,
+                Opcode::Brnz => false,
+                _ => panic!(),
+            };
+            if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
+                match pos.func.dfg[cond_inst] {
+                    InstructionData::IntCompare {
+                        cond,
+                        args: cmp_args,
+                        ..
+                    } => {
+                        CmpBrInfo {
+                            br_inst: inst,
+                            cmp_inst: cond_inst,
+                            destination,
+                            args: args.clone(),
+                            cmp_arg: cmp_args[0],
+                            invert_branch_cond,
+                            kind: CmpBrKind::Icmp {
+                                cond,
+                                arg: cmp_args[1],
+                            },
+                        }
+                    }
+                    InstructionData::IntCompareImm {
+                        cond,
+                        arg: cmp_arg,
+                        imm: cmp_imm,
+                        ..
+                    } => {
+                        CmpBrInfo {
+                            br_inst: inst,
+                            cmp_inst: cond_inst,
+                            destination,
+                            args: args.clone(),
+                            cmp_arg,
+                            invert_branch_cond,
+                            kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
+                        }
+                    }
+                    InstructionData::FloatCompare {
+                        cond,
+                        args: cmp_args,
+                        ..
+                    } => {
+                        CmpBrInfo {
+                            br_inst: inst,
+                            cmp_inst: cond_inst,
+                            destination,
+                            args: args.clone(),
+                            cmp_arg: cmp_args[0],
+                            invert_branch_cond,
+                            kind: CmpBrKind::Fcmp {
+                                cond,
+                                arg: cmp_args[1],
+                            },
+                        }
+                    }
+                    _ => return,
+                }
+            } else {
+                return;
+            }
+        }
+        // TODO: trapif, trueif, selectif, and their ff counterparts.
+        _ => return,
+    };
+
+    // If any instructions clobber the flags between the comparison and the branch,
+    // don't optimize them.
+    if last_flags_clobber != Some(info.cmp_inst) {
+        return;
+    }
+
+    // We found a compare+branch pattern. Transform it to use flags.
+    let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
+    pos.goto_inst(info.cmp_inst);
+    match info.kind {
+        CmpBrKind::Icmp { mut cond, arg } => {
+            let flags = pos.ins().ifcmp(info.cmp_arg, arg);
+            pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func.dfg.replace(info.br_inst).brif(
+                cond,
+                flags,
+                info.destination,
+                &args,
+            );
+        }
+        CmpBrKind::IcmpImm { mut cond, imm } => {
+            let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
+            pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func.dfg.replace(info.br_inst).brif(
+                cond,
+                flags,
+                info.destination,
+                &args,
+            );
+        }
+        CmpBrKind::Fcmp { mut cond, arg } => {
+            let flags = pos.ins().ffcmp(info.cmp_arg, arg);
+            pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func.dfg.replace(info.br_inst).brff(
+                cond,
+                flags,
+                info.destination,
+                &args,
+            );
+        }
+    }
+    pos.func.update_encoding(info.cmp_inst, isa).is_ok();
+    pos.func.update_encoding(info.br_inst, isa).is_ok();
+}
+
+
+//----------------------------------------------------------------------
+//
+// The main post-opt pass.
+
+pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
+    let _tt = timing::postopt();
+    let mut pos = EncCursor::new(func, isa);
+    while let Some(_ebb) = pos.next_ebb() {
+        let mut last_flags_clobber = None;
+        while let Some(inst) = pos.next_inst() {
+            if isa.uses_cpu_flags() {
+                // Optimize instructions to make use of flags.
+                optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
+
+                // Track the most recent seen instruction that clobbers the flags.
+                if let Some(constraints) =
+                    isa.encoding_info().operand_constraints(
+                        pos.func.encodings[inst],
+                    )
+                {
+                    if constraints.clobbers_flags {
+                        last_flags_clobber = Some(inst)
+                    }
+                }
+
+            }
+        }
+    }
+}
--- a/lib/cretonne/src/preopt.rs
+++ b/lib/cretonne/src/preopt.rs
@@ -127,28 +127,6 @@ fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
        return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
    }

-    // TODO: should we actually bother to do this (that is, manually match
-    // the case that the second argument is an iconst)? Or should we assume
-    // that some previous constant propagation pass has pushed all such
-    // immediates to their use points, creating BinaryImm instructions
-    // instead? For now we take the conservative approach.
-    if let InstructionData::Binary { opcode, args } = *idata {
-        let (isSigned, isRem) = match opcode {
-            Opcode::Udiv => (false, false),
-            Opcode::Urem => (false, true),
-            Opcode::Sdiv => (true, false),
-            Opcode::Srem => (true, true),
-            _other => return None,
-        };
-        let argR: Value = args[1];
-        if let Some(simm64) = get_const(argR, dfg) {
-            let argL: Value = args[0];
-            // Pull the operation size (type) from the left arg
-            let argL_ty = dfg.value_type(argL);
-            return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
-        }
-    }
-
    None
 }

@@ -473,25 +451,106 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
    }
 }

-
-//----------------------------------------------------------------------
-//
-// General pattern-match helpers.
-
-/// Find out if `value` actually resolves to a constant, and if so what its
-/// value is.
-fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
-    match dfg.value_def(value) {
-        ValueDef::Result(definingInst, resultNo) => {
-            let definingIData: &InstructionData = &dfg[definingInst];
-            if let InstructionData::UnaryImm { opcode, imm } = *definingIData {
-                if opcode == Opcode::Iconst && resultNo == 0 {
-                    return Some(imm.into());
+/// Apply basic simplifications.
+///
+/// This folds constants with arithmetic to form `_imm` instructions, and other
+/// minor simplifications.
+fn simplify(pos: &mut FuncCursor, inst: Inst) {
+    match pos.func.dfg[inst] {
+        InstructionData::Binary { opcode, args } => {
+            if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
+                if let InstructionData::UnaryImm {
+                    opcode: Opcode::Iconst,
+                    mut imm,
+                } = pos.func.dfg[iconst_inst]
+                {
+                    let new_opcode = match opcode {
+                        Opcode::Iadd => Opcode::IaddImm,
+                        Opcode::Imul => Opcode::ImulImm,
+                        Opcode::Sdiv => Opcode::SdivImm,
+                        Opcode::Udiv => Opcode::UdivImm,
+                        Opcode::Srem => Opcode::SremImm,
+                        Opcode::Urem => Opcode::UremImm,
+                        Opcode::Band => Opcode::BandImm,
+                        Opcode::Bor => Opcode::BorImm,
+                        Opcode::Bxor => Opcode::BxorImm,
+                        Opcode::Rotl => Opcode::RotlImm,
+                        Opcode::Rotr => Opcode::RotrImm,
+                        Opcode::Ishl => Opcode::IshlImm,
+                        Opcode::Ushr => Opcode::UshrImm,
+                        Opcode::Sshr => Opcode::SshrImm,
+                        Opcode::Isub => {
+                            imm = imm.wrapping_neg();
+                            Opcode::IaddImm
+                        }
+                        _ => return,
+                    };
+                    let ty = pos.func.dfg.ctrl_typevar(inst);
+                    pos.func.dfg.replace(inst).BinaryImm(
+                        new_opcode,
+                        ty,
+                        imm,
+                        args[0],
+                    );
+                }
+            } else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
+                if let InstructionData::UnaryImm {
+                    opcode: Opcode::Iconst,
+                    mut imm,
+                } = pos.func.dfg[iconst_inst]
+                {
+                    let new_opcode = match opcode {
+                        Opcode::Isub => Opcode::IrsubImm,
+                        _ => return,
+                    };
+                    let ty = pos.func.dfg.ctrl_typevar(inst);
+                    pos.func.dfg.replace(inst).BinaryImm(
+                        new_opcode,
+                        ty,
+                        imm,
+                        args[0],
+                    );
                }
            }
-            None
        }
-        ValueDef::Param(_definingEbb, _paramNo) => None,
+        InstructionData::IntCompare { opcode, cond, args } => {
+            debug_assert_eq!(opcode, Opcode::Icmp);
+            if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
+                if let InstructionData::UnaryImm {
+                    opcode: Opcode::Iconst,
+                    imm,
+                } = pos.func.dfg[iconst_inst]
+                {
+                    pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
+                }
+            }
+        }
+        InstructionData::CondTrap { .. } |
+        InstructionData::Branch { .. } |
+        InstructionData::Ternary { opcode: Opcode::Select, .. } => {
+            // Fold away a redundant `bint`.
+            let maybe = {
+                let args = pos.func.dfg.inst_args(inst);
+                if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
+                    if let InstructionData::Unary {
+                        opcode: Opcode::Bint,
+                        arg: bool_val,
+                    } = pos.func.dfg[def_inst]
+                    {
+                        Some(bool_val)
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            };
+            if let Some(bool_val) = maybe {
+                let args = pos.func.dfg.inst_args_mut(inst);
+                args[0] = bool_val;
+            }
+        }
+        _ => {}
    }
 }

@@ -503,6 +562,8 @@ pub fn do_preopt(func: &mut Function) {
    while let Some(_ebb) = pos.next_ebb() {

        while let Some(inst) = pos.next_inst() {
+            // Apply basic simplifications.
+            simplify(&mut pos, inst);

            //-- BEGIN -- division by constants ----------------

--- a/lib/cretonne/src/timing.rs
+++ b/lib/cretonne/src/timing.rs
@@ -55,7 +55,9 @@ define_passes!{
    flowgraph: "Control flow graph",
    domtree: "Dominator tree",
    loop_analysis: "Loop analysis",
+    postopt: "Post-legalization rewriting",
    preopt: "Pre-legalization rewriting",
+    dce: "Dead code elimination",
    legalize: "Legalization",
    gvn: "Global value numbering",
    licm: "Loop invariant code motion",