Optimize immediates and compare and branch sequences (#286)

* Add a pre-opt optimization to change constants into immediates.

This converts 'iadd' + 'iconst' into 'iadd_imm', and so on.

* Optimize away redundant `bint` instructions.

Cretonne has a concept of "Testable" values, which can be either boolean
or integer. When the an instruction needing a "Testable" value receives
the result of a `bint`, converting boolean to integer, eliminate the
`bint`, as it's redundant.

* Postopt: Optimize using CPU flags.

This introduces a post-legalization optimization pass which converts
compare+branch sequences to use flags values on CPUs which support it.

* Define a form of x86's `urm` that doesn't clobber FLAGS.

movzbl/movsbl/etc. don't clobber FLAGS; define a form of the `urm`
recipe that represents this.

* Implement a DCE pass.

This pass deletes instructions with no side effects and no results that
are used.

* Clarify ambiguity about "32-bit" and "64-bit" in comments.

* Add x86 encodings for icmp_imm.

* Add a testcase for postopt CPU flags optimization.

This covers the basic functionality of transforming compare+branch
sequences to use CPU flags.

* Pattern-match irsub_imm in preopt.
This commit is contained in:
Dan Gohman
2018-03-30 12:30:07 -07:00
committed by GitHub
parent 5377092e5b
commit 6606b88136
22 changed files with 921 additions and 109 deletions

View File

@@ -21,9 +21,11 @@ use result::{CtonError, CtonResult};
use settings::{FlagsOrIsa, OptLevel};
use unreachable_code::eliminate_unreachable_code;
use verifier;
use dce::do_dce;
use simple_gvn::do_simple_gvn;
use licm::do_licm;
use preopt::do_preopt;
use postopt::do_postopt;
use timing;
/// Persistent data structures and compilation pipeline.
@@ -92,6 +94,9 @@ impl Context {
self.preopt(isa)?;
}
self.legalize(isa)?;
if isa.flags().opt_level() != OptLevel::Fastest {
self.postopt(isa)?;
}
if isa.flags().opt_level() == OptLevel::Best {
self.compute_domtree();
self.compute_loop_analysis();
@@ -100,6 +105,7 @@ impl Context {
}
self.compute_domtree();
self.eliminate_unreachable_code(isa)?;
self.dce(isa)?;
self.regalloc(isa)?;
self.prologue_epilogue(isa)?;
self.relax_branches(isa)
@@ -153,6 +159,13 @@ impl Context {
}
}
/// Perform dead-code elimination on the function.
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
do_dce(&mut self.func, &mut self.domtree);
self.verify_if(fisa)?;
Ok(())
}
/// Perform pre-legalization rewrites on the function.
pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
do_preopt(&mut self.func);
@@ -170,6 +183,13 @@ impl Context {
self.verify_if(isa)
}
/// Perform post-legalization rewrites on the function.
pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
do_postopt(&mut self.func, isa);
self.verify_if(isa)?;
Ok(())
}
/// Compute the control flow graph.
pub fn compute_cfg(&mut self) {
self.cfg.compute(&self.func)

68
lib/cretonne/src/dce.rs Normal file
View File

@@ -0,0 +1,68 @@
//! A Dead-Code Elimination (DCE) pass.
//!
//! Dead code here means instructions that have no side effects and have no
//! result values used by other instructions.
use cursor::{Cursor, FuncCursor};
use dominator_tree::DominatorTree;
use entity::EntityRef;
use ir::{Function, Inst, Opcode, DataFlowGraph};
use ir::instructions::InstructionData;
use timing;
use std::vec::Vec;
/// Test whether the given opcode is unsafe to even consider for DCE.
fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
opcode.can_store()
}
/// Preserve instructions with used result values.
fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
dfg.inst_results(inst).iter().any(|v| live[v.index()])
}
/// Load instructions without the `notrap` flag are defined to trap when
/// operating on inaccessible memory, so we can't DCE them even if the
/// loaded value is unused.
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
if !opcode.can_load() {
return false;
}
match *data {
InstructionData::StackLoad { .. } => false,
InstructionData::Load { flags, .. } => !flags.notrap(),
_ => true,
}
}
/// Perform DCE on `func`.
pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
let _tt = timing::dce();
debug_assert!(domtree.is_valid());
let mut live = Vec::with_capacity(func.dfg.num_values());
live.resize(func.dfg.num_values(), false);
for &ebb in domtree.cfg_postorder().iter() {
let mut pos = FuncCursor::new(func).at_bottom(ebb);
while let Some(inst) = pos.prev_inst() {
{
let data = &pos.func.dfg[inst];
let opcode = data.opcode();
if trivially_unsafe_for_dce(opcode) ||
is_load_with_defined_trapping(opcode, &data) ||
any_inst_results_used(inst, &live, &pos.func.dfg)
{
for arg in pos.func.dfg.inst_args(inst) {
let v = pos.func.dfg.resolve_aliases(*arg);
live[v.index()] = true;
}
continue;
}
}
pos.remove_inst();
}
}
}

View File

@@ -21,6 +21,11 @@ impl Imm64 {
pub fn new(x: i64) -> Imm64 {
Imm64(x)
}
/// Return self negated.
pub fn wrapping_neg(self) -> Imm64 {
Imm64(self.0.wrapping_neg())
}
}
impl Into<i64> for Imm64 {

View File

@@ -58,6 +58,10 @@ impl TargetIsa for Isa {
&self.shared_flags
}
fn uses_cpu_flags(&self) -> bool {
true
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}

View File

@@ -158,6 +158,11 @@ pub trait TargetIsa: fmt::Display {
/// Get the ISA-independent flags that were used to make this trait object.
fn flags(&self) -> &settings::Flags;
/// Does the CPU implement scalar comparisons using a CPU flags register?
fn uses_cpu_flags(&self) -> bool {
false
}
/// Get a data structure describing the registers in this ISA.
fn register_info(&self) -> RegInfo;

View File

@@ -68,11 +68,13 @@ mod abi;
mod bitset;
mod constant_hash;
mod context;
mod dce;
mod divconst_magic_numbers;
mod iterators;
mod legalizer;
mod licm;
mod partition_slice;
mod postopt;
mod predicates;
mod preopt;
mod ref_slice;

211
lib/cretonne/src/postopt.rs Normal file
View File

@@ -0,0 +1,211 @@
//! A post-legalization rewriting pass.
#![allow(non_snake_case)]
use cursor::{Cursor, EncCursor};
use ir::dfg::ValueDef;
use ir::{Function, InstructionData, Value, InstBuilder, Ebb, Inst};
use ir::condcodes::{CondCode, IntCC, FloatCC};
use ir::instructions::{Opcode, ValueList};
use ir::immediates::Imm64;
use isa::TargetIsa;
use timing;
/// Information collected about a compare+branch sequence.
struct CmpBrInfo {
/// The branch instruction.
br_inst: Inst,
/// The icmp, icmp_imm, or fcmp instruction.
cmp_inst: Inst,
/// The destination of the branch.
destination: Ebb,
/// The arguments of the branch.
args: ValueList,
/// The first argument to the comparison. The second is in the `kind` field.
cmp_arg: Value,
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
/// before the branch.
invert_branch_cond: bool,
/// The kind of comparison, and the second argument.
kind: CmpBrKind,
}
enum CmpBrKind {
Icmp { cond: IntCC, arg: Value },
IcmpImm { cond: IntCC, imm: Imm64 },
Fcmp { cond: FloatCC, arg: Value },
}
/// Optimize comparisons to use flags values, to avoid materializing conditions
/// in integer registers.
///
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
/// sequences.
fn optimize_cpu_flags(
pos: &mut EncCursor,
inst: Inst,
last_flags_clobber: Option<Inst>,
isa: &TargetIsa,
) {
// Look for compare and branch patterns.
// This code could be considerably simplified with non-lexical lifetimes.
let info = match pos.func.dfg[inst] {
InstructionData::Branch {
opcode,
destination,
ref args,
} => {
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
let invert_branch_cond = match opcode {
Opcode::Brz => true,
Opcode::Brnz => false,
_ => panic!(),
};
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
match pos.func.dfg[cond_inst] {
InstructionData::IntCompare {
cond,
args: cmp_args,
..
} => {
CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Icmp {
cond,
arg: cmp_args[1],
},
}
}
InstructionData::IntCompareImm {
cond,
arg: cmp_arg,
imm: cmp_imm,
..
} => {
CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg,
invert_branch_cond,
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
}
}
InstructionData::FloatCompare {
cond,
args: cmp_args,
..
} => {
CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Fcmp {
cond,
arg: cmp_args[1],
},
}
}
_ => return,
}
} else {
return;
}
}
// TODO: trapif, trueif, selectif, and their ff counterparts.
_ => return,
};
// If any instructions clobber the flags between the comparison and the branch,
// don't optimize them.
if last_flags_clobber != Some(info.cmp_inst) {
return;
}
// We found a compare+branch pattern. Transform it to use flags.
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
pos.goto_inst(info.cmp_inst);
match info.kind {
CmpBrKind::Icmp { mut cond, arg } => {
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func.dfg.replace(info.br_inst).brif(
cond,
flags,
info.destination,
&args,
);
}
CmpBrKind::IcmpImm { mut cond, imm } => {
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func.dfg.replace(info.br_inst).brif(
cond,
flags,
info.destination,
&args,
);
}
CmpBrKind::Fcmp { mut cond, arg } => {
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func.dfg.replace(info.br_inst).brff(
cond,
flags,
info.destination,
&args,
);
}
}
pos.func.update_encoding(info.cmp_inst, isa).is_ok();
pos.func.update_encoding(info.br_inst, isa).is_ok();
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
let _tt = timing::postopt();
let mut pos = EncCursor::new(func, isa);
while let Some(_ebb) = pos.next_ebb() {
let mut last_flags_clobber = None;
while let Some(inst) = pos.next_inst() {
if isa.uses_cpu_flags() {
// Optimize instructions to make use of flags.
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
// Track the most recent seen instruction that clobbers the flags.
if let Some(constraints) =
isa.encoding_info().operand_constraints(
pos.func.encodings[inst],
)
{
if constraints.clobbers_flags {
last_flags_clobber = Some(inst)
}
}
}
}
}
}

View File

@@ -127,28 +127,6 @@ fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
}
// TODO: should we actually bother to do this (that is, manually match
// the case that the second argument is an iconst)? Or should we assume
// that some previous constant propagation pass has pushed all such
// immediates to their use points, creating BinaryImm instructions
// instead? For now we take the conservative approach.
if let InstructionData::Binary { opcode, args } = *idata {
let (isSigned, isRem) = match opcode {
Opcode::Udiv => (false, false),
Opcode::Urem => (false, true),
Opcode::Sdiv => (true, false),
Opcode::Srem => (true, true),
_other => return None,
};
let argR: Value = args[1];
if let Some(simm64) = get_const(argR, dfg) {
let argL: Value = args[0];
// Pull the operation size (type) from the left arg
let argL_ty = dfg.value_type(argL);
return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
}
}
None
}
@@ -473,25 +451,106 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
}
}
//----------------------------------------------------------------------
//
// General pattern-match helpers.
/// Find out if `value` actually resolves to a constant, and if so what its
/// value is.
fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
match dfg.value_def(value) {
ValueDef::Result(definingInst, resultNo) => {
let definingIData: &InstructionData = &dfg[definingInst];
if let InstructionData::UnaryImm { opcode, imm } = *definingIData {
if opcode == Opcode::Iconst && resultNo == 0 {
return Some(imm.into());
/// Apply basic simplifications.
///
/// This folds constants with arithmetic to form `_imm` instructions, and other
/// minor simplifications.
fn simplify(pos: &mut FuncCursor, inst: Inst) {
match pos.func.dfg[inst] {
InstructionData::Binary { opcode, args } => {
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
mut imm,
} = pos.func.dfg[iconst_inst]
{
let new_opcode = match opcode {
Opcode::Iadd => Opcode::IaddImm,
Opcode::Imul => Opcode::ImulImm,
Opcode::Sdiv => Opcode::SdivImm,
Opcode::Udiv => Opcode::UdivImm,
Opcode::Srem => Opcode::SremImm,
Opcode::Urem => Opcode::UremImm,
Opcode::Band => Opcode::BandImm,
Opcode::Bor => Opcode::BorImm,
Opcode::Bxor => Opcode::BxorImm,
Opcode::Rotl => Opcode::RotlImm,
Opcode::Rotr => Opcode::RotrImm,
Opcode::Ishl => Opcode::IshlImm,
Opcode::Ushr => Opcode::UshrImm,
Opcode::Sshr => Opcode::SshrImm,
Opcode::Isub => {
imm = imm.wrapping_neg();
Opcode::IaddImm
}
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
pos.func.dfg.replace(inst).BinaryImm(
new_opcode,
ty,
imm,
args[0],
);
}
} else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
mut imm,
} = pos.func.dfg[iconst_inst]
{
let new_opcode = match opcode {
Opcode::Isub => Opcode::IrsubImm,
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
pos.func.dfg.replace(inst).BinaryImm(
new_opcode,
ty,
imm,
args[0],
);
}
}
None
}
ValueDef::Param(_definingEbb, _paramNo) => None,
InstructionData::IntCompare { opcode, cond, args } => {
debug_assert_eq!(opcode, Opcode::Icmp);
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
imm,
} = pos.func.dfg[iconst_inst]
{
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
}
}
}
InstructionData::CondTrap { .. } |
InstructionData::Branch { .. } |
InstructionData::Ternary { opcode: Opcode::Select, .. } => {
// Fold away a redundant `bint`.
let maybe = {
let args = pos.func.dfg.inst_args(inst);
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
if let InstructionData::Unary {
opcode: Opcode::Bint,
arg: bool_val,
} = pos.func.dfg[def_inst]
{
Some(bool_val)
} else {
None
}
} else {
None
}
};
if let Some(bool_val) = maybe {
let args = pos.func.dfg.inst_args_mut(inst);
args[0] = bool_val;
}
}
_ => {}
}
}
@@ -503,6 +562,8 @@ pub fn do_preopt(func: &mut Function) {
while let Some(_ebb) = pos.next_ebb() {
while let Some(inst) = pos.next_inst() {
// Apply basic simplifications.
simplify(&mut pos, inst);
//-- BEGIN -- division by constants ----------------

View File

@@ -55,7 +55,9 @@ define_passes!{
flowgraph: "Control flow graph",
domtree: "Dominator tree",
loop_analysis: "Loop analysis",
postopt: "Post-legalization rewriting",
preopt: "Pre-legalization rewriting",
dce: "Dead code elimination",
legalize: "Legalization",
gvn: "Global value numbering",
licm: "Loop invariant code motion",