From cad20745876e3f7c45b2801fdb2b29cd4f4c402b Mon Sep 17 00:00:00 2001
From: Benjamin Bouvier <public@benj.me>
Date: Mon, 9 Sep 2019 18:18:41 +0200
Subject: [PATCH] [codegen] Don't simplify an operation if it would result in
 unnecessary legalization;

Converting something like iadd.i64 on a 32-bits architecture into a
iadd_imm.i64 will result in the instruction being legalized back to an
iadd.i64 later on, creating unnecessary churn.

This commit implements avoid doing so, and changes the target ISA to a
64-bits platform for tests than ran into this, as well as making sure
this won't happen on 32-bits platforms.
---
 cranelift/codegen/src/context.rs              |  2 +-
 cranelift/codegen/src/simple_preopt.rs        | 49 +++++++++------
 .../simple_preopt/div_by_const_indirect.clif  |  2 +-
 .../filetests/simple_preopt/simplify32.clif   | 61 +++++++++++++++++++
 .../{simplify.clif => simplify64.clif}        |  4 +-
 5 files changed, 98 insertions(+), 20 deletions(-)
 create mode 100644 cranelift/filetests/filetests/simple_preopt/simplify32.clif
 rename cranelift/filetests/filetests/simple_preopt/{simplify.clif => simplify64.clif} (99%)

diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs
index 5350b3444c..0f4e9ad420 100644
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -243,7 +243,7 @@ impl Context {
 
     /// Perform pre-legalization rewrites on the function.
     pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
-        do_preopt(&mut self.func, &mut self.cfg);
+        do_preopt(&mut self.func, &mut self.cfg, isa);
         self.verify_if(isa)?;
         Ok(())
     }
diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs
index 8219f6627c..221e8a57b0 100644
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -16,6 +16,7 @@ use crate::ir::{
     types::{I16, I32, I64, I8},
     DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Type, Value,
 };
+use crate::isa::TargetIsa;
 use crate::timing;
 
 #[inline]
@@ -533,9 +534,14 @@ fn try_fold_extended_move(
 
 /// Apply basic simplifications.
 ///
-/// This folds constants with arithmetic to form `_imm` instructions, and other
-/// minor simplifications.
-fn simplify(pos: &mut FuncCursor, inst: Inst) {
+/// This folds constants with arithmetic to form `_imm` instructions, and other minor
+/// simplifications.
+///
+/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
+/// controlling type's width of the instruction. This would result in an illegal instruction that
+/// would likely be expanded back into an instruction on smaller types with the same initial
+/// opcode, creating unnecessary churn.
+fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
     match pos.func.dfg[inst] {
         InstructionData::Binary { opcode, args } => {
             if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
@@ -562,13 +568,15 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
                     _ => return,
                 };
                 let ty = pos.func.dfg.ctrl_typevar(inst);
-                pos.func
-                    .dfg
-                    .replace(inst)
-                    .BinaryImm(new_opcode, ty, imm, args[0]);
+                if ty.bytes() <= native_word_width {
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .BinaryImm(new_opcode, ty, imm, args[0]);
 
-                // Repeat for BinaryImm simplification.
-                simplify(pos, inst);
+                    // Repeat for BinaryImm simplification.
+                    simplify(pos, inst, native_word_width);
+                }
             } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
                 let new_opcode = match opcode {
                     Opcode::Iadd => Opcode::IaddImm,
@@ -580,10 +588,12 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
                     _ => return,
                 };
                 let ty = pos.func.dfg.ctrl_typevar(inst);
-                pos.func
-                    .dfg
-                    .replace(inst)
-                    .BinaryImm(new_opcode, ty, imm, args[1]);
+                if ty.bytes() <= native_word_width {
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .BinaryImm(new_opcode, ty, imm, args[1]);
+                }
             }
         }
 
@@ -643,7 +653,9 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
                 }
 
                 Opcode::UshrImm | Opcode::SshrImm => {
-                    if try_fold_extended_move(pos, inst, opcode, arg, imm) {
+                    if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
+                        && try_fold_extended_move(pos, inst, opcode, arg, imm)
+                    {
                         return;
                     }
                 }
@@ -686,7 +698,9 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
         InstructionData::IntCompare { opcode, cond, args } => {
             debug_assert_eq!(opcode, Opcode::Icmp);
             if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
-                pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
+                if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
+                    pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
+                }
             }
         }
 
@@ -937,13 +951,14 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, ebb: Ebb, inst
 }
 
 /// The main pre-opt pass.
-pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph) {
+pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
     let _tt = timing::preopt();
     let mut pos = FuncCursor::new(func);
+    let native_word_width = isa.pointer_bytes();
     while let Some(ebb) = pos.next_ebb() {
         while let Some(inst) = pos.next_inst() {
             // Apply basic simplifications.
-            simplify(&mut pos, inst);
+            simplify(&mut pos, inst, native_word_width as u32);
 
             // Try to transform divide-by-constant into simpler operations.
             if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
index 5833b0f371..c111113197 100644
--- a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
@@ -1,5 +1,5 @@
 test simple_preopt
-target i686 baseline
+target x86_64 baseline
 
 ; Cases where the denominator is created by an iconst
 
diff --git a/cranelift/filetests/filetests/simple_preopt/simplify32.clif b/cranelift/filetests/filetests/simple_preopt/simplify32.clif
new file mode 100644
index 0000000000..45add1b7a3
--- /dev/null
+++ b/cranelift/filetests/filetests/simple_preopt/simplify32.clif
@@ -0,0 +1,61 @@
+test simple_preopt
+target i686
+
+;; 32-bits platforms.
+
+function %iadd_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+;; Don't simplify operations that would get illegal because of lack of native
+;; support.
+function %iadd_imm(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = iconst.i64 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: ebb0(v0: i64):
+; nextln:     v1 = iconst.i64 2
+; nextln:     v2 = iadd v0, v1
+; nextln:     return v2
+; nextln: }
+
diff --git a/cranelift/filetests/filetests/simple_preopt/simplify.clif b/cranelift/filetests/filetests/simple_preopt/simplify64.clif
similarity index 99%
rename from cranelift/filetests/filetests/simple_preopt/simplify.clif
rename to cranelift/filetests/filetests/simple_preopt/simplify64.clif
index e74be4738a..db485ce773 100644
--- a/cranelift/filetests/filetests/simple_preopt/simplify.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify64.clif
@@ -1,5 +1,7 @@
 test simple_preopt
-target i686
+target x86_64
+
+;; 64-bits platforms.
 
 function %iadd_imm(i32) -> i32 {
 ebb0(v0: i32):