Simple preopt: try to fold right-shift of left-shift into an extended move;

2019-07-05 17:21:17 +02:00
parent 141b45e0e1
commit 2fef2eef67
2 changed files with 257 additions and 1 deletions
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -13,7 +13,7 @@ use crate::ir::{
    dfg::ValueDef,
    immediates,
    instructions::{Opcode, ValueList},
-    types::{I32, I64},
+    types::{I16, I32, I64, I8},
    DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Type, Value,
 };
 use crate::timing;
@@ -466,6 +466,56 @@ fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::
    None
 }

+/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
+/// Returns true if the final instruction has been converted to such a move.
+fn try_fold_extended_move(
+    pos: &mut FuncCursor,
+    inst: Inst,
+    opcode: Opcode,
+    arg: Value,
+    imm: immediates::Imm64,
+) -> bool {
+    if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+        if let InstructionData::BinaryImm {
+            opcode: Opcode::IshlImm,
+            arg: prev_arg,
+            imm: prev_imm,
+        } = &pos.func.dfg[arg_inst]
+        {
+            if imm != *prev_imm {
+                return false;
+            }
+
+            let dest_ty = pos.func.dfg.ctrl_typevar(inst);
+            if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
+                return false;
+            }
+
+            let imm_bits: i64 = imm.into();
+            let ireduce_ty = match dest_ty.lane_bits() as i64 - imm_bits {
+                8 => I8,
+                16 => I16,
+                32 => I32,
+                _ => return false,
+            };
+            let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
+
+            // This becomes a no-op, since ireduce_ty has a smaller lane width than
+            // the argument type (also the destination type).
+            let arg = *prev_arg;
+            let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
+
+            if opcode == Opcode::UshrImm {
+                pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
+            } else {
+                pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
+            }
+            return true;
+        }
+    }
+    false
+}
+
 /// Apply basic simplifications.
 ///
 /// This folds constants with arithmetic to form `_imm` instructions, and other
@@ -565,6 +615,12 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
                    }
                }
            }
+
+            Opcode::UshrImm | Opcode::SshrImm => {
+                if try_fold_extended_move(pos, inst, opcode, arg, imm) {
+                    return;
+                }
+            }
            _ => {}
        },

--- a/cranelift/filetests/filetests/simple_preopt/simplify.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify.clif
@@ -78,3 +78,203 @@ ebb0(v0: i32):
 ; nextln:     v2 = irsub_imm v0, 2
 ; nextln:     return v2
 ; nextln: }
+
+;; Sign-extensions.
+
+;; 8 -> 16
+function %uextend_8_16() -> i16 {
+ebb0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = ushr_imm v1, 8
+    return v2
+}
+; sameln: function %uextend_8_16
+; nextln: ebb0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_16() -> i16 {
+ebb0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = sshr_imm v1, 8
+    return v2
+}
+; sameln: function %sextend_8_16
+; nextln: ebb0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 32
+function %uextend_8_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = ushr_imm v1, 24
+    return v2
+}
+; sameln: function %uextend_8_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = sshr_imm v1, 24
+    return v2
+}
+; sameln: function %sextend_8_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 32
+function %uextend_16_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = ushr_imm v1, 16
+    return v2
+}
+; sameln: function %uextend_16_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = sshr_imm v1, 16
+    return v2
+}
+; sameln: function %sextend_16_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 64
+function %uextend_8_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = ushr_imm v1, 56
+    return v2
+}
+; sameln: function %uextend_8_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = sshr_imm v1, 56
+    return v2
+}
+; sameln: function %sextend_8_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 64
+function %uextend_16_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = ushr_imm v1, 48
+    return v2
+}
+; sameln: function %uextend_16_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = sshr_imm v1, 48
+    return v2
+}
+; sameln: function %sextend_16_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 32 -> 64
+function %uextend_32_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = ushr_imm v1, 32
+    return v2
+}
+; sameln: function %uextend_32_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_32_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = sshr_imm v1, 32
+    return v2
+}
+; sameln: function %sextend_32_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }