From 2fef2eef67881ef2c921e4f2387ed1d854d387b2 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 5 Jul 2019 17:21:17 +0200 Subject: [PATCH] Simple preopt: try to fold right-shift of left-shift into an extended move; --- cranelift/codegen/src/simple_preopt.rs | 58 ++++- .../filetests/simple_preopt/simplify.clif | 200 ++++++++++++++++++ 2 files changed, 257 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs index de31036477..fc1592099a 100644 --- a/cranelift/codegen/src/simple_preopt.rs +++ b/cranelift/codegen/src/simple_preopt.rs @@ -13,7 +13,7 @@ use crate::ir::{ dfg::ValueDef, immediates, instructions::{Opcode, ValueList}, - types::{I32, I64}, + types::{I16, I32, I64, I8}, DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Type, Value, }; use crate::timing; @@ -466,6 +466,56 @@ fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option> N] into a (un)signed-extending move. +/// Returns true if the final instruction has been converted to such a move. +fn try_fold_extended_move( + pos: &mut FuncCursor, + inst: Inst, + opcode: Opcode, + arg: Value, + imm: immediates::Imm64, +) -> bool { + if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { + if let InstructionData::BinaryImm { + opcode: Opcode::IshlImm, + arg: prev_arg, + imm: prev_imm, + } = &pos.func.dfg[arg_inst] + { + if imm != *prev_imm { + return false; + } + + let dest_ty = pos.func.dfg.ctrl_typevar(inst); + if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() { + return false; + } + + let imm_bits: i64 = imm.into(); + let ireduce_ty = match dest_ty.lane_bits() as i64 - imm_bits { + 8 => I8, + 16 => I16, + 32 => I32, + _ => return false, + }; + let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap(); + + // This becomes a no-op, since ireduce_ty has a smaller lane width than + // the argument type (also the destination type). + let arg = *prev_arg; + let narrower_arg = pos.ins().ireduce(ireduce_ty, arg); + + if opcode == Opcode::UshrImm { + pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg); + } else { + pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg); + } + return true; + } + } + false +} + /// Apply basic simplifications. /// /// This folds constants with arithmetic to form `_imm` instructions, and other @@ -565,6 +615,12 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) { } } } + + Opcode::UshrImm | Opcode::SshrImm => { + if try_fold_extended_move(pos, inst, opcode, arg, imm) { + return; + } + } _ => {} }, diff --git a/cranelift/filetests/filetests/simple_preopt/simplify.clif b/cranelift/filetests/filetests/simple_preopt/simplify.clif index 1592defe2f..587f64617d 100644 --- a/cranelift/filetests/filetests/simple_preopt/simplify.clif +++ b/cranelift/filetests/filetests/simple_preopt/simplify.clif @@ -78,3 +78,203 @@ ebb0(v0: i32): ; nextln: v2 = irsub_imm v0, 2 ; nextln: return v2 ; nextln: } + +;; Sign-extensions. + +;; 8 -> 16 +function %uextend_8_16() -> i16 { +ebb0: + v0 = iconst.i16 37 + v1 = ishl_imm v0, 8 + v2 = ushr_imm v1, 8 + return v2 +} +; sameln: function %uextend_8_16 +; nextln: ebb0: +; nextln: v0 = iconst.i16 37 +; nextln: v1 = ishl_imm v0, 8 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i16 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_16() -> i16 { +ebb0: + v0 = iconst.i16 37 + v1 = ishl_imm v0, 8 + v2 = sshr_imm v1, 8 + return v2 +} +; sameln: function %sextend_8_16 +; nextln: ebb0: +; nextln: v0 = iconst.i16 37 +; nextln: v1 = ishl_imm v0, 8 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i16 v3 +; nextln: return v2 +; nextln: } + +;; 8 -> 32 +function %uextend_8_32() -> i32 { +ebb0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 24 + v2 = ushr_imm v1, 24 + return v2 +} +; sameln: function %uextend_8_32 +; nextln: ebb0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 24 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i32 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_32() -> i32 { +ebb0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 24 + v2 = sshr_imm v1, 24 + return v2 +} +; sameln: function %sextend_8_32 +; nextln: ebb0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 24 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i32 v3 +; nextln: return v2 +; nextln: } + +;; 16 -> 32 +function %uextend_16_32() -> i32 { +ebb0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 16 + v2 = ushr_imm v1, 16 + return v2 +} +; sameln: function %uextend_16_32 +; nextln: ebb0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 16 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = uextend.i32 v3 +; nextln: return v2 +; nextln: } + +function %sextend_16_32() -> i32 { +ebb0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 16 + v2 = sshr_imm v1, 16 + return v2 +} +; sameln: function %sextend_16_32 +; nextln: ebb0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 16 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = sextend.i32 v3 +; nextln: return v2 +; nextln: } + +;; 8 -> 64 +function %uextend_8_64() -> i64 { +ebb0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 56 + v2 = ushr_imm v1, 56 + return v2 +} +; sameln: function %uextend_8_64 +; nextln: ebb0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 56 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_64() -> i64 { +ebb0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 56 + v2 = sshr_imm v1, 56 + return v2 +} +; sameln: function %sextend_8_64 +; nextln: ebb0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 56 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +;; 16 -> 64 +function %uextend_16_64() -> i64 { +ebb0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 48 + v2 = ushr_imm v1, 48 + return v2 +} +; sameln: function %uextend_16_64 +; nextln: ebb0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 48 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_16_64() -> i64 { +ebb0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 48 + v2 = sshr_imm v1, 48 + return v2 +} +; sameln: function %sextend_16_64 +; nextln: ebb0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 48 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +;; 32 -> 64 +function %uextend_32_64() -> i64 { +ebb0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 32 + v2 = ushr_imm v1, 32 + return v2 +} +; sameln: function %uextend_32_64 +; nextln: ebb0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 32 +; nextln: v3 = ireduce.i32 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_32_64() -> i64 { +ebb0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 32 + v2 = sshr_imm v1, 32 + return v2 +} +; sameln: function %sextend_32_64 +; nextln: ebb0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 32 +; nextln: v3 = ireduce.i32 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: }