diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index f2e4ea9607..335a69c808 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -47,4 +47,5 @@ walkdir = "2.2" [features] default = ["disas", "wasm", "cranelift-codegen/all-arch"] disas = ["capstone"] +enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"] wasm = ["wat", "cranelift-wasm"] diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 4737904eb3..0bc1c32006 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -24,7 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op smallvec = { version = "1.0.0" } thiserror = "1.0.4" byteorder = { version = "1.3.2", default-features = false } -peepmatic-runtime = { path = "../peepmatic/crates/runtime" } +peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true } regalloc = "0.0.23" # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary @@ -74,9 +74,12 @@ all-arch = [ # For dependent crates that want to serialize some parts of cranelift enable-serde = ["serde"] -# Recompile our optimizations that are written in the peepmatic DSL into a +# Recompile our optimizations that are written in the `peepmatic` DSL into a # compact finite-state transducer automaton. rebuild-peephole-optimizers = ["peepmatic"] +# Enable the use of `peepmatic`-generated peephole optimizers. +enable-peepmatic = ["peepmatic-runtime"] + [badges] maintenance = { status = "experimental" } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 05c5583f5b..3483219fea 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -101,7 +101,6 @@ mod licm; mod nan_canonicalization; mod num_uses; mod partition_slice; -mod peepmatic; mod postopt; mod predicates; mod redundant_reload_remover; @@ -116,6 +115,9 @@ mod topo_order; mod unreachable_code; mod value_label; +#[cfg(feature = "enable-peepmatic")] +mod peepmatic; + pub use crate::result::{CodegenError, CodegenResult}; /// Version number of this crate. diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs index a77c8cb19a..7413b01e90 100644 --- a/cranelift/codegen/src/simple_preopt.rs +++ b/cranelift/codegen/src/simple_preopt.rs @@ -15,7 +15,6 @@ use crate::ir::{ Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value, }; use crate::isa::TargetIsa; -use crate::peepmatic::ValueOrInst; use crate::timing; #[inline] @@ -182,8 +181,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso // U32 div by 1: identity // U32 rem by 1: zero - DivRemByConstInfo::DivU32(_, 1) | DivRemByConstInfo::RemU32(_, 1) => { - unreachable!("unsigned division and remainder by one is handled in `preopt.peepmatic`"); + DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I32, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } } // U32 div, rem by a power-of-2 @@ -198,10 +201,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso let mask = (1u64 << k) - 1; pos.func.dfg.replace(inst).band_imm(n1, mask as i64); } else { - unreachable!( - "unsigned division by a power of two is handled in \ - `preopt.peepmatic`" - ); + pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); } } @@ -251,8 +251,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso // U64 div by 1: identity // U64 rem by 1: zero - DivRemByConstInfo::DivU64(_, 1) | DivRemByConstInfo::RemU64(_, 1) => { - unreachable!("unsigned division and remainder by one is handled in `preopt.peepmatic`"); + DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I64, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } } // U64 div, rem by a power-of-2 @@ -267,9 +271,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso let mask = (1u64 << k) - 1; pos.func.dfg.replace(inst).band_imm(n1, mask as i64); } else { - unreachable!( - "unsigned division by a power of two is handled in `preopt.peepmatic`" - ); + pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); } } @@ -322,8 +324,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso // S32 div by 1: identity // S32 rem by 1: zero - DivRemByConstInfo::DivS32(_, 1) | DivRemByConstInfo::RemS32(_, 1) => { - unreachable!("signed division and remainder by one is handled in `preopt.peepmatic`"); + DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I32, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } } DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => { @@ -393,8 +399,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso // S64 div by 1: identity // S64 rem by 1: zero - DivRemByConstInfo::DivS64(_, 1) | DivRemByConstInfo::RemS64(_, 1) => { - unreachable!("division and remaineder by one are handled in `preopt.peepmatic`"); + DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I64, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } } DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => { @@ -598,6 +608,416 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, cfg.recompute_block(pos.func, block); } +#[cfg(feature = "enable-peepmatic")] +mod simplify { + use super::*; + use crate::peepmatic::ValueOrInst; + + pub type PeepholeOptimizer<'a, 'b> = + peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>; + + pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> { + crate::peepmatic::preopt(isa) + } + + pub fn apply_all<'a, 'b>( + optimizer: &mut PeepholeOptimizer<'a, 'b>, + pos: &mut FuncCursor<'a>, + inst: Inst, + _native_word_width: u32, + ) { + // After we apply one optimization, that might make another + // optimization applicable. Keep running the peephole optimizer + // until either: + // + // * No optimization applied, and therefore it doesn't make sense to + // try again, because no optimization will apply again. + // + // * Or when we replaced an instruction with an alias to an existing + // value, because we already ran the peephole optimizer over the + // aliased value's instruction in an early part of the traversal + // over the function. + while let Some(ValueOrInst::Inst(new_inst)) = + optimizer.apply_one(pos, ValueOrInst::Inst(inst)) + { + // We transplanted a new instruction into the current + // instruction, so the "new" instruction is actually the same + // one, just with different data. + debug_assert_eq!(new_inst, inst); + } + debug_assert_eq!(pos.current_inst(), Some(inst)); + } +} + +#[cfg(not(feature = "enable-peepmatic"))] +mod simplify { + use super::*; + use crate::ir::{ + dfg::ValueDef, + immediates, + instructions::{Opcode, ValueList}, + types::{I16, I32, I8}, + }; + use std::marker::PhantomData; + + pub struct PeepholeOptimizer<'a, 'b> { + phantom: PhantomData<(&'a (), &'b ())>, + } + + pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> { + PeepholeOptimizer { + phantom: PhantomData, + } + } + + pub fn apply_all<'a, 'b>( + _optimizer: &mut PeepholeOptimizer<'a, 'b>, + pos: &mut FuncCursor<'a>, + inst: Inst, + native_word_width: u32, + ) { + simplify(pos, inst, native_word_width); + branch_opt(pos, inst); + } + + #[inline] + fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option { + if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) { + if let InstructionData::UnaryImm { + opcode: Opcode::Iconst, + imm, + } = dfg[candidate_inst] + { + return Some(imm); + } + } + None + } + + /// Try to transform [(x << N) >> N] into a (un)signed-extending move. + /// Returns true if the final instruction has been converted to such a move. + fn try_fold_extended_move( + pos: &mut FuncCursor, + inst: Inst, + opcode: Opcode, + arg: Value, + imm: immediates::Imm64, + ) -> bool { + if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { + if let InstructionData::BinaryImm { + opcode: Opcode::IshlImm, + arg: prev_arg, + imm: prev_imm, + } = &pos.func.dfg[arg_inst] + { + if imm != *prev_imm { + return false; + } + + let dest_ty = pos.func.dfg.ctrl_typevar(inst); + if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() { + return false; + } + + let imm_bits: i64 = imm.into(); + let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) { + 8 => I8, + 16 => I16, + 32 => I32, + _ => return false, + }; + let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap(); + + // This becomes a no-op, since ireduce_ty has a smaller lane width than + // the argument type (also the destination type). + let arg = *prev_arg; + let narrower_arg = pos.ins().ireduce(ireduce_ty, arg); + + if opcode == Opcode::UshrImm { + pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg); + } else { + pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg); + } + return true; + } + } + false + } + + /// Apply basic simplifications. + /// + /// This folds constants with arithmetic to form `_imm` instructions, and other minor + /// simplifications. + /// + /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the + /// controlling type's width of the instruction. This would result in an illegal instruction that + /// would likely be expanded back into an instruction on smaller types with the same initial + /// opcode, creating unnecessary churn. + fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) { + match pos.func.dfg[inst] { + InstructionData::Binary { opcode, args } => { + if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { + let new_opcode = match opcode { + Opcode::Iadd => Opcode::IaddImm, + Opcode::Imul => Opcode::ImulImm, + Opcode::Sdiv => Opcode::SdivImm, + Opcode::Udiv => Opcode::UdivImm, + Opcode::Srem => Opcode::SremImm, + Opcode::Urem => Opcode::UremImm, + Opcode::Band => Opcode::BandImm, + Opcode::Bor => Opcode::BorImm, + Opcode::Bxor => Opcode::BxorImm, + Opcode::Rotl => Opcode::RotlImm, + Opcode::Rotr => Opcode::RotrImm, + Opcode::Ishl => Opcode::IshlImm, + Opcode::Ushr => Opcode::UshrImm, + Opcode::Sshr => Opcode::SshrImm, + Opcode::Isub => { + imm = imm.wrapping_neg(); + Opcode::IaddImm + } + Opcode::Ifcmp => Opcode::IfcmpImm, + _ => return, + }; + let ty = pos.func.dfg.ctrl_typevar(inst); + if ty.bytes() <= native_word_width { + pos.func + .dfg + .replace(inst) + .BinaryImm(new_opcode, ty, imm, args[0]); + + // Repeat for BinaryImm simplification. + simplify(pos, inst, native_word_width); + } + } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) { + let new_opcode = match opcode { + Opcode::Iadd => Opcode::IaddImm, + Opcode::Imul => Opcode::ImulImm, + Opcode::Band => Opcode::BandImm, + Opcode::Bor => Opcode::BorImm, + Opcode::Bxor => Opcode::BxorImm, + Opcode::Isub => Opcode::IrsubImm, + _ => return, + }; + let ty = pos.func.dfg.ctrl_typevar(inst); + if ty.bytes() <= native_word_width { + pos.func + .dfg + .replace(inst) + .BinaryImm(new_opcode, ty, imm, args[1]); + } + } + } + + InstructionData::Unary { opcode, arg } => { + if let Opcode::AdjustSpDown = opcode { + if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) { + // Note this works for both positive and negative immediate values. + pos.func.dfg.replace(inst).adjust_sp_down_imm(imm); + } + } + } + + InstructionData::BinaryImm { opcode, arg, imm } => { + let ty = pos.func.dfg.ctrl_typevar(inst); + + let mut arg = arg; + let mut imm = imm; + match opcode { + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::BorImm + | Opcode::BandImm + | Opcode::BxorImm => { + // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x) + if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { + if let InstructionData::BinaryImm { + opcode: prev_opcode, + arg: prev_arg, + imm: prev_imm, + } = &pos.func.dfg[arg_inst] + { + if opcode == *prev_opcode + && ty == pos.func.dfg.ctrl_typevar(arg_inst) + { + let lhs: i64 = imm.into(); + let rhs: i64 = (*prev_imm).into(); + let new_imm = match opcode { + Opcode::BorImm => lhs | rhs, + Opcode::BandImm => lhs & rhs, + Opcode::BxorImm => lhs ^ rhs, + Opcode::IaddImm => lhs.wrapping_add(rhs), + Opcode::ImulImm => lhs.wrapping_mul(rhs), + _ => panic!("can't happen"), + }; + let new_imm = immediates::Imm64::from(new_imm); + let new_arg = *prev_arg; + pos.func + .dfg + .replace(inst) + .BinaryImm(opcode, ty, new_imm, new_arg); + imm = new_imm; + arg = new_arg; + } + } + } + } + + Opcode::UshrImm | Opcode::SshrImm => { + if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width + && try_fold_extended_move(pos, inst, opcode, arg, imm) + { + return; + } + } + + _ => {} + }; + + // Replace operations that are no-ops. + match (opcode, imm.into()) { + (Opcode::IaddImm, 0) + | (Opcode::ImulImm, 1) + | (Opcode::SdivImm, 1) + | (Opcode::UdivImm, 1) + | (Opcode::BorImm, 0) + | (Opcode::BandImm, -1) + | (Opcode::BxorImm, 0) + | (Opcode::RotlImm, 0) + | (Opcode::RotrImm, 0) + | (Opcode::IshlImm, 0) + | (Opcode::UshrImm, 0) + | (Opcode::SshrImm, 0) => { + // Alias the result value with the original argument. + replace_single_result_with_alias(&mut pos.func.dfg, inst, arg); + } + (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => { + // Replace by zero. + pos.func.dfg.replace(inst).iconst(ty, 0); + } + (Opcode::BorImm, -1) => { + // Replace by minus one. + pos.func.dfg.replace(inst).iconst(ty, -1); + } + _ => {} + } + } + + InstructionData::IntCompare { opcode, cond, args } => { + debug_assert_eq!(opcode, Opcode::Icmp); + if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { + if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width { + pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm); + } + } + } + + InstructionData::CondTrap { .. } + | InstructionData::Branch { .. } + | InstructionData::Ternary { + opcode: Opcode::Select, + .. + } => { + // Fold away a redundant `bint`. + let condition_def = { + let args = pos.func.dfg.inst_args(inst); + pos.func.dfg.value_def(args[0]) + }; + if let ValueDef::Result(def_inst, _) = condition_def { + if let InstructionData::Unary { + opcode: Opcode::Bint, + arg: bool_val, + } = pos.func.dfg[def_inst] + { + let args = pos.func.dfg.inst_args_mut(inst); + args[0] = bool_val; + } + } + } + + _ => {} + } + } + + struct BranchOptInfo { + br_inst: Inst, + cmp_arg: Value, + args: ValueList, + new_opcode: Opcode, + } + + /// Fold comparisons into branch operations when possible. + /// + /// This matches against operations which compare against zero, then use the + /// result in a `brz` or `brnz` branch. It folds those two operations into a + /// single `brz` or `brnz`. + fn branch_opt(pos: &mut FuncCursor, inst: Inst) { + let mut info = if let InstructionData::Branch { + opcode: br_opcode, + args: ref br_args, + .. + } = pos.func.dfg[inst] + { + let first_arg = { + let args = pos.func.dfg.inst_args(inst); + args[0] + }; + + let icmp_inst = + if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) { + icmp_inst + } else { + return; + }; + + if let InstructionData::IntCompareImm { + opcode: Opcode::IcmpImm, + arg: cmp_arg, + cond: cmp_cond, + imm: cmp_imm, + } = pos.func.dfg[icmp_inst] + { + let cmp_imm: i64 = cmp_imm.into(); + if cmp_imm != 0 { + return; + } + + // icmp_imm returns non-zero when the comparison is true. So, if + // we're branching on zero, we need to invert the condition. + let cond = match br_opcode { + Opcode::Brz => cmp_cond.inverse(), + Opcode::Brnz => cmp_cond, + _ => return, + }; + + let new_opcode = match cond { + IntCC::Equal => Opcode::Brz, + IntCC::NotEqual => Opcode::Brnz, + _ => return, + }; + + BranchOptInfo { + br_inst: inst, + cmp_arg, + args: br_args.clone(), + new_opcode, + } + } else { + return; + } + } else { + return; + }; + + info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg; + if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] { + *opcode = info.new_opcode; + } else { + panic!(); + } + } +} + /// The main pre-opt pass. pub fn do_preopt<'func, 'isa>( func: &'func mut Function, @@ -607,30 +1027,12 @@ pub fn do_preopt<'func, 'isa>( let _tt = timing::preopt(); let mut pos = FuncCursor::new(func); - let mut preopt = crate::peepmatic::preopt(isa); + let native_word_width = isa.pointer_bytes() as u32; + let mut optimizer = simplify::peephole_optimizer(isa); while let Some(block) = pos.next_block() { while let Some(inst) = pos.next_inst() { - // After we apply one optimization, that might make another - // optimization applicable. Keep running the peephole optimizer - // until either: - // - // * No optimization applied, and therefore it doesn't make sense to - // try again, because no optimization will apply again. - // - // * Or when we replaced an instruction with an alias to an existing - // value, because we already ran the peephole optimizer over the - // aliased value's instruction in an early part of the traversal - // over the function. - while let Some(ValueOrInst::Inst(new_inst)) = - preopt.apply_one(&mut pos, ValueOrInst::Inst(inst)) - { - // We transplanted a new instruction into the current - // instruction, so the "new" instruction is actually the same - // one, just with different data. - debug_assert_eq!(new_inst, inst); - } - debug_assert_eq!(pos.current_inst(), Some(inst)); + simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width); // Try to transform divide-by-constant into simpler operations. if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) { diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml index 481401cf8a..705c31fc61 100644 --- a/cranelift/filetests/Cargo.toml +++ b/cranelift/filetests/Cargo.toml @@ -26,3 +26,6 @@ num_cpus = "1.8.0" region = "2.1.2" target-lexicon = "0.10" thiserror = "1.0.15" + +[features] +enable-peepmatic = [] diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif index e59226c7de..dcf6c77e9a 100644 --- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif @@ -6,9 +6,9 @@ function u0:0(i8) -> i8 fast { block0(v0: i8): v1 = iconst.i8 0 v2 = isub v1, v0 - ; check: v4 = uextend.i32 v0 - ; nextln: v6 = iconst.i32 0 - ; nextln: v5 = isub v6, v4 - ; nextln: v2 = ireduce.i8 v5 + ; check: uextend.i32 + ; nextln: iconst.i32 + ; nextln: isub + ; nextln: ireduce.i8 return v2 } diff --git a/cranelift/filetests/filetests/peepmatic/branch.clif b/cranelift/filetests/filetests/peepmatic/branch.clif new file mode 100644 index 0000000000..0f68bbe9cb --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/branch.clif @@ -0,0 +1,81 @@ +test peepmatic +target x86_64 + +function %icmp_to_brz_fold(i32) -> i32 { +block0(v0: i32): + v1 = icmp_imm eq v0, 0 + brnz v1, block1 + jump block2 +block1: + v3 = iconst.i32 1 + return v3 +block2: + v4 = iconst.i32 2 + return v4 +} +; sameln: function %icmp_to_brz_fold +; nextln: block0(v0: i32): +; nextln: v1 = icmp_imm eq v0, 0 +; nextln: brnz v0, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v3 = iconst.i32 1 +; nextln: return v3 +; nextln: +; nextln: block2: +; nextln: v4 = iconst.i32 2 +; nextln: return v4 +; nextln: } + +function %icmp_to_brz_inverted_fold(i32) -> i32 { +block0(v0: i32): + v1 = icmp_imm ne v0, 0 + brz v1, block1 + jump block2 +block1: + v3 = iconst.i32 1 + return v3 +block2: + v4 = iconst.i32 2 + return v4 +} +; sameln: function %icmp_to_brz_inve +; nextln: block0(v0: i32): +; nextln: v1 = icmp_imm ne v0, 0 +; nextln: brnz v0, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v3 = iconst.i32 1 +; nextln: return v3 +; nextln: +; nextln: block2: +; nextln: v4 = iconst.i32 2 +; nextln: return v4 +; nextln: } + +function %br_icmp_inversion(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + br_icmp ugt v0, v1, block1 + jump block2 +block1: + v2 = iconst.i32 1 + return v2 +block2: + v3 = iconst.i32 2 + return v3 +} +; sameln: function %br_icmp_inversio +; nextln: block0(v0: i32, v1: i32): +; nextln: br_icmp ule v0, v1, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v2 = iconst.i32 1 +; nextln: return v2 +; nextln: +; nextln: block2: +; nextln: v3 = iconst.i32 2 +; nextln: return v3 +; nextln: } diff --git a/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif b/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif new file mode 100644 index 0000000000..ba65b2418c --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif @@ -0,0 +1,55 @@ +test peepmatic +target x86_64 baseline + +; Cases where the denominator is created by an iconst + +function %indir_udiv32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 7 + v2 = udiv v0, v1 + ; check: v4 = iconst.i32 0x2492_4925 + ; nextln: v5 = umulhi v0, v4 + ; nextln: v6 = isub v0, v5 + ; nextln: v7 = ushr_imm v6, 1 + ; nextln: v8 = iadd v7, v5 + ; nextln: v9 = ushr_imm v8, 2 + ; nextln: v2 -> v9 + return v2 +} + +function %indir_sdiv32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -17 + v2 = sdiv v0, v1 + ; check: v4 = iconst.i32 0xffff_ffff_8787_8787 + ; nextln: v5 = smulhi v0, v4 + ; nextln: v6 = sshr_imm v5, 3 + ; nextln: v7 = ushr_imm v6, 31 + ; nextln: v8 = iadd v6, v7 + ; nextln: v2 -> v8 + return v2 +} + +function %indir_udiv64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1337 + v2 = udiv v0, v1 + ; check: v4 = iconst.i64 0xc411_9d95_2866_a139 + ; nextln: v5 = umulhi v0, v4 + ; nextln: v6 = ushr_imm v5, 10 + ; nextln: v2 -> v6 + return v2 +} + +function %indir_sdiv64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -90210 + v2 = sdiv v0, v1 + ; check: v4 = iconst.i64 0xd181_4ee8_939c_b8bb + ; nextln: v5 = smulhi v0, v4 + ; nextln: v6 = sshr_imm v5, 14 + ; nextln: v7 = ushr_imm v6, 63 + ; nextln: v8 = iadd v6, v7 + ; nextln: v2 -> v8 + return v2 +} diff --git a/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif new file mode 100644 index 0000000000..0759f92ca9 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif @@ -0,0 +1,266 @@ +test peepmatic +target i686 baseline + +; -------- U32 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_udiv32_p7(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 7 + ; check: iconst.i32 0x2492_4925 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: v7 = ushr_imm v6, 2 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift) +function %t_udiv32_p125(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 125 + ; check: iconst.i32 0x1062_4dd3 + ; check: umulhi v0, v2 + ; check: v4 = ushr_imm v3, 3 + ; check: v1 -> v4 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_udiv32_p641(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 641 + ; check: iconst.i32 0x0066_3d81 + ; check: v3 = umulhi v0, v2 + ; check: v1 -> v3 + return v1 +} + + +; -------- S32 -------- + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_sdiv32_n6(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -6 + ; check: iconst.i32 0xffff_ffff_d555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv32_n5(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -5 + ; check: iconst.i32 0xffff_ffff_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 31 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_sdiv32_n3(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -3 + ; check: iconst.i32 0x5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 31 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_sdiv32_p6(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 6 + ; check: iconst.i32 0x2aaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; case d > 0 && M < 0 (mull, add, shift, add-sign-bit) +function %t_sdiv32_p7(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 7 + ; check: iconst.i32 0xffff_ffff_9249_2493 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 2 + ; check: ushr_imm v5, 31 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv32_p625(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 625 + ; check: iconst.i32 0x68db_8bad + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 8 + ; check: ushr_imm v4, 31 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + + +; -------- U64 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_udiv64_p7(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 7 + ; check: iconst.i64 0x2492_4924_9249_2493 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: v7 = ushr_imm v6, 2 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift) +function %t_udiv64_p9(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 9 + ; check: iconst.i64 0xe38e_38e3_8e38_e38f + ; check: umulhi v0, v2 + ; check: v4 = ushr_imm v3, 3 + ; check: v1 -> v4 + return v1 +} + +; complex case (mul, sub, shift, add, shift) +function %t_udiv64_p125(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 125 + ; check: iconst.i64 0x0624_dd2f_1a9f_be77 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: v7 = ushr_imm v6, 6 + ; check: v1 -> v7 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_udiv64_p274177(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 274177 + ; check: iconst.i64 0x3d30_f19c_d101 + ; check: v3 = umulhi v0, v2 + ; check: v1 -> v3 + return v1 +} + + +; -------- S64 -------- + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv64_n625(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -625 + ; check: iconst.i64 0xcb92_3a29_c779_a6b5 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_sdiv64_n6(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -6 + ; check: iconst.i64 0xd555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_sdiv64_n5(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -5 + ; check: iconst.i64 0x9999_9999_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 63 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_sdiv64_n3(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -3 + ; check: iconst.i64 0x5555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 63 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_sdiv64_p6(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 6 + ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; case d > 0 && M < 0 (mul, add, shift, add-sign-bit) +function %t_sdiv64_p15(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 15 + ; check: iconst.i64 0x8888_8888_8888_8889 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 3 + ; check: ushr_imm v5, 63 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv64_p625(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 625 + ; check: iconst.i64 0x346d_c5d6_3886_594b + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} diff --git a/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif new file mode 100644 index 0000000000..a2110a5a75 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif @@ -0,0 +1,292 @@ +test peepmatic +target i686 baseline + +; -------- U32 -------- + +; ignored +function %t_udiv32_p0(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 0 + ; check: udiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_udiv32_p1(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 1 + ; check: nop + return v1 +} + +; shift +function %t_udiv32_p2(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 2 + ; check: ushr_imm v0, 1 + return v1 +} + +; shift +function %t_udiv32_p2p31(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 0x8000_0000 + ; check: ushr_imm v0, 31 + return v1 +} + + +; -------- U64 -------- + +; ignored +function %t_udiv64_p0(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 0 + ; check: udiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_udiv64_p1(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 1 + ; check: nop + return v1 +} + +; shift +function %t_udiv64_p2(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 2 + ; check: ushr_imm v0, 1 + return v1 +} + +; shift +function %t_udiv64_p2p63(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 0x8000_0000_0000_0000 + ; check: ushr_imm v0, 63 + return v1 +} + + +; -------- S32 -------- + +; ignored +function %t_sdiv32_p0(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 0 + ; check: sdiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_sdiv32_p1(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 1 + ; check: nop + return v1 +} + +; ignored +function %t_sdiv32_n1(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -1 + ; check: sdiv_imm v0, -1 + return v1 +} + +; shift +function %t_sdiv32_p2(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: sshr_imm v3, 1 + ; check: v1 -> v4 + return v1 +} + +; shift +function %t_sdiv32_n2(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: sshr_imm v3, 1 + ; check: irsub_imm v4, 0 + return v1 +} + +; shift +function %t_sdiv32_p4(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 4 + ; check: v2 = sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 2 + ; check: v1 -> v5 + + return v1 +} + +; shift +function %t_sdiv32_n4(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 2 + ; check: irsub_imm v5, 0 + return v1 +} + +; shift +function %t_sdiv32_p2p30(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 30 + ; check: v1 -> v5 + return v1 +} + +; shift +function %t_sdiv32_n2p30(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 30 + ; check: irsub_imm v5, 0 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000) isn't +; representable. +function %t_sdiv32_n2p31(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -0x8000_0000 + ; check: sshr_imm v0, 30 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 31 + ; check: irsub_imm v5, 0 + return v1 +} + + +; -------- S64 -------- + +; ignored +function %t_sdiv64_p0(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 0 + ; check: sdiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_sdiv64_p1(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 1 + ; check: nop + return v1 +} + +; ignored +function %t_sdiv64_n1(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -1 + ; check: sdiv_imm v0, -1 + return v1 +} + +; shift +function %t_sdiv64_p2(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: v4 = sshr_imm v3, 1 + ; check: v1 -> v4 + return v1 +} + +; shift +function %t_sdiv64_n2(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: sshr_imm v3, 1 + ; check: irsub_imm v4, 0 + return v1 +} + +; shift +function %t_sdiv64_p4(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 2 + ; check: v1 -> v5 + return v1 +} + +; shift +function %t_sdiv64_n4(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 2 + ; check: irsub_imm v5, 0 + return v1 +} + +; shift +function %t_sdiv64_p2p62(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 62 + ; check: v1 -> v5 + return v1 +} + +; shift +function %t_sdiv64_n2p62(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 62 + ; check: irsub_imm v5, 0 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't +; representable. +function %t_sdiv64_n2p63(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -0x8000_0000_0000_0000 + ; check: sshr_imm v0, 62 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 63 + ; check: irsub_imm v5, 0 + return v1 +} diff --git a/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif b/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif index ceefd5bd1c..cc24167267 100644 --- a/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif +++ b/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif @@ -1,4 +1,4 @@ -test simple_preopt +test peepmatic target x86_64 ;; This file used to trigger assertions where we would keep trying to diff --git a/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif b/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif new file mode 100644 index 0000000000..7fc95f0fdb --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif @@ -0,0 +1,22 @@ +test peepmatic +target x86_64 + +;; Test that although v5 can be replaced with v1, we don't transplant `load.i32 +;; v0` on top of `iadd v3, v4`, because that would move the load past other uses +;; of its result. + +function %foo(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0 + v2 = iconst.i32 16 + v3 = iadd_imm v1, -16 + v4 = iconst.i32 16 + v5 = iadd v3, v4 + ; check: v1 = load.i32 v0 + ; nextln: v5 -> v1 + ; nextln: v2 = iconst.i32 16 + ; nextln: v3 = iadd_imm v1, -16 + ; nextln: v4 = iconst.i32 16 + ; nextln: nop + return v5 +} diff --git a/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif b/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif new file mode 100644 index 0000000000..e48b91a4b1 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif @@ -0,0 +1,14 @@ +test peepmatic +target x86_64 + +function %wraparound(i64 vmctx) -> f32 system_v { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 48 + +block35(v0: i64): + v88 = iconst.i64 0 + v89 = iconst.i64 0x8000_0000_0000_0000 + v90 = ishl_imm v88, 0x8000_0000_0000_0000 + v91 = sshr v90, v89; check: sshr_imm v90, 0x8000_0000_0000_0000 + trap user0 +} diff --git a/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif new file mode 100644 index 0000000000..7df5baf4e3 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif @@ -0,0 +1,285 @@ +test peepmatic +target i686 baseline + +; -------- U32 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_urem32_p7(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 7 + ; check: iconst.i32 0x2492_4925 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: ushr_imm v6, 2 + ; check: imul_imm v7, 7 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift) +function %t_urem32_p125(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 125 + ; check: iconst.i32 0x1062_4dd3 + ; check: umulhi v0, v2 + ; check: ushr_imm v3, 3 + ; check: imul_imm v4, 125 + ; check: isub v0, v5 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_urem32_p641(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 641 + ; check: iconst.i32 0x0066_3d81 + ; check: umulhi v0, v2 + ; check: imul_imm v3, 641 + ; check: isub v0, v4 + return v1 +} + + +; -------- S32 -------- + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_srem32_n6(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -6 + ; check: iconst.i32 0xffff_ffff_d555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: iadd v3, v4 + ; check: imul_imm v5, -6 + ; check: isub v0, v6 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_srem32_n5(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -5 + ; check: iconst.i32 0xffff_ffff_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 31 + ; check: iadd v4, v5 + ; check: imul_imm v6, -5 + ; check: isub v0, v7 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_srem32_n3(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -3 + ; check: iconst.i32 0x5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 31 + ; check: iadd v5, v6 + ; check: imul_imm v7, -3 + ; check: isub v0, v8 + return v1 +} + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_srem32_p6(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 6 + ; check: iconst.i32 0x2aaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: iadd v3, v4 + ; check: imul_imm v5, 6 + ; check: isub v0, v6 + return v1 +} + +; case d > 0 && M < 0 (mull, add, shift, add-sign-bit) +function %t_srem32_p7(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 7 + ; check: iconst.i32 0xffff_ffff_9249_2493 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 2 + ; check: ushr_imm v5, 31 + ; check: iadd v5, v6 + ; check: imul_imm v7, 7 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_srem32_p625(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 625 + ; check: iconst.i32 0x68db_8bad + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 8 + ; check: ushr_imm v4, 31 + ; check: iadd v4, v5 + ; check: imul_imm v6, 625 + ; check: isub v0, v7 + return v1 +} + + +; -------- U64 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_urem64_p7(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 7 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: ushr_imm v6, 2 + ; check: imul_imm v7, 7 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift) +function %t_urem64_p9(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 9 + ; check: iconst.i64 0xe38e_38e3_8e38_e38f + ; check: umulhi v0, v2 + ; check: ushr_imm v3, 3 + ; check: imul_imm v4, 9 + ; check: isub v0, v5 + return v1 +} + +; complex case (mul, sub, shift, add, shift) +function %t_urem64_p125(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 125 + ; check: iconst.i64 0x0624_dd2f_1a9f_be77 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: ushr_imm v6, 6 + ; check: imul_imm v7, 125 + ; check: isub v0, v8 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_urem64_p274177(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 274177 + ; check: iconst.i64 0x3d30_f19c_d101 + ; check: umulhi v0, v2 + ; check: imul_imm v3, 0x0004_2f01 + ; check: isub v0, v4 + return v1 +} + + +; -------- S64 -------- + +; simple case (mul, shift, add-sign-bit) +function %t_srem64_n625(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -625 + ; check: iconst.i64 0xcb92_3a29_c779_a6b5 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: iadd v4, v5 + ; check: imul_imm v6, -625 + ; check: isub v0, v7 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_srem64_n6(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -6 + ; check: iconst.i64 0xd555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: iadd v3, v4 + ; check: imul_imm v5, -6 + ; check: isub v0, v6 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_srem64_n5(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -5 + ; check: iconst.i64 0x9999_9999_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 63 + ; check: iadd v4, v5 + ; check: imul_imm v6, -5 + ; check: isub v0, v7 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_srem64_n3(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -3 + ; check: iconst.i64 0x5555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 63 + ; check: iadd v5, v6 + ; check: imul_imm v7, -3 + ; check: isub v0, v8 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_srem64_p6(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 6 + ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: iadd v3, v4 + ; check: imul_imm v5, 6 + ; check: isub v0, v6 + return v1 +} + +; case d > 0 && M < 0 (mul, add, shift, add-sign-bit) +function %t_srem64_p15(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 15 + ; check: iconst.i64 0x8888_8888_8888_8889 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 3 + ; check: ushr_imm v5, 63 + ; check: iadd v5, v6 + ; check: imul_imm v7, 15 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_srem64_p625(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 625 + ; check: iconst.i64 0x346d_c5d6_3886_594b + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: iadd v4, v5 + ; check: imul_imm v6, 625 + ; check: isub v0, v7 + return v1 +} diff --git a/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif new file mode 100644 index 0000000000..c795b73c19 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif @@ -0,0 +1,291 @@ +test peepmatic +target i686 baseline + +; -------- U32 -------- + +; ignored +function %t_urem32_p0(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 0 + ; check: urem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_urem32_p1(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 1 + ; check: iconst.i32 0 + return v1 +} + +; shift +function %t_urem32_p2(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 2 + ; check: band_imm v0, 1 + return v1 +} + +; shift +function %t_urem32_p2p31(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 0x8000_0000 + ; check: band_imm v0, 0x7fff_ffff + return v1 +} + + +; -------- U64 -------- + +; ignored +function %t_urem64_p0(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 0 + ; check: urem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_urem64_p1(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 1 + ; check: iconst.i64 0 + return v1 +} + +; shift +function %t_urem64_p2(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 2 + ; check: band_imm v0, 1 + return v1 +} + +; shift +function %t_urem64_p2p63(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 0x8000_0000_0000_0000 + ; check: band_imm v0, 0x7fff_ffff_ffff_ffff + return v1 +} + + +; -------- S32 -------- + +; ignored +function %t_srem32_n1(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -1 + ; check: srem_imm v0, -1 + return v1 +} + +; ignored +function %t_srem32_p0(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 0 + ; check: srem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_srem32_p1(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 1 + ; check: iconst.i32 0 + return v1 +} + +; shift +function %t_srem32_p2(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem32_n2(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem32_p4(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem32_n4(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem32_p2p30(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xffff_ffff_c000_0000 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem32_n2p30(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xffff_ffff_c000_0000 + ; check: isub v0, v5 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000) isn't +; representable. +function %t_srem32_n2p31(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -0x8000_0000 + ; check: sshr_imm v0, 30 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xffff_ffff_8000_0000 + ; check: isub v0, v5 + return v1 +} + + +; -------- S64 -------- + +; ignored +function %t_srem64_n1(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -1 + ; check: srem_imm v0, -1 + return v1 +} + +; ignored +function %t_srem64_p0(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 0 + ; check: srem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_srem64_p1(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 1 + ; check: iconst.i64 0 + return v1 +} + +; shift +function %t_srem64_p2(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem64_n2(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem64_p4(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem64_n4(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem64_p2p62(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xc000_0000_0000_0000 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem64_n2p62(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xc000_0000_0000_0000 + ; check: isub v0, v5 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't +; representable. +function %t_srem64_n2p63(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -0x8000_0000_0000_0000 + ; check: sshr_imm v0, 62 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: band_imm v4, 0x8000_0000_0000_0000 + ; check: isub v0, v5 + return v1 +} diff --git a/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif b/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif new file mode 100644 index 0000000000..17ca472b7e --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif @@ -0,0 +1,22 @@ +test peepmatic +target x86_64 + +function u0:2(i64 , i64) { + gv1 = load.i64 notrap aligned gv0 + heap0 = static gv1 + block0(v0: i64, v1: i64): + v16 = iconst.i32 6 + v17 = heap_addr.i64 heap0, v16, 1 + v18 = load.i32 v17 + v19 = iconst.i32 4 + v20 = icmp ne v18, v19 + v21 = bint.i32 v20 + brnz v21, block2 + jump block4 + block4: + jump block1 + block2: + jump block1 + block1: + return +} diff --git a/cranelift/filetests/filetests/peepmatic/simplify32.clif b/cranelift/filetests/filetests/peepmatic/simplify32.clif new file mode 100644 index 0000000000..b1c6786a05 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/simplify32.clif @@ -0,0 +1,60 @@ +test peepmatic +target i686 + +;; 32-bits platforms. + +function %iadd_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = iadd v0, v1 + return v2 +} +; sameln: function %iadd_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, 2 +; nextln: return v2 +; nextln: } + +function %isub_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = isub v0, v1 + return v2 +} +; sameln: function %isub_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, -2 +; nextln: return v2 +; nextln: } + +function %icmp_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = icmp slt v0, v1 + v3 = bint.i32 v2 + return v3 +} +; sameln: function %icmp_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = icmp_imm slt v0, 2 +; nextln: v3 = bint.i32 v2 +; nextln: return v3 +; nextln: } + +;; Don't simplify operations that would get illegal because of lack of native +;; support. +function %iadd_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = iadd v0, v1 + return v2 +} +; sameln: function %iadd_imm +; nextln: block0(v0: i64): +; nextln: v1 = iconst.i64 2 +; nextln: v2 = iadd v0, v1 +; nextln: return v2 +; nextln: } diff --git a/cranelift/filetests/filetests/peepmatic/simplify64.clif b/cranelift/filetests/filetests/peepmatic/simplify64.clif new file mode 100644 index 0000000000..93c289ccdd --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/simplify64.clif @@ -0,0 +1,326 @@ +test peepmatic +target x86_64 + +;; 64-bits platforms. + +function %iadd_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = iadd v0, v1 + return v2 +} +; sameln: function %iadd_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, 2 +; nextln: return v2 +; nextln: } + +function %isub_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = isub v0, v1 + return v2 +} +; sameln: function %isub_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, -2 +; nextln: return v2 +; nextln: } + +function %icmp_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = icmp slt v0, v1 + v3 = bint.i32 v2 + return v3 +} +; sameln: function %icmp_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = icmp_imm slt v0, 2 +; nextln: v3 = bint.i32 v2 +; nextln: return v3 +; nextln: } + +function %ifcmp_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = ifcmp v0, v1 + brif eq v2, block1 + jump block2 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} +; sameln: function %ifcmp_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = ifcmp_imm v0, 2 +; nextln: brif eq v2, block1 +; nextln: jump block2 +; nextln: +; nextln: block1: +; nextln: v3 = iconst.i32 1 +; nextln: return v3 +; nextln: +; nextln: block2: +; nextln: v4 = iconst.i32 2 +; nextln: return v4 +; nextln: } + +function %brz_bint(i32) { +block0(v0: i32): + v3 = icmp_imm slt v0, 0 + v1 = bint.i32 v3 + v2 = select v1, v1, v1 + trapz v1, user0 + brz v1, block1 + jump block2 + +block1: + return + +block2: + return +} +; sameln: function %brz_bint +; nextln: (v0: i32): +; nextln: v3 = icmp_imm slt v0, 0 +; nextln: v1 = bint.i32 v3 +; nextln: v2 = select v3, v1, v1 +; nextln: trapz v3, user0 +; nextln: brnz v3, block2 +; nextln: jump block1 + +function %irsub_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = isub v1, v0 + return v2 +} +; sameln: function %irsub_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = irsub_imm v0, 2 +; nextln: return v2 +; nextln: } + +;; Sign-extensions. + +;; 8 -> 16 +function %uextend_8_16() -> i16 { +block0: + v0 = iconst.i16 37 + v1 = ishl_imm v0, 8 + v2 = ushr_imm v1, 8 + return v2 +} +; sameln: function %uextend_8_16 +; nextln: block0: +; nextln: v0 = iconst.i16 37 +; nextln: v1 = ishl_imm v0, 8 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i16 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_16() -> i16 { +block0: + v0 = iconst.i16 37 + v1 = ishl_imm v0, 8 + v2 = sshr_imm v1, 8 + return v2 +} +; sameln: function %sextend_8_16 +; nextln: block0: +; nextln: v0 = iconst.i16 37 +; nextln: v1 = ishl_imm v0, 8 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i16 v3 +; nextln: return v2 +; nextln: } + +;; 8 -> 32 +function %uextend_8_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 24 + v2 = ushr_imm v1, 24 + return v2 +} +; sameln: function %uextend_8_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 24 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i32 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 24 + v2 = sshr_imm v1, 24 + return v2 +} +; sameln: function %sextend_8_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 24 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i32 v3 +; nextln: return v2 +; nextln: } + +;; 16 -> 32 +function %uextend_16_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 16 + v2 = ushr_imm v1, 16 + return v2 +} +; sameln: function %uextend_16_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 16 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = uextend.i32 v3 +; nextln: return v2 +; nextln: } + +function %sextend_16_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 16 + v2 = sshr_imm v1, 16 + return v2 +} +; sameln: function %sextend_16_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 16 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = sextend.i32 v3 +; nextln: return v2 +; nextln: } + +;; 8 -> 64 +function %uextend_8_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 56 + v2 = ushr_imm v1, 56 + return v2 +} +; sameln: function %uextend_8_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 56 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 56 + v2 = sshr_imm v1, 56 + return v2 +} +; sameln: function %sextend_8_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 56 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +;; 16 -> 64 +function %uextend_16_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 48 + v2 = ushr_imm v1, 48 + return v2 +} +; sameln: function %uextend_16_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 48 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_16_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 48 + v2 = sshr_imm v1, 48 + return v2 +} +; sameln: function %sextend_16_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 48 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +;; 32 -> 64 +function %uextend_32_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 32 + v2 = ushr_imm v1, 32 + return v2 +} +; sameln: function %uextend_32_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 32 +; nextln: v3 = ireduce.i32 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_32_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 32 + v2 = sshr_imm v1, 32 + return v2 +} +; sameln: function %sextend_32_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 32 +; nextln: v3 = ireduce.i32 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %add_imm_fold(i32) -> i32 { +block0(v0: i32): + v1 = iadd_imm v0, 42 + v2 = iadd_imm v1, -42 + return v2 +} +; sameln: function %add_imm_fold(i32) +; nextln: block0(v0: i32): +; nextln: v2 -> v0 +; nextln: v1 = iadd_imm v0, 42 +; nextln: nop +; nextln: return v2 diff --git a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif new file mode 100644 index 0000000000..3000369bb5 --- /dev/null +++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif @@ -0,0 +1,17 @@ +test peepmatic +target x86_64 + +;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with +;; its `v2` operand's instruction because `v2` is one of many results. Instead, +;; we need to make an alias `v3 -> v2`. + +function %replace_inst_with_alias() -> i32 { +block0: + v0 = iconst.i32 0 + v1, v2 = x86_smulx v0, v0 + v3 = isub v2, v0 + ; check: v0 = iconst.i32 0 + ; nextln: v1, v2 = x86_smulx v0, v0 + ; nextln: v3 -> v2 + return v3 +} diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif index 4a4b7a80b6..101e4eb201 100644 --- a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif +++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif @@ -7,13 +7,14 @@ function %indir_udiv32(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 7 v2 = udiv v0, v1 - ; check: v4 = iconst.i32 0x2492_4925 - ; nextln: v5 = umulhi v0, v4 - ; nextln: v6 = isub v0, v5 - ; nextln: v7 = ushr_imm v6, 1 - ; nextln: v8 = iadd v7, v5 - ; nextln: v9 = ushr_imm v8, 2 - ; nextln: v2 -> v9 + ; check: iconst.i32 7 + ; check: iconst.i32 0x2492_4925 + ; check: umulhi v0, v3 + ; check: isub v0, v4 + ; check: ushr_imm v5, 1 + ; check: iadd v6, v4 + ; check: v8 = ushr_imm v7, 2 + ; check: v2 -> v8 return v2 } @@ -21,12 +22,13 @@ function %indir_sdiv32(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 -17 v2 = sdiv v0, v1 - ; check: v4 = iconst.i32 0xffff_ffff_8787_8787 - ; nextln: v5 = smulhi v0, v4 - ; nextln: v6 = sshr_imm v5, 3 - ; nextln: v7 = ushr_imm v6, 31 - ; nextln: v8 = iadd v6, v7 - ; nextln: v2 -> v8 + ; check: iconst.i32 -17 + ; check: iconst.i32 0xffff_ffff_8787_8787 + ; check: smulhi v0, v3 + ; check: sshr_imm v4, 3 + ; check: ushr_imm v5, 31 + ; check: v7 = iadd v5, v6 + ; check: v2 -> v7 return v2 } @@ -34,10 +36,11 @@ function %indir_udiv64(i64) -> i64 { block0(v0: i64): v1 = iconst.i64 1337 v2 = udiv v0, v1 - ; check: v4 = iconst.i64 0xc411_9d95_2866_a139 - ; nextln: v5 = umulhi v0, v4 - ; nextln: v6 = ushr_imm v5, 10 - ; nextln: v2 -> v6 + ; check: iconst.i64 1337 + ; check: iconst.i64 0xc411_9d95_2866_a139 + ; check: umulhi v0, v3 + ; check: v5 = ushr_imm v4, 10 + ; check: v2 -> v5 return v2 } @@ -45,11 +48,12 @@ function %indir_sdiv64(i64) -> i64 { block0(v0: i64): v1 = iconst.i64 -90210 v2 = sdiv v0, v1 - ; check: v4 = iconst.i64 0xd181_4ee8_939c_b8bb - ; nextln: v5 = smulhi v0, v4 - ; nextln: v6 = sshr_imm v5, 14 - ; nextln: v7 = ushr_imm v6, 63 - ; nextln: v8 = iadd v6, v7 - ; nextln: v2 -> v8 + ; check: iconst.i64 0xffff_ffff_fffe_9f9e + ; check: iconst.i64 0xd181_4ee8_939c_b8bb + ; check: smulhi v0, v3 + ; check: sshr_imm v4, 14 + ; check: ushr_imm v5, 63 + ; check: v7 = iadd v5, v6 + ; check: v2 -> v7 return v2 } diff --git a/cranelift/filetests/filetests/simple_preopt/simplify32.clif b/cranelift/filetests/filetests/simple_preopt/simplify32.clif index cf238fb5ed..2582fd69aa 100644 --- a/cranelift/filetests/filetests/simple_preopt/simplify32.clif +++ b/cranelift/filetests/filetests/simple_preopt/simplify32.clif @@ -58,3 +58,4 @@ block0(v0: i64): ; nextln: v2 = iadd v0, v1 ; nextln: return v2 ; nextln: } + diff --git a/cranelift/filetests/filetests/simple_preopt/simplify64.clif b/cranelift/filetests/filetests/simple_preopt/simplify64.clif index 6489c3bd1e..4ceabdc335 100644 --- a/cranelift/filetests/filetests/simple_preopt/simplify64.clif +++ b/cranelift/filetests/filetests/simple_preopt/simplify64.clif @@ -44,37 +44,6 @@ block0(v0: i32): ; nextln: return v3 ; nextln: } -function %ifcmp_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = ifcmp v0, v1 - brif eq v2, block1 - jump block2 - -block1: - v3 = iconst.i32 1 - return v3 - -block2: - v4 = iconst.i32 2 - return v4 -} -; sameln: function %ifcmp_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = ifcmp_imm v0, 2 -; nextln: brif eq v2, block1 -; nextln: jump block2 -; nextln: -; nextln: block1: -; nextln: v3 = iconst.i32 1 -; nextln: return v3 -; nextln: -; nextln: block2: -; nextln: v4 = iconst.i32 2 -; nextln: return v4 -; nextln: } - function %brz_bint(i32) { block0(v0: i32): v3 = icmp_imm slt v0, 0 diff --git a/cranelift/filetests/src/lib.rs b/cranelift/filetests/src/lib.rs index bc1a6df1e2..5cf7331225 100644 --- a/cranelift/filetests/src/lib.rs +++ b/cranelift/filetests/src/lib.rs @@ -45,6 +45,7 @@ mod test_domtree; mod test_interpret; mod test_legalizer; mod test_licm; +mod test_peepmatic; mod test_postopt; mod test_preopt; mod test_print_cfg; @@ -128,6 +129,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult test_interpret::subtest(parsed), "legalizer" => test_legalizer::subtest(parsed), "licm" => test_licm::subtest(parsed), + "peepmatic" => test_peepmatic::subtest(parsed), "postopt" => test_postopt::subtest(parsed), "preopt" => test_preopt::subtest(parsed), "print-cfg" => test_print_cfg::subtest(parsed), diff --git a/cranelift/filetests/src/test_peepmatic.rs b/cranelift/filetests/src/test_peepmatic.rs new file mode 100644 index 0000000000..fc701c7046 --- /dev/null +++ b/cranelift/filetests/src/test_peepmatic.rs @@ -0,0 +1,56 @@ +//! Test command for `peepmatic`-generated peephole optimizers. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestPreopt; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "peepmatic"); + if parsed.options.is_empty() { + Ok(Box::new(TestPreopt)) + } else { + Err(format!("No options allowed on {}", parsed)) + } +} + +impl SubTest for TestPreopt { + fn name(&self) -> &'static str { + "peepmatic" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + let isa = context.isa.expect("preopt needs an ISA"); + + comp_ctx.compute_cfg(); + comp_ctx + .preopt(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + let text = &comp_ctx.func.display(isa).to_string(); + log::debug!("After peepmatic-based simple_preopt:\n{}", text); + + // Only actually run the filecheck if peepmatic is enabled, because it + // can generate slightly different code (alias a result vs replace an + // instruction) than the non-peepmatic versions of peephole + // optimizations. Note that the non-`peepmatic` results can be tested + // with the `test simple_preopt` subtest. + if cfg!(feature = "enable-peepmatic") { + run_filecheck(&text, context) + } else { + Ok(()) + } + } +} diff --git a/cranelift/filetests/src/test_simple_preopt.rs b/cranelift/filetests/src/test_simple_preopt.rs index 1463d1c69a..f6cdec391f 100644 --- a/cranelift/filetests/src/test_simple_preopt.rs +++ b/cranelift/filetests/src/test_simple_preopt.rs @@ -39,6 +39,16 @@ impl SubTest for TestSimplePreopt { .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; let text = &comp_ctx.func.display(isa).to_string(); log::debug!("After simple_preopt:\n{}", text); - run_filecheck(&text, context) + + // Only actually run the filecheck if peepmatic is *not* enabled, + // because it can generate slightly different code (alias a result vs + // replace an instruction) than the non-peepmatic versions of peephole + // optimizations. Note that the `peepmatic`-based results can be tested + // with the `test peepmatic` subtest. + if cfg!(feature = "enable-peepmatic") { + Ok(()) + } else { + run_filecheck(&text, context) + } } }