diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs index fa6fa600f9..3a78cac5c5 100644 --- a/cranelift/codegen/src/opts.rs +++ b/cranelift/codegen/src/opts.rs @@ -4,7 +4,7 @@ use crate::egraph::{NewOrExistingInst, OptimizeCtx}; use crate::ir::condcodes; pub use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::dfg::ValueDef; -pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8}; +pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8, V128Imm}; pub use crate::ir::types::*; pub use crate::ir::{ dynamic_to_fixed, AtomicRmwOp, Block, BlockCall, Constant, DataFlowGraph, DynamicStackSlot, @@ -128,4 +128,11 @@ impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> { self.ctx.stats.subsume += 1; value } + + fn splat64(&mut self, val: u64) -> Constant { + let val = u128::from(val); + let val = val | (val << 64); + let imm = V128Imm(val.to_le_bytes()); + self.ctx.func.dfg.constants.insert(imm.into()) + } } diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index f0def3acca..79c88d8a3a 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -171,3 +171,27 @@ y) ;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs + +;; A splat of a constant can become a direct `vconst` with the appropriate bit +;; pattern. +(rule (simplify (splat dst (iconst $I8 n))) + (vconst dst (splat8 (u64_uextend_imm64 $I8 n)))) +(rule (simplify (splat dst (iconst $I16 n))) + (vconst dst (splat16 (u64_uextend_imm64 $I16 n)))) +(rule (simplify (splat dst (iconst $I32 n))) + (vconst dst (splat32 (u64_uextend_imm64 $I32 n)))) +(rule (simplify (splat dst (iconst $I64 n))) + (vconst dst (splat64 (u64_uextend_imm64 $I64 n)))) +(rule (simplify (splat dst (f32const _ (u32_from_ieee32 n)))) + (vconst dst (splat32 n))) +(rule (simplify (splat dst (f64const _ (u64_from_ieee64 n)))) + (vconst dst (splat64 n))) + +(decl splat8 (u64) Constant) +(rule (splat8 n) (splat16 (u64_or n (u64_shl n 8)))) +(decl splat16 (u64) Constant) +(rule (splat16 n) (splat32 (u64_or n (u64_shl n 16)))) +(decl splat32 (u64) Constant) +(rule (splat32 n) (splat64 (u64_or n (u64_shl n 32)))) +(decl splat64 (u64) Constant) +(extern constructor splat64 splat64) diff --git a/cranelift/filetests/filetests/egraph/cprop-splat.clif b/cranelift/filetests/filetests/egraph/cprop-splat.clif new file mode 100644 index 0000000000..549663480e --- /dev/null +++ b/cranelift/filetests/filetests/egraph/cprop-splat.clif @@ -0,0 +1,181 @@ +test optimize precise-output +set opt_level=speed +set use_egraphs=true +target x86_64 + +function %i8x16_1() -> i8x16 { +block0: + v1 = iconst.i8 0x33 + v2 = splat.i8x16 v1 + return v2 +} + +; function %i8x16_1() -> i8x16 fast { +; const0 = 0x33333333333333333333333333333333 +; +; block0: +; v3 = vconst.i8x16 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i8x16_2() -> i8x16 { +block0: + v1 = iconst.i8 0x80 + v2 = splat.i8x16 v1 + return v2 +} + +; function %i8x16_2() -> i8x16 fast { +; const0 = 0x80808080808080808080808080808080 +; +; block0: +; v3 = vconst.i8x16 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i16x8_1() -> i16x8 { +block0: + v1 = iconst.i16 0x1234 + v2 = splat.i16x8 v1 + return v2 +} + +; function %i16x8_1() -> i16x8 fast { +; const0 = 0x12341234123412341234123412341234 +; +; block0: +; v3 = vconst.i16x8 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i16x8_2() -> i16x8 { +block0: + v1 = iconst.i16 0x8765 + v2 = splat.i16x8 v1 + return v2 +} + +; function %i16x8_2() -> i16x8 fast { +; const0 = 0x87658765876587658765876587658765 +; +; block0: +; v3 = vconst.i16x8 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i32x4_1() -> i32x4 { +block0: + v1 = iconst.i32 0x12345678 + v2 = splat.i32x4 v1 + return v2 +} + +; function %i32x4_1() -> i32x4 fast { +; const0 = 0x12345678123456781234567812345678 +; +; block0: +; v3 = vconst.i32x4 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i32x4_2() -> i32x4 { +block0: + v1 = iconst.i32 0x87654321 + v2 = splat.i32x4 v1 + return v2 +} + +; function %i32x4_2() -> i32x4 fast { +; const0 = 0x87654321876543218765432187654321 +; +; block0: +; v3 = vconst.i32x4 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i64x2_1() -> i64x2 { +block0: + v1 = iconst.i64 0x0123456789abcdef + v2 = splat.i64x2 v1 + return v2 +} + +; function %i64x2_1() -> i64x2 fast { +; const0 = 0x0123456789abcdef0123456789abcdef +; +; block0: +; v3 = vconst.i64x2 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i64x2_2() -> i64x2 { +block0: + v1 = iconst.i64 0xfedcba9876543210 + v2 = splat.i64x2 v1 + return v2 +} + +; function %i64x2_2() -> i64x2 fast { +; const0 = 0xfedcba9876543210fedcba9876543210 +; +; block0: +; v3 = vconst.i64x2 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %i8x16_3() -> i8x16 { +block0: + v1 = iconst.i8 -2 + v2 = splat.i8x16 v1 + return v2 +} + +; function %i8x16_3() -> i8x16 fast { +; const0 = 0xfefefefefefefefefefefefefefefefe +; +; block0: +; v3 = vconst.i8x16 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %f32x4() -> f32x4 { +block0: + v1 = f32const 0x3.0 + v2 = splat.f32x4 v1 + return v2 +} + +; function %f32x4() -> f32x4 fast { +; const0 = 0x40400000404000004040000040400000 +; +; block0: +; v3 = vconst.f32x4 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } + +function %f64x2() -> f64x2 { +block0: + v1 = f64const 0x4.0 + v2 = splat.f64x2 v1 + return v2 +} + +; function %f64x2() -> f64x2 fast { +; const0 = 0x40100000000000004010000000000000 +; +; block0: +; v3 = vconst.f64x2 const0 +; v4 -> v3 +; return v3 ; v3 = const0 +; } +