Add egraph cprop optimizations for splat (#6148)

This commit adds constant-propagation optimizations for
`splat`-of-constant to produce a `vconst` node. This should help later
hoisting these constants out of loops if it shows up in wasm.
This commit is contained in:
Alex Crichton
2023-04-05 11:10:45 -05:00
committed by GitHub
parent 3275c45993
commit d45cbba83f
3 changed files with 213 additions and 1 deletions

View File

@@ -4,7 +4,7 @@ use crate::egraph::{NewOrExistingInst, OptimizeCtx};
use crate::ir::condcodes;
pub use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::dfg::ValueDef;
pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8};
pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8, V128Imm};
pub use crate::ir::types::*;
pub use crate::ir::{
dynamic_to_fixed, AtomicRmwOp, Block, BlockCall, Constant, DataFlowGraph, DynamicStackSlot,
@@ -128,4 +128,11 @@ impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> {
self.ctx.stats.subsume += 1;
value
}
fn splat64(&mut self, val: u64) -> Constant {
let val = u128::from(val);
let val = val | (val << 64);
let imm = V128Imm(val.to_le_bytes());
self.ctx.func.dfg.constants.insert(imm.into())
}
}

View File

@@ -171,3 +171,27 @@
y)
;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs
;; A splat of a constant can become a direct `vconst` with the appropriate bit
;; pattern.
(rule (simplify (splat dst (iconst $I8 n)))
(vconst dst (splat8 (u64_uextend_imm64 $I8 n))))
(rule (simplify (splat dst (iconst $I16 n)))
(vconst dst (splat16 (u64_uextend_imm64 $I16 n))))
(rule (simplify (splat dst (iconst $I32 n)))
(vconst dst (splat32 (u64_uextend_imm64 $I32 n))))
(rule (simplify (splat dst (iconst $I64 n)))
(vconst dst (splat64 (u64_uextend_imm64 $I64 n))))
(rule (simplify (splat dst (f32const _ (u32_from_ieee32 n))))
(vconst dst (splat32 n)))
(rule (simplify (splat dst (f64const _ (u64_from_ieee64 n))))
(vconst dst (splat64 n)))
(decl splat8 (u64) Constant)
(rule (splat8 n) (splat16 (u64_or n (u64_shl n 8))))
(decl splat16 (u64) Constant)
(rule (splat16 n) (splat32 (u64_or n (u64_shl n 16))))
(decl splat32 (u64) Constant)
(rule (splat32 n) (splat64 (u64_or n (u64_shl n 32))))
(decl splat64 (u64) Constant)
(extern constructor splat64 splat64)

View File

@@ -0,0 +1,181 @@
test optimize precise-output
set opt_level=speed
set use_egraphs=true
target x86_64
function %i8x16_1() -> i8x16 {
block0:
v1 = iconst.i8 0x33
v2 = splat.i8x16 v1
return v2
}
; function %i8x16_1() -> i8x16 fast {
; const0 = 0x33333333333333333333333333333333
;
; block0:
; v3 = vconst.i8x16 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i8x16_2() -> i8x16 {
block0:
v1 = iconst.i8 0x80
v2 = splat.i8x16 v1
return v2
}
; function %i8x16_2() -> i8x16 fast {
; const0 = 0x80808080808080808080808080808080
;
; block0:
; v3 = vconst.i8x16 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i16x8_1() -> i16x8 {
block0:
v1 = iconst.i16 0x1234
v2 = splat.i16x8 v1
return v2
}
; function %i16x8_1() -> i16x8 fast {
; const0 = 0x12341234123412341234123412341234
;
; block0:
; v3 = vconst.i16x8 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i16x8_2() -> i16x8 {
block0:
v1 = iconst.i16 0x8765
v2 = splat.i16x8 v1
return v2
}
; function %i16x8_2() -> i16x8 fast {
; const0 = 0x87658765876587658765876587658765
;
; block0:
; v3 = vconst.i16x8 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i32x4_1() -> i32x4 {
block0:
v1 = iconst.i32 0x12345678
v2 = splat.i32x4 v1
return v2
}
; function %i32x4_1() -> i32x4 fast {
; const0 = 0x12345678123456781234567812345678
;
; block0:
; v3 = vconst.i32x4 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i32x4_2() -> i32x4 {
block0:
v1 = iconst.i32 0x87654321
v2 = splat.i32x4 v1
return v2
}
; function %i32x4_2() -> i32x4 fast {
; const0 = 0x87654321876543218765432187654321
;
; block0:
; v3 = vconst.i32x4 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i64x2_1() -> i64x2 {
block0:
v1 = iconst.i64 0x0123456789abcdef
v2 = splat.i64x2 v1
return v2
}
; function %i64x2_1() -> i64x2 fast {
; const0 = 0x0123456789abcdef0123456789abcdef
;
; block0:
; v3 = vconst.i64x2 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i64x2_2() -> i64x2 {
block0:
v1 = iconst.i64 0xfedcba9876543210
v2 = splat.i64x2 v1
return v2
}
; function %i64x2_2() -> i64x2 fast {
; const0 = 0xfedcba9876543210fedcba9876543210
;
; block0:
; v3 = vconst.i64x2 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %i8x16_3() -> i8x16 {
block0:
v1 = iconst.i8 -2
v2 = splat.i8x16 v1
return v2
}
; function %i8x16_3() -> i8x16 fast {
; const0 = 0xfefefefefefefefefefefefefefefefe
;
; block0:
; v3 = vconst.i8x16 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %f32x4() -> f32x4 {
block0:
v1 = f32const 0x3.0
v2 = splat.f32x4 v1
return v2
}
; function %f32x4() -> f32x4 fast {
; const0 = 0x40400000404000004040000040400000
;
; block0:
; v3 = vconst.f32x4 const0
; v4 -> v3
; return v3 ; v3 = const0
; }
function %f64x2() -> f64x2 {
block0:
v1 = f64const 0x4.0
v2 = splat.f64x2 v1
return v2
}
; function %f64x2() -> f64x2 fast {
; const0 = 0x40100000000000004010000000000000
;
; block0:
; v3 = vconst.f64x2 const0
; v4 -> v3
; return v3 ; v3 = const0
; }