Rather than outright replacing parts of our existing peephole optimizations passes, this makes peepmatic an optional cargo feature that can be enabled. This allows us to take a conservative approach with enabling peepmatic everywhere, while also allowing us to get it in-tree and make it easier to collaborate on improving it quickly.
56 lines
1.3 KiB
Plaintext
56 lines
1.3 KiB
Plaintext
test peepmatic
|
|
target x86_64 baseline
|
|
|
|
; Cases where the denominator is created by an iconst
|
|
|
|
function %indir_udiv32(i32) -> i32 {
|
|
block0(v0: i32):
|
|
v1 = iconst.i32 7
|
|
v2 = udiv v0, v1
|
|
; check: v4 = iconst.i32 0x2492_4925
|
|
; nextln: v5 = umulhi v0, v4
|
|
; nextln: v6 = isub v0, v5
|
|
; nextln: v7 = ushr_imm v6, 1
|
|
; nextln: v8 = iadd v7, v5
|
|
; nextln: v9 = ushr_imm v8, 2
|
|
; nextln: v2 -> v9
|
|
return v2
|
|
}
|
|
|
|
function %indir_sdiv32(i32) -> i32 {
|
|
block0(v0: i32):
|
|
v1 = iconst.i32 -17
|
|
v2 = sdiv v0, v1
|
|
; check: v4 = iconst.i32 0xffff_ffff_8787_8787
|
|
; nextln: v5 = smulhi v0, v4
|
|
; nextln: v6 = sshr_imm v5, 3
|
|
; nextln: v7 = ushr_imm v6, 31
|
|
; nextln: v8 = iadd v6, v7
|
|
; nextln: v2 -> v8
|
|
return v2
|
|
}
|
|
|
|
function %indir_udiv64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 1337
|
|
v2 = udiv v0, v1
|
|
; check: v4 = iconst.i64 0xc411_9d95_2866_a139
|
|
; nextln: v5 = umulhi v0, v4
|
|
; nextln: v6 = ushr_imm v5, 10
|
|
; nextln: v2 -> v6
|
|
return v2
|
|
}
|
|
|
|
function %indir_sdiv64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 -90210
|
|
v2 = sdiv v0, v1
|
|
; check: v4 = iconst.i64 0xd181_4ee8_939c_b8bb
|
|
; nextln: v5 = smulhi v0, v4
|
|
; nextln: v6 = sshr_imm v5, 14
|
|
; nextln: v7 = ushr_imm v6, 63
|
|
; nextln: v8 = iadd v6, v7
|
|
; nextln: v2 -> v8
|
|
return v2
|
|
}
|