peepmatic: Make peepmatic optional to enable

Rather than outright replacing parts of our existing peephole optimizations passes, this makes peepmatic an optional cargo feature that can be enabled. This allows us to take a conservative approach with enabling peepmatic everywhere, while also allowing us to get it in-tree and make it easier to collaborate on improving it quickly.
2020-05-08 12:15:23 -07:00
parent 6e135b3aea
commit 52c6ece5f3
25 changed files with 2284 additions and 100 deletions
--- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
+++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
@@ -6,9 +6,9 @@ function u0:0(i8) -> i8 fast {
 block0(v0: i8):
    v1 = iconst.i8 0
    v2 = isub v1, v0
-    ; check:  v4 = uextend.i32 v0
-    ; nextln: v6 = iconst.i32 0
-    ; nextln: v5 = isub v6, v4
-    ; nextln: v2 = ireduce.i8 v5
+    ; check:  uextend.i32
+    ; nextln: iconst.i32
+    ; nextln: isub
+    ; nextln: ireduce.i8
    return v2
 }
--- a/cranelift/filetests/filetests/peepmatic/branch.clif
+++ b/cranelift/filetests/filetests/peepmatic/branch.clif
@@ -0,0 +1,81 @@
+test peepmatic
+target x86_64
+
+function %icmp_to_brz_fold(i32) -> i32 {
+block0(v0: i32):
+    v1 = icmp_imm eq v0, 0
+    brnz v1, block1
+    jump block2
+block1:
+    v3 = iconst.i32 1
+    return v3
+block2:
+    v4 = iconst.i32 2
+    return v4
+}
+; sameln: function %icmp_to_brz_fold
+; nextln: block0(v0: i32):
+; nextln:     v1 = icmp_imm eq v0, 0
+; nextln:     brnz v0, block2
+; nextln:     jump block1
+; nextln: 
+; nextln: block1:
+; nextln:     v3 = iconst.i32 1
+; nextln:     return v3
+; nextln: 
+; nextln: block2:
+; nextln:     v4 = iconst.i32 2
+; nextln:     return v4
+; nextln: }
+
+function %icmp_to_brz_inverted_fold(i32) -> i32 {
+block0(v0: i32):
+    v1 = icmp_imm ne v0, 0
+    brz v1, block1
+    jump block2
+block1:
+    v3 = iconst.i32 1
+    return v3
+block2:
+    v4 = iconst.i32 2
+    return v4
+}
+; sameln: function %icmp_to_brz_inve
+; nextln: block0(v0: i32):
+; nextln:     v1 = icmp_imm ne v0, 0
+; nextln:     brnz v0, block2
+; nextln:     jump block1
+; nextln: 
+; nextln: block1:
+; nextln:     v3 = iconst.i32 1
+; nextln:     return v3
+; nextln: 
+; nextln: block2:
+; nextln:     v4 = iconst.i32 2
+; nextln:     return v4
+; nextln: }
+
+function %br_icmp_inversion(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    br_icmp ugt v0, v1, block1
+    jump block2
+block1:
+    v2 = iconst.i32 1
+    return v2
+block2:
+    v3 = iconst.i32 2
+    return v3
+}
+; sameln: function %br_icmp_inversio
+; nextln: block0(v0: i32, v1: i32):
+; nextln:     br_icmp ule v0, v1, block2
+; nextln:     jump block1
+; nextln: 
+; nextln: block1:
+; nextln:     v2 = iconst.i32 1
+; nextln:     return v2
+; nextln: 
+; nextln: block2:
+; nextln:     v3 = iconst.i32 2
+; nextln:     return v3
+; nextln: }
--- a/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif
@@ -0,0 +1,55 @@
+test peepmatic
+target x86_64 baseline
+
+; Cases where the denominator is created by an iconst
+
+function %indir_udiv32(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 7
+    v2 = udiv v0, v1
+    ; check:  v4 = iconst.i32 0x2492_4925
+    ; nextln: v5 = umulhi v0, v4
+    ; nextln: v6 = isub v0, v5
+    ; nextln: v7 = ushr_imm v6, 1
+    ; nextln: v8 = iadd v7, v5
+    ; nextln: v9 = ushr_imm v8, 2
+    ; nextln: v2 -> v9
+    return v2
+}
+
+function %indir_sdiv32(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 -17
+    v2 = sdiv v0, v1
+    ; check:  v4 = iconst.i32 0xffff_ffff_8787_8787
+    ; nextln: v5 = smulhi v0, v4
+    ; nextln: v6 = sshr_imm v5, 3
+    ; nextln: v7 = ushr_imm v6, 31
+    ; nextln: v8 = iadd v6, v7
+    ; nextln: v2 -> v8
+    return v2
+}
+
+function %indir_udiv64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 1337
+    v2 = udiv v0, v1
+    ; check:  v4 = iconst.i64 0xc411_9d95_2866_a139
+    ; nextln: v5 = umulhi v0, v4
+    ; nextln: v6 = ushr_imm v5, 10
+    ; nextln: v2 -> v6
+    return v2
+}
+
+function %indir_sdiv64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 -90210
+    v2 = sdiv v0, v1
+    ; check:  v4 = iconst.i64 0xd181_4ee8_939c_b8bb
+    ; nextln: v5 = smulhi v0, v4
+    ; nextln: v6 = sshr_imm v5, 14
+    ; nextln: v7 = ushr_imm v6, 63
+    ; nextln: v8 = iadd v6, v7
+    ; nextln: v2 -> v8
+    return v2
+}
--- a/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif
@@ -0,0 +1,266 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: v7 = ushr_imm v6, 2
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv32_p125(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: v4 = ushr_imm v3, 3
+    ; check: v1 -> v4
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv32_p641(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: v3 = umulhi v0, v2
+    ; check: v1 -> v3
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_n6(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_n5(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv32_n3(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_p6(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_sdiv32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_p625(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p7(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i64 0x2492_4924_9249_2493
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: v7 = ushr_imm v6, 2
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv64_p9(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: v4 = ushr_imm v3, 3
+    ; check: v1 -> v4
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p125(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: v7 = ushr_imm v6, 6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv64_p274177(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: v3 = umulhi v0, v2
+    ; check: v1 -> v3
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_n625(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n6(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n5(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv64_n3(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_p6(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_sdiv64_p15(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_p625(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
--- a/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif
@@ -0,0 +1,292 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_udiv32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_udiv32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; shift
+function %t_udiv32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv32_p2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 0x8000_0000
+    ; check: ushr_imm v0, 31
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_udiv64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_udiv64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; shift
+function %t_udiv64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv64_p2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 0x8000_0000_0000_0000
+    ; check: ushr_imm v0, 63
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_sdiv32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_sdiv32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; ignored
+function %t_sdiv32_n1(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: v1 -> v4
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p4(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 4
+    ; check: v2 = sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 2
+    ; check: v1 -> v5
+
+    return v1
+}
+
+; shift
+function %t_sdiv32_n4(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 30
+    ; check: v1 -> v5
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 30
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_sdiv32_n2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 31
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_sdiv64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_sdiv64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; ignored
+function %t_sdiv64_n1(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: v4 = sshr_imm v3, 1
+    ; check: v1 -> v4
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p4(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 2
+    ; check: v1 -> v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n4(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 62
+    ; check: v1 -> v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 62
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_sdiv64_n2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 63
+    ; check: irsub_imm v5, 0
+    return v1
+}
--- a/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
+++ b/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
@@ -1,4 +1,4 @@
-test simple_preopt
+test peepmatic
 target x86_64

 ;; This file used to trigger assertions where we would keep trying to
--- a/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif
+++ b/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif
@@ -0,0 +1,22 @@
+test peepmatic
+target x86_64
+
+;; Test that although v5 can be replaced with v1, we don't transplant `load.i32
+;; v0` on top of `iadd v3, v4`, because that would move the load past other uses
+;; of its result.
+
+function %foo(i64) -> i32 {
+block0(v0: i64):
+    v1 = load.i32 v0
+    v2 = iconst.i32 16
+    v3 = iadd_imm v1, -16
+    v4 = iconst.i32 16
+    v5 = iadd v3, v4
+    ; check:  v1 = load.i32 v0
+    ; nextln: v5 -> v1
+    ; nextln: v2 = iconst.i32 16
+    ; nextln: v3 = iadd_imm v1, -16
+    ; nextln: v4 = iconst.i32 16
+    ; nextln: nop
+    return v5
+}
--- a/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif
+++ b/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif
@@ -0,0 +1,14 @@
+test peepmatic
+target x86_64
+
+function %wraparound(i64 vmctx) -> f32 system_v {
+    gv0 = vmctx
+    gv1 = iadd_imm.i64 gv0, 48
+
+block35(v0: i64):
+    v88 = iconst.i64 0
+    v89 = iconst.i64 0x8000_0000_0000_0000
+    v90 = ishl_imm v88, 0x8000_0000_0000_0000
+    v91 = sshr v90, v89; check: sshr_imm v90, 0x8000_0000_0000_0000
+    trap user0
+}
--- a/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif
@@ -0,0 +1,285 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem32_p125(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 125
+    ; check: isub v0, v5
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem32_p641(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 641
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_n6(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_n5(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem32_n3(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_p6(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_srem32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_p625(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p7(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 7
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem64_p9(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 9
+    ; check: isub v0, v5
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p125(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 6
+    ; check: imul_imm v7, 125
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem64_p274177(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 0x0004_2f01
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_n625(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -625
+    ; check: isub v0, v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n6(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n5(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem64_n3(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_p6(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_srem64_p15(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 15
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_p625(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
--- a/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif
@@ -0,0 +1,291 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_urem32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_urem32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_urem32_p2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 0x8000_0000
+    ; check: band_imm v0, 0x7fff_ffff
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_urem64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_urem64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+   return v1
+}
+
+; shift
+function %t_urem64_p2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 0x8000_0000_0000_0000
+    ; check: band_imm v0, 0x7fff_ffff_ffff_ffff
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_srem32_n1(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_srem32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_n2(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_p4(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n4(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_p2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_srem32_n2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_8000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_srem64_n1(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_srem64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_n2(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_p4(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n4(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_p2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_srem64_n2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0x8000_0000_0000_0000
+    ; check: isub v0, v5
+   return v1
+}
--- a/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif
+++ b/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif
@@ -0,0 +1,22 @@
+test peepmatic
+target x86_64
+
+function u0:2(i64 , i64) {
+    gv1 = load.i64 notrap aligned gv0
+    heap0 = static gv1
+    block0(v0: i64, v1: i64):
+        v16 = iconst.i32 6
+        v17 = heap_addr.i64 heap0, v16, 1
+        v18 = load.i32 v17
+        v19 = iconst.i32 4
+        v20 = icmp ne v18, v19
+        v21 = bint.i32 v20
+        brnz v21, block2
+        jump block4
+    block4:
+        jump block1
+    block2:
+        jump block1
+    block1:
+        return
+}
--- a/cranelift/filetests/filetests/peepmatic/simplify32.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify32.clif
@@ -0,0 +1,60 @@
+test peepmatic
+target i686
+
+;; 32-bits platforms.
+
+function %iadd_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+;; Don't simplify operations that would get illegal because of lack of native
+;; support.
+function %iadd_imm(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: block0(v0: i64):
+; nextln:     v1 = iconst.i64 2
+; nextln:     v2 = iadd v0, v1
+; nextln:     return v2
+; nextln: }
--- a/cranelift/filetests/filetests/peepmatic/simplify64.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify64.clif
@@ -0,0 +1,326 @@
+test peepmatic
+target x86_64
+
+;; 64-bits platforms.
+
+function %iadd_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+function %ifcmp_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = ifcmp v0, v1
+    brif eq v2, block1
+    jump block2
+
+block1:
+    v3 = iconst.i32 1
+    return v3
+
+block2:
+    v4 = iconst.i32 2
+    return v4
+}
+; sameln: function %ifcmp_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = ifcmp_imm v0, 2
+; nextln:     brif eq v2, block1
+; nextln:     jump block2
+; nextln: 
+; nextln: block1:
+; nextln:     v3 = iconst.i32 1
+; nextln:     return v3
+; nextln: 
+; nextln: block2:
+; nextln:     v4 = iconst.i32 2
+; nextln:     return v4
+; nextln: }
+
+function %brz_bint(i32) {
+block0(v0: i32):
+    v3 = icmp_imm slt v0, 0
+    v1 = bint.i32 v3
+    v2 = select v1, v1, v1
+    trapz v1, user0
+    brz v1, block1
+    jump block2
+
+block1:
+    return
+
+block2:
+    return
+}
+; sameln: function %brz_bint
+; nextln: (v0: i32):
+; nextln:    v3 = icmp_imm slt v0, 0
+; nextln:    v1 = bint.i32 v3
+; nextln:    v2 = select v3, v1, v1
+; nextln:    trapz v3, user0
+; nextln:    brnz v3, block2
+; nextln:    jump block1
+
+function %irsub_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v1, v0
+    return v2
+}
+; sameln: function %irsub_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = irsub_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+;; Sign-extensions.
+
+;; 8 -> 16
+function %uextend_8_16() -> i16 {
+block0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = ushr_imm v1, 8
+    return v2
+}
+; sameln: function %uextend_8_16
+; nextln: block0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_16() -> i16 {
+block0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = sshr_imm v1, 8
+    return v2
+}
+; sameln: function %sextend_8_16
+; nextln: block0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 32
+function %uextend_8_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = ushr_imm v1, 24
+    return v2
+}
+; sameln: function %uextend_8_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = sshr_imm v1, 24
+    return v2
+}
+; sameln: function %sextend_8_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 32
+function %uextend_16_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = ushr_imm v1, 16
+    return v2
+}
+; sameln: function %uextend_16_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = sshr_imm v1, 16
+    return v2
+}
+; sameln: function %sextend_16_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 64
+function %uextend_8_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = ushr_imm v1, 56
+    return v2
+}
+; sameln: function %uextend_8_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = sshr_imm v1, 56
+    return v2
+}
+; sameln: function %sextend_8_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 64
+function %uextend_16_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = ushr_imm v1, 48
+    return v2
+}
+; sameln: function %uextend_16_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = sshr_imm v1, 48
+    return v2
+}
+; sameln: function %sextend_16_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 32 -> 64
+function %uextend_32_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = ushr_imm v1, 32
+    return v2
+}
+; sameln: function %uextend_32_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_32_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = sshr_imm v1, 32
+    return v2
+}
+; sameln: function %sextend_32_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %add_imm_fold(i32) -> i32 {
+block0(v0: i32):
+  v1 = iadd_imm v0, 42
+  v2 = iadd_imm v1, -42
+  return v2
+}
+; sameln: function %add_imm_fold(i32)
+; nextln: block0(v0: i32):
+; nextln:    v2 -> v0
+; nextln:    v1 = iadd_imm v0, 42
+; nextln:    nop
+; nextln:    return v2
--- a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
@@ -0,0 +1,17 @@
+test peepmatic
+target x86_64
+
+;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with
+;; its `v2` operand's instruction because `v2` is one of many results. Instead,
+;; we need to make an alias `v3 -> v2`.
+
+function %replace_inst_with_alias() -> i32 {
+block0:
+    v0 = iconst.i32 0
+    v1, v2 = x86_smulx v0, v0
+    v3 = isub v2, v0
+    ; check:  v0 = iconst.i32 0
+    ; nextln: v1, v2 = x86_smulx v0, v0
+    ; nextln: v3 -> v2
+    return v3
+}
--- a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
@@ -7,13 +7,14 @@ function %indir_udiv32(i32) -> i32 {
 block0(v0: i32):
    v1 = iconst.i32 7
    v2 = udiv v0, v1
-    ; check:  v4 = iconst.i32 0x2492_4925
-    ; nextln: v5 = umulhi v0, v4
-    ; nextln: v6 = isub v0, v5
-    ; nextln: v7 = ushr_imm v6, 1
-    ; nextln: v8 = iadd v7, v5
-    ; nextln: v9 = ushr_imm v8, 2
-    ; nextln: v2 -> v9
+    ; check: iconst.i32 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v3
+    ; check: isub v0, v4
+    ; check: ushr_imm v5, 1
+    ; check: iadd v6, v4
+    ; check: v8 = ushr_imm v7, 2
+    ; check: v2 -> v8
    return v2
 }

@@ -21,12 +22,13 @@ function %indir_sdiv32(i32) -> i32 {
 block0(v0: i32):
    v1 = iconst.i32 -17
    v2 = sdiv v0, v1
-    ; check:  v4 = iconst.i32 0xffff_ffff_8787_8787
-    ; nextln: v5 = smulhi v0, v4
-    ; nextln: v6 = sshr_imm v5, 3
-    ; nextln: v7 = ushr_imm v6, 31
-    ; nextln: v8 = iadd v6, v7
-    ; nextln: v2 -> v8
+    ; check: iconst.i32 -17
+    ; check: iconst.i32 0xffff_ffff_8787_8787
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 31
+    ; check: v7 = iadd v5, v6
+    ; check: v2 -> v7
    return v2
 }

@@ -34,10 +36,11 @@ function %indir_udiv64(i64) -> i64 {
 block0(v0: i64):
    v1 = iconst.i64 1337
    v2 = udiv v0, v1
-    ; check:  v4 = iconst.i64 0xc411_9d95_2866_a139
-    ; nextln: v5 = umulhi v0, v4
-    ; nextln: v6 = ushr_imm v5, 10
-    ; nextln: v2 -> v6
+    ; check: iconst.i64 1337
+    ; check: iconst.i64 0xc411_9d95_2866_a139
+    ; check: umulhi v0, v3
+    ; check: v5 = ushr_imm v4, 10
+    ; check: v2 -> v5
    return v2
 }

@@ -45,11 +48,12 @@ function %indir_sdiv64(i64) -> i64 {
 block0(v0: i64):
    v1 = iconst.i64 -90210
    v2 = sdiv v0, v1
-    ; check:  v4 = iconst.i64 0xd181_4ee8_939c_b8bb
-    ; nextln: v5 = smulhi v0, v4
-    ; nextln: v6 = sshr_imm v5, 14
-    ; nextln: v7 = ushr_imm v6, 63
-    ; nextln: v8 = iadd v6, v7
-    ; nextln: v2 -> v8
+    ; check: iconst.i64 0xffff_ffff_fffe_9f9e
+    ; check: iconst.i64 0xd181_4ee8_939c_b8bb
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 14
+    ; check: ushr_imm v5, 63
+    ; check: v7 = iadd v5, v6
+    ; check: v2 -> v7
    return v2
 }
--- a/cranelift/filetests/filetests/simple_preopt/simplify32.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify32.clif
@@ -58,3 +58,4 @@ block0(v0: i64):
 ; nextln:     v2 = iadd v0, v1
 ; nextln:     return v2
 ; nextln: }
+
--- a/cranelift/filetests/filetests/simple_preopt/simplify64.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify64.clif
@@ -44,37 +44,6 @@ block0(v0: i32):
 ; nextln:     return v3
 ; nextln: }

-function %ifcmp_imm(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 2
-    v2 = ifcmp v0, v1
-    brif eq v2, block1
-    jump block2
-
-block1:
-    v3 = iconst.i32 1
-    return v3
-
-block2:
-    v4 = iconst.i32 2
-    return v4
-}
-; sameln: function %ifcmp_imm
-; nextln: block0(v0: i32):
-; nextln:     v1 = iconst.i32 2
-; nextln:     v2 = ifcmp_imm v0, 2
-; nextln:     brif eq v2, block1
-; nextln:     jump block2
-; nextln: 
-; nextln: block1:
-; nextln:     v3 = iconst.i32 1
-; nextln:     return v3
-; nextln: 
-; nextln: block2:
-; nextln:     v4 = iconst.i32 2
-; nextln:     return v4
-; nextln: }
-
 function %brz_bint(i32) {
 block0(v0: i32):
    v3 = icmp_imm slt v0, 0