Cranelift(Aarch64): Optimize lowering of icmps with immediates (#5252)

We can encode more constants into 12-bit immediates if we do the following
rewrite for comparisons with odd constants:

        A >= B + 1
    ==> A - 1 >= B
    ==> A > B
This commit is contained in:
Nick Fitzgerald
2022-11-15 09:18:55 -08:00
committed by GitHub
parent 6dcdabf37e
commit 9967782726
7 changed files with 236 additions and 51 deletions

View File

@@ -83,13 +83,13 @@ block0(v0: i64, v1: i32):
; block0:
; mov w9, w1
; movz x10, #65512
; movz w10, #65512
; subs xzr, x9, x10
; b.ls label1 ; b label2
; block1:
; add x11, x0, x1, UXTW
; add x11, x11, #16
; movz x10, #65512
; movz w10, #65512
; movz x12, #0
; subs xzr, x9, x10
; csel x0, x12, x11, hi

View File

@@ -0,0 +1,111 @@
;; Test our lowerings that do things like `A >= B + 1 ==> A > B` to make better
;; use of immediate encodings.
test compile precise-output
set unwind_info=false
target aarch64
function %a(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111001
v2 = icmp.i32 uge v0, v1
return v2
}
; block0:
; subs wzr, w0, #1118208
; cset x0, hi
; ret
function %b(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111000
v2 = icmp.i32 uge v0, v1
return v2
}
; block0:
; subs wzr, w0, #1118208
; cset x0, hs
; ret
function %c(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111111
v2 = icmp.i32 uge v0, v1
return v2
}
; block0:
; movz w2, #4369
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; cset x0, hs
; ret
function %d(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111110
v2 = icmp.i32 uge v0, v1
return v2
}
; block0:
; movz w2, #4368
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; cset x0, hs
; ret
function %e(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111001
v2 = icmp.i32 sge v0, v1
return v2
}
; block0:
; subs wzr, w0, #1118208
; cset x0, gt
; ret
function %f(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111000
v2 = icmp.i32 sge v0, v1
return v2
}
; block0:
; subs wzr, w0, #1118208
; cset x0, ge
; ret
function %g(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111111
v2 = icmp.i32 sge v0, v1
return v2
}
; block0:
; movz w2, #4369
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; cset x0, ge
; ret
function %h(i32) -> i8 {
block0(v0: i32):
v1 = iconst.i32 0x111110
v2 = icmp.i32 sge v0, v1
return v2
}
; block0:
; movz w2, #4368
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; cset x0, ge
; ret