aarch64: Migrate {s,u}{div,rem} to ISLE (#3572)

* aarch64: Migrate {s,u}{div,rem} to ISLE This commit migrates four different instructions at once to ISLE: * `sdiv` * `udiv` * `srem` * `urem` These all share similar codegen and center around the `div` instruction to use internally. The main feature of these was to model the manual traps since the `div` instruction doesn't trap on overflow, instead requiring manual checks to adhere to the semantics of the instruction itself. While I was here I went ahead and implemented an optimization for these instructions when the right-hand-side is a constant with a known value. For `udiv`, `srem`, and `urem` if the right-hand-side is a nonzero constant then the checks for traps can be skipped entirely. For `sdiv` if the constant is not 0 and not -1 then additionally all checks can be elided. Finally if the right-hand-side of `sdiv` is -1 the zero-check is elided, but it still needs a check for `i64::MIN` on the left-hand-side and currently there's a TODO where `-1` is still checked too. * Rebasing and review conflicts
2021-12-13 17:27:11 -06:00
parent f1225dfd93
commit 20e090b114
12 changed files with 567 additions and 215 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -54,12 +54,11 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  sdiv x2, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
-; nextln:  mov x0, x2
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret

 function %f7(i64) -> i64 {
@@ -69,13 +68,8 @@ block0(v0: i64):
  return v2
 }

-; check:  movz x2, #2
-; nextln:  sdiv x1, x0, x2
-; nextln:  cbnz x2, 8 ; udf
-; nextln:  adds xzr, x2, #1
-; nextln:  ccmp x0, #1, #nzcv, eq
-; nextln:  b.vc 8 ; udf
-; nextln:  mov x0, x1
+; check:   orr x1, xzr, #2
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret

 function %f8(i64, i64) -> i64 {
@@ -84,8 +78,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  udiv x0, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
+; nextln:  udiv x0, x0, x1
 ; nextln:  ret

 function %f9(i64) -> i64 {
@@ -95,9 +89,8 @@ block0(v0: i64):
  return v2
 }

-; check:  movz x1, #2
+; check:   orr x1, xzr, #2
 ; nextln:  udiv x0, x0, x1
-; nextln:  cbnz x1, 8 ; udf
 ; nextln:  ret

 function %f10(i64, i64) -> i64 {
@@ -106,8 +99,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  sdiv x2, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
+; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -117,8 +110,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  udiv x2, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
+; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -129,13 +122,13 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  sxtw x3, w0
-; nextln:  sxtw x2, w1
-; nextln:  sdiv x0, x3, x2
-; nextln:  cbnz x2, 8 ; udf
-; nextln:  adds wzr, w2, #1
-; nextln:  ccmp w3, #1, #nzcv, eq
+; check:  sxtw x0, w0
+; nextln:  sxtw x1, w1
+; nextln:  cbnz x1, 8 ; udf
+; nextln:  adds wzr, w1, #1
+; nextln:  ccmp w0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret

 function %f13(i32) -> i32 {
@@ -145,15 +138,9 @@ block0(v0: i32):
  return v2
 }

-; check: sxtw x0, w0
-; nextln: movz x1, #2
-; nextln: sxtw x2, w1
-; nextln: sdiv x1, x0, x2
-; nextln: cbnz x2, 8 ; udf
-; nextln: adds wzr, w2, #1
-; nextln: ccmp w0, #1, #nzcv, eq
-; nextln: b.vc 8 ; udf
-; nextln: mov x0, x1
+; check:  sxtw x0, w0
+; nextln: orr x1, xzr, #2
+; nextln: sdiv x0, x0, x1
 ; nextln: ret

 function %f14(i32, i32) -> i32 {
@@ -164,8 +151,8 @@ block0(v0: i32, v1: i32):

 ; check: mov w0, w0
 ; nextln: mov w1, w1
-; nextln: udiv x0, x0, x1
 ; nextln: cbnz x1, 8 ; udf
+; nextln: udiv x0, x0, x1
 ; nextln: ret


@@ -176,10 +163,9 @@ block0(v0: i32):
  return v2
 }

-; check:  mov w0, w0
-; nextln:  movz x1, #2
+; check:   mov w0, w0
+; nextln:  orr x1, xzr, #2
 ; nextln:  udiv x0, x0, x1
-; nextln:  cbnz x1, 8 ; udf
 ; nextln:  ret

 function %f16(i32, i32) -> i32 {
@@ -190,8 +176,8 @@ block0(v0: i32, v1: i32):

 ; check:  sxtw x0, w0
 ; nextln:  sxtw x1, w1
-; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
+; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -203,8 +189,8 @@ block0(v0: i32, v1: i32):

 ; check:  mov w0, w0
 ; nextln:  mov w1, w1
-; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
+; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -389,3 +375,40 @@ block0(v0: i32, v1: i32, v2: i32):
 ; check:  madd w0, w1, w2, w0
 ; nextln: ret

+function %srem_const (i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = srem.i64 v0, v1
+  return v2
+}
+
+; check:   orr x1, xzr, #2
+; nextln:  sdiv x2, x0, x1
+; nextln:  msub x0, x2, x1, x0
+; nextln:  ret
+
+function %urem_const (i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = urem.i64 v0, v1
+  return v2
+}
+
+; check:   orr x1, xzr, #2
+; nextln:  udiv x2, x0, x1
+; nextln:  msub x0, x2, x1, x0
+; nextln:  ret
+
+function %sdiv_minus_one(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 -1
+  v2 = sdiv.i64 v0, v1
+  return v2
+}
+
+; check:  movn x1, #0
+; nextln:  adds xzr, x1, #1
+; nextln:  ccmp x0, #1, #nzcv, eq
+; nextln:  b.vc 8 ; udf
+; nextln:  sdiv x0, x0, x1
+; nextln:  ret