aarch64: Migrate {s,u}{div,rem} to ISLE (#3572)

* aarch64: Migrate {s,u}{div,rem} to ISLE This commit migrates four different instructions at once to ISLE: * `sdiv` * `udiv` * `srem` * `urem` These all share similar codegen and center around the `div` instruction to use internally. The main feature of these was to model the manual traps since the `div` instruction doesn't trap on overflow, instead requiring manual checks to adhere to the semantics of the instruction itself. While I was here I went ahead and implemented an optimization for these instructions when the right-hand-side is a constant with a known value. For `udiv`, `srem`, and `urem` if the right-hand-side is a nonzero constant then the checks for traps can be skipped entirely. For `sdiv` if the constant is not 0 and not -1 then additionally all checks can be elided. Finally if the right-hand-side of `sdiv` is -1 the zero-check is elided, but it still needs a check for `i64::MIN` on the left-hand-side and currently there's a TODO where `-1` is still checked too. * Rebasing and review conflicts
2021-12-13 17:27:11 -06:00
parent f1225dfd93
commit 20e090b114
12 changed files with 567 additions and 215 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -398,3 +398,107 @@
        )
        (value_reg result)))

+;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; TODO: Add UDiv32 to implement 32-bit directly, rather
+;; than extending the input.
+;;
+;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
+;; CLIF's `udiv` the check for zero needs to be manually performed.
+(rule (lower (has_type (fits_in_64 ty) (udiv x y)))
+      (value_reg (alu_rrr (ALUOp.UDiv64)
+                          (put_in_reg_zext64 x)
+                          (put_nonzero_in_reg_zext64 y))))
+
+;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
+(decl put_nonzero_in_reg_zext64 (Value) Reg)
+(rule (put_nonzero_in_reg_zext64 val)
+      (trap_if_zero_divisor (put_in_reg_zext64 val)))
+
+;; Special case where if a `Value` is known to be nonzero we can trivially
+;; move it into a register.
+(rule (put_nonzero_in_reg_zext64 (and (value_type ty)
+                                      (def_inst (iconst (nonzero_u64_from_imm64 n)))))
+      (imm ty n))
+
+;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; TODO: Add SDiv32 to implement 32-bit directly, rather
+;; than extending the input.
+;;
+;; The sequence of checks here should look like:
+;;
+;;   cbnz rm, #8
+;;   udf ; divide by zero
+;;   cmn rm, 1
+;;   ccmp rn, 1, #nzcv, eq
+;;   b.vc #8
+;;   udf ; signed overflow
+;;
+;; Note The div instruction does not trap on divide by zero or overflow, so
+;; checks need to be manually inserted.
+;;
+;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
+;; necessary, but right now `y` is checked to not be -1 as well.
+(rule (lower (has_type (fits_in_64 ty) (sdiv x y)))
+      (let (
+          (x64 Reg (put_in_reg_sext64 x))
+          (y64 Reg (put_nonzero_in_reg_sext64 y))
+          (valid_x64 Reg (trap_if_div_overflow ty x64 y64))
+          (result Reg (alu_rrr (ALUOp.SDiv64) valid_x64 y64))
+        )
+        (value_reg result)))
+
+;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
+(decl safe_divisor_from_imm64 (u64) Imm64)
+(extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64)
+
+;; Special case for `sdiv` where no checks are needed due to division by a
+;; constant meaning the checks are always passed.
+(rule (lower (has_type (fits_in_64 ty) (sdiv x (def_inst (iconst (safe_divisor_from_imm64 y))))))
+      (value_reg (alu_rrr (ALUOp.SDiv64)
+                          (put_in_reg_sext64 x)
+                          (imm ty y))))
+
+;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
+(decl put_nonzero_in_reg_sext64 (Value) Reg)
+(rule (put_nonzero_in_reg_sext64 val)
+      (trap_if_zero_divisor (put_in_reg_sext64 val)))
+
+;; Note that this has a special case where if the `Value` is a constant that's
+;; not zero we can skip the zero check.
+(rule (put_nonzero_in_reg_sext64 (and (value_type ty)
+                                      (def_inst (iconst (nonzero_u64_from_imm64 n)))))
+      (imm ty n))
+
+;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Remainder (x % y) is implemented as:
+;;
+;;   tmp = x / y
+;;   result = x - (tmp*y)
+;;
+;; use 'result' for tmp and you have:
+;;
+;;   cbnz y, #8         ; branch over trap
+;;   udf                ; divide by zero
+;;   div rd, x, y       ; rd = x / y
+;;   msub rd, rd, y, x  ; rd = x - rd * y
+
+(rule (lower (has_type (fits_in_64 ty) (urem x y)))
+      (let (
+          (x64 Reg (put_in_reg_zext64 x))
+          (y64 Reg (put_nonzero_in_reg_zext64 y))
+          (div Reg (alu_rrr (ALUOp.UDiv64) x64 y64))
+          (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
+        )
+        (value_reg result)))
+
+(rule (lower (has_type (fits_in_64 ty) (srem x y)))
+      (let (
+          (x64 Reg (put_in_reg_sext64 x))
+          (y64 Reg (put_nonzero_in_reg_sext64 y))
+          (div Reg (alu_rrr (ALUOp.SDiv64) x64 y64))
+          (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
+        )
+        (value_reg result)))