aarch64: Migrate bitrev/clz/cls/ctz to ISLE (#3658)

This commit migrates these existing instructions to ISLE from the manual lowerings implemented today. This was mostly straightforward but while I was at it I fixed what appeared to be broken translations for I{8,16} for `clz`, `cls`, and `ctz`. Previously the lowerings would produce results as-if the input was 32-bits, but now I believe they all correctly account for the bit-width.
2022-01-06 15:18:32 -06:00
parent 7fd78da23f
commit 72e2b7fe80
9 changed files with 1040 additions and 608 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1004,3 +1004,128 @@
          (lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
        )
        (value_regs lo hi)))
+
+;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Reversing an 8-bit value with a 32-bit bitrev instruction will place
+;; the reversed result in the highest 8 bits, so we need to shift them down into
+;; place.
+(rule (lower (has_type $I8 (bitrev x)))
+      (value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
+
+;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
+;; the reversed result in the highest 16 bits, so we need to shift them down into
+;; place.
+(rule (lower (has_type $I16 (bitrev x)))
+      (value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
+
+(rule (lower (has_type $I32 (bitrev x)))
+      (value_reg (rbit32 (put_in_reg x))))
+
+(rule (lower (has_type $I64 (bitrev x)))
+      (value_reg (rbit64 (put_in_reg x))))
+
+(rule (lower (has_type $I128 (bitrev x)))
+      (let (
+          (val ValueRegs (put_in_regs x))
+          (lo_rev Reg (rbit64 (value_regs_get val 0)))
+          (hi_rev Reg (rbit64 (value_regs_get val 1)))
+        )
+        (value_regs hi_rev lo_rev)))
+
+;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I8 (clz x)))
+      (value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
+
+(rule (lower (has_type $I16 (clz x)))
+      (value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
+
+(rule (lower (has_type $I32 (clz x)))
+      (value_reg (clz32 (put_in_reg x))))
+
+(rule (lower (has_type $I64 (clz x)))
+      (value_reg (clz64 (put_in_reg x))))
+
+(rule (lower (has_type $I128 (clz x)))
+      (lower_clz128 (put_in_regs x)))
+
+;; clz hi_clz, hi
+;; clz lo_clz, lo
+;; lsr tmp, hi_clz, #6
+;; madd dst_lo, lo_clz, tmp, hi_clz
+;; mov  dst_hi, 0
+(decl lower_clz128 (ValueRegs) ValueRegs)
+(rule (lower_clz128 val)
+      (let (
+        (hi_clz Reg (clz64 (value_regs_get val 1)))
+        (lo_clz Reg (clz64 (value_regs_get val 0)))
+        (tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6)))
+      )
+      (value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
+
+;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Note that all `ctz` instructions are implemented by reversing the bits and
+;; then using a `clz` instruction since the tail zeros are the same as the
+;; leading zeros of the reversed value.
+
+(rule (lower (has_type $I8 (ctz x)))
+      (value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
+
+(rule (lower (has_type $I16 (ctz x)))
+      (value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
+
+(rule (lower (has_type $I32 (ctz x)))
+      (value_reg (clz32 (rbit32 (put_in_reg x)))))
+
+(rule (lower (has_type $I64 (ctz x)))
+      (value_reg (clz64 (rbit64 (put_in_reg x)))))
+
+(rule (lower (has_type $I128 (ctz x)))
+      (let (
+        (val ValueRegs (put_in_regs x))
+        (lo Reg (rbit64 (value_regs_get val 0)))
+        (hi Reg (rbit64 (value_regs_get val 1)))
+      )
+      (lower_clz128 (value_regs hi lo))))
+
+;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I8 (cls x)))
+      (value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
+
+(rule (lower (has_type $I16 (cls x)))
+      (value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
+
+(rule (lower (has_type $I32 (cls x)))
+      (value_reg (cls32 (put_in_reg x))))
+
+(rule (lower (has_type $I64 (cls x)))
+      (value_reg (cls64 (put_in_reg x))))
+
+;; cls lo_cls, lo
+;; cls hi_cls, hi
+;; eon sign_eq_eor, hi, lo
+;; lsr sign_eq, sign_eq_eor, #63
+;; madd lo_sign_bits, out_lo, sign_eq, sign_eq
+;; cmp hi_cls, #63
+;; csel maybe_lo, lo_sign_bits, xzr, eq
+;; add  out_lo, maybe_lo, hi_cls
+;; mov  out_hi, 0
+(rule (lower (has_type $I128 (cls x)))
+      (let (
+          (val ValueRegs (put_in_regs x))
+          (lo Reg (value_regs_get val 0))
+          (hi Reg (value_regs_get val 1))
+          (lo_cls Reg (cls64 lo))
+          (hi_cls Reg (cls64 hi))
+          (sign_eq_eon Reg (eon64 hi lo))
+          (sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63)))
+          (lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
+          (maybe_lo Reg (with_flags_1
+            (cmp64_imm hi_cls (u8_into_imm12 63))
+            (csel (Cond.Eq) lo_sign_bits (zero_reg))
+          ))
+        )
+        (value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))