aarch64: Migrate bitrev/clz/cls/ctz to ISLE (#3658)

This commit migrates these existing instructions to ISLE from the manual
lowerings implemented today. This was mostly straightforward but while I
was at it I fixed what appeared to be broken translations for I{8,16}
for `clz`, `cls`, and `ctz`. Previously the lowerings would produce
results as-if the input was 32-bits, but now I believe they all
correctly account for the bit-width.
This commit is contained in:
Alex Crichton
2022-01-06 15:18:32 -06:00
committed by GitHub
parent 7fd78da23f
commit 72e2b7fe80
9 changed files with 1040 additions and 608 deletions

View File

@@ -1004,3 +1004,128 @@
(lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
)
(value_regs lo hi)))
;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Reversing an 8-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 8 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I8 (bitrev x)))
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 16 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I16 (bitrev x)))
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
(rule (lower (has_type $I32 (bitrev x)))
(value_reg (rbit32 (put_in_reg x))))
(rule (lower (has_type $I64 (bitrev x)))
(value_reg (rbit64 (put_in_reg x))))
(rule (lower (has_type $I128 (bitrev x)))
(let (
(val ValueRegs (put_in_regs x))
(lo_rev Reg (rbit64 (value_regs_get val 0)))
(hi_rev Reg (rbit64 (value_regs_get val 1)))
)
(value_regs hi_rev lo_rev)))
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (clz x)))
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
(rule (lower (has_type $I16 (clz x)))
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
(rule (lower (has_type $I32 (clz x)))
(value_reg (clz32 (put_in_reg x))))
(rule (lower (has_type $I64 (clz x)))
(value_reg (clz64 (put_in_reg x))))
(rule (lower (has_type $I128 (clz x)))
(lower_clz128 (put_in_regs x)))
;; clz hi_clz, hi
;; clz lo_clz, lo
;; lsr tmp, hi_clz, #6
;; madd dst_lo, lo_clz, tmp, hi_clz
;; mov dst_hi, 0
(decl lower_clz128 (ValueRegs) ValueRegs)
(rule (lower_clz128 val)
(let (
(hi_clz Reg (clz64 (value_regs_get val 1)))
(lo_clz Reg (clz64 (value_regs_get val 0)))
(tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6)))
)
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Note that all `ctz` instructions are implemented by reversing the bits and
;; then using a `clz` instruction since the tail zeros are the same as the
;; leading zeros of the reversed value.
(rule (lower (has_type $I8 (ctz x)))
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
(rule (lower (has_type $I16 (ctz x)))
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
(rule (lower (has_type $I32 (ctz x)))
(value_reg (clz32 (rbit32 (put_in_reg x)))))
(rule (lower (has_type $I64 (ctz x)))
(value_reg (clz64 (rbit64 (put_in_reg x)))))
(rule (lower (has_type $I128 (ctz x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (rbit64 (value_regs_get val 0)))
(hi Reg (rbit64 (value_regs_get val 1)))
)
(lower_clz128 (value_regs hi lo))))
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (cls x)))
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
(rule (lower (has_type $I16 (cls x)))
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
(rule (lower (has_type $I32 (cls x)))
(value_reg (cls32 (put_in_reg x))))
(rule (lower (has_type $I64 (cls x)))
(value_reg (cls64 (put_in_reg x))))
;; cls lo_cls, lo
;; cls hi_cls, hi
;; eon sign_eq_eor, hi, lo
;; lsr sign_eq, sign_eq_eor, #63
;; madd lo_sign_bits, out_lo, sign_eq, sign_eq
;; cmp hi_cls, #63
;; csel maybe_lo, lo_sign_bits, xzr, eq
;; add out_lo, maybe_lo, hi_cls
;; mov out_hi, 0
(rule (lower (has_type $I128 (cls x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (value_regs_get val 0))
(hi Reg (value_regs_get val 1))
(lo_cls Reg (cls64 lo))
(hi_cls Reg (cls64 hi))
(sign_eq_eon Reg (eon64 hi lo))
(sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63)))
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
(maybe_lo Reg (with_flags_1
(cmp64_imm hi_cls (u8_into_imm12 63))
(csel (Cond.Eq) lo_sign_bits (zero_reg))
))
)
(value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))