aarch64: Migrate bitrev/clz/cls/ctz to ISLE (#3658)
This commit migrates these existing instructions to ISLE from the manual
lowerings implemented today. This was mostly straightforward but while I
was at it I fixed what appeared to be broken translations for I{8,16}
for `clz`, `cls`, and `ctz`. Previously the lowerings would produce
results as-if the input was 32-bits, but now I believe they all
correctly account for the bit-width.
This commit is contained in:
@@ -1004,3 +1004,128 @@
|
||||
(lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
|
||||
)
|
||||
(value_regs lo hi)))
|
||||
|
||||
;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Reversing an 8-bit value with a 32-bit bitrev instruction will place
|
||||
;; the reversed result in the highest 8 bits, so we need to shift them down into
|
||||
;; place.
|
||||
(rule (lower (has_type $I8 (bitrev x)))
|
||||
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
|
||||
|
||||
;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
|
||||
;; the reversed result in the highest 16 bits, so we need to shift them down into
|
||||
;; place.
|
||||
(rule (lower (has_type $I16 (bitrev x)))
|
||||
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (bitrev x)))
|
||||
(value_reg (rbit32 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I64 (bitrev x)))
|
||||
(value_reg (rbit64 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I128 (bitrev x)))
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(lo_rev Reg (rbit64 (value_regs_get val 0)))
|
||||
(hi_rev Reg (rbit64 (value_regs_get val 1)))
|
||||
)
|
||||
(value_regs hi_rev lo_rev)))
|
||||
|
||||
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (clz x)))
|
||||
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
|
||||
(rule (lower (has_type $I16 (clz x)))
|
||||
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (clz x)))
|
||||
(value_reg (clz32 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I64 (clz x)))
|
||||
(value_reg (clz64 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I128 (clz x)))
|
||||
(lower_clz128 (put_in_regs x)))
|
||||
|
||||
;; clz hi_clz, hi
|
||||
;; clz lo_clz, lo
|
||||
;; lsr tmp, hi_clz, #6
|
||||
;; madd dst_lo, lo_clz, tmp, hi_clz
|
||||
;; mov dst_hi, 0
|
||||
(decl lower_clz128 (ValueRegs) ValueRegs)
|
||||
(rule (lower_clz128 val)
|
||||
(let (
|
||||
(hi_clz Reg (clz64 (value_regs_get val 1)))
|
||||
(lo_clz Reg (clz64 (value_regs_get val 0)))
|
||||
(tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6)))
|
||||
)
|
||||
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
|
||||
|
||||
;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Note that all `ctz` instructions are implemented by reversing the bits and
|
||||
;; then using a `clz` instruction since the tail zeros are the same as the
|
||||
;; leading zeros of the reversed value.
|
||||
|
||||
(rule (lower (has_type $I8 (ctz x)))
|
||||
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
|
||||
|
||||
(rule (lower (has_type $I16 (ctz x)))
|
||||
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
|
||||
|
||||
(rule (lower (has_type $I32 (ctz x)))
|
||||
(value_reg (clz32 (rbit32 (put_in_reg x)))))
|
||||
|
||||
(rule (lower (has_type $I64 (ctz x)))
|
||||
(value_reg (clz64 (rbit64 (put_in_reg x)))))
|
||||
|
||||
(rule (lower (has_type $I128 (ctz x)))
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(lo Reg (rbit64 (value_regs_get val 0)))
|
||||
(hi Reg (rbit64 (value_regs_get val 1)))
|
||||
)
|
||||
(lower_clz128 (value_regs hi lo))))
|
||||
|
||||
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (cls x)))
|
||||
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
|
||||
(rule (lower (has_type $I16 (cls x)))
|
||||
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (cls x)))
|
||||
(value_reg (cls32 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I64 (cls x)))
|
||||
(value_reg (cls64 (put_in_reg x))))
|
||||
|
||||
;; cls lo_cls, lo
|
||||
;; cls hi_cls, hi
|
||||
;; eon sign_eq_eor, hi, lo
|
||||
;; lsr sign_eq, sign_eq_eor, #63
|
||||
;; madd lo_sign_bits, out_lo, sign_eq, sign_eq
|
||||
;; cmp hi_cls, #63
|
||||
;; csel maybe_lo, lo_sign_bits, xzr, eq
|
||||
;; add out_lo, maybe_lo, hi_cls
|
||||
;; mov out_hi, 0
|
||||
(rule (lower (has_type $I128 (cls x)))
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(lo Reg (value_regs_get val 0))
|
||||
(hi Reg (value_regs_get val 1))
|
||||
(lo_cls Reg (cls64 lo))
|
||||
(hi_cls Reg (cls64 hi))
|
||||
(sign_eq_eon Reg (eon64 hi lo))
|
||||
(sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63)))
|
||||
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
|
||||
(maybe_lo Reg (with_flags_1
|
||||
(cmp64_imm hi_cls (u8_into_imm12 63))
|
||||
(csel (Cond.Eq) lo_sign_bits (zero_reg))
|
||||
))
|
||||
)
|
||||
(value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))
|
||||
|
||||
Reference in New Issue
Block a user