;; aarch64 instruction selection and CLIF-to-MachInst lowering. ;; The main lowering constructor term: takes a clif `Inst` and returns the ;; register(s) within which the lowered instruction's result values live. (decl lower (Inst) ValueRegs) ;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) (value_reg (imm ty n))) ;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (bconst $false))) (value_reg (imm ty 0))) (rule (lower (has_type ty (bconst $true))) (value_reg (imm ty 1))) ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (null))) (value_reg (imm ty 0))) ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller ;; Base case, simply adding things in registers. (rule (lower (has_type (fits_in_64 ty) (iadd x y))) (value_reg (add ty (put_in_reg x) (put_in_reg y)))) ;; Special cases for when one operand is an immediate that fits in 12 bits. (rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) (value_reg (add_imm ty (put_in_reg x) y))) (rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) (value_reg (add_imm ty (put_in_reg y) x))) ;; Same as the previous special cases, except we can switch the addition to a ;; subtraction if the negated immediate fits in 12 bits. (rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y)))) (value_reg (sub_imm ty (put_in_reg x) y))) (rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y))) (value_reg (sub_imm ty (put_in_reg y) x))) ;; Special cases for when we're adding an extended register where the extending ;; operation can get folded into the add itself. (rule (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y)))) (value_reg (add_extend ty (put_in_reg x) y))) (rule (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y))) (value_reg (add_extend ty (put_in_reg y) x))) ;; Special cases for when we're adding the shift of a different ;; register by a constant amount and the shift can get folded into the add. (rule (lower (has_type (fits_in_64 ty) (iadd x (def_inst (ishl y (def_inst (iconst (lshl_from_imm64 ORR_NOT rd, zero, rm (rule (lower (has_type (fits_in_64 ty) (bnot x))) (value_reg (orr_not ty (zero_reg) (put_in_reg x)))) ;; Special case to use `orr_not_shift` if it's a `bnot` of a const-left-shifted ;; value. (rule (lower (has_type (fits_in_64 ty) (bnot (def_inst (ishl x (def_inst (iconst (lshl_from_imm64 ;; ;; and masked_amt, amt, ;; sub tmp_sub, masked_amt, ;; sub neg_amt, zero, tmp_sub ; neg ;; lsr val_rshift, val, masked_amt ;; lsl val_lshift, val, neg_amt ;; orr rd, val_lshift val_rshift (decl small_rotr (Type Reg Reg) Reg) (rule (small_rotr ty val amt) (let ( (masked_amt Reg (and32_imm amt (rotr_mask ty))) (tmp_sub Reg (sub32_imm masked_amt (u8_into_imm12 (ty_bits ty)))) (neg_amt Reg (sub32 (zero_reg) tmp_sub)) (val_rshift Reg (lsr32 val masked_amt)) (val_lshift Reg (lsl32 val neg_amt)) ) (orr32 val_lshift val_rshift))) (decl rotr_mask (Type) ImmLogic) (extern constructor rotr_mask rotr_mask) ;; For a constant amount, we can instead do: ;; ;; rotr rd, val, #amt ;; ;; => ;; ;; lsr val_rshift, val, # ;; lsl val_lshift, val, ;; orr rd, val_lshift, val_rshift (decl small_rotr_imm (Type Reg ImmShift) Reg) (rule (small_rotr_imm ty val amt) (let ( (val_rshift Reg (lsr32_imm val amt)) (val_lshift Reg (lsl32_imm val (rotr_opposite_amount ty amt))) ) (orr32 val_lshift val_rshift))) (decl rotr_opposite_amount (Type ImmShift) ImmShift) (extern constructor rotr_opposite_amount rotr_opposite_amount) ;; General 128-bit case. ;; ;; TODO: much better codegen is possible with a constant amount. (rule (lower (has_type $I128 (rotr x y))) (let ( (val ValueRegs (put_in_regs x)) (amt Reg (value_regs_get (put_in_regs y) 0)) (neg_amt Reg (sub64 (imm $I64 128) amt)) (rshift ValueRegs (lower_ushr128 val amt)) (lshift ValueRegs (lower_shl128 val neg_amt)) (hi Reg (orr64 (value_regs_get rshift 1) (value_regs_get lshift 1))) (lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0))) ) (value_regs lo hi))) ;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Reversing an 8-bit value with a 32-bit bitrev instruction will place ;; the reversed result in the highest 8 bits, so we need to shift them down into ;; place. (rule (lower (has_type $I8 (bitrev x))) (value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24)))) ;; Reversing an 16-bit value with a 32-bit bitrev instruction will place ;; the reversed result in the highest 16 bits, so we need to shift them down into ;; place. (rule (lower (has_type $I16 (bitrev x))) (value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16)))) (rule (lower (has_type $I32 (bitrev x))) (value_reg (rbit32 (put_in_reg x)))) (rule (lower (has_type $I64 (bitrev x))) (value_reg (rbit64 (put_in_reg x)))) (rule (lower (has_type $I128 (bitrev x))) (let ( (val ValueRegs (put_in_regs x)) (lo_rev Reg (rbit64 (value_regs_get val 0))) (hi_rev Reg (rbit64 (value_regs_get val 1))) ) (value_regs hi_rev lo_rev))) ;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (clz x))) (value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24)))) (rule (lower (has_type $I16 (clz x))) (value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16)))) (rule (lower (has_type $I32 (clz x))) (value_reg (clz32 (put_in_reg x)))) (rule (lower (has_type $I64 (clz x))) (value_reg (clz64 (put_in_reg x)))) (rule (lower (has_type $I128 (clz x))) (lower_clz128 (put_in_regs x))) ;; clz hi_clz, hi ;; clz lo_clz, lo ;; lsr tmp, hi_clz, #6 ;; madd dst_lo, lo_clz, tmp, hi_clz ;; mov dst_hi, 0 (decl lower_clz128 (ValueRegs) ValueRegs) (rule (lower_clz128 val) (let ( (hi_clz Reg (clz64 (value_regs_get val 1))) (lo_clz Reg (clz64 (value_regs_get val 0))) (tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6))) ) (value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0)))) ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Note that all `ctz` instructions are implemented by reversing the bits and ;; then using a `clz` instruction since the tail zeros are the same as the ;; leading zeros of the reversed value. (rule (lower (has_type $I8 (ctz x))) (value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000))))) (rule (lower (has_type $I16 (ctz x))) (value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000))))) (rule (lower (has_type $I32 (ctz x))) (value_reg (clz32 (rbit32 (put_in_reg x))))) (rule (lower (has_type $I64 (ctz x))) (value_reg (clz64 (rbit64 (put_in_reg x))))) (rule (lower (has_type $I128 (ctz x))) (let ( (val ValueRegs (put_in_regs x)) (lo Reg (rbit64 (value_regs_get val 0))) (hi Reg (rbit64 (value_regs_get val 1))) ) (lower_clz128 (value_regs hi lo)))) ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (cls x))) (value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24)))) (rule (lower (has_type $I16 (cls x))) (value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16)))) (rule (lower (has_type $I32 (cls x))) (value_reg (cls32 (put_in_reg x)))) (rule (lower (has_type $I64 (cls x))) (value_reg (cls64 (put_in_reg x)))) ;; cls lo_cls, lo ;; cls hi_cls, hi ;; eon sign_eq_eor, hi, lo ;; lsr sign_eq, sign_eq_eor, #63 ;; madd lo_sign_bits, out_lo, sign_eq, sign_eq ;; cmp hi_cls, #63 ;; csel maybe_lo, lo_sign_bits, xzr, eq ;; add out_lo, maybe_lo, hi_cls ;; mov out_hi, 0 (rule (lower (has_type $I128 (cls x))) (let ( (val ValueRegs (put_in_regs x)) (lo Reg (value_regs_get val 0)) (hi Reg (value_regs_get val 1)) (lo_cls Reg (cls64 lo)) (hi_cls Reg (cls64 hi)) (sign_eq_eon Reg (eon64 hi lo)) (sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63))) (lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq)) (maybe_lo Reg (with_flags_1 (cmp64_imm hi_cls (u8_into_imm12 63)) (csel (Cond.Eq) lo_sign_bits (zero_reg)) )) ) (value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))