[AArch64] Merge 32- and 64-bit ALUOps (#3802)
Combine the two opcodes into one and pass and add an OperandSize field to these instructions, as well as an ISLE helper to perform the conversion from Type. This saves us from having having to write ISLE helpers to select the correct opcode, based on type, and reduces the amount of code needed for emission. Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -91,8 +91,8 @@
|
||||
;; the actual addition is `adds` followed by `adc` which comprises the
|
||||
;; low/high bits of the result
|
||||
(with_flags
|
||||
(add64_with_flags x_lo y_lo)
|
||||
(adc64 x_hi y_hi))))
|
||||
(add_with_flags $I64 x_lo y_lo)
|
||||
(adc $I64 x_hi y_hi))))
|
||||
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -142,8 +142,8 @@
|
||||
;; the actual subtraction is `subs` followed by `sbc` which comprises
|
||||
;; the low/high bits of the result
|
||||
(with_flags
|
||||
(sub64_with_flags x_lo y_lo)
|
||||
(sbc64 x_hi y_hi))))
|
||||
(sub_with_flags $I64 x_lo y_lo)
|
||||
(sbc $I64 x_hi y_hi))))
|
||||
|
||||
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -203,7 +203,7 @@
|
||||
;; madd dst_hi, x_lo, y_hi, dst_hi
|
||||
;; madd dst_hi, x_hi, y_lo, dst_hi
|
||||
;; madd dst_lo, x_lo, y_lo, zero
|
||||
(dst_hi1 Reg (umulh x_lo y_lo))
|
||||
(dst_hi1 Reg (umulh $I64 x_lo y_lo))
|
||||
(dst_hi2 Reg (madd64 x_lo y_hi dst_hi1))
|
||||
(dst_hi Reg (madd64 x_hi y_lo dst_hi2))
|
||||
(dst_lo Reg (madd64 x_lo y_lo (zero_reg)))
|
||||
@@ -358,28 +358,28 @@
|
||||
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I64 (smulhi x y)))
|
||||
(value_reg (smulh (put_in_reg x) (put_in_reg y))))
|
||||
(value_reg (smulh $I64 (put_in_reg x) (put_in_reg y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (smulhi x y)))
|
||||
(let (
|
||||
(x64 Reg (put_in_reg_sext64 x))
|
||||
(y64 Reg (put_in_reg_sext64 y))
|
||||
(mul Reg (madd64 x64 y64 (zero_reg)))
|
||||
(result Reg (asr64_imm mul (imm_shift_from_u8 (ty_bits ty))))
|
||||
(result Reg (asr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty))))
|
||||
)
|
||||
(value_reg result)))
|
||||
|
||||
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I64 (umulhi x y)))
|
||||
(value_reg (umulh (put_in_reg x) (put_in_reg y))))
|
||||
(value_reg (umulh $I64 (put_in_reg x) (put_in_reg y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (umulhi x y)))
|
||||
(let (
|
||||
(x64 Reg (put_in_reg_zext64 x))
|
||||
(y64 Reg (put_in_reg_zext64 y))
|
||||
(mul Reg (madd64 x64 y64 (zero_reg)))
|
||||
(result Reg (lsr64_imm mul (imm_shift_from_u8 (ty_bits ty))))
|
||||
(result Reg (lsr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty))))
|
||||
)
|
||||
(value_reg result)))
|
||||
|
||||
@@ -391,7 +391,7 @@
|
||||
;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
|
||||
;; CLIF's `udiv` the check for zero needs to be manually performed.
|
||||
(rule (lower (has_type (fits_in_64 ty) (udiv x y)))
|
||||
(value_reg (udiv64 (put_in_reg_zext64 x) (put_nonzero_in_reg_zext64 y))))
|
||||
(value_reg (a64_udiv $I64 (put_in_reg_zext64 x) (put_nonzero_in_reg_zext64 y))))
|
||||
|
||||
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
|
||||
(decl put_nonzero_in_reg_zext64 (Value) Reg)
|
||||
@@ -428,7 +428,7 @@
|
||||
(x64 Reg (put_in_reg_sext64 x))
|
||||
(y64 Reg (put_nonzero_in_reg_sext64 y))
|
||||
(valid_x64 Reg (trap_if_div_overflow ty x64 y64))
|
||||
(result Reg (sdiv64 valid_x64 y64))
|
||||
(result Reg (a64_sdiv $I64 valid_x64 y64))
|
||||
)
|
||||
(value_reg result)))
|
||||
|
||||
@@ -439,7 +439,7 @@
|
||||
;; Special case for `sdiv` where no checks are needed due to division by a
|
||||
;; constant meaning the checks are always passed.
|
||||
(rule (lower (has_type (fits_in_64 ty) (sdiv x (def_inst (iconst (safe_divisor_from_imm64 y))))))
|
||||
(value_reg (sdiv64 (put_in_reg_sext64 x) (imm ty y))))
|
||||
(value_reg (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty y))))
|
||||
|
||||
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
|
||||
(decl put_nonzero_in_reg_sext64 (Value) Reg)
|
||||
@@ -470,7 +470,7 @@
|
||||
(let (
|
||||
(x64 Reg (put_in_reg_zext64 x))
|
||||
(y64 Reg (put_nonzero_in_reg_zext64 y))
|
||||
(div Reg (udiv64 x64 y64))
|
||||
(div Reg (a64_udiv $I64 x64 y64))
|
||||
(result Reg (msub64 div y64 x64))
|
||||
)
|
||||
(value_reg result)))
|
||||
@@ -479,7 +479,7 @@
|
||||
(let (
|
||||
(x64 Reg (put_in_reg_sext64 x))
|
||||
(y64 Reg (put_nonzero_in_reg_sext64 y))
|
||||
(div Reg (sdiv64 x64 y64))
|
||||
(div Reg (a64_sdiv $I64 x64 y64))
|
||||
(result Reg (msub64 div y64 x64))
|
||||
)
|
||||
(value_reg result)))
|
||||
@@ -537,7 +537,7 @@
|
||||
(rule (lower (has_type $I128 (sextend x)))
|
||||
(let (
|
||||
(lo Reg (put_in_reg_sext64 x))
|
||||
(hi Reg (asr64_imm lo (imm_shift_from_u8 63)))
|
||||
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))
|
||||
)
|
||||
(value_regs lo hi)))
|
||||
|
||||
@@ -554,7 +554,7 @@
|
||||
lane
|
||||
(vector_size in)
|
||||
(size_from_ty $I64)))
|
||||
(hi Reg (asr64_imm lo (imm_shift_from_u8 63)))
|
||||
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))
|
||||
)
|
||||
(value_regs lo hi)))
|
||||
|
||||
@@ -566,7 +566,7 @@
|
||||
(lo Reg (mov_from_vec (put_in_reg vec)
|
||||
lane
|
||||
(VectorSize.Size64x2)))
|
||||
(hi Reg (asr64_imm lo (imm_shift_from_u8 63)))
|
||||
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))
|
||||
)
|
||||
(value_regs lo hi)))
|
||||
|
||||
@@ -592,8 +592,8 @@
|
||||
(x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(new_lo Reg (orr_not64 (zero_reg) x_lo))
|
||||
(new_hi Reg (orr_not64 (zero_reg) x_hi))
|
||||
(new_lo Reg (orr_not $I64 (zero_reg) x_lo))
|
||||
(new_hi Reg (orr_not $I64 (zero_reg) x_hi))
|
||||
)
|
||||
(value_regs new_lo new_hi)))
|
||||
|
||||
@@ -604,12 +604,12 @@
|
||||
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (band x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.And32) ty x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.And) ty x y)))
|
||||
|
||||
(rule (lower (has_type $I64 (band x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.And64) $I64 x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.And) $I64 x y)))
|
||||
|
||||
(rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And64) x y))
|
||||
(rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And) $I64 x y))
|
||||
|
||||
(rule (lower (has_type (vec128 ty) (band x y)))
|
||||
(value_reg (and_vec (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
||||
@@ -617,12 +617,12 @@
|
||||
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (bor x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Orr32) ty x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Orr) ty x y)))
|
||||
|
||||
(rule (lower (has_type $I64 (bor x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Orr64) $I64 x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Orr) $I64 x y)))
|
||||
|
||||
(rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr64) x y))
|
||||
(rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr) $I64 x y))
|
||||
|
||||
(rule (lower (has_type (vec128 ty) (bor x y)))
|
||||
(value_reg (orr_vec (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
||||
@@ -630,12 +630,12 @@
|
||||
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (bxor x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Eor32) ty x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Eor) ty x y)))
|
||||
|
||||
(rule (lower (has_type $I64 (bxor x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Eor64) $I64 x y)))
|
||||
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Eor) $I64 x y)))
|
||||
|
||||
(rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor64) x y))
|
||||
(rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor) $I64 x y))
|
||||
|
||||
(rule (lower (has_type (vec128 ty) (bxor x y)))
|
||||
(value_reg (eor_vec (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
||||
@@ -643,12 +643,12 @@
|
||||
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (band_not x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.AndNot32) ty x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.AndNot) ty x y)))
|
||||
|
||||
(rule (lower (has_type $I64 (band_not x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.AndNot64) $I64 x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.AndNot) $I64 x y)))
|
||||
|
||||
(rule (lower (has_type $I128 (band_not x y))) (i128_alu_bitop (ALUOp.AndNot64) x y))
|
||||
(rule (lower (has_type $I128 (band_not x y))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y))
|
||||
|
||||
(rule (lower (has_type (vec128 ty) (band_not x y)))
|
||||
(value_reg (bic_vec (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
||||
@@ -656,32 +656,32 @@
|
||||
;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (bor_not x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.OrrNot32) ty x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.OrrNot) ty x y)))
|
||||
|
||||
(rule (lower (has_type $I64 (bor_not x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.OrrNot64) $I64 x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.OrrNot) $I64 x y)))
|
||||
|
||||
(rule (lower (has_type $I128 (bor_not x y))) (i128_alu_bitop (ALUOp.OrrNot64) x y))
|
||||
(rule (lower (has_type $I128 (bor_not x y))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y))
|
||||
|
||||
;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (fits_in_32 ty) (bxor_not x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.EorNot32) ty x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.EorNot) $I32 x y)))
|
||||
|
||||
(rule (lower (has_type $I64 (bxor_not x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.EorNot64) $I64 x y)))
|
||||
(value_reg (alu_rs_imm_logic (ALUOp.EorNot) $I64 x y)))
|
||||
|
||||
(rule (lower (has_type $I128 (bxor_not x y))) (i128_alu_bitop (ALUOp.EorNot64) x y))
|
||||
(rule (lower (has_type $I128 (bxor_not x y))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y))
|
||||
|
||||
;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Shift for i8/i16/i32.
|
||||
(rule (lower (has_type (fits_in_32 ty) (ishl x y)))
|
||||
(value_reg (do_shift (ALUOp.Lsl32) ty (put_in_reg x) y)))
|
||||
(value_reg (do_shift (ALUOp.Lsl) ty (put_in_reg x) y)))
|
||||
|
||||
;; Shift for i64.
|
||||
(rule (lower (has_type $I64 (ishl x y)))
|
||||
(value_reg (do_shift (ALUOp.Lsl64) $I64 (put_in_reg x) y)))
|
||||
(value_reg (do_shift (ALUOp.Lsl) $I64 (put_in_reg x) y)))
|
||||
|
||||
;; Shift for i128.
|
||||
(rule (lower (has_type $I128 (ishl x y)))
|
||||
@@ -701,15 +701,15 @@
|
||||
(let (
|
||||
(src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
(lo_lshift Reg (lsl64 src_lo amt))
|
||||
(hi_lshift Reg (lsl64 src_hi amt))
|
||||
(inv_amt Reg (orr_not32 (zero_reg) amt))
|
||||
(lo_rshift Reg (lsr64 (lsr64_imm src_lo (imm_shift_from_u8 1))
|
||||
(lo_lshift Reg (lsl $I64 src_lo amt))
|
||||
(hi_lshift Reg (lsl $I64 src_hi amt))
|
||||
(inv_amt Reg (orr_not $I32 (zero_reg) amt))
|
||||
(lo_rshift Reg (lsr $I64 (lsr_imm $I64 src_lo (imm_shift_from_u8 1))
|
||||
inv_amt))
|
||||
(maybe_hi Reg (orr64 hi_lshift lo_rshift))
|
||||
(maybe_hi Reg (orr $I64 hi_lshift lo_rshift))
|
||||
)
|
||||
(with_flags_2
|
||||
(tst64_imm amt (u64_into_imm_logic $I64 64))
|
||||
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
||||
(csel (Cond.Ne) (zero_reg) lo_lshift)
|
||||
(csel (Cond.Ne) lo_lshift maybe_hi))))
|
||||
|
||||
@@ -741,16 +741,16 @@
|
||||
(rule (do_shift op (fits_in_16 ty) x y)
|
||||
(let (
|
||||
(shift_amt Reg (value_regs_get (put_in_regs y) 0))
|
||||
(masked_shift_amt Reg (and32_imm shift_amt (shift_mask ty)))
|
||||
(masked_shift_amt Reg (and_imm $I32 shift_amt (shift_mask ty)))
|
||||
)
|
||||
(alu_rrr op x masked_shift_amt)))
|
||||
(alu_rrr op $I32 x masked_shift_amt)))
|
||||
|
||||
(decl shift_mask (Type) ImmLogic)
|
||||
(extern constructor shift_mask shift_mask)
|
||||
|
||||
;; 32/64-bit shift base cases.
|
||||
(rule (do_shift op $I32 x y) (alu_rrr op x (value_regs_get (put_in_regs y) 0)))
|
||||
(rule (do_shift op $I64 x y) (alu_rrr op x (value_regs_get (put_in_regs y) 0)))
|
||||
(rule (do_shift op $I32 x y) (alu_rrr op $I32 x (value_regs_get (put_in_regs y) 0)))
|
||||
(rule (do_shift op $I64 x y) (alu_rrr op $I64 x (value_regs_get (put_in_regs y) 0)))
|
||||
|
||||
;; Special case for shifting by a constant value where the value can fit into an
|
||||
;; `ImmShift`.
|
||||
@@ -759,17 +759,17 @@
|
||||
;; to ensure it's attempted first, otherwise the type-based filters on the
|
||||
;; previous rules seem to take priority over this rule.
|
||||
(rule 1 (do_shift op ty x (def_inst (iconst (imm_shift_from_imm64 <ty shift))))
|
||||
(alu_rr_imm_shift op x shift))
|
||||
(alu_rr_imm_shift op ty x shift))
|
||||
|
||||
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Shift for i8/i16/i32.
|
||||
(rule (lower (has_type (fits_in_32 ty) (ushr x y)))
|
||||
(value_reg (do_shift (ALUOp.Lsr32) ty (put_in_reg_zext32 x) y)))
|
||||
(value_reg (do_shift (ALUOp.Lsr) ty (put_in_reg_zext32 x) y)))
|
||||
|
||||
;; Shift for i64.
|
||||
(rule (lower (has_type $I64 (ushr x y)))
|
||||
(value_reg (do_shift (ALUOp.Lsr64) $I64 (put_in_reg_zext64 x) y)))
|
||||
(value_reg (do_shift (ALUOp.Lsr) $I64 (put_in_reg_zext64 x) y)))
|
||||
|
||||
;; Shift for i128.
|
||||
(rule (lower (has_type $I128 (ushr x y)))
|
||||
@@ -779,7 +779,7 @@
|
||||
(rule (lower (has_type (vec128 ty) (ushr x y)))
|
||||
(let (
|
||||
(size VectorSize (vector_size ty))
|
||||
(shift Reg (vec_dup (sub32 (zero_reg) (put_in_reg y)) size))
|
||||
(shift Reg (vec_dup (sub $I32 (zero_reg) (put_in_reg y)) size))
|
||||
)
|
||||
(value_reg (ushl (put_in_reg x) shift size))))
|
||||
|
||||
@@ -797,16 +797,16 @@
|
||||
(let (
|
||||
(src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
(lo_rshift Reg (lsr64 src_lo amt))
|
||||
(hi_rshift Reg (lsr64 src_hi amt))
|
||||
(lo_rshift Reg (lsr $I64 src_lo amt))
|
||||
(hi_rshift Reg (lsr $I64 src_hi amt))
|
||||
|
||||
(inv_amt Reg (orr_not32 (zero_reg) amt))
|
||||
(hi_lshift Reg (lsl64 (lsl64_imm src_hi (imm_shift_from_u8 1))
|
||||
(inv_amt Reg (orr_not $I32 (zero_reg) amt))
|
||||
(hi_lshift Reg (lsl $I64 (lsl_imm $I64 src_hi (imm_shift_from_u8 1))
|
||||
inv_amt))
|
||||
(maybe_lo Reg (orr64 lo_rshift hi_lshift))
|
||||
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
||||
)
|
||||
(with_flags_2
|
||||
(tst64_imm amt (u64_into_imm_logic $I64 64))
|
||||
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
||||
(csel (Cond.Ne) hi_rshift maybe_lo)
|
||||
(csel (Cond.Ne) (zero_reg) hi_rshift))))
|
||||
|
||||
@@ -814,11 +814,11 @@
|
||||
|
||||
;; Shift for i8/i16/i32.
|
||||
(rule (lower (has_type (fits_in_32 ty) (sshr x y)))
|
||||
(value_reg (do_shift (ALUOp.Asr32) ty (put_in_reg_sext32 x) y)))
|
||||
(value_reg (do_shift (ALUOp.Asr) ty (put_in_reg_sext32 x) y)))
|
||||
|
||||
;; Shift for i64.
|
||||
(rule (lower (has_type $I64 (sshr x y)))
|
||||
(value_reg (do_shift (ALUOp.Asr64) $I64 (put_in_reg_sext64 x) y)))
|
||||
(value_reg (do_shift (ALUOp.Asr) $I64 (put_in_reg_sext64 x) y)))
|
||||
|
||||
;; Shift for i128.
|
||||
(rule (lower (has_type $I128 (sshr x y)))
|
||||
@@ -830,7 +830,7 @@
|
||||
(rule (lower (has_type (vec128 ty) (sshr x y)))
|
||||
(let (
|
||||
(size VectorSize (vector_size ty))
|
||||
(shift Reg (vec_dup (sub32 (zero_reg) (put_in_reg y)) size))
|
||||
(shift Reg (vec_dup (sub $I32 (zero_reg) (put_in_reg y)) size))
|
||||
)
|
||||
(value_reg (sshl (put_in_reg x) shift size))))
|
||||
|
||||
@@ -849,17 +849,17 @@
|
||||
(let (
|
||||
(src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
(lo_rshift Reg (lsr64 src_lo amt))
|
||||
(hi_rshift Reg (asr64 src_hi amt))
|
||||
(lo_rshift Reg (lsr $I64 src_lo amt))
|
||||
(hi_rshift Reg (asr $I64 src_hi amt))
|
||||
|
||||
(inv_amt Reg (orr_not32 (zero_reg) amt))
|
||||
(hi_lshift Reg (lsl64 (lsl64_imm src_hi (imm_shift_from_u8 1))
|
||||
(inv_amt Reg (orr_not $I32 (zero_reg) amt))
|
||||
(hi_lshift Reg (lsl $I64 (lsl_imm $I64 src_hi (imm_shift_from_u8 1))
|
||||
inv_amt))
|
||||
(hi_sign Reg (asr64_imm src_hi (imm_shift_from_u8 63)))
|
||||
(maybe_lo Reg (orr64 lo_rshift hi_lshift))
|
||||
(hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63)))
|
||||
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
||||
)
|
||||
(with_flags_2
|
||||
(tst64_imm amt (u64_into_imm_logic $I64 64))
|
||||
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
||||
(csel (Cond.Ne) hi_rshift maybe_lo)
|
||||
(csel (Cond.Ne) hi_sign hi_rshift))))
|
||||
|
||||
@@ -867,7 +867,7 @@
|
||||
|
||||
;; General 8/16-bit case.
|
||||
(rule (lower (has_type (fits_in_16 ty) (rotl x y)))
|
||||
(let ((neg_shift Reg (sub32 (zero_reg) (put_in_reg y))))
|
||||
(let ((neg_shift Reg (sub $I32 (zero_reg) (put_in_reg y))))
|
||||
(value_reg (small_rotr ty (put_in_reg_zext32 x) neg_shift))))
|
||||
|
||||
;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
|
||||
@@ -884,21 +884,21 @@
|
||||
|
||||
;; General 32-bit case.
|
||||
(rule (lower (has_type $I32 (rotl x y)))
|
||||
(let ((neg_shift Reg (sub32 (zero_reg) (put_in_reg y))))
|
||||
(value_reg (rotr32 (put_in_reg x) neg_shift))))
|
||||
(let ((neg_shift Reg (sub $I32 (zero_reg) (put_in_reg y))))
|
||||
(value_reg (a64_rotr $I32 (put_in_reg x) neg_shift))))
|
||||
|
||||
;; General 64-bit case.
|
||||
(rule (lower (has_type $I64 (rotl x y)))
|
||||
(let ((neg_shift Reg (sub64 (zero_reg) (put_in_reg y))))
|
||||
(value_reg (rotr64 (put_in_reg x) neg_shift))))
|
||||
(let ((neg_shift Reg (sub $I64 (zero_reg) (put_in_reg y))))
|
||||
(value_reg (a64_rotr $I64 (put_in_reg x) neg_shift))))
|
||||
|
||||
;; Specialization for the 32-bit case when the rotation amount is an immediate.
|
||||
(rule (lower (has_type $I32 (rotl x (def_inst (iconst (imm_shift_from_imm64 <$I32 n))))))
|
||||
(value_reg (rotr32_imm (put_in_reg x) (negate_imm_shift $I32 n))))
|
||||
(value_reg (a64_rotr_imm $I32 (put_in_reg x) (negate_imm_shift $I32 n))))
|
||||
|
||||
;; Specialization for the 64-bit case when the rotation amount is an immediate.
|
||||
(rule (lower (has_type $I64 (rotl x (def_inst (iconst (imm_shift_from_imm64 <$I64 n))))))
|
||||
(value_reg (rotr64_imm (put_in_reg x) (negate_imm_shift $I64 n))))
|
||||
(value_reg (a64_rotr_imm $I64 (put_in_reg x) (negate_imm_shift $I64 n))))
|
||||
|
||||
(decl negate_imm_shift (Type ImmShift) ImmShift)
|
||||
(extern constructor negate_imm_shift negate_imm_shift)
|
||||
@@ -910,13 +910,13 @@
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(amt Reg (value_regs_get (put_in_regs y) 0))
|
||||
(neg_amt Reg (sub64 (imm $I64 128) amt))
|
||||
(neg_amt Reg (sub $I64 (imm $I64 128) amt))
|
||||
(lshift ValueRegs (lower_shl128 val amt))
|
||||
(rshift ValueRegs (lower_ushr128 val neg_amt))
|
||||
)
|
||||
(value_regs
|
||||
(orr64 (value_regs_get lshift 0) (value_regs_get rshift 0))
|
||||
(orr64 (value_regs_get lshift 1) (value_regs_get rshift 1)))))
|
||||
(orr $I64 (value_regs_get lshift 0) (value_regs_get rshift 0))
|
||||
(orr $I64 (value_regs_get lshift 1) (value_regs_get rshift 1)))))
|
||||
|
||||
;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -926,11 +926,11 @@
|
||||
|
||||
;; General 32-bit case.
|
||||
(rule (lower (has_type $I32 (rotr x y)))
|
||||
(value_reg (rotr32 (put_in_reg x) (put_in_reg y))))
|
||||
(value_reg (a64_rotr $I32 (put_in_reg x) (put_in_reg y))))
|
||||
|
||||
;; General 64-bit case.
|
||||
(rule (lower (has_type $I64 (rotr x y)))
|
||||
(value_reg (rotr64 (put_in_reg x) (put_in_reg y))))
|
||||
(value_reg (a64_rotr $I64 (put_in_reg x) (put_in_reg y))))
|
||||
|
||||
;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
|
||||
(rule (lower (has_type (fits_in_16 ty) (rotr x (def_inst (iconst (imm_shift_from_imm64 <ty n))))))
|
||||
@@ -938,11 +938,11 @@
|
||||
|
||||
;; Specialization for the 32-bit case when the rotation amount is an immediate.
|
||||
(rule (lower (has_type $I32 (rotr x (def_inst (iconst (imm_shift_from_imm64 <$I32 n))))))
|
||||
(value_reg (rotr32_imm (put_in_reg x) n)))
|
||||
(value_reg (a64_rotr_imm $I32 (put_in_reg x) n)))
|
||||
|
||||
;; Specialization for the 64-bit case when the rotation amount is an immediate.
|
||||
(rule (lower (has_type $I64 (rotr x (def_inst (iconst (imm_shift_from_imm64 <$I64 n))))))
|
||||
(value_reg (rotr64_imm (put_in_reg x) n)))
|
||||
(value_reg (a64_rotr_imm $I64 (put_in_reg x) n)))
|
||||
|
||||
;; For a < 32-bit rotate-right, we synthesize this as:
|
||||
;;
|
||||
@@ -959,13 +959,13 @@
|
||||
(decl small_rotr (Type Reg Reg) Reg)
|
||||
(rule (small_rotr ty val amt)
|
||||
(let (
|
||||
(masked_amt Reg (and32_imm amt (rotr_mask ty)))
|
||||
(tmp_sub Reg (sub32_imm masked_amt (u8_into_imm12 (ty_bits ty))))
|
||||
(neg_amt Reg (sub32 (zero_reg) tmp_sub))
|
||||
(val_rshift Reg (lsr32 val masked_amt))
|
||||
(val_lshift Reg (lsl32 val neg_amt))
|
||||
(masked_amt Reg (and_imm $I32 amt (rotr_mask ty)))
|
||||
(tmp_sub Reg (sub_imm $I32 masked_amt (u8_into_imm12 (ty_bits ty))))
|
||||
(neg_amt Reg (sub $I32 (zero_reg) tmp_sub))
|
||||
(val_rshift Reg (lsr $I32 val masked_amt))
|
||||
(val_lshift Reg (lsl $I32 val neg_amt))
|
||||
)
|
||||
(orr32 val_lshift val_rshift)))
|
||||
(orr $I32 val_lshift val_rshift)))
|
||||
|
||||
(decl rotr_mask (Type) ImmLogic)
|
||||
(extern constructor rotr_mask rotr_mask)
|
||||
@@ -982,10 +982,10 @@
|
||||
(decl small_rotr_imm (Type Reg ImmShift) Reg)
|
||||
(rule (small_rotr_imm ty val amt)
|
||||
(let (
|
||||
(val_rshift Reg (lsr32_imm val amt))
|
||||
(val_lshift Reg (lsl32_imm val (rotr_opposite_amount ty amt)))
|
||||
(val_rshift Reg (lsr_imm $I32 val amt))
|
||||
(val_lshift Reg (lsl_imm $I32 val (rotr_opposite_amount ty amt)))
|
||||
)
|
||||
(orr32 val_lshift val_rshift)))
|
||||
(orr $I32 val_lshift val_rshift)))
|
||||
|
||||
(decl rotr_opposite_amount (Type ImmShift) ImmShift)
|
||||
(extern constructor rotr_opposite_amount rotr_opposite_amount)
|
||||
@@ -997,11 +997,11 @@
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(amt Reg (value_regs_get (put_in_regs y) 0))
|
||||
(neg_amt Reg (sub64 (imm $I64 128) amt))
|
||||
(neg_amt Reg (sub $I64 (imm $I64 128) amt))
|
||||
(rshift ValueRegs (lower_ushr128 val amt))
|
||||
(lshift ValueRegs (lower_shl128 val neg_amt))
|
||||
(hi Reg (orr64 (value_regs_get rshift 1) (value_regs_get lshift 1)))
|
||||
(lo Reg (orr64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
|
||||
(hi Reg (orr $I64 (value_regs_get rshift 1) (value_regs_get lshift 1)))
|
||||
(lo Reg (orr $I64 (value_regs_get rshift 0) (value_regs_get lshift 0)))
|
||||
)
|
||||
(value_regs lo hi)))
|
||||
|
||||
@@ -1011,13 +1011,13 @@
|
||||
;; the reversed result in the highest 8 bits, so we need to shift them down into
|
||||
;; place.
|
||||
(rule (lower (has_type $I8 (bitrev x)))
|
||||
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
|
||||
(value_reg (lsr_imm $I32 (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
|
||||
|
||||
;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
|
||||
;; the reversed result in the highest 16 bits, so we need to shift them down into
|
||||
;; place.
|
||||
(rule (lower (has_type $I16 (bitrev x)))
|
||||
(value_reg (lsr32_imm (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
|
||||
(value_reg (lsr_imm $I32 (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (bitrev x)))
|
||||
(value_reg (rbit32 (put_in_reg x))))
|
||||
@@ -1036,10 +1036,10 @@
|
||||
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (clz x)))
|
||||
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
(value_reg (sub_imm $I32 (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
|
||||
(rule (lower (has_type $I16 (clz x)))
|
||||
(value_reg (sub32_imm (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
(value_reg (sub_imm $I32 (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (clz x)))
|
||||
(value_reg (clz32 (put_in_reg x))))
|
||||
@@ -1060,7 +1060,7 @@
|
||||
(let (
|
||||
(hi_clz Reg (clz64 (value_regs_get val 1)))
|
||||
(lo_clz Reg (clz64 (value_regs_get val 0)))
|
||||
(tmp Reg (lsr64_imm hi_clz (imm_shift_from_u8 6)))
|
||||
(tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6)))
|
||||
)
|
||||
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
|
||||
|
||||
@@ -1071,10 +1071,10 @@
|
||||
;; leading zeros of the reversed value.
|
||||
|
||||
(rule (lower (has_type $I8 (ctz x)))
|
||||
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
|
||||
(value_reg (clz32 (orr_imm $I32 (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
|
||||
|
||||
(rule (lower (has_type $I16 (ctz x)))
|
||||
(value_reg (clz32 (orr32_imm (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
|
||||
(value_reg (clz32 (orr_imm $I32 (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
|
||||
|
||||
(rule (lower (has_type $I32 (ctz x)))
|
||||
(value_reg (clz32 (rbit32 (put_in_reg x)))))
|
||||
@@ -1093,10 +1093,10 @@
|
||||
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (cls x)))
|
||||
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
(value_reg (sub_imm $I32 (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
|
||||
(rule (lower (has_type $I16 (cls x)))
|
||||
(value_reg (sub32_imm (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
(value_reg (sub_imm $I32 (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (cls x)))
|
||||
(value_reg (cls32 (put_in_reg x))))
|
||||
@@ -1120,15 +1120,15 @@
|
||||
(hi Reg (value_regs_get val 1))
|
||||
(lo_cls Reg (cls64 lo))
|
||||
(hi_cls Reg (cls64 hi))
|
||||
(sign_eq_eon Reg (eon64 hi lo))
|
||||
(sign_eq Reg (lsr64_imm sign_eq_eon (imm_shift_from_u8 63)))
|
||||
(sign_eq_eon Reg (eon $I64 hi lo))
|
||||
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
|
||||
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
|
||||
(maybe_lo Reg (with_flags_1
|
||||
(cmp64_imm hi_cls (u8_into_imm12 63))
|
||||
(csel (Cond.Eq) lo_sign_bits (zero_reg))
|
||||
))
|
||||
)
|
||||
(value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))
|
||||
(value_regs (add $I64 maybe_lo hi_cls) (imm $I64 0))))
|
||||
|
||||
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user