When adding some optimization rules for `icmp` in the egraph infrastructure, we ended up creating a path to legal CLIF but with patterns unsupported by three of our four backends: specifically, `select_spectre_guard` with a general truthy input, rather than an `icmp`. In #5206 we discussed replacing `select_spectre_guard` with something more specific, and that could still be a long-term solution here, but doing so now would interfere with ongoing refactoring of heap access lowering, so I've opted not to do so. (In that issue I was concerned about complexity and didn't see the need but with this fuzzbug I'm starting to feel a bit differently; maybe we should remove this non-orthogonal op in the long run.) Fixes #5417.
2503 lines
98 KiB
Common Lisp
2503 lines
98 KiB
Common Lisp
;; aarch64 instruction selection and CLIF-to-MachInst lowering.
|
|
|
|
;; The main lowering constructor term: takes a clif `Inst` and returns the
|
|
;; register(s) within which the lowered instruction's result values live.
|
|
(decl partial lower (Inst) InstOutput)
|
|
|
|
;; Variant of the main lowering constructor term, which receives an
|
|
;; additional argument (a vector of branch targets to be used) for
|
|
;; implementing branches.
|
|
;; For two-branch instructions, the first target is `taken` and the second
|
|
;; `not_taken`, even if it is a Fallthrough instruction: because we reorder
|
|
;; blocks while we lower, the fallthrough in the new order is not (necessarily)
|
|
;; the same as the fallthrough in CLIF. So, we use the explicitly-provided
|
|
;; target.
|
|
(decl partial lower_branch (Inst VecMachLabel) InstOutput)
|
|
|
|
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
|
|
(imm ty (ImmExtend.Zero) n))
|
|
|
|
;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (null)))
|
|
(imm ty (ImmExtend.Zero) 0))
|
|
|
|
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller
|
|
|
|
;; Base case, simply adding things in registers.
|
|
(rule -1 (lower (has_type (fits_in_64 ty) (iadd x y)))
|
|
(add ty x y))
|
|
|
|
;; Special cases for when one operand is an immediate that fits in 12 bits.
|
|
(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
|
|
(add_imm ty x y))
|
|
|
|
(rule 5 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
|
|
(add_imm ty y x))
|
|
|
|
;; Same as the previous special cases, except we can switch the addition to a
|
|
;; subtraction if the negated immediate fits in 12 bits.
|
|
(rule 2 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y))))
|
|
(sub_imm ty x y))
|
|
|
|
(rule 3 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y)))
|
|
(sub_imm ty y x))
|
|
|
|
;; Special cases for when we're adding an extended register where the extending
|
|
;; operation can get folded into the add itself.
|
|
(rule 0 (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y))))
|
|
(add_extend ty x y))
|
|
|
|
(rule 1 (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y)))
|
|
(add_extend ty y x))
|
|
|
|
;; Special cases for when we're adding the shift of a different
|
|
;; register by a constant amount and the shift can get folded into the add.
|
|
(rule 7 (lower (has_type (fits_in_64 ty)
|
|
(iadd x (ishl y (iconst k)))))
|
|
(if-let amt (lshl_from_imm64 ty k))
|
|
(add_shift ty x y amt))
|
|
|
|
(rule 6 (lower (has_type (fits_in_64 ty)
|
|
(iadd (ishl x (iconst k)) y)))
|
|
(if-let amt (lshl_from_imm64 ty k))
|
|
(add_shift ty y x amt))
|
|
|
|
;; Fold an `iadd` and `imul` combination into a `madd` instruction.
|
|
(rule 7 (lower (has_type (fits_in_64 ty) (iadd x (imul y z))))
|
|
(madd ty y z x))
|
|
|
|
(rule 6 (lower (has_type (fits_in_64 ty) (iadd (imul x y) z)))
|
|
(madd ty x y z))
|
|
|
|
;; Fold an `isub` and `imul` combination into a `msub` instruction.
|
|
(rule (lower (has_type (fits_in_64 ty) (isub x (imul y z))))
|
|
(msub ty y z x))
|
|
|
|
;; vectors
|
|
|
|
(rule -2 (lower (has_type ty @ (multi_lane _ _) (iadd x y)))
|
|
(add_vec x y (vector_size ty)))
|
|
|
|
;; `i128`
|
|
(rule -3 (lower (has_type $I128 (iadd x y)))
|
|
(let
|
|
;; Get the high/low registers for `x`.
|
|
((x_regs ValueRegs x)
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
|
|
;; Get the high/low registers for `y`.
|
|
(y_regs ValueRegs y)
|
|
(y_lo Reg (value_regs_get y_regs 0))
|
|
(y_hi Reg (value_regs_get y_regs 1)))
|
|
;; the actual addition is `adds` followed by `adc` which comprises the
|
|
;; low/high bits of the result
|
|
(with_flags
|
|
(add_with_flags_paired $I64 x_lo y_lo)
|
|
(adc_paired $I64 x_hi y_hi))))
|
|
|
|
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
|
|
(let ((mask_reg Reg (constant_f128 mask)))
|
|
(vec_tbl2 rn rn2 mask_reg ty)))
|
|
|
|
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type vec_i128_ty (swizzle rn rm)))
|
|
(vec_tbl rn rm))
|
|
|
|
;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (isplit x @ (value_type $I128)))
|
|
(let
|
|
((x_regs ValueRegs x)
|
|
(x_lo ValueRegs (value_regs_get x_regs 0))
|
|
(x_hi ValueRegs (value_regs_get x_regs 1)))
|
|
(output_pair x_lo x_hi)))
|
|
|
|
;;;; Rules for `iconcat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I128 (iconcat lo hi)))
|
|
(output (value_regs lo hi)))
|
|
|
|
;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $F32X4 (scalar_to_vector x)))
|
|
(fpu_extend x (ScalarSize.Size32)))
|
|
|
|
(rule (lower (has_type $F64X2 (scalar_to_vector x)))
|
|
(fpu_extend x (ScalarSize.Size64)))
|
|
|
|
(rule -1 (lower (scalar_to_vector x @ (value_type $I64)))
|
|
(mov_to_fpu x (ScalarSize.Size64)))
|
|
|
|
(rule -2 (lower (scalar_to_vector x @ (value_type (int_fits_in_32 _))))
|
|
(mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32)))
|
|
|
|
;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; cmeq vtmp.2d, vm.2d, #0
|
|
;; addp dtmp, vtmp.2d
|
|
;; fcmp dtmp, dtmp
|
|
;; cset xd, eq
|
|
;;
|
|
;; Note that after the ADDP the value of the temporary register will be either
|
|
;; 0 when all input elements are true, i.e. non-zero, or a NaN otherwise
|
|
;; (either -1 or -2 when represented as an integer); NaNs are the only
|
|
;; floating-point numbers that compare unequal to themselves.
|
|
(rule (lower (vall_true x @ (value_type (multi_lane 64 2))))
|
|
(let ((x1 Reg (cmeq0 x (VectorSize.Size64x2)))
|
|
(x2 Reg (addp x1 x1 (VectorSize.Size64x2))))
|
|
(with_flags (fpu_cmp (ScalarSize.Size64) x2 x2)
|
|
(materialize_bool_result (Cond.Eq)))))
|
|
|
|
(rule (lower (vall_true x @ (value_type (multi_lane 32 2))))
|
|
(let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64))))
|
|
(with_flags (cmp_rr_shift (OperandSize.Size64) (zero_reg) x1 32)
|
|
(ccmp_imm
|
|
(OperandSize.Size32)
|
|
x1
|
|
(u8_into_uimm5 0)
|
|
(nzcv $false $true $false $false)
|
|
(Cond.Ne)))))
|
|
|
|
;; This operation is implemented by using uminv to create a scalar value, which
|
|
;; is then compared against zero.
|
|
;;
|
|
;; uminv bn, vm.16b
|
|
;; mov xm, vn.d[0]
|
|
;; cmp xm, #0
|
|
;; cset xm, ne
|
|
(rule -1 (lower (vall_true x @ (value_type (lane_fits_in_32 ty))))
|
|
(if (not_vec32x2 ty))
|
|
(let ((x1 Reg (vec_lanes (VecLanesOp.Uminv) x (vector_size ty)))
|
|
(x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
|
|
(with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0))
|
|
(materialize_bool_result (Cond.Ne)))))
|
|
|
|
;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (vany_true x @ (value_type in_ty)))
|
|
(with_flags (vanytrue x in_ty)
|
|
(materialize_bool_result (Cond.Ne))))
|
|
|
|
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high x))))
|
|
(saddlp8 x))
|
|
|
|
(rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high x))))
|
|
(saddlp16 x))
|
|
|
|
(rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high x))))
|
|
(uaddlp8 x))
|
|
|
|
(rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high x))))
|
|
(uaddlp16 x))
|
|
|
|
(rule -1 (lower (has_type ty (iadd_pairwise x y)))
|
|
(addp x y (vector_size ty)))
|
|
|
|
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty @ (multi_lane _ _) (iabs x)))
|
|
(vec_abs x (vector_size ty)))
|
|
|
|
(rule 2 (lower (has_type $I64 (iabs x)))
|
|
(abs (OperandSize.Size64) x))
|
|
|
|
(rule 1 (lower (has_type (fits_in_32 ty) (iabs x)))
|
|
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
|
|
|
|
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I64X2 (avg_round x y)))
|
|
(let ((one Reg (splat_const 1 (VectorSize.Size64x2)))
|
|
(c Reg (orr_vec x y (VectorSize.Size64x2)))
|
|
(c Reg (and_vec c one (VectorSize.Size64x2)))
|
|
(x Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 x
|
|
(VectorSize.Size64x2)))
|
|
(y Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 y
|
|
(VectorSize.Size64x2)))
|
|
(sum Reg (add_vec x y (VectorSize.Size64x2))))
|
|
(add_vec c sum (VectorSize.Size64x2))))
|
|
|
|
(rule -1 (lower (has_type (lane_fits_in_32 ty) (avg_round x y)))
|
|
(vec_rrr (VecALUOp.Urhadd) x y (vector_size ty)))
|
|
|
|
;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty @ (multi_lane _ _) (sqmul_round_sat x y)))
|
|
(vec_rrr (VecALUOp.Sqrdmulh) x y (vector_size ty)))
|
|
|
|
;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
|
|
(vec_rrr (VecALUOp.Fadd) rn rm (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fadd rn rm)))
|
|
(fpu_rrr (FPUOp2.Add) rn rm (scalar_size ty)))
|
|
|
|
;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fsub rn rm)))
|
|
(vec_rrr (VecALUOp.Fsub) rn rm (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fsub rn rm)))
|
|
(fpu_rrr (FPUOp2.Sub) rn rm (scalar_size ty)))
|
|
|
|
;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmul rn rm)))
|
|
(vec_rrr (VecALUOp.Fmul) rn rm (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fmul rn rm)))
|
|
(fpu_rrr (FPUOp2.Mul) rn rm (scalar_size ty)))
|
|
|
|
;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fdiv rn rm)))
|
|
(vec_rrr (VecALUOp.Fdiv) rn rm (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fdiv rn rm)))
|
|
(fpu_rrr (FPUOp2.Div) rn rm (scalar_size ty)))
|
|
|
|
;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin rn rm)))
|
|
(vec_rrr (VecALUOp.Fmin) rn rm (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fmin rn rm)))
|
|
(fpu_rrr (FPUOp2.Min) rn rm (scalar_size ty)))
|
|
|
|
;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax rn rm)))
|
|
(vec_rrr (VecALUOp.Fmax) rn rm (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fmax rn rm)))
|
|
(fpu_rrr (FPUOp2.Max) rn rm (scalar_size ty)))
|
|
|
|
;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin_pseudo rm rn)))
|
|
(bsl ty (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)) rn rm))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fmin_pseudo rm rn)))
|
|
(with_flags (fpu_cmp (scalar_size ty) rm rn)
|
|
(fpu_csel ty (Cond.Gt) rn rm)))
|
|
|
|
;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax_pseudo rm rn)))
|
|
(bsl ty (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)) rn rm))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fmax_pseudo rm rn)))
|
|
(with_flags (fpu_cmp (scalar_size ty) rn rm)
|
|
(fpu_csel ty (Cond.Gt) rn rm)))
|
|
|
|
;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (sqrt x)))
|
|
(vec_misc (VecMisc2.Fsqrt) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (sqrt x)))
|
|
(fpu_rr (FPUOp1.Sqrt) x (scalar_size ty)))
|
|
|
|
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fneg x)))
|
|
(vec_misc (VecMisc2.Fneg) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fneg x)))
|
|
(fpu_rr (FPUOp1.Neg) x (scalar_size ty)))
|
|
|
|
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (fabs x)))
|
|
(vec_misc (VecMisc2.Fabs) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type (ty_scalar_float ty) (fabs x)))
|
|
(fpu_rr (FPUOp1.Abs) x (scalar_size ty)))
|
|
|
|
;;;; Rules for `fpromote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $F64 (fpromote x)))
|
|
(fpu_rr (FPUOp1.Cvt32To64) x (ScalarSize.Size32)))
|
|
|
|
;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $F32 (fdemote x)))
|
|
(fpu_rr (FPUOp1.Cvt64To32) x (ScalarSize.Size64)))
|
|
|
|
;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (ceil x)))
|
|
(vec_misc (VecMisc2.Frintp) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type $F32 (ceil x)))
|
|
(fpu_round (FpuRoundMode.Plus32) x))
|
|
|
|
(rule (lower (has_type $F64 (ceil x)))
|
|
(fpu_round (FpuRoundMode.Plus64) x))
|
|
|
|
;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (floor x)))
|
|
(vec_misc (VecMisc2.Frintm) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type $F32 (floor x)))
|
|
(fpu_round (FpuRoundMode.Minus32) x))
|
|
|
|
(rule (lower (has_type $F64 (floor x)))
|
|
(fpu_round (FpuRoundMode.Minus64) x))
|
|
|
|
;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (trunc x)))
|
|
(vec_misc (VecMisc2.Frintz) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type $F32 (trunc x)))
|
|
(fpu_round (FpuRoundMode.Zero32) x))
|
|
|
|
(rule (lower (has_type $F64 (trunc x)))
|
|
(fpu_round (FpuRoundMode.Zero64) x))
|
|
|
|
;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane _ _) (nearest x)))
|
|
(vec_misc (VecMisc2.Frintn) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type $F32 (nearest x)))
|
|
(fpu_round (FpuRoundMode.Nearest32) x))
|
|
|
|
(rule (lower (has_type $F64 (nearest x)))
|
|
(fpu_round (FpuRoundMode.Nearest64) x))
|
|
|
|
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty @ (multi_lane _ _) (fma x y z)))
|
|
(vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty)))
|
|
|
|
(rule 1 (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
|
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
|
|
|
|
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (fcopysign x y)))
|
|
(fcopy_sign x y ty))
|
|
|
|
;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F32))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F32ToU32) x $false $F32 out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F32ToU64) x $false $F32 $I64))
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F64))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F64ToU32) x $false $F64 out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F64ToU64) x $false $F64 $I64))
|
|
|
|
;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F32))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F32ToI32) x $true $F32 out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F32ToI64) x $true $F32 $I64))
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F64))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F64ToI32) x $true $F64 out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64))))
|
|
(fpu_to_int_cvt (FpuToIntOp.F64ToI64) x $true $F64 $I64))
|
|
|
|
;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _)))))
|
|
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _)))))
|
|
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
|
|
(int_to_fpu (IntToFpuOp.U32ToF32) (put_in_reg_zext32 x)))
|
|
|
|
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
|
|
(int_to_fpu (IntToFpuOp.U32ToF64) (put_in_reg_zext32 x)))
|
|
|
|
(rule 1 (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64))))
|
|
(int_to_fpu (IntToFpuOp.U64ToF32) x))
|
|
|
|
(rule 1 (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64))))
|
|
(int_to_fpu (IntToFpuOp.U64ToF64) x))
|
|
|
|
;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _)))))
|
|
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _)))))
|
|
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
|
|
(int_to_fpu (IntToFpuOp.I32ToF32) (put_in_reg_sext32 x)))
|
|
|
|
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
|
|
(int_to_fpu (IntToFpuOp.I32ToF64) (put_in_reg_sext32 x)))
|
|
|
|
(rule 1 (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64))))
|
|
(int_to_fpu (IntToFpuOp.I64ToF32) x))
|
|
|
|
(rule 1 (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64))))
|
|
(int_to_fpu (IntToFpuOp.I64ToF64) x))
|
|
|
|
;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _)))))
|
|
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
|
|
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64))
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64))
|
|
|
|
;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _)))))
|
|
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
|
|
|
|
(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
|
|
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64))
|
|
|
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty))
|
|
|
|
(rule 1 (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
|
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64))
|
|
|
|
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller
|
|
|
|
;; Base case, simply subtracting things in registers.
|
|
(rule -4 (lower (has_type (fits_in_64 ty) (isub x y)))
|
|
(sub ty x y))
|
|
|
|
;; Special case for when one operand is an immediate that fits in 12 bits.
|
|
(rule 0 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y))))
|
|
(sub_imm ty x y))
|
|
|
|
;; Same as the previous special case, except we can switch the subtraction to an
|
|
;; addition if the negated immediate fits in 12 bits.
|
|
(rule 2 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y))))
|
|
(add_imm ty x y))
|
|
|
|
;; Special cases for when we're subtracting an extended register where the
|
|
;; extending operation can get folded into the sub itself.
|
|
(rule 1 (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y))))
|
|
(sub_extend ty x y))
|
|
|
|
;; Finally a special case for when we're subtracting the shift of a different
|
|
;; register by a constant amount and the shift can get folded into the sub.
|
|
(rule -3 (lower (has_type (fits_in_64 ty)
|
|
(isub x (ishl y (iconst k)))))
|
|
(if-let amt (lshl_from_imm64 ty k))
|
|
(sub_shift ty x y amt))
|
|
|
|
;; vectors
|
|
(rule -2 (lower (has_type ty @ (multi_lane _ _) (isub x y)))
|
|
(sub_vec x y (vector_size ty)))
|
|
|
|
;; `i128`
|
|
(rule -1 (lower (has_type $I128 (isub x y)))
|
|
(sub_i128 x y))
|
|
|
|
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (ty_vec128 ty) (uadd_sat x y)))
|
|
(uqadd x y (vector_size ty)))
|
|
|
|
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (ty_vec128 ty) (sadd_sat x y)))
|
|
(sqadd x y (vector_size ty)))
|
|
|
|
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (ty_vec128 ty) (usub_sat x y)))
|
|
(uqsub x y (vector_size ty)))
|
|
|
|
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (ty_vec128 ty) (ssub_sat x y)))
|
|
(sqsub x y (vector_size ty)))
|
|
|
|
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller.
|
|
(rule 1 (lower (has_type (fits_in_64 ty) (ineg x)))
|
|
(sub ty (zero_reg) x))
|
|
|
|
;; `i128`
|
|
(rule 2 (lower (has_type $I128 (ineg x)))
|
|
(sub_i128 (value_regs_zero) x))
|
|
|
|
;; vectors.
|
|
(rule (lower (has_type (ty_vec128 ty) (ineg x)))
|
|
(neg x (vector_size ty)))
|
|
|
|
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller.
|
|
(rule -3 (lower (has_type (fits_in_64 ty) (imul x y)))
|
|
(madd ty x y (zero_reg)))
|
|
|
|
;; `i128`.
|
|
(rule -1 (lower (has_type $I128 (imul x y)))
|
|
(let
|
|
;; Get the high/low registers for `x`.
|
|
((x_regs ValueRegs x)
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
|
|
;; Get the high/low registers for `y`.
|
|
(y_regs ValueRegs y)
|
|
(y_lo Reg (value_regs_get y_regs 0))
|
|
(y_hi Reg (value_regs_get y_regs 1))
|
|
|
|
;; 128bit mul formula:
|
|
;; dst_lo = x_lo * y_lo
|
|
;; dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
|
|
;;
|
|
;; We can convert the above formula into the following
|
|
;; umulh dst_hi, x_lo, y_lo
|
|
;; madd dst_hi, x_lo, y_hi, dst_hi
|
|
;; madd dst_hi, x_hi, y_lo, dst_hi
|
|
;; madd dst_lo, x_lo, y_lo, zero
|
|
(dst_hi1 Reg (umulh $I64 x_lo y_lo))
|
|
(dst_hi2 Reg (madd $I64 x_lo y_hi dst_hi1))
|
|
(dst_hi Reg (madd $I64 x_hi y_lo dst_hi2))
|
|
(dst_lo Reg (madd $I64 x_lo y_lo (zero_reg))))
|
|
(value_regs dst_lo dst_hi)))
|
|
|
|
;; Case for i8x16, i16x8, and i32x4.
|
|
(rule -2 (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y)))
|
|
(mul x y (vector_size ty)))
|
|
|
|
;; Special lowering for i64x2.
|
|
;;
|
|
;; This I64X2 multiplication is performed with several 32-bit
|
|
;; operations.
|
|
;;
|
|
;; 64-bit numbers x and y, can be represented as:
|
|
;; x = a + 2^32(b)
|
|
;; y = c + 2^32(d)
|
|
;;
|
|
;; A 64-bit multiplication is:
|
|
;; x * y = ac + 2^32(ad + bc) + 2^64(bd)
|
|
;; note: `2^64(bd)` can be ignored, the value is too large to fit in
|
|
;; 64 bits.
|
|
;;
|
|
;; This sequence implements a I64X2 multiply, where the registers
|
|
;; `rn` and `rm` are split up into 32-bit components:
|
|
;; rn = |d|c|b|a|
|
|
;; rm = |h|g|f|e|
|
|
;;
|
|
;; rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
|
|
;;
|
|
;; The sequence is:
|
|
;; rev64 rd.4s, rm.4s
|
|
;; mul rd.4s, rd.4s, rn.4s
|
|
;; xtn tmp1.2s, rn.2d
|
|
;; addp rd.4s, rd.4s, rd.4s
|
|
;; xtn tmp2.2s, rm.2d
|
|
;; shll rd.2d, rd.2s, #32
|
|
;; umlal rd.2d, tmp2.2s, tmp1.2s
|
|
(rule -1 (lower (has_type $I64X2 (imul x y)))
|
|
(let ((rn Reg x)
|
|
(rm Reg y)
|
|
;; Reverse the 32-bit elements in the 64-bit words.
|
|
;; rd = |g|h|e|f|
|
|
(rev Reg (rev64 rm (VectorSize.Size32x4)))
|
|
|
|
;; Calculate the high half components.
|
|
;; rd = |dg|ch|be|af|
|
|
;;
|
|
;; Note that this 32-bit multiply of the high half
|
|
;; discards the bits that would overflow, same as
|
|
;; if 64-bit operations were used. Also the Shll
|
|
;; below would shift out the overflow bits anyway.
|
|
(mul Reg (mul rev rn (VectorSize.Size32x4)))
|
|
|
|
;; Extract the low half components of rn.
|
|
;; tmp1 = |c|a|
|
|
(tmp1 Reg (xtn rn (ScalarSize.Size32)))
|
|
|
|
;; Sum the respective high half components.
|
|
;; rd = |dg+ch|be+af||dg+ch|be+af|
|
|
(sum Reg (addp mul mul (VectorSize.Size32x4)))
|
|
|
|
;; Extract the low half components of rm.
|
|
;; tmp2 = |g|e|
|
|
(tmp2 Reg (xtn rm (ScalarSize.Size32)))
|
|
|
|
;; Shift the high half components, into the high half.
|
|
;; rd = |dg+ch << 32|be+af << 32|
|
|
(shift Reg (shll32 sum $false))
|
|
|
|
;; Multiply the low components together, and accumulate with the high
|
|
;; half.
|
|
;; rd = |rd[1] + cg|rd[0] + ae|
|
|
(result Reg (umlal32 shift tmp2 tmp1 $false)))
|
|
result))
|
|
|
|
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (swiden_low x @ (value_type $I8X16))
|
|
(swiden_low y @ (value_type $I8X16)))))
|
|
(smull8 x y $false))
|
|
|
|
;; Special case for `i16x8.extmul_high_i8x16_s`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (swiden_high x @ (value_type $I8X16))
|
|
(swiden_high y @ (value_type $I8X16)))))
|
|
(smull8 x y $true))
|
|
|
|
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (uwiden_low x @ (value_type $I8X16))
|
|
(uwiden_low y @ (value_type $I8X16)))))
|
|
(umull8 x y $false))
|
|
|
|
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (uwiden_high x @ (value_type $I8X16))
|
|
(uwiden_high y @ (value_type $I8X16)))))
|
|
(umull8 x y $true))
|
|
|
|
;; Special case for `i32x4.extmul_low_i16x8_s`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (swiden_low x @ (value_type $I16X8))
|
|
(swiden_low y @ (value_type $I16X8)))))
|
|
(smull16 x y $false))
|
|
|
|
;; Special case for `i32x4.extmul_high_i16x8_s`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (swiden_high x @ (value_type $I16X8))
|
|
(swiden_high y @ (value_type $I16X8)))))
|
|
(smull16 x y $true))
|
|
|
|
;; Special case for `i32x4.extmul_low_i16x8_u`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (uwiden_low x @ (value_type $I16X8))
|
|
(uwiden_low y @ (value_type $I16X8)))))
|
|
(umull16 x y $false))
|
|
|
|
;; Special case for `i32x4.extmul_high_i16x8_u`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (uwiden_high x @ (value_type $I16X8))
|
|
(uwiden_high y @ (value_type $I16X8)))))
|
|
(umull16 x y $true))
|
|
|
|
;; Special case for `i64x2.extmul_low_i32x4_s`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (swiden_low x @ (value_type $I32X4))
|
|
(swiden_low y @ (value_type $I32X4)))))
|
|
(smull32 x y $false))
|
|
|
|
;; Special case for `i64x2.extmul_high_i32x4_s`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (swiden_high x @ (value_type $I32X4))
|
|
(swiden_high y @ (value_type $I32X4)))))
|
|
(smull32 x y $true))
|
|
|
|
;; Special case for `i64x2.extmul_low_i32x4_u`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (uwiden_low x @ (value_type $I32X4))
|
|
(uwiden_low y @ (value_type $I32X4)))))
|
|
(umull32 x y $false))
|
|
|
|
;; Special case for `i64x2.extmul_high_i32x4_u`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (uwiden_high x @ (value_type $I32X4))
|
|
(uwiden_high y @ (value_type $I32X4)))))
|
|
(umull32 x y $true))
|
|
|
|
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (has_type $I64 (smulhi x y)))
|
|
(smulh $I64 x y))
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (smulhi x y)))
|
|
(let ((x64 Reg (put_in_reg_sext64 x))
|
|
(y64 Reg (put_in_reg_sext64 y))
|
|
(mul Reg (madd $I64 x64 y64 (zero_reg)))
|
|
(result Reg (asr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty)))))
|
|
result))
|
|
|
|
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (has_type $I64 (umulhi x y)))
|
|
(umulh $I64 x y))
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (umulhi x y)))
|
|
(let (
|
|
(x64 Reg (put_in_reg_zext64 x))
|
|
(y64 Reg (put_in_reg_zext64 y))
|
|
(mul Reg (madd $I64 x64 y64 (zero_reg)))
|
|
(result Reg (lsr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty))))
|
|
)
|
|
(value_reg result)))
|
|
|
|
;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; TODO: Add UDiv32 to implement 32-bit directly, rather
|
|
;; than extending the input.
|
|
;;
|
|
;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
|
|
;; CLIF's `udiv` the check for zero needs to be manually performed.
|
|
(rule (lower (has_type (fits_in_64 ty) (udiv x y)))
|
|
(a64_udiv $I64 (put_in_reg_zext64 x) (put_nonzero_in_reg_zext64 y)))
|
|
|
|
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
|
|
(decl put_nonzero_in_reg_zext64 (Value) Reg)
|
|
(rule -1 (put_nonzero_in_reg_zext64 val)
|
|
(trap_if_zero_divisor (put_in_reg_zext64 val)))
|
|
|
|
;; Special case where if a `Value` is known to be nonzero we can trivially
|
|
;; move it into a register.
|
|
(rule (put_nonzero_in_reg_zext64 (and (value_type ty)
|
|
(iconst (nonzero_u64_from_imm64 n))))
|
|
(imm ty (ImmExtend.Zero) n))
|
|
|
|
;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; TODO: Add SDiv32 to implement 32-bit directly, rather
|
|
;; than extending the input.
|
|
;;
|
|
;; The sequence of checks here should look like:
|
|
;;
|
|
;; cbnz rm, #8
|
|
;; udf ; divide by zero
|
|
;; cmn rm, 1
|
|
;; ccmp rn, 1, #nzcv, eq
|
|
;; b.vc #8
|
|
;; udf ; signed overflow
|
|
;;
|
|
;; Note The div instruction does not trap on divide by zero or overflow, so
|
|
;; checks need to be manually inserted.
|
|
;;
|
|
;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
|
|
;; necessary, but right now `y` is checked to not be -1 as well.
|
|
(rule (lower (has_type (fits_in_64 ty) (sdiv x y)))
|
|
(let ((x64 Reg (put_in_reg_sext64 x))
|
|
(y64 Reg (put_nonzero_in_reg_sext64 y))
|
|
(valid_x64 Reg (trap_if_div_overflow ty x64 y64))
|
|
(result Reg (a64_sdiv $I64 valid_x64 y64)))
|
|
result))
|
|
|
|
;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
|
|
(decl safe_divisor_from_imm64 (u64) Imm64)
|
|
(extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64)
|
|
|
|
;; Special case for `sdiv` where no checks are needed due to division by a
|
|
;; constant meaning the checks are always passed.
|
|
(rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y)))))
|
|
(a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y)))
|
|
|
|
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
|
|
(decl put_nonzero_in_reg_sext64 (Value) Reg)
|
|
(rule -1 (put_nonzero_in_reg_sext64 val)
|
|
(trap_if_zero_divisor (put_in_reg_sext64 val)))
|
|
|
|
;; Note that this has a special case where if the `Value` is a constant that's
|
|
;; not zero we can skip the zero check.
|
|
(rule (put_nonzero_in_reg_sext64 (and (value_type ty)
|
|
(iconst (nonzero_u64_from_imm64 n))))
|
|
(imm ty (ImmExtend.Sign) n))
|
|
|
|
;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Remainder (x % y) is implemented as:
|
|
;;
|
|
;; tmp = x / y
|
|
;; result = x - (tmp*y)
|
|
;;
|
|
;; use 'result' for tmp and you have:
|
|
;;
|
|
;; cbnz y, #8 ; branch over trap
|
|
;; udf ; divide by zero
|
|
;; div rd, x, y ; rd = x / y
|
|
;; msub rd, rd, y, x ; rd = x - rd * y
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (urem x y)))
|
|
(let ((x64 Reg (put_in_reg_zext64 x))
|
|
(y64 Reg (put_nonzero_in_reg_zext64 y))
|
|
(div Reg (a64_udiv $I64 x64 y64))
|
|
(result Reg (msub $I64 div y64 x64)))
|
|
result))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (srem x y)))
|
|
(let ((x64 Reg (put_in_reg_sext64 x))
|
|
(y64 Reg (put_nonzero_in_reg_sext64 y))
|
|
(div Reg (a64_sdiv $I64 x64 y64))
|
|
(result Reg (msub $I64 div y64 x64)))
|
|
result))
|
|
|
|
;;; Rules for integer min/max: umin, smin, umax, smax ;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty @ (not_i64x2) (smin x y)))
|
|
(vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
|
|
|
|
(rule 1 (lower (has_type $I64X2 (smin x y)))
|
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) y x (VectorSize.Size64x2)) x y))
|
|
|
|
(rule (lower (has_type ty @ (not_i64x2) (umin x y)))
|
|
(vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
|
|
|
|
(rule 1 (lower (has_type $I64X2 (umin x y)))
|
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) y x (VectorSize.Size64x2)) x y))
|
|
|
|
(rule (lower (has_type ty @ (not_i64x2) (smax x y)))
|
|
(vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
|
|
|
|
(rule 1 (lower (has_type $I64X2 (smax x y)))
|
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) x y (VectorSize.Size64x2)) x y))
|
|
|
|
(rule (lower (has_type ty @ (not_i64x2) (umax x y)))
|
|
(vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
|
|
|
|
(rule 1 (lower (has_type $I64X2 (umax x y)))
|
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) x y (VectorSize.Size64x2)) x y))
|
|
|
|
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; General rule for extending input to an output which fits in a single
|
|
;; register.
|
|
(rule -2 (lower (has_type (fits_in_64 out) (uextend x @ (value_type in))))
|
|
(extend x $false (ty_bits in) (ty_bits out)))
|
|
|
|
;; Extraction of a vector lane automatically extends as necessary, so we can
|
|
;; skip an explicit extending instruction.
|
|
(rule 1 (lower (has_type (fits_in_64 out)
|
|
(uextend (extractlane vec @ (value_type in)
|
|
(u8_from_uimm8 lane)))))
|
|
(mov_from_vec (put_in_reg vec) lane (lane_size in)))
|
|
|
|
;; Atomic loads will also automatically zero their upper bits so the `uextend`
|
|
;; instruction can effectively get skipped here.
|
|
(rule 1 (lower (has_type (fits_in_64 out)
|
|
(uextend x @ (and (value_type in) (atomic_load flags _)))))
|
|
(if-let mem_op (is_sinkable_inst x))
|
|
(load_acquire in flags (sink_atomic_load mem_op)))
|
|
|
|
;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper
|
|
;; bits are all zero.
|
|
(rule -1 (lower (has_type $I128 (uextend x)))
|
|
(value_regs (put_in_reg_zext64 x) (imm $I64 (ImmExtend.Zero) 0)))
|
|
|
|
;; Like above where vector extraction automatically zero-extends extending to
|
|
;; i128 only requires generating a 0 constant for the upper bits.
|
|
(rule (lower (has_type $I128
|
|
(uextend (extractlane vec @ (value_type in)
|
|
(u8_from_uimm8 lane)))))
|
|
(value_regs (mov_from_vec (put_in_reg vec) lane (lane_size in)) (imm $I64 (ImmExtend.Zero) 0)))
|
|
|
|
;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; General rule for extending input to an output which fits in a single
|
|
;; register.
|
|
(rule -4 (lower (has_type (fits_in_64 out) (sextend x @ (value_type in))))
|
|
(extend x $true (ty_bits in) (ty_bits out)))
|
|
|
|
;; Extraction of a vector lane automatically extends as necessary, so we can
|
|
;; skip an explicit extending instruction.
|
|
(rule -3 (lower (has_type (fits_in_64 out)
|
|
(sextend (extractlane vec @ (value_type in)
|
|
(u8_from_uimm8 lane)))))
|
|
(mov_from_vec_signed (put_in_reg vec)
|
|
lane
|
|
(vector_size in)
|
|
(size_from_ty out)))
|
|
|
|
;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits.
|
|
(rule -2 (lower (has_type $I128 (sextend x)))
|
|
(let ((lo Reg (put_in_reg_sext64 x))
|
|
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63))))
|
|
(value_regs lo hi)))
|
|
|
|
;; Like above where vector extraction automatically zero-extends extending to
|
|
;; i128 only requires generating a 0 constant for the upper bits.
|
|
;;
|
|
;; Note that `mov_from_vec_signed` doesn't exist for i64x2, so that's
|
|
;; specifically excluded here.
|
|
(rule (lower (has_type $I128
|
|
(sextend (extractlane vec @ (value_type in @ (not_i64x2))
|
|
(u8_from_uimm8 lane)))))
|
|
(let ((lo Reg (mov_from_vec_signed (put_in_reg vec)
|
|
lane
|
|
(vector_size in)
|
|
(size_from_ty $I64)))
|
|
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63))))
|
|
(value_regs lo hi)))
|
|
|
|
;; Extension from an extraction of i64x2 into i128.
|
|
(rule -1 (lower (has_type $I128
|
|
(sextend (extractlane vec @ (value_type $I64X2)
|
|
(u8_from_uimm8 lane)))))
|
|
(let ((lo Reg (mov_from_vec (put_in_reg vec)
|
|
lane
|
|
(ScalarSize.Size64)))
|
|
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63))))
|
|
(value_regs lo hi)))
|
|
|
|
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Base case using `orn` between two registers.
|
|
;;
|
|
;; Note that bitwise negation is implemented here as
|
|
;;
|
|
;; NOT rd, rm ==> ORR_NOT rd, zero, rm
|
|
(rule -1 (lower (has_type (fits_in_64 ty) (bnot x)))
|
|
(orr_not ty (zero_reg) x))
|
|
|
|
;; Special case to use `orr_not_shift` if it's a `bnot` of a const-left-shifted
|
|
;; value.
|
|
(rule 1 (lower (has_type (fits_in_64 ty)
|
|
(bnot (ishl x (iconst k)))))
|
|
(if-let amt (lshl_from_imm64 ty k))
|
|
(orr_not_shift ty (zero_reg) x amt))
|
|
|
|
;; Implementation of `bnot` for `i128`.
|
|
(rule (lower (has_type $I128 (bnot x)))
|
|
(let ((x_regs ValueRegs x)
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
(new_lo Reg (orr_not $I64 (zero_reg) x_lo))
|
|
(new_hi Reg (orr_not $I64 (zero_reg) x_hi)))
|
|
(value_regs new_lo new_hi)))
|
|
|
|
;; Implementation of `bnot` for vector types.
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (bnot x)))
|
|
(not x (vector_size ty)))
|
|
|
|
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (band x y)))
|
|
(alu_rs_imm_logic_commutative (ALUOp.And) ty x y))
|
|
|
|
(rule (lower (has_type $I64 (band x y)))
|
|
(alu_rs_imm_logic_commutative (ALUOp.And) $I64 x y))
|
|
|
|
(rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And) $I64 x y))
|
|
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (band x y)))
|
|
(and_vec x y (vector_size ty)))
|
|
|
|
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (bor x y)))
|
|
(alu_rs_imm_logic_commutative (ALUOp.Orr) ty x y))
|
|
|
|
(rule (lower (has_type $I64 (bor x y)))
|
|
(alu_rs_imm_logic_commutative (ALUOp.Orr) $I64 x y))
|
|
|
|
(rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr) $I64 x y))
|
|
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (bor x y)))
|
|
(orr_vec x y (vector_size ty)))
|
|
|
|
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (bxor x y)))
|
|
(alu_rs_imm_logic_commutative (ALUOp.Eor) ty x y))
|
|
|
|
(rule (lower (has_type $I64 (bxor x y)))
|
|
(alu_rs_imm_logic_commutative (ALUOp.Eor) $I64 x y))
|
|
|
|
(rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor) $I64 x y))
|
|
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (bxor x y)))
|
|
(eor_vec x y (vector_size ty)))
|
|
|
|
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (band_not x y)))
|
|
(alu_rs_imm_logic (ALUOp.AndNot) ty x y))
|
|
|
|
(rule (lower (has_type $I64 (band_not x y)))
|
|
(alu_rs_imm_logic (ALUOp.AndNot) $I64 x y))
|
|
|
|
(rule (lower (has_type $I128 (band_not x y))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y))
|
|
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (band_not x y)))
|
|
(bic_vec x y (vector_size ty)))
|
|
|
|
;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (bor_not x y)))
|
|
(alu_rs_imm_logic (ALUOp.OrrNot) ty x y))
|
|
|
|
(rule (lower (has_type $I64 (bor_not x y)))
|
|
(alu_rs_imm_logic (ALUOp.OrrNot) $I64 x y))
|
|
|
|
(rule (lower (has_type $I128 (bor_not x y))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y))
|
|
|
|
;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (bxor_not x y)))
|
|
(alu_rs_imm_logic (ALUOp.EorNot) $I32 x y))
|
|
|
|
(rule (lower (has_type $I64 (bxor_not x y)))
|
|
(alu_rs_imm_logic (ALUOp.EorNot) $I64 x y))
|
|
|
|
(rule (lower (has_type $I128 (bxor_not x y))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y))
|
|
|
|
;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Shift for i8/i16/i32.
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (ishl x y)))
|
|
(do_shift (ALUOp.Lsl) ty x y))
|
|
|
|
;; Shift for i64.
|
|
(rule (lower (has_type $I64 (ishl x y)))
|
|
(do_shift (ALUOp.Lsl) $I64 x y))
|
|
|
|
;; Shift for i128.
|
|
(rule (lower (has_type $I128 (ishl x y)))
|
|
(lower_shl128 x (value_regs_get y 0)))
|
|
|
|
;; lsl lo_lshift, src_lo, amt
|
|
;; lsl hi_lshift, src_hi, amt
|
|
;; mvn inv_amt, amt
|
|
;; lsr lo_rshift, src_lo, #1
|
|
;; lsr lo_rshift, lo_rshift, inv_amt
|
|
;; orr maybe_hi, hi_lshift, lo_rshift
|
|
;; tst amt, #0x40
|
|
;; csel dst_hi, lo_lshift, maybe_hi, ne
|
|
;; csel dst_lo, xzr, lo_lshift, ne
|
|
(decl lower_shl128 (ValueRegs Reg) ValueRegs)
|
|
(rule (lower_shl128 src amt)
|
|
(let ((src_lo Reg (value_regs_get src 0))
|
|
(src_hi Reg (value_regs_get src 1))
|
|
(lo_lshift Reg (lsl $I64 src_lo amt))
|
|
(hi_lshift Reg (lsl $I64 src_hi amt))
|
|
(inv_amt Reg (orr_not $I32 (zero_reg) amt))
|
|
(lo_rshift Reg (lsr $I64 (lsr_imm $I64 src_lo (imm_shift_from_u8 1))
|
|
inv_amt))
|
|
(maybe_hi Reg (orr $I64 hi_lshift lo_rshift))
|
|
)
|
|
(with_flags
|
|
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
|
(consumes_flags_concat
|
|
(csel (Cond.Ne) (zero_reg) lo_lshift)
|
|
(csel (Cond.Ne) lo_lshift maybe_hi)))))
|
|
|
|
;; Shift for vector types.
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (ishl x y)))
|
|
(let ((size VectorSize (vector_size ty))
|
|
(masked_shift_amt Reg (and_imm $I32 y (shift_mask ty)))
|
|
(shift Reg (vec_dup masked_shift_amt size)))
|
|
(sshl x shift size)))
|
|
|
|
;; Helper function to emit a shift operation with the opcode specified and
|
|
;; the output type specified. The `Reg` provided is shifted by the `Value`
|
|
;; given.
|
|
;;
|
|
;; Note that this automatically handles the clif semantics of masking the
|
|
;; shift amount where necessary.
|
|
(decl do_shift (ALUOp Type Reg Value) Reg)
|
|
|
|
;; 8/16-bit shift base case.
|
|
;;
|
|
;; When shifting for amounts larger than the size of the type, the CLIF shift
|
|
;; instructions implement a "wrapping" behaviour, such that an i8 << 8 is
|
|
;; equivalent to i8 << 0
|
|
;;
|
|
;; On i32 and i64 types this matches what the aarch64 spec does, but on smaller
|
|
;; types (i16, i8) we need to do this manually, so we wrap the shift amount
|
|
;; with an AND instruction
|
|
(rule -1 (do_shift op (fits_in_16 ty) x y)
|
|
(let ((shift_amt Reg (value_regs_get y 0))
|
|
(masked_shift_amt Reg (and_imm $I32 shift_amt (shift_mask ty))))
|
|
(alu_rrr op $I32 x masked_shift_amt)))
|
|
|
|
(decl shift_mask (Type) ImmLogic)
|
|
(extern constructor shift_mask shift_mask)
|
|
|
|
;; 32/64-bit shift base cases.
|
|
(rule (do_shift op $I32 x y) (alu_rrr op $I32 x (value_regs_get y 0)))
|
|
(rule (do_shift op $I64 x y) (alu_rrr op $I64 x (value_regs_get y 0)))
|
|
|
|
;; Special case for shifting by a constant value where the value can fit into an
|
|
;; `ImmShift`.
|
|
;;
|
|
;; Note that this rule explicitly has a higher priority than the others
|
|
;; to ensure it's attempted first, otherwise the type-based filters on the
|
|
;; previous rules seem to take priority over this rule.
|
|
(rule 1 (do_shift op ty x (iconst k))
|
|
(if-let shift (imm_shift_from_imm64 ty k))
|
|
(alu_rr_imm_shift op ty x shift))
|
|
|
|
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Shift for i8/i16/i32.
|
|
(rule -1 (lower (has_type (fits_in_32 ty) (ushr x y)))
|
|
(do_shift (ALUOp.Lsr) ty (put_in_reg_zext32 x) y))
|
|
|
|
;; Shift for i64.
|
|
(rule (lower (has_type $I64 (ushr x y)))
|
|
(do_shift (ALUOp.Lsr) $I64 (put_in_reg_zext64 x) y))
|
|
|
|
;; Shift for i128.
|
|
(rule (lower (has_type $I128 (ushr x y)))
|
|
(lower_ushr128 x (value_regs_get y 0)))
|
|
|
|
;; Vector shifts.
|
|
(rule -2 (lower (has_type (ty_vec128 ty) (ushr x y)))
|
|
(let ((size VectorSize (vector_size ty))
|
|
(masked_shift_amt Reg (and_imm $I32 y (shift_mask ty)))
|
|
(shift Reg (vec_dup (sub $I64 (zero_reg) masked_shift_amt) size)))
|
|
(ushl x shift size)))
|
|
|
|
;; lsr lo_rshift, src_lo, amt
|
|
;; lsr hi_rshift, src_hi, amt
|
|
;; mvn inv_amt, amt
|
|
;; lsl hi_lshift, src_hi, #1
|
|
;; lsl hi_lshift, hi_lshift, inv_amt
|
|
;; tst amt, #0x40
|
|
;; orr maybe_lo, lo_rshift, hi_lshift
|
|
;; csel dst_hi, xzr, hi_rshift, ne
|
|
;; csel dst_lo, hi_rshift, maybe_lo, ne
|
|
(decl lower_ushr128 (ValueRegs Reg) ValueRegs)
|
|
(rule (lower_ushr128 src amt)
|
|
(let ((src_lo Reg (value_regs_get src 0))
|
|
(src_hi Reg (value_regs_get src 1))
|
|
(lo_rshift Reg (lsr $I64 src_lo amt))
|
|
(hi_rshift Reg (lsr $I64 src_hi amt))
|
|
|
|
(inv_amt Reg (orr_not $I32 (zero_reg) amt))
|
|
(hi_lshift Reg (lsl $I64 (lsl_imm $I64 src_hi (imm_shift_from_u8 1))
|
|
inv_amt))
|
|
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
|
)
|
|
(with_flags
|
|
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
|
(consumes_flags_concat
|
|
(csel (Cond.Ne) hi_rshift maybe_lo)
|
|
(csel (Cond.Ne) (zero_reg) hi_rshift)))))
|
|
|
|
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Shift for i8/i16/i32.
|
|
(rule -2 (lower (has_type (fits_in_32 ty) (sshr x y)))
|
|
(do_shift (ALUOp.Asr) ty (put_in_reg_sext32 x) y))
|
|
|
|
;; Shift for i64.
|
|
(rule (lower (has_type $I64 (sshr x y)))
|
|
(do_shift (ALUOp.Asr) $I64 (put_in_reg_sext64 x) y))
|
|
|
|
;; Shift for i128.
|
|
(rule (lower (has_type $I128 (sshr x y)))
|
|
(lower_sshr128 x (value_regs_get y 0)))
|
|
|
|
;; Vector shifts.
|
|
;;
|
|
;; Note that right shifts are implemented with a negative left shift.
|
|
(rule -1 (lower (has_type (ty_vec128 ty) (sshr x y)))
|
|
(let ((size VectorSize (vector_size ty))
|
|
(masked_shift_amt Reg (and_imm $I32 y (shift_mask ty)))
|
|
(shift Reg (vec_dup (sub $I64 (zero_reg) masked_shift_amt) size)))
|
|
(sshl x shift size)))
|
|
|
|
;; lsr lo_rshift, src_lo, amt
|
|
;; asr hi_rshift, src_hi, amt
|
|
;; mvn inv_amt, amt
|
|
;; lsl hi_lshift, src_hi, #1
|
|
;; lsl hi_lshift, hi_lshift, inv_amt
|
|
;; asr hi_sign, src_hi, #63
|
|
;; orr maybe_lo, lo_rshift, hi_lshift
|
|
;; tst amt, #0x40
|
|
;; csel dst_hi, hi_sign, hi_rshift, ne
|
|
;; csel dst_lo, hi_rshift, maybe_lo, ne
|
|
(decl lower_sshr128 (ValueRegs Reg) ValueRegs)
|
|
(rule (lower_sshr128 src amt)
|
|
(let ((src_lo Reg (value_regs_get src 0))
|
|
(src_hi Reg (value_regs_get src 1))
|
|
(lo_rshift Reg (lsr $I64 src_lo amt))
|
|
(hi_rshift Reg (asr $I64 src_hi amt))
|
|
|
|
(inv_amt Reg (orr_not $I32 (zero_reg) amt))
|
|
(hi_lshift Reg (lsl $I64 (lsl_imm $I64 src_hi (imm_shift_from_u8 1))
|
|
inv_amt))
|
|
(hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63)))
|
|
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
|
)
|
|
(with_flags
|
|
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
|
(consumes_flags_concat
|
|
(csel (Cond.Ne) hi_rshift maybe_lo)
|
|
(csel (Cond.Ne) hi_sign hi_rshift)))))
|
|
|
|
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; General 8/16-bit case.
|
|
(rule -2 (lower (has_type (fits_in_16 ty) (rotl x y)))
|
|
(let ((amt Reg (value_regs_get y 0))
|
|
(neg_shift Reg (sub $I32 (zero_reg) amt)))
|
|
(small_rotr ty (put_in_reg_zext32 x) neg_shift)))
|
|
|
|
;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
|
|
(rule -1 (lower (has_type (fits_in_16 ty) (rotl x (iconst k))))
|
|
(if-let n (imm_shift_from_imm64 ty k))
|
|
(small_rotr_imm ty (put_in_reg_zext32 x) (negate_imm_shift ty n)))
|
|
|
|
;; aarch64 doesn't have a left-rotate instruction, but a left rotation of K
|
|
;; places is effectively a right rotation of N - K places, if N is the integer's
|
|
;; bit size. We implement left rotations with this trick.
|
|
;;
|
|
;; Note that when negating the shift amount here the upper bits are ignored
|
|
;; by the rotr instruction, meaning that we'll still left-shift by the desired
|
|
;; amount.
|
|
|
|
;; General 32-bit case.
|
|
(rule (lower (has_type $I32 (rotl x y)))
|
|
(let ((amt Reg (value_regs_get y 0))
|
|
(neg_shift Reg (sub $I32 (zero_reg) amt)))
|
|
(a64_rotr $I32 x neg_shift)))
|
|
|
|
;; General 64-bit case.
|
|
(rule (lower (has_type $I64 (rotl x y)))
|
|
(let ((amt Reg (value_regs_get y 0))
|
|
(neg_shift Reg (sub $I64 (zero_reg) amt)))
|
|
(a64_rotr $I64 x neg_shift)))
|
|
|
|
;; Specialization for the 32-bit case when the rotation amount is an immediate.
|
|
(rule 1 (lower (has_type $I32 (rotl x (iconst k))))
|
|
(if-let n (imm_shift_from_imm64 $I32 k))
|
|
(a64_rotr_imm $I32 x (negate_imm_shift $I32 n)))
|
|
|
|
;; Specialization for the 64-bit case when the rotation amount is an immediate.
|
|
(rule 1 (lower (has_type $I64 (rotl x (iconst k))))
|
|
(if-let n (imm_shift_from_imm64 $I64 k))
|
|
(a64_rotr_imm $I64 x (negate_imm_shift $I64 n)))
|
|
|
|
(decl negate_imm_shift (Type ImmShift) ImmShift)
|
|
(extern constructor negate_imm_shift negate_imm_shift)
|
|
|
|
;; General 128-bit case.
|
|
;;
|
|
;; TODO: much better codegen is possible with a constant amount.
|
|
(rule (lower (has_type $I128 (rotl x y)))
|
|
(let ((val ValueRegs x)
|
|
(amt Reg (value_regs_get y 0))
|
|
(neg_amt Reg (sub $I64 (imm $I64 (ImmExtend.Zero) 128) amt))
|
|
(lshift ValueRegs (lower_shl128 val amt))
|
|
(rshift ValueRegs (lower_ushr128 val neg_amt)))
|
|
(value_regs
|
|
(orr $I64 (value_regs_get lshift 0) (value_regs_get rshift 0))
|
|
(orr $I64 (value_regs_get lshift 1) (value_regs_get rshift 1)))))
|
|
|
|
;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; General 8/16-bit case.
|
|
(rule -3 (lower (has_type (fits_in_16 ty) (rotr x y)))
|
|
(small_rotr ty (put_in_reg_zext32 x) (value_regs_get y 0)))
|
|
|
|
;; General 32-bit case.
|
|
(rule -1 (lower (has_type $I32 (rotr x y)))
|
|
(a64_rotr $I32 x (value_regs_get y 0)))
|
|
|
|
;; General 64-bit case.
|
|
(rule -1 (lower (has_type $I64 (rotr x y)))
|
|
(a64_rotr $I64 x (value_regs_get y 0)))
|
|
|
|
;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
|
|
(rule -2 (lower (has_type (fits_in_16 ty) (rotr x (iconst k))))
|
|
(if-let n (imm_shift_from_imm64 ty k))
|
|
(small_rotr_imm ty (put_in_reg_zext32 x) n))
|
|
|
|
;; Specialization for the 32-bit case when the rotation amount is an immediate.
|
|
(rule (lower (has_type $I32 (rotr x (iconst k))))
|
|
(if-let n (imm_shift_from_imm64 $I32 k))
|
|
(a64_rotr_imm $I32 x n))
|
|
|
|
;; Specialization for the 64-bit case when the rotation amount is an immediate.
|
|
(rule (lower (has_type $I64 (rotr x (iconst k))))
|
|
(if-let n (imm_shift_from_imm64 $I64 k))
|
|
(a64_rotr_imm $I64 x n))
|
|
|
|
;; For a < 32-bit rotate-right, we synthesize this as:
|
|
;;
|
|
;; rotr rd, val, amt
|
|
;;
|
|
;; =>
|
|
;;
|
|
;; and masked_amt, amt, <bitwidth - 1>
|
|
;; sub tmp_sub, masked_amt, <bitwidth>
|
|
;; sub neg_amt, zero, tmp_sub ; neg
|
|
;; lsr val_rshift, val, masked_amt
|
|
;; lsl val_lshift, val, neg_amt
|
|
;; orr rd, val_lshift val_rshift
|
|
(decl small_rotr (Type Reg Reg) Reg)
|
|
(rule (small_rotr ty val amt)
|
|
(let ((masked_amt Reg (and_imm $I32 amt (rotr_mask ty)))
|
|
(tmp_sub Reg (sub_imm $I32 masked_amt (u8_into_imm12 (ty_bits ty))))
|
|
(neg_amt Reg (sub $I32 (zero_reg) tmp_sub))
|
|
(val_rshift Reg (lsr $I32 val masked_amt))
|
|
(val_lshift Reg (lsl $I32 val neg_amt)))
|
|
(orr $I32 val_lshift val_rshift)))
|
|
|
|
(decl rotr_mask (Type) ImmLogic)
|
|
(extern constructor rotr_mask rotr_mask)
|
|
|
|
;; For a constant amount, we can instead do:
|
|
;;
|
|
;; rotr rd, val, #amt
|
|
;;
|
|
;; =>
|
|
;;
|
|
;; lsr val_rshift, val, #<amt>
|
|
;; lsl val_lshift, val, <bitwidth - amt>
|
|
;; orr rd, val_lshift, val_rshift
|
|
(decl small_rotr_imm (Type Reg ImmShift) Reg)
|
|
(rule (small_rotr_imm ty val amt)
|
|
(let ((val_rshift Reg (lsr_imm $I32 val amt))
|
|
(val_lshift Reg (lsl_imm $I32 val (rotr_opposite_amount ty amt))))
|
|
(orr $I32 val_lshift val_rshift)))
|
|
|
|
(decl rotr_opposite_amount (Type ImmShift) ImmShift)
|
|
(extern constructor rotr_opposite_amount rotr_opposite_amount)
|
|
|
|
;; General 128-bit case.
|
|
;;
|
|
;; TODO: much better codegen is possible with a constant amount.
|
|
(rule (lower (has_type $I128 (rotr x y)))
|
|
(let ((val ValueRegs x)
|
|
(amt Reg (value_regs_get y 0))
|
|
(neg_amt Reg (sub $I64 (imm $I64 (ImmExtend.Zero) 128) amt))
|
|
(rshift ValueRegs (lower_ushr128 val amt))
|
|
(lshift ValueRegs (lower_shl128 val neg_amt))
|
|
(hi Reg (orr $I64 (value_regs_get rshift 1) (value_regs_get lshift 1)))
|
|
(lo Reg (orr $I64 (value_regs_get rshift 0) (value_regs_get lshift 0))))
|
|
(value_regs lo hi)))
|
|
|
|
;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Reversing an 8-bit value with a 32-bit bitrev instruction will place
|
|
;; the reversed result in the highest 8 bits, so we need to shift them down into
|
|
;; place.
|
|
(rule (lower (has_type $I8 (bitrev x)))
|
|
(lsr_imm $I32 (rbit $I32 x) (imm_shift_from_u8 24)))
|
|
|
|
;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
|
|
;; the reversed result in the highest 16 bits, so we need to shift them down into
|
|
;; place.
|
|
(rule (lower (has_type $I16 (bitrev x)))
|
|
(lsr_imm $I32 (rbit $I32 x) (imm_shift_from_u8 16)))
|
|
|
|
(rule (lower (has_type $I128 (bitrev x)))
|
|
(let ((val ValueRegs x)
|
|
(lo_rev Reg (rbit $I64 (value_regs_get val 0)))
|
|
(hi_rev Reg (rbit $I64 (value_regs_get val 1))))
|
|
(value_regs hi_rev lo_rev)))
|
|
|
|
(rule -1 (lower (has_type ty (bitrev x)))
|
|
(rbit ty x))
|
|
|
|
|
|
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I8 (clz x)))
|
|
(sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24)))
|
|
|
|
(rule (lower (has_type $I16 (clz x)))
|
|
(sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16)))
|
|
|
|
(rule (lower (has_type $I128 (clz x)))
|
|
(lower_clz128 x))
|
|
|
|
(rule -1 (lower (has_type ty (clz x)))
|
|
(a64_clz ty x))
|
|
|
|
;; clz hi_clz, hi
|
|
;; clz lo_clz, lo
|
|
;; lsr tmp, hi_clz, #6
|
|
;; madd dst_lo, lo_clz, tmp, hi_clz
|
|
;; mov dst_hi, 0
|
|
(decl lower_clz128 (ValueRegs) ValueRegs)
|
|
(rule (lower_clz128 val)
|
|
(let ((hi_clz Reg (a64_clz $I64 (value_regs_get val 1)))
|
|
(lo_clz Reg (a64_clz $I64 (value_regs_get val 0)))
|
|
(tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6))))
|
|
(value_regs (madd $I64 lo_clz tmp hi_clz) (imm $I64 (ImmExtend.Zero) 0))))
|
|
|
|
;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Note that all `ctz` instructions are implemented by reversing the bits and
|
|
;; then using a `clz` instruction since the tail zeros are the same as the
|
|
;; leading zeros of the reversed value.
|
|
|
|
(rule (lower (has_type $I8 (ctz x)))
|
|
(a64_clz $I32 (orr_imm $I32 (rbit $I32 x) (u64_into_imm_logic $I32 0x800000))))
|
|
|
|
(rule (lower (has_type $I16 (ctz x)))
|
|
(a64_clz $I32 (orr_imm $I32 (rbit $I32 x) (u64_into_imm_logic $I32 0x8000))))
|
|
|
|
(rule (lower (has_type $I128 (ctz x)))
|
|
(let ((val ValueRegs x)
|
|
(lo Reg (rbit $I64 (value_regs_get val 0)))
|
|
(hi Reg (rbit $I64 (value_regs_get val 1))))
|
|
(lower_clz128 (value_regs hi lo))))
|
|
|
|
(rule -1 (lower (has_type ty (ctz x)))
|
|
(a64_clz ty (rbit ty x)))
|
|
|
|
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I8 (cls x)))
|
|
(sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24)))
|
|
|
|
(rule (lower (has_type $I16 (cls x)))
|
|
(sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16)))
|
|
|
|
;; cls lo_cls, lo
|
|
;; cls hi_cls, hi
|
|
;; eon sign_eq_eor, hi, lo
|
|
;; lsr sign_eq, sign_eq_eor, #63
|
|
;; madd lo_sign_bits, out_lo, sign_eq, sign_eq
|
|
;; cmp hi_cls, #63
|
|
;; csel maybe_lo, lo_sign_bits, xzr, eq
|
|
;; add out_lo, maybe_lo, hi_cls
|
|
;; mov out_hi, 0
|
|
(rule (lower (has_type $I128 (cls x)))
|
|
(let ((val ValueRegs x)
|
|
(lo Reg (value_regs_get val 0))
|
|
(hi Reg (value_regs_get val 1))
|
|
(lo_cls Reg (a64_cls $I64 lo))
|
|
(hi_cls Reg (a64_cls $I64 hi))
|
|
(sign_eq_eon Reg (eon $I64 hi lo))
|
|
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
|
|
(lo_sign_bits Reg (madd $I64 lo_cls sign_eq sign_eq))
|
|
(maybe_lo Reg (with_flags_reg
|
|
(cmp64_imm hi_cls (u8_into_imm12 63))
|
|
(csel (Cond.Eq) lo_sign_bits (zero_reg)))))
|
|
(value_regs (add $I64 maybe_lo hi_cls) (imm $I64 (ImmExtend.Zero) 0))))
|
|
|
|
(rule -1 (lower (has_type ty (cls x)))
|
|
(a64_cls ty x))
|
|
|
|
;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I16 (bswap x)))
|
|
(a64_rev16 $I16 x))
|
|
|
|
(rule (lower (has_type $I32 (bswap x)))
|
|
(a64_rev32 $I32 x))
|
|
|
|
(rule (lower (has_type $I64 (bswap x)))
|
|
(a64_rev64 $I64 x))
|
|
|
|
(rule (lower (has_type $I128 (bswap x)))
|
|
(value_regs
|
|
(a64_rev64 $I64 (value_regs_get x 1))
|
|
(a64_rev64 $I64 (value_regs_get x 0))))
|
|
|
|
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Bmask tests the value against zero, and uses `csetm` to assert the result.
|
|
(rule (lower (has_type out_ty (bmask x @ (value_type in_ty))))
|
|
(lower_bmask out_ty in_ty x))
|
|
|
|
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; The implementation of `popcnt` for scalar types is done by moving the value
|
|
;; into a vector register, using the `cnt` instruction, and then collating the
|
|
;; result back into a normal register.
|
|
;;
|
|
;; The general sequence emitted here is
|
|
;;
|
|
;; fmov tmp, in_lo
|
|
;; if ty == i128:
|
|
;; mov tmp.d[1], in_hi
|
|
;;
|
|
;; cnt tmp.16b, tmp.16b / cnt tmp.8b, tmp.8b
|
|
;; addv tmp, tmp.16b / addv tmp, tmp.8b / addp tmp.8b, tmp.8b, tmp.8b / (no instruction for 8-bit inputs)
|
|
;;
|
|
;; umov out_lo, tmp.b[0]
|
|
;; if ty == i128:
|
|
;; mov out_hi, 0
|
|
|
|
(rule (lower (has_type $I8 (popcnt x)))
|
|
(let ((tmp Reg (mov_to_fpu x (ScalarSize.Size32)))
|
|
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8))))
|
|
(mov_from_vec nbits 0 (ScalarSize.Size8))))
|
|
|
|
;; Note that this uses `addp` instead of `addv` as it's usually cheaper.
|
|
(rule (lower (has_type $I16 (popcnt x)))
|
|
(let ((tmp Reg (mov_to_fpu x (ScalarSize.Size32)))
|
|
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
|
|
(added Reg (addp nbits nbits (VectorSize.Size8x8))))
|
|
(mov_from_vec added 0 (ScalarSize.Size8))))
|
|
|
|
(rule (lower (has_type $I32 (popcnt x)))
|
|
(let ((tmp Reg (mov_to_fpu x (ScalarSize.Size32)))
|
|
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
|
|
(added Reg (addv nbits (VectorSize.Size8x8))))
|
|
(mov_from_vec added 0 (ScalarSize.Size8))))
|
|
|
|
(rule (lower (has_type $I64 (popcnt x)))
|
|
(let ((tmp Reg (mov_to_fpu x (ScalarSize.Size64)))
|
|
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
|
|
(added Reg (addv nbits (VectorSize.Size8x8))))
|
|
(mov_from_vec added 0 (ScalarSize.Size8))))
|
|
|
|
(rule (lower (has_type $I128 (popcnt x)))
|
|
(let ((val ValueRegs x)
|
|
(tmp_half Reg (mov_to_fpu (value_regs_get val 0) (ScalarSize.Size64)))
|
|
(tmp Reg (mov_to_vec tmp_half (value_regs_get val 1) 1 (VectorSize.Size64x2)))
|
|
(nbits Reg (vec_cnt tmp (VectorSize.Size8x16)))
|
|
(added Reg (addv nbits (VectorSize.Size8x16))))
|
|
(value_regs (mov_from_vec added 0 (ScalarSize.Size8)) (imm $I64 (ImmExtend.Zero) 0))))
|
|
|
|
(rule (lower (has_type $I8X16 (popcnt x)))
|
|
(vec_cnt x (VectorSize.Size8x16)))
|
|
|
|
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (bitselect c x y)))
|
|
(if (ty_int_ref_scalar_64 ty))
|
|
(let ((tmp1 Reg (and_reg ty x c))
|
|
(tmp2 Reg (bic ty y c)))
|
|
(orr ty tmp1 tmp2)))
|
|
|
|
(rule 1 (lower (has_type (ty_vec128 ty) (bitselect c x y)))
|
|
(bsl ty c x y))
|
|
|
|
;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
|
|
(bsl ty c x y))
|
|
|
|
;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; T -> I{64,32,16,8}: We can simply pass through the value: values
|
|
;; are always stored with high bits undefined, so we can just leave
|
|
;; them be.
|
|
(rule (lower (has_type ty (ireduce src)))
|
|
(if (ty_int_ref_scalar_64 ty))
|
|
(value_regs_get src 0))
|
|
|
|
;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 4 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
|
|
(if (zero_value y))
|
|
(let ((rn Reg x)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
|
|
|
|
(rule 3 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
|
|
(if (zero_value y))
|
|
(let ((rn Reg x)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (float_cmp_zero cond rn vec_size))))
|
|
|
|
(rule 2 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
|
|
(if (zero_value x))
|
|
(let ((rn Reg y)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
|
|
|
|
(rule 1 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
|
|
(if (zero_value x))
|
|
(let ((rn Reg y)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (float_cmp_zero_swap cond rn vec_size))))
|
|
|
|
(rule 0 (lower (has_type out_ty
|
|
(fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
|
|
(with_flags (fpu_cmp (scalar_size in_ty) x y)
|
|
(materialize_bool_result (fp_cond_code cond))))
|
|
|
|
(rule -1 (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
|
|
(if (ty_vector_float in_ty))
|
|
(vec_cmp x y in_ty (fp_cond_code cond)))
|
|
|
|
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 3 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
|
|
(if (zero_value y))
|
|
(let ((rn Reg x)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (not (cmeq0 rn vec_size) vec_size))))
|
|
|
|
(rule 2 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
|
|
(if (zero_value y))
|
|
(let ((rn Reg x)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (int_cmp_zero cond rn vec_size))))
|
|
|
|
(rule 1 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
|
|
(if (zero_value x))
|
|
(let ((rn Reg y)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (not (cmeq0 rn vec_size) vec_size))))
|
|
|
|
(rule 0 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
|
|
(if (zero_value x))
|
|
(let ((rn Reg y)
|
|
(vec_size VectorSize (vector_size ty)))
|
|
(value_reg (int_cmp_zero_swap cond rn vec_size))))
|
|
|
|
(rule -1 (lower (icmp cond x @ (value_type in_ty) y))
|
|
(lower_icmp_into_reg cond x y in_ty $I8))
|
|
|
|
;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (trap trap_code))
|
|
(side_effect (udf trap_code)))
|
|
|
|
;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (resumable_trap trap_code))
|
|
(side_effect (udf trap_code)))
|
|
|
|
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty
|
|
(select (icmp cc
|
|
x @ (value_type in_ty)
|
|
y)
|
|
rn
|
|
rm)))
|
|
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y in_ty)))
|
|
(lower_select (flags_and_cc_flags comparison)
|
|
(cond_code (flags_and_cc_cc comparison))
|
|
ty
|
|
rn
|
|
rm)))
|
|
|
|
(rule (lower (has_type ty
|
|
(select (fcmp cc x @ (value_type in_ty) y)
|
|
rn
|
|
rm)))
|
|
(let ((cond Cond (fp_cond_code cc)))
|
|
(lower_select
|
|
(fpu_cmp (scalar_size in_ty) x y)
|
|
cond ty rn rm)))
|
|
|
|
(rule -1 (lower (has_type ty (select rcond @ (value_type $I8) rn rm)))
|
|
(let ((rcond Reg rcond))
|
|
(lower_select
|
|
(tst_imm $I32 rcond (u64_into_imm_logic $I32 255))
|
|
(Cond.Ne) ty rn rm)))
|
|
|
|
(rule -2 (lower (has_type ty (select rcond @ (value_type (fits_in_32 _)) rn rm)))
|
|
(let ((rcond Reg (put_in_reg_zext32 rcond)))
|
|
(lower_select
|
|
(cmp (OperandSize.Size32) rcond (zero_reg))
|
|
(Cond.Ne) ty rn rm)))
|
|
|
|
(rule -3 (lower (has_type ty (select rcond rn rm)))
|
|
(let ((rcond Reg (put_in_reg_zext64 rcond)))
|
|
(lower_select
|
|
(cmp (OperandSize.Size64) rcond (zero_reg))
|
|
(Cond.Ne) ty rn rm)))
|
|
|
|
;;;; Rules for `select_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty
|
|
(select_spectre_guard (icmp cc x @ (value_type in_ty) y)
|
|
if_true
|
|
if_false)))
|
|
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y in_ty))
|
|
(dst ValueRegs (lower_select
|
|
(flags_and_cc_flags comparison)
|
|
(cond_code (flags_and_cc_cc comparison))
|
|
ty
|
|
if_true
|
|
if_false))
|
|
(_ InstOutput (side_effect (csdb))))
|
|
dst))
|
|
|
|
(rule -1 (lower (has_type ty (select_spectre_guard rcond rn rm)))
|
|
(let ((rcond Reg (put_in_reg_zext64 rcond)))
|
|
(lower_select
|
|
(cmp (OperandSize.Size64) rcond (zero_reg))
|
|
(Cond.Ne) ty rn rm)))
|
|
|
|
;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (ty_vec128 _) (vconst (u128_from_constant x))))
|
|
(constant_f128 x))
|
|
|
|
(rule 1 (lower (has_type ty (vconst (u64_from_constant x))))
|
|
(if (ty_vec64 ty))
|
|
(constant_f64 x))
|
|
|
|
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule -1 (lower (has_type ty (splat x @ (value_type in_ty))))
|
|
(if (ty_int_ref_scalar_64 in_ty))
|
|
(vec_dup x (vector_size ty)))
|
|
|
|
(rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
|
|
(vec_dup_from_fpu x (vector_size ty)))
|
|
|
|
(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
|
|
(splat_const n (vector_size ty)))
|
|
|
|
(rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))
|
|
(splat_const n (vector_size ty)))
|
|
|
|
(rule (lower (has_type ty (splat (iconst (u64_from_imm64 n)))))
|
|
(splat_const n (vector_size ty)))
|
|
|
|
(rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n))))))
|
|
(splat_const n (vector_size ty)))
|
|
|
|
(rule (lower (has_type ty (splat x @ (load flags _ _))))
|
|
(if-let mem_op (is_sinkable_inst x))
|
|
(let ((addr AMode (sink_load_into_amode (lane_type ty) mem_op))
|
|
(address Reg (load_addr addr)))
|
|
(ld1r address (vector_size ty) flags)))
|
|
|
|
;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
(rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr)))
|
|
(load_acquire ty flags addr))
|
|
|
|
|
|
;;;; Rules for `AtomicStore` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
(rule (lower (atomic_store flags
|
|
src @ (value_type (valid_atomic_transaction ty))
|
|
addr))
|
|
(side_effect (store_release ty flags src addr)))
|
|
|
|
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Add) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Xor) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Or) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Smax) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Smin) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Umax) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Umin) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Sub) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty flags))
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.And) addr src))))
|
|
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty flags))
|
|
|
|
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Add) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Sub) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.And) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Nand) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Or) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Xor) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Smin) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Smax) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Umin) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Umax) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty flags))
|
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
|
(atomic_rmw flags (AtomicRmwOp.Xchg) addr src)))
|
|
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty flags))
|
|
|
|
;;;; Rules for `AtomicCAS` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
(rule 1 (lower (and (use_lse)
|
|
(has_type (valid_atomic_transaction ty)
|
|
(atomic_cas flags addr src1 src2))))
|
|
(lse_atomic_cas addr src1 src2 ty flags))
|
|
|
|
(rule (lower (and (has_type (valid_atomic_transaction ty)
|
|
(atomic_cas flags addr src1 src2))))
|
|
(atomic_cas_loop addr src1 src2 ty flags))
|
|
|
|
;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
(rule (lower (fvdemote x))
|
|
(fcvtn x (ScalarSize.Size32)))
|
|
|
|
|
|
;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
(rule 1 (lower (has_type (ty_vec128_int ty) (snarrow x y)))
|
|
(if (zero_value y))
|
|
(sqxtn x (lane_size ty)))
|
|
|
|
(rule 2 (lower (has_type (ty_vec64_int ty) (snarrow x y)))
|
|
(let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
|
|
(sqxtn dst (lane_size ty))))
|
|
|
|
(rule 0 (lower (has_type (ty_vec128_int ty) (snarrow x y)))
|
|
(let ((low_half Reg (sqxtn x (lane_size ty)))
|
|
(result Reg (sqxtn2 low_half y (lane_size ty))))
|
|
result))
|
|
|
|
|
|
;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
(rule 1 (lower (has_type (ty_vec128_int ty) (unarrow x y)))
|
|
(if (zero_value y))
|
|
(sqxtun x (lane_size ty)))
|
|
|
|
(rule 2 (lower (has_type (ty_vec64_int ty) (unarrow x y)))
|
|
(let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
|
|
(sqxtun dst (lane_size ty))))
|
|
|
|
(rule 0 (lower (has_type (ty_vec128_int ty) (unarrow x y)))
|
|
(let ((low_half Reg (sqxtun x (lane_size ty)))
|
|
(result Reg (sqxtun2 low_half y (lane_size ty))))
|
|
result))
|
|
|
|
|
|
;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
|
|
(if (zero_value y))
|
|
(uqxtn x (lane_size ty)))
|
|
|
|
(rule 2 (lower (has_type (ty_vec64_int ty) (uunarrow x y)))
|
|
(let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
|
|
(uqxtn dst (lane_size ty))))
|
|
|
|
(rule 0 (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
|
|
(let ((low_half Reg (uqxtn x (lane_size ty)))
|
|
(result Reg (uqxtn2 low_half y (lane_size ty))))
|
|
result))
|
|
|
|
;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (swiden_low x)))
|
|
(vec_extend (VecExtendOp.Sxtl) x $false (lane_size ty)))
|
|
|
|
;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (has_type (ty_vec128 ty) (swiden_high x)))
|
|
(vec_extend (VecExtendOp.Sxtl) x $true (lane_size ty)))
|
|
|
|
(rule (lower (has_type ty (swiden_high x)))
|
|
(if (ty_vec64 ty))
|
|
(let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2))))
|
|
(vec_extend (VecExtendOp.Sxtl) tmp $false (lane_size ty))))
|
|
|
|
;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (uwiden_low x)))
|
|
(vec_extend (VecExtendOp.Uxtl) x $false (lane_size ty)))
|
|
|
|
;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (has_type (ty_vec128 ty) (uwiden_high x)))
|
|
(vec_extend (VecExtendOp.Uxtl) x $true (lane_size ty)))
|
|
|
|
(rule (lower (has_type ty (uwiden_high x)))
|
|
(if (ty_vec64 ty))
|
|
(let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2))))
|
|
(vec_extend (VecExtendOp.Uxtl) tmp $false (lane_size ty))))
|
|
|
|
;;;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; The args have type I16X8.
|
|
;; "dst = i32x4.dot_i16x8_s(x, y)"
|
|
;; => smull tmp, x, y
|
|
;; smull2 dst, x, y
|
|
;; addp dst, tmp, dst
|
|
(rule (lower (has_type $I32X4 (widening_pairwise_dot_product_s x y)))
|
|
(let ((tmp Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $false))
|
|
(dst Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $true)))
|
|
(vec_rrr (VecALUOp.Addp) tmp dst (VectorSize.Size32x4))))
|
|
|
|
;;;; Rules for `Fence` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (fence))
|
|
(side_effect (aarch64_fence)))
|
|
|
|
;;;; Rules for `IsNull` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (is_null x @ (value_type ty)))
|
|
(with_flags (cmp_imm (operand_size ty) x (u8_into_imm12 0))
|
|
(materialize_bool_result (Cond.Eq))))
|
|
|
|
;;;; Rules for `IsInvalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (is_invalid x @ (value_type ty)))
|
|
(with_flags (cmn_imm (operand_size ty) x (u8_into_imm12 1))
|
|
(materialize_bool_result (Cond.Eq))))
|
|
|
|
;;;; Rules for `Debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (debugtrap))
|
|
(side_effect (brk)))
|
|
|
|
;;;; Rules for `func_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (func_addr (func_ref_data _ extname _)))
|
|
(load_ext_name (box_external_name extname) 0))
|
|
|
|
;;;; Rules for `symbol_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (symbol_value (symbol_value_data extname _ offset)))
|
|
(load_ext_name (box_external_name extname) offset))
|
|
|
|
;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;;
|
|
|
|
(rule (lower (get_frame_pointer))
|
|
(aarch64_fp))
|
|
|
|
(rule (lower (get_stack_pointer))
|
|
(aarch64_sp))
|
|
|
|
(rule (lower (get_return_address))
|
|
(aarch64_link))
|
|
|
|
;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (call (func_ref_data sig_ref extname dist) inputs))
|
|
(gen_call sig_ref extname dist inputs))
|
|
|
|
(rule (lower (call_indirect sig_ref val inputs))
|
|
(gen_call_indirect sig_ref val inputs))
|
|
|
|
;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; N.B.: the Ret itself is generated by the ABI.
|
|
(rule (lower (return args))
|
|
(lower_return (range 0 (value_slice_len args)) args))
|
|
|
|
;;;; Rules for loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower
|
|
(has_type $I8 (load flags address offset)))
|
|
(aarch64_uload8 (amode $I8 address offset) flags))
|
|
(rule (lower
|
|
(has_type $I16 (load flags address offset)))
|
|
(aarch64_uload16 (amode $I16 address offset) flags))
|
|
(rule (lower
|
|
(has_type $I32 (load flags address offset)))
|
|
(aarch64_uload32 (amode $I32 address offset) flags))
|
|
(rule (lower
|
|
(has_type $I64 (load flags address offset)))
|
|
(aarch64_uload64 (amode $I64 address offset) flags))
|
|
(rule (lower
|
|
(has_type $R64 (load flags address offset)))
|
|
(aarch64_uload64 (amode $I64 address offset) flags))
|
|
(rule (lower
|
|
(has_type $F32 (load flags address offset)))
|
|
(aarch64_fpuload32 (amode $F32 address offset) flags))
|
|
(rule (lower
|
|
(has_type $F64 (load flags address offset)))
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags))
|
|
(rule (lower
|
|
(has_type $I128 (load flags address offset)))
|
|
(aarch64_loadp64 (pair_amode address offset) flags))
|
|
(rule -1 (lower
|
|
(has_type (ty_vec64 _)
|
|
(load flags address offset)))
|
|
(aarch64_fpuload128 (amode $F64 address offset) flags))
|
|
(rule -3 (lower
|
|
(has_type (ty_vec128 _)
|
|
(load flags address offset)))
|
|
(aarch64_fpuload128 (amode $I8X16 address offset) flags))
|
|
(rule -2 (lower
|
|
(has_type (ty_dyn_vec64 _)
|
|
(load flags address offset)))
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags))
|
|
(rule -4 (lower
|
|
(has_type (ty_dyn_vec128 _)
|
|
(load flags address offset)))
|
|
(aarch64_fpuload128 (amode $I8X16 address offset) flags))
|
|
|
|
(rule (lower
|
|
(uload8 flags address offset))
|
|
(aarch64_uload8 (amode $I8 address offset) flags))
|
|
(rule (lower
|
|
(sload8 flags address offset))
|
|
(aarch64_sload8 (amode $I8 address offset) flags))
|
|
(rule (lower
|
|
(uload16 flags address offset))
|
|
(aarch64_uload16 (amode $I16 address offset) flags))
|
|
(rule (lower
|
|
(sload16 flags address offset))
|
|
(aarch64_sload16 (amode $I16 address offset) flags))
|
|
(rule (lower
|
|
(uload32 flags address offset))
|
|
(aarch64_uload32 (amode $I32 address offset) flags))
|
|
(rule (lower
|
|
(sload32 flags address offset))
|
|
(aarch64_sload32 (amode $I32 address offset) flags))
|
|
|
|
(rule (lower
|
|
(sload8x8 flags address offset))
|
|
(vec_extend (VecExtendOp.Sxtl)
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags)
|
|
$false
|
|
(ScalarSize.Size16)))
|
|
(rule (lower
|
|
(uload8x8 flags address offset))
|
|
(vec_extend (VecExtendOp.Uxtl)
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags)
|
|
$false
|
|
(ScalarSize.Size16)))
|
|
(rule (lower
|
|
(sload16x4 flags address offset))
|
|
(vec_extend (VecExtendOp.Sxtl)
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags)
|
|
$false
|
|
(ScalarSize.Size32)))
|
|
(rule (lower
|
|
(uload16x4 flags address offset))
|
|
(vec_extend (VecExtendOp.Uxtl)
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags)
|
|
$false
|
|
(ScalarSize.Size32)))
|
|
(rule (lower
|
|
(sload32x2 flags address offset))
|
|
(vec_extend (VecExtendOp.Sxtl)
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags)
|
|
$false
|
|
(ScalarSize.Size64)))
|
|
(rule (lower
|
|
(uload32x2 flags address offset))
|
|
(vec_extend (VecExtendOp.Uxtl)
|
|
(aarch64_fpuload64 (amode $F64 address offset) flags)
|
|
$false
|
|
(ScalarSize.Size64)))
|
|
|
|
;;;; Rules for stores ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower
|
|
(store flags value @ (value_type $I8) address offset))
|
|
(side_effect
|
|
(aarch64_store8 (amode $I8 address offset) flags value)))
|
|
(rule (lower
|
|
(store flags value @ (value_type $I16) address offset))
|
|
(side_effect
|
|
(aarch64_store16 (amode $I16 address offset) flags value)))
|
|
(rule (lower
|
|
(store flags value @ (value_type $I32) address offset))
|
|
(side_effect
|
|
(aarch64_store32 (amode $I32 address offset) flags value)))
|
|
(rule (lower
|
|
(store flags value @ (value_type $I64) address offset))
|
|
(side_effect
|
|
(aarch64_store64 (amode $I64 address offset) flags value)))
|
|
(rule (lower
|
|
(store flags value @ (value_type $R64) address offset))
|
|
(side_effect
|
|
(aarch64_store64 (amode $I64 address offset) flags value)))
|
|
|
|
(rule (lower
|
|
(istore8 flags value address offset))
|
|
(side_effect
|
|
(aarch64_store8 (amode $I8 address offset) flags value)))
|
|
(rule (lower
|
|
(istore16 flags value address offset))
|
|
(side_effect
|
|
(aarch64_store16 (amode $I16 address offset) flags value)))
|
|
(rule (lower
|
|
(istore32 flags value address offset))
|
|
(side_effect
|
|
(aarch64_store32 (amode $I32 address offset) flags value)))
|
|
|
|
(rule (lower
|
|
(store flags value @ (value_type $F32) address offset))
|
|
(side_effect
|
|
(aarch64_fpustore32 (amode $F32 address offset) flags value)))
|
|
(rule (lower
|
|
(store flags value @ (value_type $F64) address offset))
|
|
(side_effect
|
|
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
|
|
|
|
(rule (lower
|
|
(store flags value @ (value_type $I128) address offset))
|
|
(side_effect
|
|
(aarch64_storep64 (pair_amode address offset) flags
|
|
(value_regs_get value 0)
|
|
(value_regs_get value 1))))
|
|
|
|
(rule -1 (lower
|
|
(store flags value @ (value_type (ty_vec64 _)) address offset))
|
|
(side_effect
|
|
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
|
|
(rule -3 (lower
|
|
(store flags value @ (value_type (ty_vec128 _)) address offset))
|
|
(side_effect
|
|
(aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
|
|
(rule -2 (lower
|
|
(store flags value @ (value_type (ty_dyn_vec64 _)) address offset))
|
|
(side_effect
|
|
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
|
|
(rule -4 (lower
|
|
(store flags value @ (value_type (ty_dyn_vec128 _)) address offset))
|
|
(side_effect
|
|
(aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
|
|
|
|
;;; Rules for `{get,set}_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (get_pinned_reg))
|
|
(mov_from_preg (preg_pinned)))
|
|
|
|
(rule (lower (set_pinned_reg val))
|
|
(side_effect (write_pinned_reg val)))
|
|
|
|
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
; SIMD&FP <=> SIMD&FP
|
|
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type (ty_float_or_vec _)))))
|
|
x)
|
|
|
|
; GPR => SIMD&FP
|
|
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type in_ty))))
|
|
(if (ty_int_ref_scalar_64 in_ty))
|
|
(mov_to_fpu x (scalar_size in_ty)))
|
|
|
|
; SIMD&FP => GPR
|
|
(rule 3 (lower (has_type out_ty (bitcast _ x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
|
|
(if (ty_int_ref_scalar_64 out_ty))
|
|
(mov_from_vec x 0 (scalar_size out_ty)))
|
|
|
|
; GPR <=> GPR
|
|
(rule 2 (lower (has_type out_ty (bitcast _ x @ (value_type in_ty))))
|
|
(if (ty_int_ref_scalar_64 out_ty))
|
|
(if (ty_int_ref_scalar_64 in_ty))
|
|
x)
|
|
(rule 1 (lower (has_type $I128 (bitcast _ x @ (value_type $I128)))) x)
|
|
|
|
;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; extractlane with lane 0 can pass through the value unchanged; upper
|
|
;; bits are undefined when a narrower type is in a wider register.
|
|
(rule 2 (lower (has_type (ty_scalar_float _) (extractlane val (u8_from_uimm8 0))))
|
|
val)
|
|
|
|
(rule 0 (lower (has_type (ty_int ty)
|
|
(extractlane val
|
|
(u8_from_uimm8 lane))))
|
|
(mov_from_vec val lane (scalar_size ty)))
|
|
|
|
(rule 1 (lower (has_type (ty_scalar_float ty)
|
|
(extractlane val @ (value_type vty)
|
|
(u8_from_uimm8 lane))))
|
|
(fpu_move_from_vec val lane (vector_size vty)))
|
|
|
|
;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule 1 (lower (insertlane vec @ (value_type vty)
|
|
val @ (value_type (ty_int _))
|
|
(u8_from_uimm8 lane)))
|
|
(mov_to_vec vec val lane (vector_size vty)))
|
|
|
|
(rule (lower (insertlane vec @ (value_type vty)
|
|
val @ (value_type (ty_scalar_float _))
|
|
(u8_from_uimm8 lane)))
|
|
(mov_vec_elem vec val lane 0 (vector_size vty)))
|
|
|
|
;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (stack_addr stack_slot offset))
|
|
(compute_stack_addr stack_slot offset))
|
|
|
|
;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; All three sequences use one integer temporary and two vector
|
|
;; temporaries. The shift is done early so as to give the register
|
|
;; allocator the possibility of using the same reg for `tmp_v1` and
|
|
;; `src_v` in the case that this is the last use of `src_v`. See
|
|
;; https://github.com/WebAssembly/simd/pull/201 for the background and
|
|
;; derivation of these sequences. Alternative sequences are discussed
|
|
;; in https://github.com/bytecodealliance/wasmtime/issues/2296,
|
|
;; although they are not used here.
|
|
|
|
(rule (lower (vhigh_bits vec @ (value_type $I8X16)))
|
|
(let (
|
|
;; Replicate the MSB of each of the 16 byte lanes across
|
|
;; the whole lane (sshr is an arithmetic right shift).
|
|
(shifted Reg (vec_shift_imm (VecShiftImmOp.Sshr) 7 vec (VectorSize.Size8x16)))
|
|
;; Bitwise-and with a mask
|
|
;; `0x80402010_08040201_80402010_08040201` to get the bit
|
|
;; in the proper location for each group of 8 lanes.
|
|
(anded Reg (and_vec shifted (constant_f128 0x80402010_08040201_80402010_08040201) (VectorSize.Size8x16)))
|
|
;; Produce a version of `anded` with upper 8 lanes and
|
|
;; lower 8 lanes swapped.
|
|
(anded_swapped Reg (vec_extract anded anded 8))
|
|
;; Zip together the two; with the above this produces the lane permutation:
|
|
;; 15 7 14 6 13 5 12 4 11 3 10 2 9 1 8 0
|
|
(zipped Reg (zip1 anded anded_swapped (VectorSize.Size8x16)))
|
|
;; Add 16-bit lanes together ("add across vector"), so we
|
|
;; get, in the low 16 bits, 15+14+...+8 in the high byte
|
|
;; and 7+6+...+0 in the low byte. This effectively puts
|
|
;; the 16 MSBs together, giving our results.
|
|
;;
|
|
;; N.B.: `Size16x8` is not a typo!
|
|
(result Reg (addv zipped (VectorSize.Size16x8))))
|
|
(mov_from_vec result 0 (ScalarSize.Size16))))
|
|
|
|
(rule (lower (vhigh_bits vec @ (value_type $I16X8)))
|
|
(let (
|
|
;; Replicate the MSB of each of the 8 16-bit lanes across
|
|
;; the whole lane (sshr is an arithmetic right shift).
|
|
(shifted Reg (vec_shift_imm (VecShiftImmOp.Sshr) 15 vec (VectorSize.Size16x8)))
|
|
;; Bitwise-and with a mask
|
|
;; `0x0080_0040_0020_0010_0008_0004_0002_0001` to get the
|
|
;; bit in the proper location for each group of 4 lanes.
|
|
(anded Reg (and_vec shifted (constant_f128 0x0080_0040_0020_0010_0008_0004_0002_0001) (VectorSize.Size16x8)))
|
|
;; Add lanes together to get the 8 MSBs in the low byte.
|
|
(result Reg (addv anded (VectorSize.Size16x8))))
|
|
(mov_from_vec result 0 (ScalarSize.Size16))))
|
|
|
|
(rule (lower (vhigh_bits vec @ (value_type $I32X4)))
|
|
(let (
|
|
;; Replicate the MSB of each of the 4 32-bit lanes across
|
|
;; the whole lane (sshr is an arithmetic right shift).
|
|
(shifted Reg (vec_shift_imm (VecShiftImmOp.Sshr) 31 vec (VectorSize.Size32x4)))
|
|
;; Bitwise-and with a mask
|
|
;; `0x00000008_00000004_00000002_00000001` to get the bit
|
|
;; in the proper location for each group of 4 lanes.
|
|
(anded Reg (and_vec shifted (constant_f128 0x00000008_00000004_00000002_00000001) (VectorSize.Size32x4)))
|
|
;; Add lanes together to get the 4 MSBs in the low byte.
|
|
(result Reg (addv anded (VectorSize.Size32x4))))
|
|
(mov_from_vec result 0 (ScalarSize.Size32))))
|
|
|
|
(rule (lower (vhigh_bits vec @ (value_type $I64X2)))
|
|
(let (
|
|
;; Grab the MSB out of each of the lanes, right-shift to
|
|
;; LSB, and add with a left-shift of upper lane's MSB back
|
|
;; to bit 1. the whole lane (sshr is an arithmetic right
|
|
;; shift).
|
|
(upper_msb Reg (mov_from_vec vec 1 (ScalarSize.Size64)))
|
|
(lower_msb Reg (mov_from_vec vec 0 (ScalarSize.Size64)))
|
|
(upper_msb Reg (lsr_imm $I64 upper_msb (imm_shift_from_u8 63)))
|
|
(lower_msb Reg (lsr_imm $I64 lower_msb (imm_shift_from_u8 63))))
|
|
(add_shift $I64 lower_msb upper_msb (lshl_from_u64 $I64 1))))
|
|
|
|
;;; Rules for `iadd_cout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; For values smaller than a register, we do a normal `add` with both arguments
|
|
;; sign extended. We then check if the output sign bit has flipped.
|
|
(rule 0 (lower (has_type (fits_in_16 ty) (iadd_cout a b)))
|
|
(let ((extend ExtendOp (lower_extend_op ty $true))
|
|
|
|
;; Instead of emitting two `sxt{b,h}` we do one as an instruction and
|
|
;; the other as an extend operation in the `add` instruction.
|
|
;;
|
|
;; sxtb a_sext, a
|
|
;; add out, a_sext, b, sxtb
|
|
;; cmp out, out, sxtb
|
|
;; cset out_carry, ne
|
|
(a_sext Reg (put_in_reg_sext32 a))
|
|
(out Reg (add_extend_op ty a_sext b extend))
|
|
(out_carry Reg (with_flags_reg
|
|
(cmp_extend (OperandSize.Size32) out out extend)
|
|
(cset (Cond.Ne)))))
|
|
(output_pair
|
|
(value_reg out)
|
|
(value_reg out_carry))))
|
|
|
|
|
|
;; For register sized add's we just emit a adds+cset, without further masking.
|
|
;;
|
|
;; adds out, a, b
|
|
;; cset carry, vs
|
|
(rule 1 (lower (has_type (ty_32_or_64 ty) (iadd_cout a b)))
|
|
(let ((out ValueRegs
|
|
(with_flags
|
|
(add_with_flags_paired ty a b)
|
|
(cset_paired (Cond.Vs)))))
|
|
(output_pair
|
|
(value_regs_get out 0)
|
|
(value_regs_get out 1))))
|
|
|
|
;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (uadd_overflow_trap a b tc)))
|
|
(trap_if_overflow (add_with_flags_paired ty a b) tc))
|
|
|
|
;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (tls_value (symbol_value_data name _ _)))
|
|
(if (tls_model_is_elf_gd))
|
|
(elf_tls_get_addr name))
|
|
|
|
;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $F64X2 (fcvt_low_from_sint val)))
|
|
(let ((extended Reg (vec_extend (VecExtendOp.Sxtl) val $false (ScalarSize.Size64)))
|
|
(converted Reg (vec_misc (VecMisc2.Scvtf) extended (VectorSize.Size64x2))))
|
|
converted))
|
|
|
|
;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (fvpromote_low val))
|
|
(vec_rr_long (VecRRLongOp.Fcvtl32) val $false))
|
|
|
|
;;; Rules for `brz`/`brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `brz` following `icmp`
|
|
(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
|
|
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y ty))
|
|
;; Negate the condition for `brz`.
|
|
(cond Cond (invert_cond (cond_code (flags_and_cc_cc comparison))))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect (flags_and_cc_flags comparison)
|
|
(cond_br taken
|
|
not_taken
|
|
(cond_br_cond cond))))))
|
|
;; `brnz` following `icmp`
|
|
(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
|
|
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y ty))
|
|
(cond Cond (cond_code (flags_and_cc_cc comparison)))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect (flags_and_cc_flags comparison)
|
|
(cond_br taken
|
|
not_taken
|
|
(cond_br_cond cond))))))
|
|
;; `brz` following `fcmp`
|
|
(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
|
|
(let ((cond Cond (fp_cond_code cc))
|
|
(cond Cond (invert_cond cond)) ;; negate for `brz`
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
|
(cond_br taken not_taken
|
|
(cond_br_cond cond))))))
|
|
;; `brnz` following `fcmp`
|
|
(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
|
|
(let ((cond Cond (fp_cond_code cc))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
|
(cond_br taken not_taken
|
|
(cond_br_cond cond))))))
|
|
;; standard `brz`
|
|
(rule -1 (lower_branch (brz c @ (value_type $I128) _ _) targets)
|
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
|
(c ValueRegs (put_in_regs c))
|
|
(c_lo Reg (value_regs_get c 0))
|
|
(c_hi Reg (value_regs_get c 1))
|
|
(rt Reg (orr $I64 c_lo c_hi))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect flags
|
|
(cond_br taken not_taken (cond_br_zero rt))))))
|
|
(rule -2 (lower_branch (brz c @ (value_type ty) _ _) targets)
|
|
(if (ty_int_ref_scalar_64 ty))
|
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
|
(rt Reg (put_in_reg_zext64 c))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect flags
|
|
(cond_br taken not_taken (cond_br_zero rt))))))
|
|
;; standard `brnz`
|
|
(rule -1 (lower_branch (brnz c @ (value_type $I128) _ _) targets)
|
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
|
(c ValueRegs (put_in_regs c))
|
|
(c_lo Reg (value_regs_get c 0))
|
|
(c_hi Reg (value_regs_get c 1))
|
|
(rt Reg (orr $I64 c_lo c_hi))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect flags
|
|
(cond_br taken not_taken (cond_br_not_zero rt))))))
|
|
(rule -2 (lower_branch (brnz c @ (value_type ty) _ _) targets)
|
|
(if (ty_int_ref_scalar_64 ty))
|
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
|
(rt Reg (put_in_reg_zext64 c))
|
|
(taken BranchTarget (branch_target targets 0))
|
|
(not_taken BranchTarget (branch_target targets 1)))
|
|
(side_effect
|
|
(with_flags_side_effect flags
|
|
(cond_br taken not_taken (cond_br_not_zero rt))))))
|
|
|
|
;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower_branch (jump _ _) targets)
|
|
(side_effect (aarch64_jump (branch_target targets 0))))
|
|
|
|
;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `targets` contains the default target with the list of branch targets
|
|
;; concatenated.
|
|
(rule (lower_branch (br_table idx _ _) targets)
|
|
(let ((jt_size u32 (targets_jt_size targets))
|
|
(_ InstOutput (side_effect
|
|
(emit_island (targets_jt_space targets))))
|
|
(ridx Reg (put_in_reg_zext32 idx)))
|
|
(br_table_impl (u32_as_u64 jt_size) ridx targets)))
|