* aarch64: Migrate some bit-ops to ISLE This commit migrates these instructions to ISLE: * `bnot` * `band` * `bor` * `bxor` * `band_not` * `bor_not` * `bxor_not` The translations were relatively straightforward but the interesting part here was trying to reduce the duplication between all these instructions. I opted for a route that's similar to what the lowering does today, having a `decl` which takes the `ALUOp` and then performs further pattern matching internally. This enabled each instruction's lowering to be pretty simple while we still get to handle all the fancy cases of shifts, constants, etc, for each instruction. * Actually delete previous lowerings * Remove dead code
698 lines
29 KiB
Common Lisp
698 lines
29 KiB
Common Lisp
;; aarch64 instruction selection and CLIF-to-MachInst lowering.
|
|
|
|
;; The main lowering constructor term: takes a clif `Inst` and returns the
|
|
;; register(s) within which the lowered instruction's result values live.
|
|
(decl lower (Inst) ValueRegs)
|
|
|
|
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
|
|
(value_reg (imm ty n)))
|
|
|
|
;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (bconst $false)))
|
|
(value_reg (imm ty 0)))
|
|
|
|
(rule (lower (has_type ty (bconst $true)))
|
|
(value_reg (imm ty 1)))
|
|
|
|
;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type ty (null)))
|
|
(value_reg (imm ty 0)))
|
|
|
|
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller
|
|
|
|
;; Base case, simply adding things in registers.
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd x y)))
|
|
(value_reg (alu_rrr (iadd_op ty) (put_in_reg x) (put_in_reg y))))
|
|
|
|
;; Special cases for when one operand is an immediate that fits in 12 bits.
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
|
|
(value_reg (alu_rr_imm12 (iadd_op ty) (put_in_reg x) y)))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
|
|
(value_reg (alu_rr_imm12 (iadd_op ty) (put_in_reg y) x)))
|
|
|
|
;; Same as the previous special cases, except we can switch the addition to a
|
|
;; subtraction if the negated immediate fits in 12 bits.
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y))))
|
|
(value_reg (alu_rr_imm12 (isub_op ty) (put_in_reg x) y)))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y)))
|
|
(value_reg (alu_rr_imm12 (isub_op ty) (put_in_reg y) x)))
|
|
|
|
;; Special cases for when we're adding an extended register where the extending
|
|
;; operation can get folded into the add itself.
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y))))
|
|
(value_reg (alu_rr_extend_reg (iadd_op ty) (put_in_reg x) y)))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y)))
|
|
(value_reg (alu_rr_extend_reg (iadd_op ty) (put_in_reg y) x)))
|
|
|
|
;; Special cases for when we're adding the shift of a different
|
|
;; register by a constant amount and the shift can get folded into the add.
|
|
(rule (lower (has_type (fits_in_64 ty)
|
|
(iadd x (def_inst (ishl y (def_inst (iconst (lshl_from_imm64 <ty amt))))))))
|
|
(value_reg (alu_rrr_shift (iadd_op ty) (put_in_reg x) (put_in_reg y) amt)))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty)
|
|
(iadd (def_inst (ishl x (def_inst (iconst (lshl_from_imm64 <ty amt))))) y)))
|
|
(value_reg (alu_rrr_shift (iadd_op ty) (put_in_reg y) (put_in_reg x) amt)))
|
|
|
|
;; Fold an `iadd` and `imul` combination into a `madd` instruction.
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd x (def_inst (imul y z)))))
|
|
(value_reg (alu_rrrr (madd_op ty) (put_in_reg y) (put_in_reg z) (put_in_reg x))))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (iadd (def_inst (imul x y)) z)))
|
|
(value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (put_in_reg z))))
|
|
|
|
;; Helper to use either a 32 or 64-bit add depending on the input type.
|
|
(decl iadd_op (Type) ALUOp)
|
|
(rule (iadd_op (fits_in_32 _ty)) (ALUOp.Add32))
|
|
(rule (iadd_op $I64) (ALUOp.Add64))
|
|
|
|
;; Helper to use either a 32 or 64-bit sub depending on the input type.
|
|
(decl isub_op (Type) ALUOp)
|
|
(rule (isub_op (fits_in_32 _ty)) (ALUOp.Sub32))
|
|
(rule (isub_op $I64) (ALUOp.Sub64))
|
|
|
|
;; Helper to use either a 32 or 64-bit madd depending on the input type.
|
|
(decl madd_op (Type) ALUOp3)
|
|
(rule (madd_op (fits_in_32 _ty)) (ALUOp3.MAdd32))
|
|
(rule (madd_op $I64) (ALUOp3.MAdd64))
|
|
|
|
;; vectors
|
|
|
|
(rule (lower (has_type ty @ (multi_lane _ _) (iadd x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Add) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;; `i128`
|
|
(rule (lower (has_type $I128 (iadd x y)))
|
|
(let (
|
|
;; Get the high/low registers for `x`.
|
|
(x_regs ValueRegs (put_in_regs x))
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
|
|
;; Get the high/low registers for `y`.
|
|
(y_regs ValueRegs (put_in_regs y))
|
|
(y_lo Reg (value_regs_get y_regs 0))
|
|
(y_hi Reg (value_regs_get y_regs 1))
|
|
)
|
|
;; the actual addition is `adds` followed by `adc` which comprises the
|
|
;; low/high bits of the result
|
|
(with_flags
|
|
(add64_with_flags x_lo y_lo)
|
|
(adc64 x_hi y_hi))))
|
|
|
|
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller
|
|
|
|
;; Base case, simply subtracting things in registers.
|
|
(rule (lower (has_type (fits_in_64 ty) (isub x y)))
|
|
(value_reg (alu_rrr (isub_op ty) (put_in_reg x) (put_in_reg y))))
|
|
|
|
;; Special case for when one operand is an immediate that fits in 12 bits.
|
|
(rule (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y))))
|
|
(value_reg (alu_rr_imm12 (isub_op ty) (put_in_reg x) y)))
|
|
|
|
;; Same as the previous special case, except we can switch the subtraction to an
|
|
;; addition if the negated immediate fits in 12 bits.
|
|
(rule (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y))))
|
|
(value_reg (alu_rr_imm12 (iadd_op ty) (put_in_reg x) y)))
|
|
|
|
;; Special cases for when we're subtracting an extended register where the
|
|
;; extending operation can get folded into the sub itself.
|
|
(rule (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y))))
|
|
(value_reg (alu_rr_extend_reg (isub_op ty) (put_in_reg x) y)))
|
|
|
|
;; Finally a special case for when we're subtracting the shift of a different
|
|
;; register by a constant amount and the shift can get folded into the sub.
|
|
(rule (lower (has_type (fits_in_64 ty)
|
|
(isub x (def_inst (ishl y (def_inst (iconst (lshl_from_imm64 <ty amt))))))))
|
|
(value_reg (alu_rrr_shift (isub_op ty) (put_in_reg x) (put_in_reg y) amt)))
|
|
|
|
;; vectors
|
|
(rule (lower (has_type ty @ (multi_lane _ _) (isub x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Sub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;; `i128`
|
|
(rule (lower (has_type $I128 (isub x y)))
|
|
(let (
|
|
;; Get the high/low registers for `x`.
|
|
(x_regs ValueRegs (put_in_regs x))
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
|
|
;; Get the high/low registers for `y`.
|
|
(y_regs ValueRegs (put_in_regs y))
|
|
(y_lo Reg (value_regs_get y_regs 0))
|
|
(y_hi Reg (value_regs_get y_regs 1))
|
|
)
|
|
;; the actual subtraction is `subs` followed by `sbc` which comprises
|
|
;; the low/high bits of the result
|
|
(with_flags
|
|
(sub64_with_flags x_lo y_lo)
|
|
(sbc64 x_hi y_hi))))
|
|
|
|
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (vec128 ty) (uadd_sat x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Uqadd) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (vec128 ty) (sadd_sat x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Sqadd) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (vec128 ty) (usub_sat x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Uqsub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (vec128 ty) (ssub_sat x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Sqsub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller.
|
|
(rule (lower (has_type (fits_in_64 ty) (ineg x)))
|
|
(value_reg (alu_rrr (isub_op ty) (zero_reg) (put_in_reg x))))
|
|
|
|
;; vectors.
|
|
(rule (lower (has_type (vec128 ty) (ineg x)))
|
|
(value_reg (vec_misc (VecMisc2.Neg) (put_in_reg x) (vector_size ty))))
|
|
|
|
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; `i64` and smaller.
|
|
(rule (lower (has_type (fits_in_64 ty) (imul x y)))
|
|
(value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (zero_reg))))
|
|
|
|
;; `i128`.
|
|
(rule (lower (has_type $I128 (imul x y)))
|
|
(let (
|
|
;; Get the high/low registers for `x`.
|
|
(x_regs ValueRegs (put_in_regs x))
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
|
|
;; Get the high/low registers for `y`.
|
|
(y_regs ValueRegs (put_in_regs y))
|
|
(y_lo Reg (value_regs_get y_regs 0))
|
|
(y_hi Reg (value_regs_get y_regs 1))
|
|
|
|
;; 128bit mul formula:
|
|
;; dst_lo = x_lo * y_lo
|
|
;; dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
|
|
;;
|
|
;; We can convert the above formula into the following
|
|
;; umulh dst_hi, x_lo, y_lo
|
|
;; madd dst_hi, x_lo, y_hi, dst_hi
|
|
;; madd dst_hi, x_hi, y_lo, dst_hi
|
|
;; madd dst_lo, x_lo, y_lo, zero
|
|
(dst_hi1 Reg (alu_rrr (ALUOp.UMulH) x_lo y_lo))
|
|
(dst_hi2 Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_hi dst_hi1))
|
|
(dst_hi Reg (alu_rrrr (ALUOp3.MAdd64) x_hi y_lo dst_hi2))
|
|
(dst_lo Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_lo (zero_reg)))
|
|
)
|
|
(value_regs dst_lo dst_hi)))
|
|
|
|
;; Case for i8x16, i16x8, and i32x4.
|
|
(rule (lower (has_type (vec128 ty @ (not_i64x2)) (imul x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;; Special lowering for i64x2.
|
|
;;
|
|
;; This I64X2 multiplication is performed with several 32-bit
|
|
;; operations.
|
|
;;
|
|
;; 64-bit numbers x and y, can be represented as:
|
|
;; x = a + 2^32(b)
|
|
;; y = c + 2^32(d)
|
|
;;
|
|
;; A 64-bit multiplication is:
|
|
;; x * y = ac + 2^32(ad + bc) + 2^64(bd)
|
|
;; note: `2^64(bd)` can be ignored, the value is too large to fit in
|
|
;; 64 bits.
|
|
;;
|
|
;; This sequence implements a I64X2 multiply, where the registers
|
|
;; `rn` and `rm` are split up into 32-bit components:
|
|
;; rn = |d|c|b|a|
|
|
;; rm = |h|g|f|e|
|
|
;;
|
|
;; rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
|
|
;;
|
|
;; The sequence is:
|
|
;; rev64 rd.4s, rm.4s
|
|
;; mul rd.4s, rd.4s, rn.4s
|
|
;; xtn tmp1.2s, rn.2d
|
|
;; addp rd.4s, rd.4s, rd.4s
|
|
;; xtn tmp2.2s, rm.2d
|
|
;; shll rd.2d, rd.2s, #32
|
|
;; umlal rd.2d, tmp2.2s, tmp1.2s
|
|
(rule (lower (has_type $I64X2 (imul x y)))
|
|
(let (
|
|
(rn Reg (put_in_reg x))
|
|
(rm Reg (put_in_reg y))
|
|
;; Reverse the 32-bit elements in the 64-bit words.
|
|
;; rd = |g|h|e|f|
|
|
(rev Reg (vec_misc (VecMisc2.Rev64) rm (VectorSize.Size32x4)))
|
|
|
|
;; Calculate the high half components.
|
|
;; rd = |dg|ch|be|af|
|
|
;;
|
|
;; Note that this 32-bit multiply of the high half
|
|
;; discards the bits that would overflow, same as
|
|
;; if 64-bit operations were used. Also the Shll
|
|
;; below would shift out the overflow bits anyway.
|
|
(mul Reg (vec_rrr (VecALUOp.Mul) rev rn (VectorSize.Size32x4)))
|
|
|
|
;; Extract the low half components of rn.
|
|
;; tmp1 = |c|a|
|
|
(tmp1 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rn $false))
|
|
|
|
;; Sum the respective high half components.
|
|
;; rd = |dg+ch|be+af||dg+ch|be+af|
|
|
(sum Reg (vec_rrr (VecALUOp.Addp) mul mul (VectorSize.Size32x4)))
|
|
|
|
;; Extract the low half components of rm.
|
|
;; tmp2 = |g|e|
|
|
(tmp2 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rm $false))
|
|
|
|
;; Shift the high half components, into the high half.
|
|
;; rd = |dg+ch << 32|be+af << 32|
|
|
(shift Reg (vec_rr_long (VecRRLongOp.Shll32) sum $false))
|
|
|
|
;; Multiply the low components together, and accumulate with the high
|
|
;; half.
|
|
;; rd = |rd[1] + cg|rd[0] + ae|
|
|
(result Reg (vec_rrrr_long (VecRRRLongOp.Umlal32) shift tmp2 tmp1 $false))
|
|
)
|
|
(value_reg result)))
|
|
|
|
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (def_inst (swiden_low x @ (value_type $I8X16)))
|
|
(def_inst (swiden_low y @ (value_type $I8X16))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $false)))
|
|
|
|
;; Special case for `i16x8.extmul_high_i8x16_s`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (def_inst (swiden_high x @ (value_type $I8X16)))
|
|
(def_inst (swiden_high y @ (value_type $I8X16))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $true)))
|
|
|
|
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (def_inst (uwiden_low x @ (value_type $I8X16)))
|
|
(def_inst (uwiden_low y @ (value_type $I8X16))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $false)))
|
|
|
|
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
|
(rule (lower (has_type $I16X8
|
|
(imul (def_inst (uwiden_high x @ (value_type $I8X16)))
|
|
(def_inst (uwiden_high y @ (value_type $I8X16))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $true)))
|
|
|
|
;; Special case for `i32x4.extmul_low_i16x8_s`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (def_inst (swiden_low x @ (value_type $I16X8)))
|
|
(def_inst (swiden_low y @ (value_type $I16X8))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $false)))
|
|
|
|
;; Special case for `i32x4.extmul_high_i16x8_s`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (def_inst (swiden_high x @ (value_type $I16X8)))
|
|
(def_inst (swiden_high y @ (value_type $I16X8))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $true)))
|
|
|
|
;; Special case for `i32x4.extmul_low_i16x8_u`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (def_inst (uwiden_low x @ (value_type $I16X8)))
|
|
(def_inst (uwiden_low y @ (value_type $I16X8))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $false)))
|
|
|
|
;; Special case for `i32x4.extmul_high_i16x8_u`.
|
|
(rule (lower (has_type $I32X4
|
|
(imul (def_inst (uwiden_high x @ (value_type $I16X8)))
|
|
(def_inst (uwiden_high y @ (value_type $I16X8))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $true)))
|
|
|
|
;; Special case for `i64x2.extmul_low_i32x4_s`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (def_inst (swiden_low x @ (value_type $I32X4)))
|
|
(def_inst (swiden_low y @ (value_type $I32X4))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $false)))
|
|
|
|
;; Special case for `i64x2.extmul_high_i32x4_s`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (def_inst (swiden_high x @ (value_type $I32X4)))
|
|
(def_inst (swiden_high y @ (value_type $I32X4))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $true)))
|
|
|
|
;; Special case for `i64x2.extmul_low_i32x4_u`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (def_inst (uwiden_low x @ (value_type $I32X4)))
|
|
(def_inst (uwiden_low y @ (value_type $I32X4))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $false)))
|
|
|
|
;; Special case for `i64x2.extmul_high_i32x4_u`.
|
|
(rule (lower (has_type $I64X2
|
|
(imul (def_inst (uwiden_high x @ (value_type $I32X4)))
|
|
(def_inst (uwiden_high y @ (value_type $I32X4))))))
|
|
(value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $true)))
|
|
|
|
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I64 (smulhi x y)))
|
|
(value_reg (alu_rrr (ALUOp.SMulH) (put_in_reg x) (put_in_reg y))))
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (smulhi x y)))
|
|
(let (
|
|
(x64 Reg (put_in_reg_sext64 x))
|
|
(y64 Reg (put_in_reg_sext64 y))
|
|
(mul Reg (alu_rrrr (ALUOp3.MAdd64) x64 y64 (zero_reg)))
|
|
(result Reg (alu_rr_imm_shift (ALUOp.Asr64) mul (imm_shift_from_u8 (ty_bits ty))))
|
|
)
|
|
(value_reg result)))
|
|
|
|
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type $I64 (umulhi x y)))
|
|
(value_reg (alu_rrr (ALUOp.UMulH) (put_in_reg x) (put_in_reg y))))
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (umulhi x y)))
|
|
(let (
|
|
(x64 Reg (put_in_reg_zext64 x))
|
|
(y64 Reg (put_in_reg_zext64 y))
|
|
(mul Reg (alu_rrrr (ALUOp3.MAdd64) x64 y64 (zero_reg)))
|
|
(result Reg (alu_rr_imm_shift (ALUOp.Lsr64) mul (imm_shift_from_u8 (ty_bits ty))))
|
|
)
|
|
(value_reg result)))
|
|
|
|
;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; TODO: Add UDiv32 to implement 32-bit directly, rather
|
|
;; than extending the input.
|
|
;;
|
|
;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
|
|
;; CLIF's `udiv` the check for zero needs to be manually performed.
|
|
(rule (lower (has_type (fits_in_64 ty) (udiv x y)))
|
|
(value_reg (alu_rrr (ALUOp.UDiv64)
|
|
(put_in_reg_zext64 x)
|
|
(put_nonzero_in_reg_zext64 y))))
|
|
|
|
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
|
|
(decl put_nonzero_in_reg_zext64 (Value) Reg)
|
|
(rule (put_nonzero_in_reg_zext64 val)
|
|
(trap_if_zero_divisor (put_in_reg_zext64 val)))
|
|
|
|
;; Special case where if a `Value` is known to be nonzero we can trivially
|
|
;; move it into a register.
|
|
(rule (put_nonzero_in_reg_zext64 (and (value_type ty)
|
|
(def_inst (iconst (nonzero_u64_from_imm64 n)))))
|
|
(imm ty n))
|
|
|
|
;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; TODO: Add SDiv32 to implement 32-bit directly, rather
|
|
;; than extending the input.
|
|
;;
|
|
;; The sequence of checks here should look like:
|
|
;;
|
|
;; cbnz rm, #8
|
|
;; udf ; divide by zero
|
|
;; cmn rm, 1
|
|
;; ccmp rn, 1, #nzcv, eq
|
|
;; b.vc #8
|
|
;; udf ; signed overflow
|
|
;;
|
|
;; Note The div instruction does not trap on divide by zero or overflow, so
|
|
;; checks need to be manually inserted.
|
|
;;
|
|
;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
|
|
;; necessary, but right now `y` is checked to not be -1 as well.
|
|
(rule (lower (has_type (fits_in_64 ty) (sdiv x y)))
|
|
(let (
|
|
(x64 Reg (put_in_reg_sext64 x))
|
|
(y64 Reg (put_nonzero_in_reg_sext64 y))
|
|
(valid_x64 Reg (trap_if_div_overflow ty x64 y64))
|
|
(result Reg (alu_rrr (ALUOp.SDiv64) valid_x64 y64))
|
|
)
|
|
(value_reg result)))
|
|
|
|
;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
|
|
(decl safe_divisor_from_imm64 (u64) Imm64)
|
|
(extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64)
|
|
|
|
;; Special case for `sdiv` where no checks are needed due to division by a
|
|
;; constant meaning the checks are always passed.
|
|
(rule (lower (has_type (fits_in_64 ty) (sdiv x (def_inst (iconst (safe_divisor_from_imm64 y))))))
|
|
(value_reg (alu_rrr (ALUOp.SDiv64)
|
|
(put_in_reg_sext64 x)
|
|
(imm ty y))))
|
|
|
|
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
|
|
(decl put_nonzero_in_reg_sext64 (Value) Reg)
|
|
(rule (put_nonzero_in_reg_sext64 val)
|
|
(trap_if_zero_divisor (put_in_reg_sext64 val)))
|
|
|
|
;; Note that this has a special case where if the `Value` is a constant that's
|
|
;; not zero we can skip the zero check.
|
|
(rule (put_nonzero_in_reg_sext64 (and (value_type ty)
|
|
(def_inst (iconst (nonzero_u64_from_imm64 n)))))
|
|
(imm ty n))
|
|
|
|
;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Remainder (x % y) is implemented as:
|
|
;;
|
|
;; tmp = x / y
|
|
;; result = x - (tmp*y)
|
|
;;
|
|
;; use 'result' for tmp and you have:
|
|
;;
|
|
;; cbnz y, #8 ; branch over trap
|
|
;; udf ; divide by zero
|
|
;; div rd, x, y ; rd = x / y
|
|
;; msub rd, rd, y, x ; rd = x - rd * y
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (urem x y)))
|
|
(let (
|
|
(x64 Reg (put_in_reg_zext64 x))
|
|
(y64 Reg (put_nonzero_in_reg_zext64 y))
|
|
(div Reg (alu_rrr (ALUOp.UDiv64) x64 y64))
|
|
(result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
|
|
)
|
|
(value_reg result)))
|
|
|
|
(rule (lower (has_type (fits_in_64 ty) (srem x y)))
|
|
(let (
|
|
(x64 Reg (put_in_reg_sext64 x))
|
|
(y64 Reg (put_nonzero_in_reg_sext64 y))
|
|
(div Reg (alu_rrr (ALUOp.SDiv64) x64 y64))
|
|
(result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
|
|
)
|
|
(value_reg result)))
|
|
|
|
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; General rule for extending input to an output which fits in a single
|
|
;; register.
|
|
(rule (lower (has_type (fits_in_64 out) (uextend x @ (value_type in))))
|
|
(value_reg (extend (put_in_reg x) $false (ty_bits in) (ty_bits out))))
|
|
|
|
;; Extraction of a vector lane automatically extends as necessary, so we can
|
|
;; skip an explicit extending instruction.
|
|
(rule (lower (has_type (fits_in_64 out)
|
|
(uextend (def_inst (extractlane vec @ (value_type in)
|
|
(u8_from_uimm8 lane))))))
|
|
(value_reg (mov_from_vec (put_in_reg vec) lane (vector_size in))))
|
|
|
|
;; Atomic loads will also automatically zero their upper bits so the `uextend`
|
|
;; instruction can effectively get skipped here.
|
|
(rule (lower (has_type (fits_in_64 out)
|
|
(uextend (and (value_type in) (sinkable_atomic_load addr)))))
|
|
(value_reg (load_acquire in (sink_atomic_load addr))))
|
|
|
|
;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper
|
|
;; bits are all zero.
|
|
(rule (lower (has_type $I128 (uextend x)))
|
|
(value_regs (put_in_reg_zext64 x) (imm $I64 0)))
|
|
|
|
;; Like above where vector extraction automatically zero-extends extending to
|
|
;; i128 only requires generating a 0 constant for the upper bits.
|
|
(rule (lower (has_type $I128
|
|
(uextend (def_inst (extractlane vec @ (value_type in)
|
|
(u8_from_uimm8 lane))))))
|
|
(value_regs (mov_from_vec (put_in_reg vec) lane (vector_size in)) (imm $I64 0)))
|
|
|
|
;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; General rule for extending input to an output which fits in a single
|
|
;; register.
|
|
(rule (lower (has_type (fits_in_64 out) (sextend x @ (value_type in))))
|
|
(value_reg (extend (put_in_reg x) $true (ty_bits in) (ty_bits out))))
|
|
|
|
;; Extraction of a vector lane automatically extends as necessary, so we can
|
|
;; skip an explicit extending instruction.
|
|
(rule (lower (has_type (fits_in_64 out)
|
|
(sextend (def_inst (extractlane vec @ (value_type in)
|
|
(u8_from_uimm8 lane))))))
|
|
(value_reg (mov_from_vec_signed (put_in_reg vec)
|
|
lane
|
|
(vector_size in)
|
|
(size_from_ty out))))
|
|
|
|
;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits.
|
|
(rule (lower (has_type $I128 (sextend x)))
|
|
(let (
|
|
(lo Reg (put_in_reg_sext64 x))
|
|
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
|
|
)
|
|
(value_regs lo hi)))
|
|
|
|
;; Like above where vector extraction automatically zero-extends extending to
|
|
;; i128 only requires generating a 0 constant for the upper bits.
|
|
;;
|
|
;; Note that `mov_from_vec_signed` doesn't exist for i64x2, so that's
|
|
;; specifically excluded here.
|
|
(rule (lower (has_type $I128
|
|
(sextend (def_inst (extractlane vec @ (value_type in @ (not_i64x2))
|
|
(u8_from_uimm8 lane))))))
|
|
(let (
|
|
(lo Reg (mov_from_vec_signed (put_in_reg vec)
|
|
lane
|
|
(vector_size in)
|
|
(size_from_ty $I64)))
|
|
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
|
|
)
|
|
(value_regs lo hi)))
|
|
|
|
;; Extension from an extraction of i64x2 into i128.
|
|
(rule (lower (has_type $I128
|
|
(sextend (def_inst (extractlane vec @ (value_type $I64X2)
|
|
(u8_from_uimm8 lane))))))
|
|
(let (
|
|
(lo Reg (mov_from_vec (put_in_reg vec)
|
|
lane
|
|
(VectorSize.Size64x2)))
|
|
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
|
|
)
|
|
(value_regs lo hi)))
|
|
|
|
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(decl orr_not_op (Type) ALUOp)
|
|
(rule (orr_not_op (fits_in_32 _ty)) (ALUOp.OrrNot32))
|
|
(rule (orr_not_op $I64) (ALUOp.OrrNot64))
|
|
|
|
;; Base case using `orn` between two registers.
|
|
;;
|
|
;; Note that bitwise negation is implemented here as
|
|
;;
|
|
;; NOT rd, rm ==> ORR_NOT rd, zero, rm
|
|
(rule (lower (has_type (fits_in_64 ty) (bnot x)))
|
|
(value_reg (alu_rrr (orr_not_op ty) (zero_reg) (put_in_reg x))))
|
|
|
|
;; Special case to use `AluRRRShift` if it's a `bnot` of a const-left-shifted
|
|
;; value.
|
|
(rule (lower (has_type (fits_in_64 ty)
|
|
(bnot (def_inst (ishl x (def_inst (iconst (lshl_from_imm64 <ty amt))))))))
|
|
(value_reg (alu_rrr_shift (orr_not_op ty) (zero_reg) (put_in_reg x) amt)))
|
|
|
|
;; Implementation of `bnot` for `i128`.
|
|
(rule (lower (has_type $I128 (bnot x)))
|
|
(let (
|
|
(x_regs ValueRegs (put_in_regs x))
|
|
(x_lo Reg (value_regs_get x_regs 0))
|
|
(x_hi Reg (value_regs_get x_regs 1))
|
|
(new_lo Reg (alu_rrr (ALUOp.OrrNot64) (zero_reg) x_lo))
|
|
(new_hi Reg (alu_rrr (ALUOp.OrrNot64) (zero_reg) x_hi))
|
|
)
|
|
(value_regs new_lo new_hi)))
|
|
|
|
;; Implementation of `bnot` for vector types.
|
|
(rule (lower (has_type (vec128 ty) (bnot x)))
|
|
(value_reg (vec_misc (VecMisc2.Not) (put_in_reg x) (vector_size ty))))
|
|
|
|
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (band x y)))
|
|
(value_reg (alu_rs_imm_logic_commutative (ALUOp.And32) ty x y)))
|
|
|
|
(rule (lower (has_type $I64 (band x y)))
|
|
(value_reg (alu_rs_imm_logic_commutative (ALUOp.And64) $I64 x y)))
|
|
|
|
(rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And64) x y))
|
|
|
|
(rule (lower (has_type (vec128 ty) (band x y)))
|
|
(value_reg (vec_rrr (VecALUOp.And) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (bor x y)))
|
|
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Orr32) ty x y)))
|
|
|
|
(rule (lower (has_type $I64 (bor x y)))
|
|
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Orr64) $I64 x y)))
|
|
|
|
(rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr64) x y))
|
|
|
|
(rule (lower (has_type (vec128 ty) (bor x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Orr) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (bxor x y)))
|
|
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Eor32) ty x y)))
|
|
|
|
(rule (lower (has_type $I64 (bxor x y)))
|
|
(value_reg (alu_rs_imm_logic_commutative (ALUOp.Eor64) $I64 x y)))
|
|
|
|
(rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor64) x y))
|
|
|
|
(rule (lower (has_type (vec128 ty) (bxor x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Eor) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (band_not x y)))
|
|
(value_reg (alu_rs_imm_logic (ALUOp.AndNot32) ty x y)))
|
|
|
|
(rule (lower (has_type $I64 (band_not x y)))
|
|
(value_reg (alu_rs_imm_logic (ALUOp.AndNot64) $I64 x y)))
|
|
|
|
(rule (lower (has_type $I128 (band_not x y))) (i128_alu_bitop (ALUOp.AndNot64) x y))
|
|
|
|
(rule (lower (has_type (vec128 ty) (band_not x y)))
|
|
(value_reg (vec_rrr (VecALUOp.Bic) (put_in_reg x) (put_in_reg y) (vector_size ty))))
|
|
|
|
;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (bor_not x y)))
|
|
(value_reg (alu_rs_imm_logic (ALUOp.OrrNot32) ty x y)))
|
|
|
|
(rule (lower (has_type $I64 (bor_not x y)))
|
|
(value_reg (alu_rs_imm_logic (ALUOp.OrrNot64) $I64 x y)))
|
|
|
|
(rule (lower (has_type $I128 (bor_not x y))) (i128_alu_bitop (ALUOp.OrrNot64) x y))
|
|
|
|
;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(rule (lower (has_type (fits_in_32 ty) (bxor_not x y)))
|
|
(value_reg (alu_rs_imm_logic (ALUOp.EorNot32) ty x y)))
|
|
|
|
(rule (lower (has_type $I64 (bxor_not x y)))
|
|
(value_reg (alu_rs_imm_logic (ALUOp.EorNot64) $I64 x y)))
|
|
|
|
(rule (lower (has_type $I128 (bxor_not x y))) (i128_alu_bitop (ALUOp.EorNot64) x y))
|