Files
wasmtime/cranelift/codegen/src/isa/aarch64/lower.isle
Alex Crichton 25b380d5fc aarch64: Migrate {s,u}mulhi to ISLE
This starts moving over some sign/zero-extend helpers also present in
lowering in Rust. Otherwise this is a relatively unsurprising transition
with the various cases of the instructions mapping well to ISLE
utilities.
2021-11-29 18:11:42 -08:00

401 lines
17 KiB
Common Lisp

;; aarch64 instruction selection and CLIF-to-MachInst lowering.
;; The main lowering constructor term: takes a clif `Inst` and returns the
;; register(s) within which the lowered instruction's result values live.
(decl lower (Inst) ValueRegs)
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
(value_reg (imm ty n)))
;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (bconst $false)))
(value_reg (imm ty 0)))
(rule (lower (has_type ty (bconst $true)))
(value_reg (imm ty 1)))
;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (null)))
(value_reg (imm ty 0)))
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller
;; Base case, simply adding things in registers.
(rule (lower (has_type (fits_in_64 ty) (iadd x y)))
(value_reg (alu_rrr (iadd_op ty) (put_in_reg x) (put_in_reg y))))
;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
(value_reg (alu_rr_imm12 (iadd_op ty) (put_in_reg x) y)))
(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
(value_reg (alu_rr_imm12 (iadd_op ty) (put_in_reg y) x)))
;; Same as the previous special cases, except we can switch the addition to a
;; subtraction if the negated immediate fits in 12 bits.
(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y))))
(value_reg (alu_rr_imm12 (isub_op ty) (put_in_reg x) y)))
(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y)))
(value_reg (alu_rr_imm12 (isub_op ty) (put_in_reg y) x)))
;; Special cases for when we're adding an extended register where the extending
;; operation can get folded into the add itself.
(rule (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y))))
(value_reg (alu_rr_extend_reg (iadd_op ty) (put_in_reg x) y)))
(rule (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y)))
(value_reg (alu_rr_extend_reg (iadd_op ty) (put_in_reg y) x)))
;; Special cases for when we're adding the shift of a different
;; register by a constant amount and the shift can get folded into the add.
(rule (lower (has_type (fits_in_64 ty)
(iadd x (def_inst (ishl y (def_inst (iconst (lshl_from_imm64 <ty amt))))))))
(value_reg (alu_rrr_shift (iadd_op ty) (put_in_reg x) (put_in_reg y) amt)))
(rule (lower (has_type (fits_in_64 ty)
(iadd (def_inst (ishl x (def_inst (iconst (lshl_from_imm64 <ty amt))))) y)))
(value_reg (alu_rrr_shift (iadd_op ty) (put_in_reg y) (put_in_reg x) amt)))
;; Fold an `iadd` and `imul` combination into a `madd` instruction.
(rule (lower (has_type (fits_in_64 ty) (iadd x (def_inst (imul y z)))))
(value_reg (alu_rrrr (madd_op ty) (put_in_reg y) (put_in_reg z) (put_in_reg x))))
(rule (lower (has_type (fits_in_64 ty) (iadd (def_inst (imul x y)) z)))
(value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (put_in_reg z))))
;; Helper to use either a 32 or 64-bit add depending on the input type.
(decl iadd_op (Type) ALUOp)
(rule (iadd_op (fits_in_32 _ty)) (ALUOp.Add32))
(rule (iadd_op $I64) (ALUOp.Add64))
;; Helper to use either a 32 or 64-bit sub depending on the input type.
(decl isub_op (Type) ALUOp)
(rule (isub_op (fits_in_32 _ty)) (ALUOp.Sub32))
(rule (isub_op $I64) (ALUOp.Sub64))
;; Helper to use either a 32 or 64-bit madd depending on the input type.
(decl madd_op (Type) ALUOp3)
(rule (madd_op (fits_in_32 _ty)) (ALUOp3.MAdd32))
(rule (madd_op $I64) (ALUOp3.MAdd64))
;; vectors
(rule (lower (has_type ty @ (multi_lane _ _) (iadd x y)))
(value_reg (vec_rrr (VecALUOp.Add) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;; `i128`
(rule (lower (has_type $I128 (iadd x y)))
(let (
;; Get the high/low registers for `x`.
(x_regs ValueRegs (put_in_regs x))
(x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1))
;; Get the high/low registers for `y`.
(y_regs ValueRegs (put_in_regs y))
(y_lo Reg (value_regs_get y_regs 0))
(y_hi Reg (value_regs_get y_regs 1))
)
;; the actual addition is `adds` followed by `adc` which comprises the
;; low/high bits of the result
(with_flags
(add64_with_flags x_lo y_lo)
(adc64 x_hi y_hi))))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller
;; Base case, simply subtracting things in registers.
(rule (lower (has_type (fits_in_64 ty) (isub x y)))
(value_reg (alu_rrr (isub_op ty) (put_in_reg x) (put_in_reg y))))
;; Special case for when one operand is an immediate that fits in 12 bits.
(rule (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y))))
(value_reg (alu_rr_imm12 (isub_op ty) (put_in_reg x) y)))
;; Same as the previous special case, except we can switch the subtraction to an
;; addition if the negated immediate fits in 12 bits.
(rule (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y))))
(value_reg (alu_rr_imm12 (iadd_op ty) (put_in_reg x) y)))
;; Special cases for when we're subtracting an extended register where the
;; extending operation can get folded into the sub itself.
(rule (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y))))
(value_reg (alu_rr_extend_reg (isub_op ty) (put_in_reg x) y)))
;; Finally a special case for when we're subtracting the shift of a different
;; register by a constant amount and the shift can get folded into the sub.
(rule (lower (has_type (fits_in_64 ty)
(isub x (def_inst (ishl y (def_inst (iconst (lshl_from_imm64 <ty amt))))))))
(value_reg (alu_rrr_shift (isub_op ty) (put_in_reg x) (put_in_reg y) amt)))
;; vectors
(rule (lower (has_type ty @ (multi_lane _ _) (isub x y)))
(value_reg (vec_rrr (VecALUOp.Sub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;; `i128`
(rule (lower (has_type $I128 (isub x y)))
(let (
;; Get the high/low registers for `x`.
(x_regs ValueRegs (put_in_regs x))
(x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1))
;; Get the high/low registers for `y`.
(y_regs ValueRegs (put_in_regs y))
(y_lo Reg (value_regs_get y_regs 0))
(y_hi Reg (value_regs_get y_regs 1))
)
;; the actual subtraction is `subs` followed by `sbc` which comprises
;; the low/high bits of the result
(with_flags
(sub64_with_flags x_lo y_lo)
(sbc64 x_hi y_hi))))
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (vec128 ty) (uadd_sat x y)))
(value_reg (vec_rrr (VecALUOp.Uqadd) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (vec128 ty) (sadd_sat x y)))
(value_reg (vec_rrr (VecALUOp.Sqadd) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (vec128 ty) (usub_sat x y)))
(value_reg (vec_rrr (VecALUOp.Uqsub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (vec128 ty) (ssub_sat x y)))
(value_reg (vec_rrr (VecALUOp.Sqsub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
(rule (lower (has_type (fits_in_64 ty) (ineg x)))
(value_reg (alu_rrr (isub_op ty) (zero_reg) (put_in_reg x))))
;; vectors.
(rule (lower (has_type (vec128 ty) (ineg x)))
(value_reg (vec_misc (VecMisc2.Neg) (put_in_reg x) (vector_size ty))))
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
(rule (lower (has_type (fits_in_64 ty) (imul x y)))
(value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (zero_reg))))
;; `i128`.
(rule (lower (has_type $I128 (imul x y)))
(let (
;; Get the high/low registers for `x`.
(x_regs ValueRegs (put_in_regs x))
(x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1))
;; Get the high/low registers for `y`.
(y_regs ValueRegs (put_in_regs y))
(y_lo Reg (value_regs_get y_regs 0))
(y_hi Reg (value_regs_get y_regs 1))
;; 128bit mul formula:
;; dst_lo = x_lo * y_lo
;; dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
;;
;; We can convert the above formula into the following
;; umulh dst_hi, x_lo, y_lo
;; madd dst_hi, x_lo, y_hi, dst_hi
;; madd dst_hi, x_hi, y_lo, dst_hi
;; madd dst_lo, x_lo, y_lo, zero
(dst_hi1 Reg (alu_rrr (ALUOp.UMulH) x_lo y_lo))
(dst_hi2 Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_hi dst_hi1))
(dst_hi Reg (alu_rrrr (ALUOp3.MAdd64) x_hi y_lo dst_hi2))
(dst_lo Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_lo (zero_reg)))
)
(value_regs dst_lo dst_hi)))
;; Case for i8x16, i16x8, and i32x4.
(rule (lower (has_type (vec128 ty @ (not_i64x2)) (imul x y)))
(value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;; Special lowering for i64x2.
;;
;; This I64X2 multiplication is performed with several 32-bit
;; operations.
;;
;; 64-bit numbers x and y, can be represented as:
;; x = a + 2^32(b)
;; y = c + 2^32(d)
;;
;; A 64-bit multiplication is:
;; x * y = ac + 2^32(ad + bc) + 2^64(bd)
;; note: `2^64(bd)` can be ignored, the value is too large to fit in
;; 64 bits.
;;
;; This sequence implements a I64X2 multiply, where the registers
;; `rn` and `rm` are split up into 32-bit components:
;; rn = |d|c|b|a|
;; rm = |h|g|f|e|
;;
;; rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
;;
;; The sequence is:
;; rev64 rd.4s, rm.4s
;; mul rd.4s, rd.4s, rn.4s
;; xtn tmp1.2s, rn.2d
;; addp rd.4s, rd.4s, rd.4s
;; xtn tmp2.2s, rm.2d
;; shll rd.2d, rd.2s, #32
;; umlal rd.2d, tmp2.2s, tmp1.2s
(rule (lower (has_type $I64X2 (imul x y)))
(let (
(rn Reg (put_in_reg x))
(rm Reg (put_in_reg y))
;; Reverse the 32-bit elements in the 64-bit words.
;; rd = |g|h|e|f|
(rev Reg (vec_misc (VecMisc2.Rev64) rm (VectorSize.Size32x4)))
;; Calculate the high half components.
;; rd = |dg|ch|be|af|
;;
;; Note that this 32-bit multiply of the high half
;; discards the bits that would overflow, same as
;; if 64-bit operations were used. Also the Shll
;; below would shift out the overflow bits anyway.
(mul Reg (vec_rrr (VecALUOp.Mul) rev rn (VectorSize.Size32x4)))
;; Extract the low half components of rn.
;; tmp1 = |c|a|
(tmp1 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rn $false))
;; Sum the respective high half components.
;; rd = |dg+ch|be+af||dg+ch|be+af|
(sum Reg (vec_rrr (VecALUOp.Addp) mul mul (VectorSize.Size32x4)))
;; Extract the low half components of rm.
;; tmp2 = |g|e|
(tmp2 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rm $false))
;; Shift the high half components, into the high half.
;; rd = |dg+ch << 32|be+af << 32|
(shift Reg (vec_rr_long (VecRRLongOp.Shll32) sum $false))
;; Multiply the low components together, and accumulate with the high
;; half.
;; rd = |rd[1] + cg|rd[0] + ae|
(result Reg (vec_rrrr_long (VecRRRLongOp.Umlal32) shift tmp2 tmp1 $false))
)
(value_reg result)))
;; Special case for `i16x8.extmul_low_i8x16_s`.
(rule (lower (has_type $I16X8
(imul (def_inst (swiden_low x @ (value_type $I8X16)))
(def_inst (swiden_low y @ (value_type $I8X16))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $false)))
;; Special case for `i16x8.extmul_high_i8x16_s`.
(rule (lower (has_type $I16X8
(imul (def_inst (swiden_high x @ (value_type $I8X16)))
(def_inst (swiden_high y @ (value_type $I8X16))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $true)))
;; Special case for `i16x8.extmul_low_i8x16_u`.
(rule (lower (has_type $I16X8
(imul (def_inst (uwiden_low x @ (value_type $I8X16)))
(def_inst (uwiden_low y @ (value_type $I8X16))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $false)))
;; Special case for `i16x8.extmul_high_i8x16_u`.
(rule (lower (has_type $I16X8
(imul (def_inst (uwiden_high x @ (value_type $I8X16)))
(def_inst (uwiden_high y @ (value_type $I8X16))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $true)))
;; Special case for `i32x4.extmul_low_i16x8_s`.
(rule (lower (has_type $I32X4
(imul (def_inst (swiden_low x @ (value_type $I16X8)))
(def_inst (swiden_low y @ (value_type $I16X8))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $false)))
;; Special case for `i32x4.extmul_high_i16x8_s`.
(rule (lower (has_type $I32X4
(imul (def_inst (swiden_high x @ (value_type $I16X8)))
(def_inst (swiden_high y @ (value_type $I16X8))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $true)))
;; Special case for `i32x4.extmul_low_i16x8_u`.
(rule (lower (has_type $I32X4
(imul (def_inst (uwiden_low x @ (value_type $I16X8)))
(def_inst (uwiden_low y @ (value_type $I16X8))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $false)))
;; Special case for `i32x4.extmul_high_i16x8_u`.
(rule (lower (has_type $I32X4
(imul (def_inst (uwiden_high x @ (value_type $I16X8)))
(def_inst (uwiden_high y @ (value_type $I16X8))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $true)))
;; Special case for `i64x2.extmul_low_i32x4_s`.
(rule (lower (has_type $I64X2
(imul (def_inst (swiden_low x @ (value_type $I32X4)))
(def_inst (swiden_low y @ (value_type $I32X4))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $false)))
;; Special case for `i64x2.extmul_high_i32x4_s`.
(rule (lower (has_type $I64X2
(imul (def_inst (swiden_high x @ (value_type $I32X4)))
(def_inst (swiden_high y @ (value_type $I32X4))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $true)))
;; Special case for `i64x2.extmul_low_i32x4_u`.
(rule (lower (has_type $I64X2
(imul (def_inst (uwiden_low x @ (value_type $I32X4)))
(def_inst (uwiden_low y @ (value_type $I32X4))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $false)))
;; Special case for `i64x2.extmul_high_i32x4_u`.
(rule (lower (has_type $I64X2
(imul (def_inst (uwiden_high x @ (value_type $I32X4)))
(def_inst (uwiden_high y @ (value_type $I32X4))))))
(value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $true)))
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I64 (smulhi x y)))
(value_reg (alu_rrr (ALUOp.SMulH) (put_in_reg x) (put_in_reg y))))
(rule (lower (has_type (fits_in_32 ty) (smulhi x y)))
(let (
(x64 Reg (put_in_reg_sext64 x))
(y64 Reg (put_in_reg_sext64 y))
(mul Reg (alu_rrrr (ALUOp3.MAdd64) x64 y64 (zero_reg)))
(result Reg (alu_rr_imm_shift (ALUOp.Asr64) mul (imm_shift_from_u8 (ty_bits ty))))
)
(value_reg result)))
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I64 (umulhi x y)))
(value_reg (alu_rrr (ALUOp.UMulH) (put_in_reg x) (put_in_reg y))))
(rule (lower (has_type (fits_in_32 ty) (umulhi x y)))
(let (
(x64 Reg (put_in_reg_zext64 x))
(y64 Reg (put_in_reg_zext64 y))
(mul Reg (alu_rrrr (ALUOp3.MAdd64) x64 y64 (zero_reg)))
(result Reg (alu_rr_imm_shift (ALUOp.Lsr64) mul (imm_shift_from_u8 (ty_bits ty))))
)
(value_reg result)))