aarch64: fix up regalloc2 semantics. (#4830)
This PR removes all uses of modify-operands in the aarch64 backend, replacing them with reused-input operands instead. This has the nice effect of removing a bunch of move instructions and more clearly representing inputs and outputs. This PR also removes the explicit use of pinned vregs in the aarch64 backend, instead using fixed-register constraints on the operands when insts or pseudo-inst sequences require certain registers. This is the second PR in the regalloc-semantics cleanup series; after the remaining backend (s390x) and the ABI code are cleaned up as well, we'll be able to simplify the regalloc2 frontend.
This commit is contained in:
@@ -171,13 +171,23 @@
|
||||
(rd WritableReg)
|
||||
(rm PReg))
|
||||
|
||||
;; A MOV[Z,N,K] with a 16-bit immediate.
|
||||
;; A MOV[Z,N] with a 16-bit immediate.
|
||||
(MovWide
|
||||
(op MoveWideOp)
|
||||
(rd WritableReg)
|
||||
(imm MoveWideConst)
|
||||
(size OperandSize))
|
||||
|
||||
;; A MOVK with a 16-bit immediate. Modifies its register; we
|
||||
;; model this with a seprate input `rn` and output `rd` virtual
|
||||
;; register, with a regalloc constraint to tie them together.
|
||||
(MovK
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(imm MoveWideConst)
|
||||
(size OperandSize))
|
||||
|
||||
|
||||
;; A sign- or zero-extend operation.
|
||||
(Extend
|
||||
(rd WritableReg)
|
||||
@@ -240,7 +250,12 @@
|
||||
;; x28 (wr) scratch reg; value afterwards has no meaning
|
||||
(AtomicRMWLoop
|
||||
(ty Type) ;; I8, I16, I32 or I64
|
||||
(op AtomicRMWLoopOp))
|
||||
(op AtomicRMWLoopOp)
|
||||
(addr Reg)
|
||||
(operand Reg)
|
||||
(oldval WritableReg)
|
||||
(scratch1 WritableReg)
|
||||
(scratch2 WritableReg))
|
||||
|
||||
;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
|
||||
;; store-conditional loop, with acquire-release semantics.
|
||||
@@ -253,7 +268,11 @@
|
||||
;; x24 (wr) scratch reg; value afterwards has no meaning
|
||||
(AtomicCASLoop
|
||||
(ty Type) ;; I8, I16, I32 or I64
|
||||
)
|
||||
(addr Reg)
|
||||
(expected Reg)
|
||||
(replacement Reg)
|
||||
(oldval WritableReg)
|
||||
(scratch WritableReg))
|
||||
|
||||
;; An atomic read-modify-write operation. These instructions require the
|
||||
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||
@@ -269,7 +288,10 @@
|
||||
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||
;; acquire-release semantics.
|
||||
(AtomicCAS
|
||||
(rs WritableReg)
|
||||
;; `rd` is really `rs` in the encoded instruction (so `rd` == `rs`); we separate
|
||||
;; them here to have separate use and def vregs for regalloc.
|
||||
(rd WritableReg)
|
||||
(rs Reg)
|
||||
(rt Reg)
|
||||
(rn Reg)
|
||||
(ty Type))
|
||||
@@ -342,6 +364,16 @@
|
||||
(rd WritableReg)
|
||||
(rn Reg))
|
||||
|
||||
;; Variant of FpuRRI that modifies its `rd`, and so we name the
|
||||
;; input state `ri` (for "input") and constrain the two
|
||||
;; together.
|
||||
(FpuRRIMod
|
||||
(fpu_op FPUOpRIMod)
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg))
|
||||
|
||||
|
||||
;; 3-op FPU instruction.
|
||||
;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
|
||||
(FpuRRRR
|
||||
@@ -479,6 +511,7 @@
|
||||
;; Move to a vector element from a GPR.
|
||||
(MovToVec
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(idx u8)
|
||||
(size VectorSize))
|
||||
@@ -534,6 +567,7 @@
|
||||
;; Move vector element to another vector element.
|
||||
(VecMovElement
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(dest_idx u8)
|
||||
(src_idx u8)
|
||||
@@ -546,12 +580,19 @@
|
||||
(rn Reg)
|
||||
(high_half bool))
|
||||
|
||||
;; Vector narrowing operation.
|
||||
(VecRRNarrow
|
||||
;; Vector narrowing operation -- low half.
|
||||
(VecRRNarrowLow
|
||||
(op VecRRNarrowOp)
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(high_half bool)
|
||||
(lane_size ScalarSize))
|
||||
|
||||
;; Vector narrowing operation -- high half.
|
||||
(VecRRNarrowHigh
|
||||
(op VecRRNarrowOp)
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(lane_size ScalarSize))
|
||||
|
||||
;; 1-operand vector instruction that operates on a pair of elements.
|
||||
@@ -569,6 +610,17 @@
|
||||
(rm Reg)
|
||||
(high_half bool))
|
||||
|
||||
;; 2-operand vector instruction that produces a result with
|
||||
;; twice the lane width and half the number of lanes. Variant
|
||||
;; that modifies `rd` (so takes its initial state as `ri`).
|
||||
(VecRRRLongMod
|
||||
(alu_op VecRRRLongModOp)
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(rm Reg)
|
||||
(high_half bool))
|
||||
|
||||
;; 1-operand vector instruction that extends elements of the input
|
||||
;; register and operates on a pair of elements. The output lane width
|
||||
;; is double that of the input.
|
||||
@@ -589,6 +641,7 @@
|
||||
(VecRRRMod
|
||||
(alu_op VecALUModOp)
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(rm Reg)
|
||||
(size VectorSize))
|
||||
@@ -623,6 +676,7 @@
|
||||
(VecShiftImmMod
|
||||
(op VecShiftImmModOp)
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(size VectorSize)
|
||||
(imm u8))
|
||||
@@ -635,29 +689,55 @@
|
||||
(rm Reg)
|
||||
(imm4 u8))
|
||||
|
||||
;; Table vector lookup - single register table. The table consists of 8-bit elements and is
|
||||
;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
|
||||
;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
|
||||
;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
|
||||
;; to 0.
|
||||
;; Table vector lookup - single register table. The table
|
||||
;; consists of 8-bit elements and is stored in `rn`, while `rm`
|
||||
;; contains 8-bit element indices. This variant emits `TBL`,
|
||||
;; which sets elements that correspond to out-of-range indices
|
||||
;; (greater than 15) to 0.
|
||||
(VecTbl
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(rm Reg)
|
||||
(is_extension bool))
|
||||
(rm Reg))
|
||||
|
||||
;; Table vector lookup - two register table. The table consists of 8-bit elements and is
|
||||
;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
|
||||
;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
|
||||
;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified
|
||||
;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
|
||||
;; modulo 32, that is v31 and v0 (in that order) are consecutive registers.
|
||||
;; Table vector lookup - single register table. The table
|
||||
;; consists of 8-bit elements and is stored in `rn`, while `rm`
|
||||
;; contains 8-bit element indices. This variant emits `TBX`,
|
||||
;; which leaves elements that correspond to out-of-range indices
|
||||
;; (greater than 15) unmodified. Hence, it takes an input vreg in
|
||||
;; `ri` that is constrained to the same allocation as `rd`.
|
||||
(VecTblExt
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(rm Reg))
|
||||
|
||||
;; Table vector lookup - two register table. The table consists
|
||||
;; of 8-bit elements and is stored in `rn` and `rn2`, while
|
||||
;; `rm` contains 8-bit element indices. The table registers
|
||||
;; `rn` and `rn2` must have consecutive numbers modulo 32, that
|
||||
;; is v31 and v0 (in that order) are consecutive registers.
|
||||
;; This variant emits `TBL`, which sets out-of-range results to
|
||||
;; 0.
|
||||
(VecTbl2
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(rn2 Reg)
|
||||
(rm Reg)
|
||||
(is_extension bool))
|
||||
(rm Reg))
|
||||
|
||||
;; Table vector lookup - two register table. The table consists
|
||||
;; of 8-bit elements and is stored in `rn` and `rn2`, while
|
||||
;; `rm` contains 8-bit element indices. The table registers
|
||||
;; `rn` and `rn2` must have consecutive numbers modulo 32, that
|
||||
;; is v31 and v0 (in that order) are consecutive registers.
|
||||
;; This variant emits `TBX`, which leaves out-of-range results
|
||||
;; unmodified, hence takes the initial state of the result
|
||||
;; register in vreg `ri`.
|
||||
(VecTbl2Ext
|
||||
(rd WritableReg)
|
||||
(ri Reg)
|
||||
(rn Reg)
|
||||
(rn2 Reg)
|
||||
(rm Reg))
|
||||
|
||||
;; Load an element and replicate to all lanes of a vector.
|
||||
(VecLoadReplicate
|
||||
@@ -888,7 +968,6 @@
|
||||
(enum
|
||||
(MovZ)
|
||||
(MovN)
|
||||
(MovK)
|
||||
))
|
||||
|
||||
(type UImm5 (primitive UImm5))
|
||||
@@ -934,6 +1013,7 @@
|
||||
(type AMode extern (enum))
|
||||
(type PairAMode extern (enum))
|
||||
(type FPUOpRI extern (enum))
|
||||
(type FPUOpRIMod extern (enum))
|
||||
|
||||
(type OperandSize extern
|
||||
(enum Size32
|
||||
@@ -1287,6 +1367,10 @@
|
||||
(Umull8)
|
||||
(Umull16)
|
||||
(Umull32)
|
||||
))
|
||||
|
||||
(type VecRRRLongModOp
|
||||
(enum
|
||||
;; Unsigned multiply add long
|
||||
(Umlal8)
|
||||
(Umlal16)
|
||||
@@ -1447,9 +1531,9 @@
|
||||
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
|
||||
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
|
||||
|
||||
;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
|
||||
;; Constructs an FPUOpRIMod.Sli* given the size in bits of the value (or lane)
|
||||
;; and the amount to shift by.
|
||||
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
|
||||
(decl fpu_op_ri_sli (u8 u8) FPUOpRIMod)
|
||||
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
|
||||
|
||||
(decl imm12_from_negated_u64 (Imm12) u64)
|
||||
@@ -1524,29 +1608,6 @@
|
||||
(decl writable_zero_reg () WritableReg)
|
||||
(extern constructor writable_zero_reg writable_zero_reg)
|
||||
|
||||
;; Helpers for getting a particular real register
|
||||
(decl xreg (u8) Reg)
|
||||
(extern constructor xreg xreg)
|
||||
|
||||
(decl writable_vreg (u8) WritableReg)
|
||||
(extern constructor writable_vreg writable_vreg)
|
||||
|
||||
(decl writable_xreg (u8) WritableReg)
|
||||
(extern constructor writable_xreg writable_xreg)
|
||||
|
||||
;; Helper for emitting `MInst.Mov64` instructions.
|
||||
(decl mov64_to_real (u8 Reg) Reg)
|
||||
(rule (mov64_to_real num src)
|
||||
(let ((dst WritableReg (writable_xreg num))
|
||||
(_ Unit (emit (MInst.Mov (operand_size $I64) dst src))))
|
||||
dst))
|
||||
|
||||
(decl mov64_from_real (u8) Reg)
|
||||
(rule (mov64_from_real num)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.Mov (operand_size $I64) dst (xreg num)))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovZ` instructions.
|
||||
(decl movz (MoveWideConst OperandSize) Reg)
|
||||
(rule (movz imm size)
|
||||
@@ -1601,8 +1662,7 @@
|
||||
(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg)
|
||||
(rule (vec_rrr_mod op src1 src2 src3 size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
|
||||
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
|
||||
(_1 Unit (emit (MInst.VecRRRMod op dst src1 src2 src3 size))))
|
||||
dst))
|
||||
|
||||
(decl fpu_rri (FPUOpRI Reg) Reg)
|
||||
@@ -1611,6 +1671,12 @@
|
||||
(_ Unit (emit (MInst.FpuRRI op dst src))))
|
||||
dst))
|
||||
|
||||
(decl fpu_rri_mod (FPUOpRIMod Reg Reg) Reg)
|
||||
(rule (fpu_rri_mod op dst_src src)
|
||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||
(_ Unit (emit (MInst.FpuRRIMod op dst dst_src src))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuRRR` instructions.
|
||||
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
|
||||
(rule (fpu_rrr op src1 src2 size)
|
||||
@@ -1790,29 +1856,33 @@
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecTbl` instructions.
|
||||
(decl vec_tbl (Reg Reg bool) Reg)
|
||||
(rule (vec_tbl rn rm is_extension)
|
||||
(decl vec_tbl (Reg Reg) Reg)
|
||||
(rule (vec_tbl rn rm)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecTbl dst rn rm is_extension))))
|
||||
(_ Unit (emit (MInst.VecTbl dst rn rm))))
|
||||
dst))
|
||||
|
||||
(decl vec_tbl_ext (Reg Reg Reg) Reg)
|
||||
(rule (vec_tbl_ext ri rn rm)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecTblExt dst ri rn rm))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecTbl2` instructions.
|
||||
;; - 2 register table vector lookups require consecutive table registers;
|
||||
;; we satisfy this constraint by hardcoding the usage of v30 and v31.
|
||||
;; - Make sure that both args are in virtual regs, since it is not guaranteed
|
||||
;; that we can get them safely to the temporaries if either is in a real
|
||||
;; register.
|
||||
(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
|
||||
(rule (vec_tbl2 rn rn2 rm is_extension ty)
|
||||
(decl vec_tbl2 (Reg Reg Reg Type) Reg)
|
||||
(rule (vec_tbl2 rn rn2 rm ty)
|
||||
(let (
|
||||
(temp WritableReg (writable_vreg 30))
|
||||
(temp2 WritableReg (writable_vreg 31))
|
||||
(dst WritableReg (temp_writable_reg $I8X16))
|
||||
(rn Reg (ensure_in_vreg rn ty))
|
||||
(rn2 Reg (ensure_in_vreg rn2 ty))
|
||||
(_ Unit (emit (MInst.FpuMove128 temp rn)))
|
||||
(_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
|
||||
(_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension)))
|
||||
(_ Unit (emit (MInst.VecTbl2 dst rn rn2 rm)))
|
||||
)
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecTbl2Ext` instructions.
|
||||
(decl vec_tbl2_ext (Reg Reg Reg Reg Type) Reg)
|
||||
(rule (vec_tbl2_ext ri rn rn2 rm ty)
|
||||
(let (
|
||||
(dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecTbl2Ext dst ri rn rn2 rm)))
|
||||
)
|
||||
dst))
|
||||
|
||||
@@ -1830,22 +1900,18 @@
|
||||
(_ Unit (emit (MInst.VecRRPairLong op dst src))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
|
||||
;; where the operation both reads and modifies the destination register.
|
||||
;;
|
||||
;; Currently this is only used for `VecRRRLongOp.Umlal*`
|
||||
(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg)
|
||||
;; Helper for emitting `MInst.VecRRRLongMod` instructions.
|
||||
(decl vec_rrrr_long (VecRRRLongModOp Reg Reg Reg bool) Reg)
|
||||
(rule (vec_rrrr_long op src1 src2 src3 high_half)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuMove128 dst src1)))
|
||||
(_ Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half))))
|
||||
(_ Unit (emit (MInst.VecRRRLongMod op dst src1 src2 src3 high_half))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRNarrow` instructions.
|
||||
(decl vec_rr_narrow (VecRRNarrowOp Reg ScalarSize) Reg)
|
||||
(rule (vec_rr_narrow op src size)
|
||||
(decl vec_rr_narrow_low (VecRRNarrowOp Reg ScalarSize) Reg)
|
||||
(rule (vec_rr_narrow_low op src size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecRRNarrow op dst src $false size))))
|
||||
(_ Unit (emit (MInst.VecRRNarrowLow op dst src size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRNarrow` instructions which update the
|
||||
@@ -1853,8 +1919,7 @@
|
||||
(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg)
|
||||
(rule (vec_rr_narrow_high op mod src size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuMove128 dst mod)))
|
||||
(_ Unit (emit (MInst.VecRRNarrow op dst src $true size))))
|
||||
(_ Unit (emit (MInst.VecRRNarrowHigh op dst mod src size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRLong` instructions.
|
||||
@@ -1897,16 +1962,14 @@
|
||||
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
|
||||
(rule (mov_to_vec src1 src2 lane size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuMove128 dst src1)))
|
||||
(_ Unit (emit (MInst.MovToVec dst src2 lane size))))
|
||||
(_ Unit (emit (MInst.MovToVec dst src1 src2 lane size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecMovElement` instructions.
|
||||
(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg)
|
||||
(rule (mov_vec_elem src1 src2 dst_idx src_idx size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuMove128 dst src1)))
|
||||
(_ Unit (emit (MInst.VecMovElement dst src2 dst_idx src_idx size))))
|
||||
(_ Unit (emit (MInst.VecMovElement dst src1 src2 dst_idx src_idx size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovFromVec` instructions.
|
||||
@@ -2104,15 +2167,15 @@
|
||||
|
||||
;; Helper for generating `xtn` instructions.
|
||||
(decl xtn (Reg ScalarSize) Reg)
|
||||
(rule (xtn x size) (vec_rr_narrow (VecRRNarrowOp.Xtn) x size))
|
||||
(rule (xtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Xtn) x size))
|
||||
|
||||
;; Helper for generating `fcvtn` instructions.
|
||||
(decl fcvtn (Reg ScalarSize) Reg)
|
||||
(rule (fcvtn x size) (vec_rr_narrow (VecRRNarrowOp.Fcvtn) x size))
|
||||
(rule (fcvtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Fcvtn) x size))
|
||||
|
||||
;; Helper for generating `sqxtn` instructions.
|
||||
(decl sqxtn (Reg ScalarSize) Reg)
|
||||
(rule (sqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtn) x size))
|
||||
(rule (sqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtn) x size))
|
||||
|
||||
;; Helper for generating `sqxtn2` instructions.
|
||||
(decl sqxtn2 (Reg Reg ScalarSize) Reg)
|
||||
@@ -2120,7 +2183,7 @@
|
||||
|
||||
;; Helper for generating `sqxtun` instructions.
|
||||
(decl sqxtun (Reg ScalarSize) Reg)
|
||||
(rule (sqxtun x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtun) x size))
|
||||
(rule (sqxtun x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtun) x size))
|
||||
|
||||
;; Helper for generating `sqxtun2` instructions.
|
||||
(decl sqxtun2 (Reg Reg ScalarSize) Reg)
|
||||
@@ -2128,7 +2191,7 @@
|
||||
|
||||
;; Helper for generating `uqxtn` instructions.
|
||||
(decl uqxtn (Reg ScalarSize) Reg)
|
||||
(rule (uqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Uqxtn) x size))
|
||||
(rule (uqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Uqxtn) x size))
|
||||
|
||||
;; Helper for generating `uqxtn2` instructions.
|
||||
(decl uqxtn2 (Reg Reg ScalarSize) Reg)
|
||||
@@ -2187,7 +2250,7 @@
|
||||
|
||||
;; Helper for generating `umlal32` instructions.
|
||||
(decl umlal32 (Reg Reg Reg bool) Reg)
|
||||
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half))
|
||||
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongModOp.Umlal32) x y z high_half))
|
||||
|
||||
;; Helper for generating `smull8` instructions.
|
||||
(decl smull8 (Reg Reg bool) Reg)
|
||||
@@ -2719,8 +2782,7 @@
|
||||
(rule (lse_atomic_cas addr expect replace ty)
|
||||
(let (
|
||||
(dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.Mov (operand_size ty) dst expect)))
|
||||
(_ Unit (emit (MInst.AtomicCAS dst replace addr ty)))
|
||||
(_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty)))
|
||||
)
|
||||
dst))
|
||||
|
||||
@@ -2730,16 +2792,13 @@
|
||||
;; regs, and that's not guaranteed safe if either is in a real reg.
|
||||
;; - Move the args to the preordained AtomicRMW input regs
|
||||
;; - And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||
(decl atomic_rmw_loop (AtomicRMWLoopOp Value Value Type) Reg)
|
||||
(rule (atomic_rmw_loop op p arg2 ty)
|
||||
(let (
|
||||
(v_addr Reg (ensure_in_vreg p $I64))
|
||||
(v_arg2 Reg (ensure_in_vreg arg2 $I64))
|
||||
(r_addr Reg (mov64_to_real 25 v_addr))
|
||||
(r_arg2 Reg (mov64_to_real 26 v_arg2))
|
||||
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
|
||||
)
|
||||
(mov64_from_real 27)))
|
||||
(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type) Reg)
|
||||
(rule (atomic_rmw_loop op addr operand ty)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(scratch1 WritableReg (temp_writable_reg $I64))
|
||||
(scratch2 WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.AtomicRMWLoop ty op addr operand dst scratch1 scratch2))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.AtomicCASLoop` instructions.
|
||||
;; This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||
@@ -2749,21 +2808,10 @@
|
||||
;; for `atomic_rmw_loop` above.
|
||||
(decl atomic_cas_loop (Reg Reg Reg Type) Reg)
|
||||
(rule (atomic_cas_loop addr expect replace ty)
|
||||
(let (
|
||||
(v_addr Reg (ensure_in_vreg addr $I64))
|
||||
(v_exp Reg (ensure_in_vreg expect $I64))
|
||||
(v_rep Reg (ensure_in_vreg replace $I64))
|
||||
;; Move the args to the preordained AtomicCASLoop input regs
|
||||
(r_addr Reg (mov64_to_real 25 v_addr))
|
||||
(r_exp Reg (mov64_to_real 26 v_exp))
|
||||
(r_rep Reg (mov64_to_real 28 v_rep))
|
||||
;; Now the AtomicCASLoop itself, implemented in the normal way, with a
|
||||
;; load-exclusive, store-exclusive loop
|
||||
(_ Unit (emit (MInst.AtomicCASLoop ty)))
|
||||
)
|
||||
;; And finally, copy the preordained AtomicCASLoop output reg to its destination.
|
||||
;; Also, x24 and x28 are trashed.
|
||||
(mov64_from_real 27)))
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(scratch WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.AtomicCASLoop ty addr expect replace dst scratch))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovPReg` instructions.
|
||||
(decl mov_preg (PReg) Reg)
|
||||
@@ -2811,15 +2859,13 @@
|
||||
(decl fcopy_sign (Reg Reg Type) Reg)
|
||||
(rule (fcopy_sign x y (ty_scalar_float ty))
|
||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||
(_ Unit (emit (MInst.FpuMove64 dst x)))
|
||||
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
|
||||
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
|
||||
(_ Unit (emit (MInst.FpuRRIMod (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst x tmp))))
|
||||
dst))
|
||||
(rule (fcopy_sign x y ty @ (multi_lane _ _))
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuMove128 dst x)))
|
||||
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
|
||||
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
|
||||
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst x tmp (vector_size ty) (max_shift (lane_type ty))))))
|
||||
dst))
|
||||
|
||||
;; Helpers for generating `MInst.FpuToInt` instructions.
|
||||
|
||||
Reference in New Issue
Block a user