aarch64: fix up regalloc2 semantics. (#4830)

This PR removes all uses of modify-operands in the aarch64 backend,
replacing them with reused-input operands instead. This has the nice
effect of removing a bunch of move instructions and more clearly
representing inputs and outputs.

This PR also removes the explicit use of pinned vregs in the aarch64
backend, instead using fixed-register constraints on the operands when
insts or pseudo-inst sequences require certain registers.

This is the second PR in the regalloc-semantics cleanup series; after
the remaining backend (s390x) and the ABI code are cleaned up as well,
we'll be able to simplify the regalloc2 frontend.
This commit is contained in:
Chris Fallin
2022-09-01 14:25:20 -07:00
committed by GitHub
parent ac2d4c4818
commit ae5fe8a728
25 changed files with 1098 additions and 886 deletions

View File

@@ -560,10 +560,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts.push(Inst::StoreP64 { insts.push(Inst::StoreP64 {
rt: fp_reg(), rt: fp_reg(),
rt2: link_reg(), rt2: link_reg(),
mem: PairAMode::PreIndexed( mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap()),
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
@@ -601,10 +598,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts.push(Inst::LoadP64 { insts.push(Inst::LoadP64 {
rt: writable_fp_reg(), rt: writable_fp_reg(),
rt2: writable_link_reg(), rt2: writable_link_reg(),
mem: PairAMode::PostIndexed( mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, types::I64).unwrap()),
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
insts insts
@@ -676,10 +670,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
// str rd, [sp, #-16]! // str rd, [sp, #-16]!
insts.push(Inst::Store64 { insts.push(Inst::Store64 {
rd, rd,
mem: AMode::PreIndexed( mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(-clobber_offset_change).unwrap()),
writable_stack_reg(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
@@ -708,8 +699,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts.push(Inst::StoreP64 { insts.push(Inst::StoreP64 {
rt, rt,
rt2, rt2,
mem: PairAMode::PreIndexed( mem: PairAMode::SPPreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(), SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
), ),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
@@ -734,10 +724,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
let store_vec_reg = |rd| Inst::FpuStore64 { let store_vec_reg = |rd| Inst::FpuStore64 {
rd, rd,
mem: AMode::PreIndexed( mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(-clobber_offset_change).unwrap()),
writable_stack_reg(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}; };
let iter = clobbered_vec.chunks_exact(2); let iter = clobbered_vec.chunks_exact(2);
@@ -766,8 +753,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
Inst::FpuStoreP64 { Inst::FpuStoreP64 {
rt, rt,
rt2, rt2,
mem: PairAMode::PreIndexed( mem: PairAMode::SPPreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(), SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
), ),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
@@ -831,16 +817,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
let load_vec_reg = |rd| Inst::FpuLoad64 { let load_vec_reg = |rd| Inst::FpuLoad64 {
rd, rd,
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}; };
let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 { let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
rt, rt,
rt2, rt2,
mem: PairAMode::PostIndexed( mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, F64).unwrap()),
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}; };
@@ -876,10 +859,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts.push(Inst::LoadP64 { insts.push(Inst::LoadP64 {
rt, rt,
rt2, rt2,
mem: PairAMode::PostIndexed( mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, I64).unwrap()),
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
} }
@@ -893,7 +873,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
// ldr rd, [sp], #16 // ldr rd, [sp], #16
insts.push(Inst::ULoad64 { insts.push(Inst::ULoad64 {
rd, rd,
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
} }

View File

@@ -171,13 +171,23 @@
(rd WritableReg) (rd WritableReg)
(rm PReg)) (rm PReg))
;; A MOV[Z,N,K] with a 16-bit immediate. ;; A MOV[Z,N] with a 16-bit immediate.
(MovWide (MovWide
(op MoveWideOp) (op MoveWideOp)
(rd WritableReg) (rd WritableReg)
(imm MoveWideConst) (imm MoveWideConst)
(size OperandSize)) (size OperandSize))
;; A MOVK with a 16-bit immediate. Modifies its register; we
;; model this with a seprate input `rn` and output `rd` virtual
;; register, with a regalloc constraint to tie them together.
(MovK
(rd WritableReg)
(rn Reg)
(imm MoveWideConst)
(size OperandSize))
;; A sign- or zero-extend operation. ;; A sign- or zero-extend operation.
(Extend (Extend
(rd WritableReg) (rd WritableReg)
@@ -240,7 +250,12 @@
;; x28 (wr) scratch reg; value afterwards has no meaning ;; x28 (wr) scratch reg; value afterwards has no meaning
(AtomicRMWLoop (AtomicRMWLoop
(ty Type) ;; I8, I16, I32 or I64 (ty Type) ;; I8, I16, I32 or I64
(op AtomicRMWLoopOp)) (op AtomicRMWLoopOp)
(addr Reg)
(operand Reg)
(oldval WritableReg)
(scratch1 WritableReg)
(scratch2 WritableReg))
;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
;; store-conditional loop, with acquire-release semantics. ;; store-conditional loop, with acquire-release semantics.
@@ -253,7 +268,11 @@
;; x24 (wr) scratch reg; value afterwards has no meaning ;; x24 (wr) scratch reg; value afterwards has no meaning
(AtomicCASLoop (AtomicCASLoop
(ty Type) ;; I8, I16, I32 or I64 (ty Type) ;; I8, I16, I32 or I64
) (addr Reg)
(expected Reg)
(replacement Reg)
(oldval WritableReg)
(scratch WritableReg))
;; An atomic read-modify-write operation. These instructions require the ;; An atomic read-modify-write operation. These instructions require the
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
@@ -269,7 +288,10 @@
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
;; acquire-release semantics. ;; acquire-release semantics.
(AtomicCAS (AtomicCAS
(rs WritableReg) ;; `rd` is really `rs` in the encoded instruction (so `rd` == `rs`); we separate
;; them here to have separate use and def vregs for regalloc.
(rd WritableReg)
(rs Reg)
(rt Reg) (rt Reg)
(rn Reg) (rn Reg)
(ty Type)) (ty Type))
@@ -342,6 +364,16 @@
(rd WritableReg) (rd WritableReg)
(rn Reg)) (rn Reg))
;; Variant of FpuRRI that modifies its `rd`, and so we name the
;; input state `ri` (for "input") and constrain the two
;; together.
(FpuRRIMod
(fpu_op FPUOpRIMod)
(rd WritableReg)
(ri Reg)
(rn Reg))
;; 3-op FPU instruction. ;; 3-op FPU instruction.
;; 16-bit scalars require half-precision floating-point support (FEAT_FP16). ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
(FpuRRRR (FpuRRRR
@@ -479,6 +511,7 @@
;; Move to a vector element from a GPR. ;; Move to a vector element from a GPR.
(MovToVec (MovToVec
(rd WritableReg) (rd WritableReg)
(ri Reg)
(rn Reg) (rn Reg)
(idx u8) (idx u8)
(size VectorSize)) (size VectorSize))
@@ -534,6 +567,7 @@
;; Move vector element to another vector element. ;; Move vector element to another vector element.
(VecMovElement (VecMovElement
(rd WritableReg) (rd WritableReg)
(ri Reg)
(rn Reg) (rn Reg)
(dest_idx u8) (dest_idx u8)
(src_idx u8) (src_idx u8)
@@ -546,12 +580,19 @@
(rn Reg) (rn Reg)
(high_half bool)) (high_half bool))
;; Vector narrowing operation. ;; Vector narrowing operation -- low half.
(VecRRNarrow (VecRRNarrowLow
(op VecRRNarrowOp) (op VecRRNarrowOp)
(rd WritableReg) (rd WritableReg)
(rn Reg) (rn Reg)
(high_half bool) (lane_size ScalarSize))
;; Vector narrowing operation -- high half.
(VecRRNarrowHigh
(op VecRRNarrowOp)
(rd WritableReg)
(ri Reg)
(rn Reg)
(lane_size ScalarSize)) (lane_size ScalarSize))
;; 1-operand vector instruction that operates on a pair of elements. ;; 1-operand vector instruction that operates on a pair of elements.
@@ -569,6 +610,17 @@
(rm Reg) (rm Reg)
(high_half bool)) (high_half bool))
;; 2-operand vector instruction that produces a result with
;; twice the lane width and half the number of lanes. Variant
;; that modifies `rd` (so takes its initial state as `ri`).
(VecRRRLongMod
(alu_op VecRRRLongModOp)
(rd WritableReg)
(ri Reg)
(rn Reg)
(rm Reg)
(high_half bool))
;; 1-operand vector instruction that extends elements of the input ;; 1-operand vector instruction that extends elements of the input
;; register and operates on a pair of elements. The output lane width ;; register and operates on a pair of elements. The output lane width
;; is double that of the input. ;; is double that of the input.
@@ -589,6 +641,7 @@
(VecRRRMod (VecRRRMod
(alu_op VecALUModOp) (alu_op VecALUModOp)
(rd WritableReg) (rd WritableReg)
(ri Reg)
(rn Reg) (rn Reg)
(rm Reg) (rm Reg)
(size VectorSize)) (size VectorSize))
@@ -623,6 +676,7 @@
(VecShiftImmMod (VecShiftImmMod
(op VecShiftImmModOp) (op VecShiftImmModOp)
(rd WritableReg) (rd WritableReg)
(ri Reg)
(rn Reg) (rn Reg)
(size VectorSize) (size VectorSize)
(imm u8)) (imm u8))
@@ -635,29 +689,55 @@
(rm Reg) (rm Reg)
(imm4 u8)) (imm4 u8))
;; Table vector lookup - single register table. The table consists of 8-bit elements and is ;; Table vector lookup - single register table. The table
;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether ;; consists of 8-bit elements and is stored in `rn`, while `rm`
;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination ;; contains 8-bit element indices. This variant emits `TBL`,
;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them ;; which sets elements that correspond to out-of-range indices
;; to 0. ;; (greater than 15) to 0.
(VecTbl (VecTbl
(rd WritableReg) (rd WritableReg)
(rn Reg) (rn Reg)
(rm Reg) (rm Reg))
(is_extension bool))
;; Table vector lookup - two register table. The table consists of 8-bit elements and is ;; Table vector lookup - single register table. The table
;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension` ;; consists of 8-bit elements and is stored in `rn`, while `rm`
;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in ;; contains 8-bit element indices. This variant emits `TBX`,
;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified ;; which leaves elements that correspond to out-of-range indices
;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers ;; (greater than 15) unmodified. Hence, it takes an input vreg in
;; modulo 32, that is v31 and v0 (in that order) are consecutive registers. ;; `ri` that is constrained to the same allocation as `rd`.
(VecTblExt
(rd WritableReg)
(ri Reg)
(rn Reg)
(rm Reg))
;; Table vector lookup - two register table. The table consists
;; of 8-bit elements and is stored in `rn` and `rn2`, while
;; `rm` contains 8-bit element indices. The table registers
;; `rn` and `rn2` must have consecutive numbers modulo 32, that
;; is v31 and v0 (in that order) are consecutive registers.
;; This variant emits `TBL`, which sets out-of-range results to
;; 0.
(VecTbl2 (VecTbl2
(rd WritableReg) (rd WritableReg)
(rn Reg) (rn Reg)
(rn2 Reg) (rn2 Reg)
(rm Reg) (rm Reg))
(is_extension bool))
;; Table vector lookup - two register table. The table consists
;; of 8-bit elements and is stored in `rn` and `rn2`, while
;; `rm` contains 8-bit element indices. The table registers
;; `rn` and `rn2` must have consecutive numbers modulo 32, that
;; is v31 and v0 (in that order) are consecutive registers.
;; This variant emits `TBX`, which leaves out-of-range results
;; unmodified, hence takes the initial state of the result
;; register in vreg `ri`.
(VecTbl2Ext
(rd WritableReg)
(ri Reg)
(rn Reg)
(rn2 Reg)
(rm Reg))
;; Load an element and replicate to all lanes of a vector. ;; Load an element and replicate to all lanes of a vector.
(VecLoadReplicate (VecLoadReplicate
@@ -888,7 +968,6 @@
(enum (enum
(MovZ) (MovZ)
(MovN) (MovN)
(MovK)
)) ))
(type UImm5 (primitive UImm5)) (type UImm5 (primitive UImm5))
@@ -934,6 +1013,7 @@
(type AMode extern (enum)) (type AMode extern (enum))
(type PairAMode extern (enum)) (type PairAMode extern (enum))
(type FPUOpRI extern (enum)) (type FPUOpRI extern (enum))
(type FPUOpRIMod extern (enum))
(type OperandSize extern (type OperandSize extern
(enum Size32 (enum Size32
@@ -1287,6 +1367,10 @@
(Umull8) (Umull8)
(Umull16) (Umull16)
(Umull32) (Umull32)
))
(type VecRRRLongModOp
(enum
;; Unsigned multiply add long ;; Unsigned multiply add long
(Umlal8) (Umlal8)
(Umlal16) (Umlal16)
@@ -1447,9 +1531,9 @@
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI) (decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr) (extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane) ;; Constructs an FPUOpRIMod.Sli* given the size in bits of the value (or lane)
;; and the amount to shift by. ;; and the amount to shift by.
(decl fpu_op_ri_sli (u8 u8) FPUOpRI) (decl fpu_op_ri_sli (u8 u8) FPUOpRIMod)
(extern constructor fpu_op_ri_sli fpu_op_ri_sli) (extern constructor fpu_op_ri_sli fpu_op_ri_sli)
(decl imm12_from_negated_u64 (Imm12) u64) (decl imm12_from_negated_u64 (Imm12) u64)
@@ -1524,29 +1608,6 @@
(decl writable_zero_reg () WritableReg) (decl writable_zero_reg () WritableReg)
(extern constructor writable_zero_reg writable_zero_reg) (extern constructor writable_zero_reg writable_zero_reg)
;; Helpers for getting a particular real register
(decl xreg (u8) Reg)
(extern constructor xreg xreg)
(decl writable_vreg (u8) WritableReg)
(extern constructor writable_vreg writable_vreg)
(decl writable_xreg (u8) WritableReg)
(extern constructor writable_xreg writable_xreg)
;; Helper for emitting `MInst.Mov64` instructions.
(decl mov64_to_real (u8 Reg) Reg)
(rule (mov64_to_real num src)
(let ((dst WritableReg (writable_xreg num))
(_ Unit (emit (MInst.Mov (operand_size $I64) dst src))))
dst))
(decl mov64_from_real (u8) Reg)
(rule (mov64_from_real num)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Mov (operand_size $I64) dst (xreg num)))))
dst))
;; Helper for emitting `MInst.MovZ` instructions. ;; Helper for emitting `MInst.MovZ` instructions.
(decl movz (MoveWideConst OperandSize) Reg) (decl movz (MoveWideConst OperandSize) Reg)
(rule (movz imm size) (rule (movz imm size)
@@ -1601,8 +1662,7 @@
(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg) (decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg)
(rule (vec_rrr_mod op src1 src2 src3 size) (rule (vec_rrr_mod op src1 src2 src3 size)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_1 Unit (emit (MInst.FpuMove128 dst src1))) (_1 Unit (emit (MInst.VecRRRMod op dst src1 src2 src3 size))))
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
dst)) dst))
(decl fpu_rri (FPUOpRI Reg) Reg) (decl fpu_rri (FPUOpRI Reg) Reg)
@@ -1611,6 +1671,12 @@
(_ Unit (emit (MInst.FpuRRI op dst src)))) (_ Unit (emit (MInst.FpuRRI op dst src))))
dst)) dst))
(decl fpu_rri_mod (FPUOpRIMod Reg Reg) Reg)
(rule (fpu_rri_mod op dst_src src)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRIMod op dst dst_src src))))
dst))
;; Helper for emitting `MInst.FpuRRR` instructions. ;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg) (decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
(rule (fpu_rrr op src1 src2 size) (rule (fpu_rrr op src1 src2 size)
@@ -1790,29 +1856,33 @@
dst)) dst))
;; Helper for emitting `MInst.VecTbl` instructions. ;; Helper for emitting `MInst.VecTbl` instructions.
(decl vec_tbl (Reg Reg bool) Reg) (decl vec_tbl (Reg Reg) Reg)
(rule (vec_tbl rn rm is_extension) (rule (vec_tbl rn rm)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecTbl dst rn rm is_extension)))) (_ Unit (emit (MInst.VecTbl dst rn rm))))
dst))
(decl vec_tbl_ext (Reg Reg Reg) Reg)
(rule (vec_tbl_ext ri rn rm)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecTblExt dst ri rn rm))))
dst)) dst))
;; Helper for emitting `MInst.VecTbl2` instructions. ;; Helper for emitting `MInst.VecTbl2` instructions.
;; - 2 register table vector lookups require consecutive table registers; (decl vec_tbl2 (Reg Reg Reg Type) Reg)
;; we satisfy this constraint by hardcoding the usage of v30 and v31. (rule (vec_tbl2 rn rn2 rm ty)
;; - Make sure that both args are in virtual regs, since it is not guaranteed
;; that we can get them safely to the temporaries if either is in a real
;; register.
(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
(rule (vec_tbl2 rn rn2 rm is_extension ty)
(let ( (let (
(temp WritableReg (writable_vreg 30))
(temp2 WritableReg (writable_vreg 31))
(dst WritableReg (temp_writable_reg $I8X16)) (dst WritableReg (temp_writable_reg $I8X16))
(rn Reg (ensure_in_vreg rn ty)) (_ Unit (emit (MInst.VecTbl2 dst rn rn2 rm)))
(rn2 Reg (ensure_in_vreg rn2 ty)) )
(_ Unit (emit (MInst.FpuMove128 temp rn))) dst))
(_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
(_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension))) ;; Helper for emitting `MInst.VecTbl2Ext` instructions.
(decl vec_tbl2_ext (Reg Reg Reg Reg Type) Reg)
(rule (vec_tbl2_ext ri rn rn2 rm ty)
(let (
(dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecTbl2Ext dst ri rn rn2 rm)))
) )
dst)) dst))
@@ -1830,22 +1900,18 @@
(_ Unit (emit (MInst.VecRRPairLong op dst src)))) (_ Unit (emit (MInst.VecRRPairLong op dst src))))
dst)) dst))
;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants ;; Helper for emitting `MInst.VecRRRLongMod` instructions.
;; where the operation both reads and modifies the destination register. (decl vec_rrrr_long (VecRRRLongModOp Reg Reg Reg bool) Reg)
;;
;; Currently this is only used for `VecRRRLongOp.Umlal*`
(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg)
(rule (vec_rrrr_long op src1 src2 src3 high_half) (rule (vec_rrrr_long op src1 src2 src3 high_half)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst src1))) (_ Unit (emit (MInst.VecRRRLongMod op dst src1 src2 src3 high_half))))
(_ Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half))))
dst)) dst))
;; Helper for emitting `MInst.VecRRNarrow` instructions. ;; Helper for emitting `MInst.VecRRNarrow` instructions.
(decl vec_rr_narrow (VecRRNarrowOp Reg ScalarSize) Reg) (decl vec_rr_narrow_low (VecRRNarrowOp Reg ScalarSize) Reg)
(rule (vec_rr_narrow op src size) (rule (vec_rr_narrow_low op src size)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecRRNarrow op dst src $false size)))) (_ Unit (emit (MInst.VecRRNarrowLow op dst src size))))
dst)) dst))
;; Helper for emitting `MInst.VecRRNarrow` instructions which update the ;; Helper for emitting `MInst.VecRRNarrow` instructions which update the
@@ -1853,8 +1919,7 @@
(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg) (decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg)
(rule (vec_rr_narrow_high op mod src size) (rule (vec_rr_narrow_high op mod src size)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst mod))) (_ Unit (emit (MInst.VecRRNarrowHigh op dst mod src size))))
(_ Unit (emit (MInst.VecRRNarrow op dst src $true size))))
dst)) dst))
;; Helper for emitting `MInst.VecRRLong` instructions. ;; Helper for emitting `MInst.VecRRLong` instructions.
@@ -1897,16 +1962,14 @@
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg) (decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
(rule (mov_to_vec src1 src2 lane size) (rule (mov_to_vec src1 src2 lane size)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst src1))) (_ Unit (emit (MInst.MovToVec dst src1 src2 lane size))))
(_ Unit (emit (MInst.MovToVec dst src2 lane size))))
dst)) dst))
;; Helper for emitting `MInst.VecMovElement` instructions. ;; Helper for emitting `MInst.VecMovElement` instructions.
(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg) (decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg)
(rule (mov_vec_elem src1 src2 dst_idx src_idx size) (rule (mov_vec_elem src1 src2 dst_idx src_idx size)
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst src1))) (_ Unit (emit (MInst.VecMovElement dst src1 src2 dst_idx src_idx size))))
(_ Unit (emit (MInst.VecMovElement dst src2 dst_idx src_idx size))))
dst)) dst))
;; Helper for emitting `MInst.MovFromVec` instructions. ;; Helper for emitting `MInst.MovFromVec` instructions.
@@ -2104,15 +2167,15 @@
;; Helper for generating `xtn` instructions. ;; Helper for generating `xtn` instructions.
(decl xtn (Reg ScalarSize) Reg) (decl xtn (Reg ScalarSize) Reg)
(rule (xtn x size) (vec_rr_narrow (VecRRNarrowOp.Xtn) x size)) (rule (xtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Xtn) x size))
;; Helper for generating `fcvtn` instructions. ;; Helper for generating `fcvtn` instructions.
(decl fcvtn (Reg ScalarSize) Reg) (decl fcvtn (Reg ScalarSize) Reg)
(rule (fcvtn x size) (vec_rr_narrow (VecRRNarrowOp.Fcvtn) x size)) (rule (fcvtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Fcvtn) x size))
;; Helper for generating `sqxtn` instructions. ;; Helper for generating `sqxtn` instructions.
(decl sqxtn (Reg ScalarSize) Reg) (decl sqxtn (Reg ScalarSize) Reg)
(rule (sqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtn) x size)) (rule (sqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtn) x size))
;; Helper for generating `sqxtn2` instructions. ;; Helper for generating `sqxtn2` instructions.
(decl sqxtn2 (Reg Reg ScalarSize) Reg) (decl sqxtn2 (Reg Reg ScalarSize) Reg)
@@ -2120,7 +2183,7 @@
;; Helper for generating `sqxtun` instructions. ;; Helper for generating `sqxtun` instructions.
(decl sqxtun (Reg ScalarSize) Reg) (decl sqxtun (Reg ScalarSize) Reg)
(rule (sqxtun x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtun) x size)) (rule (sqxtun x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtun) x size))
;; Helper for generating `sqxtun2` instructions. ;; Helper for generating `sqxtun2` instructions.
(decl sqxtun2 (Reg Reg ScalarSize) Reg) (decl sqxtun2 (Reg Reg ScalarSize) Reg)
@@ -2128,7 +2191,7 @@
;; Helper for generating `uqxtn` instructions. ;; Helper for generating `uqxtn` instructions.
(decl uqxtn (Reg ScalarSize) Reg) (decl uqxtn (Reg ScalarSize) Reg)
(rule (uqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Uqxtn) x size)) (rule (uqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Uqxtn) x size))
;; Helper for generating `uqxtn2` instructions. ;; Helper for generating `uqxtn2` instructions.
(decl uqxtn2 (Reg Reg ScalarSize) Reg) (decl uqxtn2 (Reg Reg ScalarSize) Reg)
@@ -2187,7 +2250,7 @@
;; Helper for generating `umlal32` instructions. ;; Helper for generating `umlal32` instructions.
(decl umlal32 (Reg Reg Reg bool) Reg) (decl umlal32 (Reg Reg Reg bool) Reg)
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half)) (rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongModOp.Umlal32) x y z high_half))
;; Helper for generating `smull8` instructions. ;; Helper for generating `smull8` instructions.
(decl smull8 (Reg Reg bool) Reg) (decl smull8 (Reg Reg bool) Reg)
@@ -2719,8 +2782,7 @@
(rule (lse_atomic_cas addr expect replace ty) (rule (lse_atomic_cas addr expect replace ty)
(let ( (let (
(dst WritableReg (temp_writable_reg ty)) (dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.Mov (operand_size ty) dst expect))) (_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty)))
(_ Unit (emit (MInst.AtomicCAS dst replace addr ty)))
) )
dst)) dst))
@@ -2730,16 +2792,13 @@
;; regs, and that's not guaranteed safe if either is in a real reg. ;; regs, and that's not guaranteed safe if either is in a real reg.
;; - Move the args to the preordained AtomicRMW input regs ;; - Move the args to the preordained AtomicRMW input regs
;; - And finally, copy the preordained AtomicRMW output reg to its destination. ;; - And finally, copy the preordained AtomicRMW output reg to its destination.
(decl atomic_rmw_loop (AtomicRMWLoopOp Value Value Type) Reg) (decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type) Reg)
(rule (atomic_rmw_loop op p arg2 ty) (rule (atomic_rmw_loop op addr operand ty)
(let ( (let ((dst WritableReg (temp_writable_reg $I64))
(v_addr Reg (ensure_in_vreg p $I64)) (scratch1 WritableReg (temp_writable_reg $I64))
(v_arg2 Reg (ensure_in_vreg arg2 $I64)) (scratch2 WritableReg (temp_writable_reg $I64))
(r_addr Reg (mov64_to_real 25 v_addr)) (_ Unit (emit (MInst.AtomicRMWLoop ty op addr operand dst scratch1 scratch2))))
(r_arg2 Reg (mov64_to_real 26 v_arg2)) dst))
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
)
(mov64_from_real 27)))
;; Helper for emitting `MInst.AtomicCASLoop` instructions. ;; Helper for emitting `MInst.AtomicCASLoop` instructions.
;; This is very similar to, but not identical to, the AtomicRmw case. Note ;; This is very similar to, but not identical to, the AtomicRmw case. Note
@@ -2749,21 +2808,10 @@
;; for `atomic_rmw_loop` above. ;; for `atomic_rmw_loop` above.
(decl atomic_cas_loop (Reg Reg Reg Type) Reg) (decl atomic_cas_loop (Reg Reg Reg Type) Reg)
(rule (atomic_cas_loop addr expect replace ty) (rule (atomic_cas_loop addr expect replace ty)
(let ( (let ((dst WritableReg (temp_writable_reg $I64))
(v_addr Reg (ensure_in_vreg addr $I64)) (scratch WritableReg (temp_writable_reg $I64))
(v_exp Reg (ensure_in_vreg expect $I64)) (_ Unit (emit (MInst.AtomicCASLoop ty addr expect replace dst scratch))))
(v_rep Reg (ensure_in_vreg replace $I64)) dst))
;; Move the args to the preordained AtomicCASLoop input regs
(r_addr Reg (mov64_to_real 25 v_addr))
(r_exp Reg (mov64_to_real 26 v_exp))
(r_rep Reg (mov64_to_real 28 v_rep))
;; Now the AtomicCASLoop itself, implemented in the normal way, with a
;; load-exclusive, store-exclusive loop
(_ Unit (emit (MInst.AtomicCASLoop ty)))
)
;; And finally, copy the preordained AtomicCASLoop output reg to its destination.
;; Also, x24 and x28 are trashed.
(mov64_from_real 27)))
;; Helper for emitting `MInst.MovPReg` instructions. ;; Helper for emitting `MInst.MovPReg` instructions.
(decl mov_preg (PReg) Reg) (decl mov_preg (PReg) Reg)
@@ -2811,15 +2859,13 @@
(decl fcopy_sign (Reg Reg Type) Reg) (decl fcopy_sign (Reg Reg Type) Reg)
(rule (fcopy_sign x y (ty_scalar_float ty)) (rule (fcopy_sign x y (ty_scalar_float ty))
(let ((dst WritableReg (temp_writable_reg $F64)) (let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuMove64 dst x)))
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y)) (tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp)))) (_ Unit (emit (MInst.FpuRRIMod (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst x tmp))))
dst)) dst))
(rule (fcopy_sign x y ty @ (multi_lane _ _)) (rule (fcopy_sign x y ty @ (multi_lane _ _))
(let ((dst WritableReg (temp_writable_reg $I8X16)) (let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst x)))
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty))) (tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty)))))) (_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst x tmp (vector_size ty) (max_shift (lane_type ty))))))
dst)) dst))
;; Helpers for generating `MInst.FpuToInt` instructions. ;; Helpers for generating `MInst.FpuToInt` instructions.

View File

@@ -3,7 +3,7 @@
use crate::ir::types::*; use crate::ir::types::*;
use crate::ir::Type; use crate::ir::Type;
use crate::isa::aarch64::inst::*; use crate::isa::aarch64::inst::*;
use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg, Writable}; use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg};
use core::convert::Into; use core::convert::Into;
use std::string::String; use std::string::String;
@@ -122,9 +122,11 @@ pub enum AMode {
// Real ARM64 addressing modes: // Real ARM64 addressing modes:
// //
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation. /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
PostIndexed(Writable<Reg>, SImm9), /// Specialized here to SP so we don't have to emit regalloc metadata.
SPPostIndexed(SImm9),
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation. /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
PreIndexed(Writable<Reg>, SImm9), /// Specialized here to SP so we don't have to emit regalloc metadata.
SPPreIndexed(SImm9),
// N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
// what the ISA calls the "register offset" addressing mode. We split out // what the ISA calls the "register offset" addressing mode. We split out
@@ -220,10 +222,12 @@ impl AMode {
&AMode::RegExtended(r1, r2, ext) => { &AMode::RegExtended(r1, r2, ext) => {
AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext) AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext)
} }
&AMode::PreIndexed(reg, simm9) => AMode::PreIndexed(allocs.next_writable(reg), simm9), // Note that SP is not managed by regalloc, so there is no register to report in the
&AMode::PostIndexed(reg, simm9) => AMode::PostIndexed(allocs.next_writable(reg), simm9), // pre/post-indexed amodes.
&AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty), &AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty),
&AMode::FPOffset(..) &AMode::SPPreIndexed(..)
| &AMode::SPPostIndexed(..)
| &AMode::FPOffset(..)
| &AMode::SPOffset(..) | &AMode::SPOffset(..)
| &AMode::NominalSPOffset(..) | &AMode::NominalSPOffset(..)
| AMode::Label(..) => self.clone(), | AMode::Label(..) => self.clone(),
@@ -235,8 +239,8 @@ impl AMode {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum PairAMode { pub enum PairAMode {
SignedOffset(Reg, SImm7Scaled), SignedOffset(Reg, SImm7Scaled),
PreIndexed(Writable<Reg>, SImm7Scaled), SPPreIndexed(SImm7Scaled),
PostIndexed(Writable<Reg>, SImm7Scaled), SPPostIndexed(SImm7Scaled),
} }
impl PairAMode { impl PairAMode {
@@ -246,12 +250,7 @@ impl PairAMode {
&PairAMode::SignedOffset(reg, simm7scaled) => { &PairAMode::SignedOffset(reg, simm7scaled) => {
PairAMode::SignedOffset(allocs.next(reg), simm7scaled) PairAMode::SignedOffset(allocs.next(reg), simm7scaled)
} }
&PairAMode::PreIndexed(reg, simm7scaled) => { &PairAMode::SPPreIndexed(..) | &PairAMode::SPPostIndexed(..) => self.clone(),
PairAMode::PreIndexed(allocs.next_writable(reg), simm7scaled)
}
&PairAMode::PostIndexed(reg, simm7scaled) => {
PairAMode::PostIndexed(allocs.next_writable(reg), simm7scaled)
}
} }
} }
} }
@@ -470,15 +469,13 @@ impl PrettyPrint for AMode {
format!("[{}, {}, {}]", r1, r2, op) format!("[{}, {}, {}]", r1, r2, op)
} }
&AMode::Label(ref label) => label.pretty_print(0, allocs), &AMode::Label(ref label) => label.pretty_print(0, allocs),
&AMode::PreIndexed(r, simm9) => { &AMode::SPPreIndexed(simm9) => {
let r = pretty_print_reg(r.to_reg(), allocs);
let simm9 = simm9.pretty_print(8, allocs); let simm9 = simm9.pretty_print(8, allocs);
format!("[{}, {}]!", r, simm9) format!("[sp, {}]!", simm9)
} }
&AMode::PostIndexed(r, simm9) => { &AMode::SPPostIndexed(simm9) => {
let r = pretty_print_reg(r.to_reg(), allocs);
let simm9 = simm9.pretty_print(8, allocs); let simm9 = simm9.pretty_print(8, allocs);
format!("[{}], {}", r, simm9) format!("[sp], {}", simm9)
} }
// Eliminated by `mem_finalize()`. // Eliminated by `mem_finalize()`.
&AMode::SPOffset(..) &AMode::SPOffset(..)
@@ -503,15 +500,13 @@ impl PrettyPrint for PairAMode {
format!("[{}]", reg) format!("[{}]", reg)
} }
} }
&PairAMode::PreIndexed(reg, simm7) => { &PairAMode::SPPreIndexed(simm7) => {
let reg = pretty_print_reg(reg.to_reg(), allocs);
let simm7 = simm7.pretty_print(8, allocs); let simm7 = simm7.pretty_print(8, allocs);
format!("[{}, {}]!", reg, simm7) format!("[sp, {}]!", simm7)
} }
&PairAMode::PostIndexed(reg, simm7) => { &PairAMode::SPPostIndexed(simm7) => {
let reg = pretty_print_reg(reg.to_reg(), allocs);
let simm7 = simm7.pretty_print(8, allocs); let simm7 = simm7.pretty_print(8, allocs);
format!("[{}], {}", reg, simm7) format!("[sp], {}", simm7)
} }
} }
} }

View File

@@ -184,7 +184,6 @@ fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: Op
let op = match op { let op = match op {
MoveWideOp::MovN => 0b00, MoveWideOp::MovN => 0b00,
MoveWideOp::MovZ => 0b10, MoveWideOp::MovZ => 0b10,
MoveWideOp::MovK => 0b11,
}; };
0x12800000 0x12800000
| size.sf_bit() << 31 | size.sf_bit() << 31
@@ -194,6 +193,15 @@ fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: Op
| machreg_to_gpr(rd.to_reg()) | machreg_to_gpr(rd.to_reg())
} }
fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
assert!(imm.shift <= 0b11);
0x72800000
| size.sf_bit() << 31
| u32::from(imm.shift) << 21
| u32::from(imm.bits) << 5
| machreg_to_gpr(rd.to_reg())
}
fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 { fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
(op_31_22 << 22) (op_31_22 << 22)
| (simm7.bits() << 15) | (simm7.bits() << 15)
@@ -1040,12 +1048,12 @@ impl MachInstEmit for Inst {
_ => panic!("Unspported size for LDR from constant pool!"), _ => panic!("Unspported size for LDR from constant pool!"),
} }
} }
&AMode::PreIndexed(reg, simm9) => { &AMode::SPPreIndexed(simm9) => {
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
} }
&AMode::PostIndexed(reg, simm9) => { &AMode::SPPostIndexed(simm9) => {
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
} }
// Eliminated by `mem_finalize()` above. // Eliminated by `mem_finalize()` above.
@@ -1134,12 +1142,12 @@ impl MachInstEmit for Inst {
&AMode::Label(..) => { &AMode::Label(..) => {
panic!("Store to a MemLabel not implemented!"); panic!("Store to a MemLabel not implemented!");
} }
&AMode::PreIndexed(reg, simm9) => { &AMode::SPPreIndexed(simm9) => {
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
} }
&AMode::PostIndexed(reg, simm9) => { &AMode::SPPostIndexed(simm9) => {
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
} }
// Eliminated by `mem_finalize()` above. // Eliminated by `mem_finalize()` above.
@@ -1170,14 +1178,14 @@ impl MachInstEmit for Inst {
let reg = allocs.next(reg); let reg = allocs.next(reg);
sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
} }
&PairAMode::PreIndexed(reg, simm7) => { &PairAMode::SPPreIndexed(simm7) => {
assert_eq!(simm7.scale_ty, I64); assert_eq!(simm7.scale_ty, I64);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2)); sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
} }
&PairAMode::PostIndexed(reg, simm7) => { &PairAMode::SPPostIndexed(simm7) => {
assert_eq!(simm7.scale_ty, I64); assert_eq!(simm7.scale_ty, I64);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2)); sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
} }
} }
@@ -1203,14 +1211,14 @@ impl MachInstEmit for Inst {
let reg = allocs.next(reg); let reg = allocs.next(reg);
sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
} }
&PairAMode::PreIndexed(reg, simm7) => { &PairAMode::SPPreIndexed(simm7) => {
assert_eq!(simm7.scale_ty, I64); assert_eq!(simm7.scale_ty, I64);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2)); sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
} }
&PairAMode::PostIndexed(reg, simm7) => { &PairAMode::SPPostIndexed(simm7) => {
assert_eq!(simm7.scale_ty, I64); assert_eq!(simm7.scale_ty, I64);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2)); sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
} }
} }
@@ -1249,14 +1257,14 @@ impl MachInstEmit for Inst {
let reg = allocs.next(reg); let reg = allocs.next(reg);
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2)); sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
} }
&PairAMode::PreIndexed(reg, simm7) => { &PairAMode::SPPreIndexed(simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2)); sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
} }
&PairAMode::PostIndexed(reg, simm7) => { &PairAMode::SPPostIndexed(simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2)); sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
} }
} }
@@ -1295,14 +1303,14 @@ impl MachInstEmit for Inst {
let reg = allocs.next(reg); let reg = allocs.next(reg);
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2)); sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
} }
&PairAMode::PreIndexed(reg, simm7) => { &PairAMode::SPPreIndexed(simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2)); sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
} }
&PairAMode::PostIndexed(reg, simm7) => { &PairAMode::SPPostIndexed(simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
let reg = allocs.next(reg.to_reg()); let reg = stack_reg();
sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2)); sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
} }
} }
@@ -1356,6 +1364,12 @@ impl MachInstEmit for Inst {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
sink.put4(enc_move_wide(op, rd, imm, size)); sink.put4(enc_move_wide(op, rd, imm, size));
} }
&Inst::MovK { rd, rn, imm, size } => {
let rn = allocs.next(rn);
let rd = allocs.next_writable(rd);
debug_assert_eq!(rn, rd.to_reg());
sink.put4(enc_movk(rd, imm, size));
}
&Inst::CSel { rd, rn, rm, cond } => { &Inst::CSel { rd, rn, rm, cond } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);
@@ -1403,7 +1417,7 @@ impl MachInstEmit for Inst {
let rn = allocs.next(rn); let rn = allocs.next(rn);
sink.put4(enc_acq_rel(ty, op, rs, rt, rn)); sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
} }
&Inst::AtomicRMWLoop { ty, op } => { &Inst::AtomicRMWLoop { ty, op, .. } => {
/* Emit this: /* Emit this:
again: again:
ldaxr{,b,h} x/w27, [x25] ldaxr{,b,h} x/w27, [x25]
@@ -1581,8 +1595,10 @@ impl MachInstEmit for Inst {
)); ));
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
} }
&Inst::AtomicCAS { rs, rt, rn, ty } => { &Inst::AtomicCAS { rd, rs, rt, rn, ty } => {
let rs = allocs.next_writable(rs); let rd = allocs.next_writable(rd);
let rs = allocs.next(rs);
debug_assert_eq!(rd.to_reg(), rs);
let rt = allocs.next(rt); let rt = allocs.next(rt);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let size = match ty { let size = match ty {
@@ -1593,9 +1609,9 @@ impl MachInstEmit for Inst {
_ => panic!("Unsupported type: {}", ty), _ => panic!("Unsupported type: {}", ty),
}; };
sink.put4(enc_cas(size, rs, rt, rn)); sink.put4(enc_cas(size, rd, rt, rn));
} }
&Inst::AtomicCASLoop { ty } => { &Inst::AtomicCASLoop { ty, .. } => {
/* Emit this: /* Emit this:
again: again:
ldaxr{,b,h} x/w27, [x25] ldaxr{,b,h} x/w27, [x25]
@@ -1788,7 +1804,15 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()), | machreg_to_vec(rd.to_reg()),
) )
} }
FPUOpRI::Sli64(imm) => { }
}
&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
let rn = allocs.next(rn);
debug_assert_eq!(rd.to_reg(), ri);
match fpu_op {
FPUOpRIMod::Sli64(imm) => {
debug_assert_eq!(64, imm.lane_size_in_bits); debug_assert_eq!(64, imm.lane_size_in_bits);
sink.put4( sink.put4(
0b01_1_111110_0000000_010101_00000_00000 0b01_1_111110_0000000_010101_00000_00000
@@ -1797,7 +1821,7 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()), | machreg_to_vec(rd.to_reg()),
) )
} }
FPUOpRI::Sli32(imm) => { FPUOpRIMod::Sli32(imm) => {
debug_assert_eq!(32, imm.lane_size_in_bits); debug_assert_eq!(32, imm.lane_size_in_bits);
sink.put4( sink.put4(
0b0_0_1_011110_0000000_010101_00000_00000 0b0_0_1_011110_0000000_010101_00000_00000
@@ -2036,11 +2060,14 @@ impl MachInstEmit for Inst {
&Inst::VecShiftImmMod { &Inst::VecShiftImmMod {
op, op,
rd, rd,
ri,
rn, rn,
size, size,
imm, imm,
} => { } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
debug_assert_eq!(rd.to_reg(), ri);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let (is_shr, mut template) = match op { let (is_shr, mut template) = match op {
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32), VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
@@ -2096,30 +2123,43 @@ impl MachInstEmit for Inst {
); );
} }
} }
&Inst::VecTbl { &Inst::VecTbl { rd, rn, rm } => {
rd,
rn,
rm,
is_extension,
} => {
let rn = allocs.next(rn); let rn = allocs.next(rn);
let rm = allocs.next(rm); let rm = allocs.next(rm);
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm)); sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
} }
&Inst::VecTbl2 { &Inst::VecTblExt { rd, ri, rn, rm } => {
rd, let rn = allocs.next(rn);
rn, let rm = allocs.next(rm);
rn2, let rd = allocs.next_writable(rd);
rm, let ri = allocs.next(ri);
is_extension, debug_assert_eq!(rd.to_reg(), ri);
} => { sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
}
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
let rn = allocs.next(rn); let rn = allocs.next(rn);
let rn2 = allocs.next(rn2); let rn2 = allocs.next(rn2);
let rm = allocs.next(rm); let rm = allocs.next(rm);
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm)); sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
}
&Inst::VecTbl2Ext {
rd,
ri,
rn,
rn2,
rm,
} => {
let rn = allocs.next(rn);
let rn2 = allocs.next(rn2);
let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
debug_assert_eq!(rd.to_reg(), ri);
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
} }
&Inst::FpuCmp { size, rn, rm } => { &Inst::FpuCmp { size, rn, rm } => {
let rn = allocs.next(rn); let rn = allocs.next(rn);
@@ -2254,8 +2294,16 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()), | machreg_to_vec(rd.to_reg()),
); );
} }
&Inst::MovToVec { rd, rn, idx, size } => { &Inst::MovToVec {
rd,
ri,
rn,
idx,
size,
} => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
debug_assert_eq!(rd.to_reg(), ri);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let (imm5, shift) = match size.lane_size() { let (imm5, shift) = match size.lane_size() {
ScalarSize::Size8 => (0b00001, 1), ScalarSize::Size8 => (0b00001, 1),
@@ -2475,15 +2523,26 @@ impl MachInstEmit for Inst {
rn, rn,
)); ));
} }
&Inst::VecRRNarrow { &Inst::VecRRNarrowLow {
op, op,
rd, rd,
rn, rn,
high_half,
lane_size, lane_size,
}
| &Inst::VecRRNarrowHigh {
op,
rd,
rn,
lane_size,
..
} => { } => {
let rn = allocs.next(rn); let rn = allocs.next(rn);
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let high_half = match self {
&Inst::VecRRNarrowLow { .. } => false,
&Inst::VecRRNarrowHigh { .. } => true,
_ => unreachable!(),
};
let size = match lane_size { let size = match lane_size {
ScalarSize::Size8 => 0b00, ScalarSize::Size8 => 0b00,
@@ -2516,12 +2575,15 @@ impl MachInstEmit for Inst {
} }
&Inst::VecMovElement { &Inst::VecMovElement {
rd, rd,
ri,
rn, rn,
dest_idx, dest_idx,
src_idx, src_idx,
size, size,
} => { } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
debug_assert_eq!(rd.to_reg(), ri);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let (imm5, shift) = match size.lane_size() { let (imm5, shift) = match size.lane_size() {
ScalarSize::Size8 => (0b00001, 1), ScalarSize::Size8 => (0b00001, 1),
@@ -2569,9 +2631,34 @@ impl MachInstEmit for Inst {
VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1), VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1), VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1), VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
VecRRRLongOp::Umlal8 => (0b1, 0b00, 0b0), };
VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0), sink.put4(enc_vec_rrr_long(
VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0), high_half as u32,
u,
size,
bit14,
rm,
rn,
rd,
));
}
&Inst::VecRRRLongMod {
rd,
ri,
rn,
rm,
alu_op,
high_half,
} => {
let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
debug_assert_eq!(rd.to_reg(), ri);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let (u, size, bit14) = match alu_op {
VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
}; };
sink.put4(enc_vec_rrr_long( sink.put4(enc_vec_rrr_long(
high_half as u32, high_half as u32,
@@ -2702,12 +2789,15 @@ impl MachInstEmit for Inst {
} }
&Inst::VecRRRMod { &Inst::VecRRRMod {
rd, rd,
ri,
rn, rn,
rm, rm,
alu_op, alu_op,
size, size,
} => { } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let ri = allocs.next(ri);
debug_assert_eq!(rd.to_reg(), ri);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let rm = allocs.next(rm); let rm = allocs.next(rm);
let (q, _enc_size) = size.enc_size(); let (q, _enc_size) = size.enc_size();

File diff suppressed because it is too large Load Diff

View File

@@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
}; };
/// A floating-point unit (FPU) operation with two args, a register and an immediate. /// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -49,6 +49,13 @@ pub enum FPUOpRI {
UShr32(FPURightShiftImm), UShr32(FPURightShiftImm),
/// Unsigned right shift. Rd = Rn << #imm /// Unsigned right shift. Rd = Rn << #imm
UShr64(FPURightShiftImm), UShr64(FPURightShiftImm),
}
/// A floating-point unit (FPU) operation with two args, a register and
/// an immediate that modifies its dest (so takes that input value as a
/// separate virtual register).
#[derive(Copy, Clone, Debug)]
pub enum FPUOpRIMod {
/// Shift left and insert. Rd |= Rn << #imm /// Shift left and insert. Rd |= Rn << #imm
Sli32(FPULeftShiftImm), Sli32(FPULeftShiftImm),
/// Shift left and insert. Rd |= Rn << #imm /// Shift left and insert. Rd |= Rn << #imm
@@ -197,9 +204,9 @@ impl Inst {
} }
} else { } else {
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
insts.push(Inst::MovWide { insts.push(Inst::MovK {
op: MoveWideOp::MovK,
rd, rd,
rn: rd.to_reg(), // Redef the same virtual register.
imm, imm,
size, size,
}); });
@@ -550,9 +557,7 @@ fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &AMode, collector: &mut OperandC
collector.reg_use(r2); collector.reg_use(r2);
} }
&AMode::Label(..) => {} &AMode::Label(..) => {}
&AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => { &AMode::SPPreIndexed(..) | &AMode::SPPostIndexed(..) => {}
collector.reg_mod(reg);
}
&AMode::FPOffset(..) => {} &AMode::FPOffset(..) => {}
&AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {} &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {}
&AMode::RegOffset(r, ..) => { &AMode::RegOffset(r, ..) => {
@@ -570,9 +575,7 @@ fn pairmemarg_operands<F: Fn(VReg) -> VReg>(
&PairAMode::SignedOffset(reg, ..) => { &PairAMode::SignedOffset(reg, ..) => {
collector.reg_use(reg); collector.reg_use(reg);
} }
&PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => { &PairAMode::SPPreIndexed(..) | &PairAMode::SPPostIndexed(..) => {}
collector.reg_mod(reg);
}
} }
} }
@@ -657,10 +660,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
debug_assert!(rd.to_reg().is_virtual()); debug_assert!(rd.to_reg().is_virtual());
collector.reg_def(rd); collector.reg_def(rd);
} }
&Inst::MovWide { op, rd, .. } => match op { &Inst::MovK { rd, rn, .. } => {
MoveWideOp::MovK => collector.reg_mod(rd), collector.reg_use(rn);
_ => collector.reg_def(rd), collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
}, }
&Inst::MovWide { rd, .. } => {
collector.reg_def(rd);
}
&Inst::CSel { rd, rn, rm, .. } => { &Inst::CSel { rd, rn, rm, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
@@ -681,13 +687,21 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
&Inst::CCmpImm { rn, .. } => { &Inst::CCmpImm { rn, .. } => {
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::AtomicRMWLoop { op, .. } => { &Inst::AtomicRMWLoop {
collector.reg_use(xreg(25)); op,
collector.reg_use(xreg(26)); addr,
collector.reg_def(writable_xreg(24)); operand,
collector.reg_def(writable_xreg(27)); oldval,
scratch1,
scratch2,
..
} => {
collector.reg_fixed_use(addr, xreg(25));
collector.reg_fixed_use(operand, xreg(26));
collector.reg_fixed_def(oldval, xreg(27));
collector.reg_fixed_def(scratch1, xreg(24));
if op != AtomicRMWLoopOp::Xchg { if op != AtomicRMWLoopOp::Xchg {
collector.reg_def(writable_xreg(28)); collector.reg_fixed_def(scratch2, xreg(28));
} }
} }
&Inst::AtomicRMW { rs, rt, rn, .. } => { &Inst::AtomicRMW { rs, rt, rn, .. } => {
@@ -695,17 +709,25 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rt); collector.reg_def(rt);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::AtomicCAS { rs, rt, rn, .. } => { &Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
collector.reg_mod(rs); collector.reg_reuse_def(rd, 1); // reuse `rs`.
collector.reg_use(rs);
collector.reg_use(rt); collector.reg_use(rt);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::AtomicCASLoop { .. } => { &Inst::AtomicCASLoop {
collector.reg_use(xreg(25)); addr,
collector.reg_use(xreg(26)); expected,
collector.reg_use(xreg(28)); replacement,
collector.reg_def(writable_xreg(24)); oldval,
collector.reg_def(writable_xreg(27)); scratch,
..
} => {
collector.reg_fixed_use(addr, xreg(25));
collector.reg_fixed_use(expected, xreg(26));
collector.reg_fixed_use(replacement, xreg(28));
collector.reg_fixed_def(oldval, xreg(24));
collector.reg_fixed_def(scratch, xreg(27));
} }
&Inst::LoadAcquire { rt, rn, .. } => { &Inst::LoadAcquire { rt, rn, .. } => {
collector.reg_use(rn); collector.reg_use(rn);
@@ -741,11 +763,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
} }
&Inst::FpuRRI { fpu_op, rd, rn, .. } => { &Inst::FpuRRI { rd, rn, .. } => {
match fpu_op { collector.reg_def(rd);
FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.reg_def(rd), collector.reg_use(rn);
FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.reg_mod(rd), }
} &Inst::FpuRRIMod { rd, ri, rn, .. } => {
collector.reg_reuse_def(rd, 1); // reuse `ri`.
collector.reg_use(ri);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => { &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
@@ -767,8 +791,9 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecShiftImmMod { rd, rn, .. } => { &Inst::VecShiftImmMod { rd, ri, rn, .. } => {
collector.reg_mod(rd); collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
collector.reg_use(ri);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecExtract { rd, rn, rm, .. } => { &Inst::VecExtract { rd, rn, rm, .. } => {
@@ -776,37 +801,42 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
} }
&Inst::VecTbl { &Inst::VecTbl { rd, rn, rm } => {
rd,
rn,
rm,
is_extension,
} => {
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
collector.reg_def(rd);
if is_extension {
collector.reg_mod(rd);
} else {
collector.reg_def(rd);
}
} }
&Inst::VecTbl2 { &Inst::VecTblExt { rd, ri, rn, rm } => {
collector.reg_use(rn);
collector.reg_use(rm);
collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
collector.reg_use(ri);
}
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
// Constrain to v30 / v31 so that we satisfy the "adjacent
// registers" constraint without use of pinned vregs in
// lowering.
collector.reg_fixed_use(rn, vreg(30));
collector.reg_fixed_use(rn2, vreg(31));
collector.reg_use(rm);
collector.reg_def(rd);
}
&Inst::VecTbl2Ext {
rd, rd,
ri,
rn, rn,
rn2, rn2,
rm, rm,
is_extension,
} => { } => {
collector.reg_use(rn); // Constrain to v30 / v31 so that we satisfy the "adjacent
collector.reg_use(rn2); // registers" constraint without use of pinned vregs in
// lowering.
collector.reg_fixed_use(rn, vreg(30));
collector.reg_fixed_use(rn2, vreg(31));
collector.reg_use(rm); collector.reg_use(rm);
collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
if is_extension { collector.reg_use(ri);
collector.reg_mod(rd);
} else {
collector.reg_def(rd);
}
} }
&Inst::VecLoadReplicate { rd, rn, .. } => { &Inst::VecLoadReplicate { rd, rn, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
@@ -900,8 +930,9 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
&Inst::FpuMoveFPImm { rd, .. } => { &Inst::FpuMoveFPImm { rd, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
} }
&Inst::MovToVec { rd, rn, .. } => { &Inst::MovToVec { rd, ri, rn, .. } => {
collector.reg_mod(rd); collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
collector.reg_use(ri);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => { &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
@@ -926,38 +957,36 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecMovElement { rd, rn, .. } => { &Inst::VecMovElement { rd, ri, rn, .. } => {
collector.reg_mod(rd); collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
collector.reg_use(ri);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecRRLong { rd, rn, .. } => { &Inst::VecRRLong { rd, rn, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecRRNarrow { &Inst::VecRRNarrowLow { rd, rn, .. } => {
rd, rn, high_half, ..
} => {
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_def(rd);
if high_half { }
collector.reg_mod(rd); &Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
} else { collector.reg_use(rn);
collector.reg_def(rd); collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
} collector.reg_use(ri);
} }
&Inst::VecRRPair { rd, rn, .. } => { &Inst::VecRRPair { rd, rn, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecRRRLong { &Inst::VecRRRLong { rd, rn, rm, .. } => {
alu_op, rd, rn, rm, .. collector.reg_def(rd);
} => { collector.reg_use(rn);
match alu_op { collector.reg_use(rm);
VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => { }
collector.reg_mod(rd) &Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
} collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
_ => collector.reg_def(rd), collector.reg_use(ri);
};
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
} }
@@ -970,8 +999,9 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
} }
&Inst::VecRRRMod { rd, rn, rm, .. } => { &Inst::VecRRRMod { rd, ri, rn, rm, .. } => {
collector.reg_mod(rd); collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
collector.reg_use(ri);
collector.reg_use(rn); collector.reg_use(rn);
collector.reg_use(rm); collector.reg_use(rm);
} }
@@ -1508,12 +1538,22 @@ impl Inst {
let op_str = match op { let op_str = match op {
MoveWideOp::MovZ => "movz", MoveWideOp::MovZ => "movz",
MoveWideOp::MovN => "movn", MoveWideOp::MovN => "movn",
MoveWideOp::MovK => "movk",
}; };
let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
let imm = imm.pretty_print(0, allocs); let imm = imm.pretty_print(0, allocs);
format!("{} {}, {}", op_str, rd, imm) format!("{} {}, {}", op_str, rd, imm)
} }
&Inst::MovK {
rd,
rn,
ref imm,
size,
} => {
let rn = pretty_print_ireg(rn, size, allocs);
let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
let imm = imm.pretty_print(0, allocs);
format!("movk {}, {}, {}", rd, rn, imm)
}
&Inst::CSel { rd, rn, rm, cond } => { &Inst::CSel { rd, rn, rm, cond } => {
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
@@ -1589,75 +1629,45 @@ impl Inst {
}; };
format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn) format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn)
} }
&Inst::AtomicRMWLoop { ty, op, .. } => { &Inst::AtomicRMWLoop {
let ty_suffix = match ty { ty,
I8 => "b", op,
I16 => "h", addr,
_ => "", operand,
}; oldval,
let size = OperandSize::from_ty(ty); scratch1,
let r_addr = pretty_print_ireg(xreg(25), OperandSize::Size64, allocs); scratch2,
let r_arg2 = pretty_print_ireg(xreg(26), size, allocs); } => {
let r_status = pretty_print_ireg(xreg(24), OperandSize::Size32, allocs); let op = match op {
let r_tmp = pretty_print_ireg(xreg(27), size, allocs);
let mut r_dst = pretty_print_ireg(xreg(28), size, allocs);
let mut loop_str: String = "1: ".to_string();
loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));
let op_str = match op {
AtomicRMWLoopOp::Add => "add", AtomicRMWLoopOp::Add => "add",
AtomicRMWLoopOp::Sub => "sub", AtomicRMWLoopOp::Sub => "sub",
AtomicRMWLoopOp::Eor => "eor", AtomicRMWLoopOp::Eor => "eor",
AtomicRMWLoopOp::Orr => "orr", AtomicRMWLoopOp::Orr => "orr",
AtomicRMWLoopOp::And => "and", AtomicRMWLoopOp::And => "and",
_ => "", AtomicRMWLoopOp::Nand => "nand",
AtomicRMWLoopOp::Smin => "smin",
AtomicRMWLoopOp::Smax => "smax",
AtomicRMWLoopOp::Umin => "umin",
AtomicRMWLoopOp::Umax => "umax",
AtomicRMWLoopOp::Xchg => "xchg",
}; };
let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs);
if op_str.is_empty() { let operand = pretty_print_ireg(operand, OperandSize::Size64, allocs);
match op { let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs);
AtomicRMWLoopOp::Xchg => r_dst = r_arg2, let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64, allocs);
AtomicRMWLoopOp::Nand => { let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64, allocs);
loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2)); format!(
loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst)); "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
} op,
_ => { ty.bits(),
if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax) addr,
&& (ty == I8 || ty == I16) operand,
{ oldval,
loop_str scratch1,
.push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp)); scratch2,
loop_str.push_str(&format!( )
"cmp {}, {}, sxt{}; ",
r_tmp, r_arg2, ty_suffix
));
} else {
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
}
let cond = match op {
AtomicRMWLoopOp::Smin => "lt",
AtomicRMWLoopOp::Smax => "gt",
AtomicRMWLoopOp::Umin => "lo",
AtomicRMWLoopOp::Umax => "hi",
_ => unreachable!(),
};
loop_str.push_str(&format!(
"csel {}, {}, {}, {}; ",
r_dst, r_tmp, r_arg2, cond
));
}
};
} else {
loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2));
}
loop_str.push_str(&format!(
"stlxr{} {}, {}, [{}]; ",
ty_suffix, r_status, r_dst, r_addr
));
loop_str.push_str(&format!("cbnz {}, 1b", r_status));
loop_str
} }
&Inst::AtomicCAS { rs, rt, rn, ty } => { &Inst::AtomicCAS { rd, rs, rt, rn, ty } => {
let op = match ty { let op = match ty {
I8 => "casalb", I8 => "casalb",
I16 => "casalh", I16 => "casalh",
@@ -1665,16 +1675,35 @@ impl Inst {
_ => panic!("Unsupported type: {}", ty), _ => panic!("Unsupported type: {}", ty),
}; };
let size = OperandSize::from_ty(ty); let size = OperandSize::from_ty(ty);
let rs = pretty_print_ireg(rs.to_reg(), size, allocs); let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
let rs = pretty_print_ireg(rs, size, allocs);
let rt = pretty_print_ireg(rt, size, allocs); let rt = pretty_print_ireg(rt, size, allocs);
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
format!("{} {}, {}, [{}]", op, rs, rt, rn) format!("{} {}, {}, {}, [{}]", op, rd, rs, rt, rn)
} }
&Inst::AtomicCASLoop { ty } => { &Inst::AtomicCASLoop {
ty,
addr,
expected,
replacement,
oldval,
scratch,
} => {
let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs);
let expected = pretty_print_ireg(expected, OperandSize::Size64, allocs);
let replacement = pretty_print_ireg(replacement, OperandSize::Size64, allocs);
let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs);
let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64, allocs);
format!( format!(
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}", "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
ty.bits()) ty.bits(),
addr,
expected,
replacement,
oldval,
scratch,
)
} }
&Inst::LoadAcquire { &Inst::LoadAcquire {
access_ty, rt, rn, .. access_ty, rt, rn, ..
@@ -1777,8 +1806,6 @@ impl Inst {
let (op, imm, vector) = match fpu_op { let (op, imm, vector) = match fpu_op {
FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true), FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true),
FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false), FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false),
FPUOpRI::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true),
FPUOpRI::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false),
}; };
let (rd, rn) = if vector { let (rd, rn) = if vector {
@@ -1794,6 +1821,27 @@ impl Inst {
}; };
format!("{} {}, {}, {}", op, rd, rn, imm) format!("{} {}, {}, {}", op, rd, rn, imm)
} }
&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
let (op, imm, vector) = match fpu_op {
FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true),
FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false),
};
let (rd, ri, rn) = if vector {
(
pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs),
pretty_print_vreg_vector(ri, VectorSize::Size32x2, allocs),
pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs),
)
} else {
(
pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs),
pretty_print_vreg_scalar(ri, ScalarSize::Size64, allocs),
pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs),
)
};
format!("{} {}, {}, {}, {}", op, rd, ri, rn, imm)
}
&Inst::FpuRRRR { &Inst::FpuRRRR {
fpu_op, fpu_op,
size, size,
@@ -1983,11 +2031,18 @@ impl Inst {
format!("fmov {}, {}", rd, imm) format!("fmov {}, {}", rd, imm)
} }
&Inst::MovToVec { rd, rn, idx, size } => { &Inst::MovToVec {
rd,
ri,
rn,
idx,
size,
} => {
let rd = let rd =
pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size(), allocs); pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size(), allocs);
let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size(), allocs);
let rn = pretty_print_ireg(rn, size.operand_size(), allocs); let rn = pretty_print_ireg(rn, size.operand_size(), allocs);
format!("mov {}, {}", rd, rn) format!("mov {}, {}, {}", rd, ri, rn)
} }
&Inst::MovFromVec { rd, rn, idx, size } => { &Inst::MovFromVec { rd, rn, idx, size } => {
let op = match size { let op = match size {
@@ -2062,6 +2117,7 @@ impl Inst {
} }
&Inst::VecMovElement { &Inst::VecMovElement {
rd, rd,
ri,
rn, rn,
dest_idx, dest_idx,
src_idx, src_idx,
@@ -2073,8 +2129,9 @@ impl Inst {
size.lane_size(), size.lane_size(),
allocs, allocs,
); );
let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size(), allocs);
let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size(), allocs); let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size(), allocs);
format!("mov {}, {}", rd, rn) format!("mov {}, {}, {}", rd, ri, rn)
} }
&Inst::VecRRLong { &Inst::VecRRLong {
op, op,
@@ -2119,16 +2176,28 @@ impl Inst {
format!("{} {}, {}{}", op, rd, rn, suffix) format!("{} {}, {}{}", op, rd, rn, suffix)
} }
&Inst::VecRRNarrow { &Inst::VecRRNarrowLow {
op, op,
rd, rd,
rn, rn,
high_half,
lane_size, lane_size,
..
}
| &Inst::VecRRNarrowHigh {
op,
rd,
rn,
lane_size,
..
} => { } => {
let vec64 = VectorSize::from_lane_size(lane_size, false); let vec64 = VectorSize::from_lane_size(lane_size, false);
let vec128 = VectorSize::from_lane_size(lane_size, true); let vec128 = VectorSize::from_lane_size(lane_size, true);
let rn_size = VectorSize::from_lane_size(lane_size.widen(), true); let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
let high_half = match self {
&Inst::VecRRNarrowLow { .. } => false,
&Inst::VecRRNarrowHigh { .. } => true,
_ => unreachable!(),
};
let (op, rd_size) = match (op, high_half) { let (op, rd_size) = match (op, high_half) {
(VecRRNarrowOp::Xtn, false) => ("xtn", vec64), (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
(VecRRNarrowOp::Xtn, true) => ("xtn2", vec128), (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
@@ -2143,8 +2212,15 @@ impl Inst {
}; };
let rn = pretty_print_vreg_vector(rn, rn_size, allocs); let rn = pretty_print_vreg_vector(rn, rn_size, allocs);
let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);
let ri = match self {
&Inst::VecRRNarrowLow { .. } => "".to_string(),
&Inst::VecRRNarrowHigh { ri, .. } => {
format!("{}, ", pretty_print_vreg_vector(ri, rd_size, allocs))
}
_ => unreachable!(),
};
format!("{} {}, {}", op, rd, rn) format!("{} {}, {}{}", op, rd, ri, rn)
} }
&Inst::VecRRPair { op, rd, rn } => { &Inst::VecRRPair { op, rd, rn } => {
let op = match op { let op = match op {
@@ -2227,6 +2303,7 @@ impl Inst {
} }
&Inst::VecRRRMod { &Inst::VecRRRMod {
rd, rd,
ri,
rn, rn,
rm, rm,
alu_op, alu_op,
@@ -2237,9 +2314,10 @@ impl Inst {
VecALUModOp::Fmla => ("fmla", size), VecALUModOp::Fmla => ("fmla", size),
}; };
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let ri = pretty_print_vreg_vector(ri, size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs);
let rm = pretty_print_vreg_vector(rm, size, allocs); let rm = pretty_print_vreg_vector(rm, size, allocs);
format!("{} {}, {}, {}", op, rd, rn, rm) format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm)
} }
&Inst::VecRRRLong { &Inst::VecRRRLong {
rd, rd,
@@ -2285,30 +2363,46 @@ impl Inst {
(VecRRRLongOp::Umull32, true) => { (VecRRRLongOp::Umull32, true) => {
("umull2", VectorSize::Size64x2, VectorSize::Size32x4) ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
} }
(VecRRRLongOp::Umlal8, false) => {
("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecRRRLongOp::Umlal8, true) => {
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecRRRLongOp::Umlal16, false) => {
("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecRRRLongOp::Umlal16, true) => {
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecRRRLongOp::Umlal32, false) => {
("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecRRRLongOp::Umlal32, true) => {
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
}
}; };
let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs);
let rn = pretty_print_vreg_vector(rn, src_size, allocs); let rn = pretty_print_vreg_vector(rn, src_size, allocs);
let rm = pretty_print_vreg_vector(rm, src_size, allocs); let rm = pretty_print_vreg_vector(rm, src_size, allocs);
format!("{} {}, {}, {}", op, rd, rn, rm) format!("{} {}, {}, {}", op, rd, rn, rm)
} }
&Inst::VecRRRLongMod {
rd,
ri,
rn,
rm,
alu_op,
high_half,
} => {
let (op, dest_size, src_size) = match (alu_op, high_half) {
(VecRRRLongModOp::Umlal8, false) => {
("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecRRRLongModOp::Umlal8, true) => {
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecRRRLongModOp::Umlal16, false) => {
("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecRRRLongModOp::Umlal16, true) => {
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecRRRLongModOp::Umlal32, false) => {
("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecRRRLongModOp::Umlal32, true) => {
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
}
};
let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs);
let ri = pretty_print_vreg_vector(ri, dest_size, allocs);
let rn = pretty_print_vreg_vector(rn, src_size, allocs);
let rm = pretty_print_vreg_vector(rm, src_size, allocs);
format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm)
}
&Inst::VecMisc { op, rd, rn, size } => { &Inst::VecMisc { op, rd, rn, size } => {
let (op, size, suffix) = match op { let (op, size, suffix) = match op {
VecMisc2::Not => ( VecMisc2::Not => (
@@ -2378,6 +2472,7 @@ impl Inst {
&Inst::VecShiftImmMod { &Inst::VecShiftImmMod {
op, op,
rd, rd,
ri,
rn, rn,
size, size,
imm, imm,
@@ -2386,8 +2481,9 @@ impl Inst {
VecShiftImmModOp::Sli => "sli", VecShiftImmModOp::Sli => "sli",
}; };
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let ri = pretty_print_vreg_vector(ri, size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm) format!("{} {}, {}, {}, #{}", op, rd, ri, rn, imm)
} }
&Inst::VecExtract { rd, rn, rm, imm4 } => { &Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
@@ -2395,31 +2491,39 @@ impl Inst {
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4) format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
} }
&Inst::VecTbl { &Inst::VecTbl { rd, rn, rm } => {
rd,
rn,
rm,
is_extension,
} => {
let op = if is_extension { "tbx" } else { "tbl" };
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
format!("{} {}, {{ {} }}, {}", op, rd, rn, rm) format!("tbl {}, {{ {} }}, {}", rd, rn, rm)
} }
&Inst::VecTbl2 { &Inst::VecTblExt { rd, ri, rn, rm } => {
rd, let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
rn, let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
rn2, let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
rm, let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs);
is_extension, format!("tbx {}, {}, {{ {} }}, {}", rd, ri, rn, rm)
} => { }
let op = if is_extension { "tbx" } else { "tbl" }; &Inst::VecTbl2 { rd, rn, rn2, rm } => {
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs); let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs);
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm) format!("tbl {}, {{ {}, {} }}, {}", rd, rn, rn2, rm)
}
&Inst::VecTbl2Ext {
rd,
ri,
rn,
rn2,
rm,
} => {
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs);
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs);
format!("tbx {}, {}, {{ {}, {} }}, {}", rd, ri, rn, rn2, rm)
} }
&Inst::VecLoadReplicate { rd, rn, size, .. } => { &Inst::VecLoadReplicate { rd, rn, size, .. } => {
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);

View File

@@ -50,6 +50,7 @@ pub(crate) const fn vreg_preg(num: u8) -> PReg {
} }
/// Get a writable reference to a V-register. /// Get a writable reference to a V-register.
#[cfg(test)] // Used only in test code.
pub fn writable_vreg(num: u8) -> Writable<Reg> { pub fn writable_vreg(num: u8) -> Writable<Reg> {
Writable::from_reg(vreg(num)) Writable::from_reg(vreg(num))
} }

View File

@@ -103,12 +103,12 @@
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask)))) (rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
(let ((mask_reg Reg (constant_f128 mask))) (let ((mask_reg Reg (constant_f128 mask)))
(vec_tbl2 rn rn2 mask_reg $false ty))) (vec_tbl2 rn rn2 mask_reg ty)))
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type vec_i128_ty (swizzle rn rm))) (rule (lower (has_type vec_i128_ty (swizzle rn rm)))
(vec_tbl rn rm #f)) (vec_tbl rn rm))
;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -8,7 +8,7 @@ use generated_code::Context;
use super::{ use super::{
lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode, lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode,
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV, NZCV,
@@ -28,7 +28,6 @@ use crate::{
}, },
isa::aarch64::abi::AArch64Caller, isa::aarch64::abi::AArch64Caller,
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
isa::unwind::UnwindInst, isa::unwind::UnwindInst,
machinst::{ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData}, machinst::{ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData},
}; };
@@ -209,9 +208,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}); });
if upper_halfword != 0 { if upper_halfword != 0 {
self.emit(&MInst::MovWide { self.emit(&MInst::MovK {
op: MoveWideOp::MovK,
rd, rd,
rn: rd.to_reg(),
imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(), imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
size, size,
}); });
@@ -263,9 +262,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
} }
} else { } else {
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
self.emit(&MInst::MovWide { self.emit(&MInst::MovK {
op: MoveWideOp::MovK,
rd, rd,
rn: rd.to_reg(),
imm, imm,
size, size,
}); });
@@ -294,18 +293,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
zero_reg() zero_reg()
} }
fn xreg(&mut self, index: u8) -> Reg {
xreg(index)
}
fn writable_xreg(&mut self, index: u8) -> WritableReg {
writable_xreg(index)
}
fn writable_vreg(&mut self, index: u8) -> WritableReg {
writable_vreg(index)
}
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> { fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
let (val, extend) = let (val, extend) =
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?; super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
@@ -718,11 +705,11 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
} }
} }
fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI { fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRIMod {
if ty_bits == 32 { if ty_bits == 32 {
FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) FPUOpRIMod::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else if ty_bits == 64 { } else if ty_bits == 64 {
FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) FPUOpRIMod::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else { } else {
unimplemented!( unimplemented!(
"unexpected input size for fpu_op_ri_sli: {} (shift: {})", "unexpected input size for fpu_op_ri_sli: {} (shift: {})",

View File

@@ -139,7 +139,7 @@ block0(v0: i64):
; block0: ; block0:
; movz w3, #51712 ; movz w3, #51712
; movk w3, #15258, LSL #16 ; movk w3, w3, #15258, LSL #16
; add x3, x3, x0 ; add x3, x3, x0
; ldr w0, [x3] ; ldr w0, [x3]
; ret ; ret

View File

@@ -142,9 +142,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -164,9 +163,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -186,9 +184,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -208,9 +205,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16

View File

@@ -14,9 +14,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_add_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -36,9 +35,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_add_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -58,9 +56,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_add_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -80,9 +77,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_add_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -102,9 +98,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_sub_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; sub x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -124,9 +119,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_sub_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; sub w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -146,9 +140,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_sub_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; sub w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -168,9 +161,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_sub_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -190,9 +182,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_and_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -212,9 +203,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_and_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -234,9 +224,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_and_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -256,9 +245,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_and_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -278,9 +266,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -300,9 +287,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -322,9 +308,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -344,9 +329,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -366,9 +350,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_orr_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -388,9 +371,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_orr_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -410,9 +392,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_orr_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -432,9 +413,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_orr_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -454,9 +434,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_eor_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -476,9 +455,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_eor_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -498,9 +476,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_eor_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -520,9 +497,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_eor_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -542,9 +518,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -564,9 +539,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -586,9 +560,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -608,9 +581,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -630,9 +602,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -652,9 +623,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -674,9 +644,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -696,9 +665,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -718,9 +686,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -740,9 +707,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -762,9 +728,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -784,9 +749,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_smin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -806,9 +770,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -828,9 +791,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -850,9 +812,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
@@ -872,9 +833,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]! ; stp x24, x25, [sp, #-16]!
; block0: ; block0:
; mov x25, x0 ; mov x25, x0
; mov x4, x1 ; mov x26, x1
; mov x26, x4 ; atomic_rmw_loop_umin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b
; ldp x24, x25, [sp], #16 ; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16 ; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16 ; ldr x28, [sp], #16

View File

@@ -245,11 +245,11 @@ block0(v0: i128):
} }
; block0: ; block0:
; fmov d6, x0 ; fmov d7, x0
; mov v6.d[1], x1 ; mov v7.d[1], v7.d[1], x1
; cnt v19.16b, v6.16b ; cnt v18.16b, v7.16b
; addv b21, v19.16b ; addv b20, v18.16b
; umov w0, v21.b[0] ; umov w0, v20.b[0]
; movz w1, #0 ; movz w1, #0
; ret ; ret

View File

@@ -130,9 +130,9 @@ block0:
; block0: ; block0:
; movz x0, #58 ; movz x0, #58
; movk x0, #4626, LSL #16 ; movk x0, x0, #4626, LSL #16
; movk x0, #61603, LSL #32 ; movk x0, x0, #61603, LSL #32
; movk x0, #62283, LSL #48 ; movk x0, x0, #62283, LSL #48
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -143,7 +143,7 @@ block0:
; block0: ; block0:
; movz x0, #7924, LSL #16 ; movz x0, #7924, LSL #16
; movk x0, #4841, LSL #48 ; movk x0, x0, #4841, LSL #48
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -154,7 +154,7 @@ block0:
; block0: ; block0:
; movn x0, #57611, LSL #16 ; movn x0, #57611, LSL #16
; movk x0, #4841, LSL #48 ; movk x0, x0, #4841, LSL #48
; ret ; ret
function %f() -> i32 { function %f() -> i32 {

View File

@@ -15,10 +15,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v6.4h, w0 ; dup v4.4h, w0
; mov v7.16b, v6.16b ; mov v4.d[1], v4.d[1], v4.d[0]
; mov v7.d[1], v6.d[0] ; sqxtn v0.8b, v4.8h
; sqxtn v0.8b, v7.8h
; ret ; ret
function %snarrow_i16x8(i16) -> i8x16 { function %snarrow_i16x8(i16) -> i8x16 {
@@ -37,7 +36,7 @@ block0(v0: i16):
; block0: ; block0:
; dup v6.8h, w0 ; dup v6.8h, w0
; sqxtn v0.8b, v6.8h ; sqxtn v0.8b, v6.8h
; sqxtn2 v0.16b, v6.8h ; sqxtn2 v0.16b, v0.16b, v6.8h
; ret ; ret
function %snarrow_i32x2(i32) -> i16x4 { function %snarrow_i32x2(i32) -> i16x4 {
@@ -54,10 +53,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v6.2s, w0 ; dup v4.2s, w0
; mov v7.16b, v6.16b ; mov v4.d[1], v4.d[1], v4.d[0]
; mov v7.d[1], v6.d[0] ; sqxtn v0.4h, v4.4s
; sqxtn v0.4h, v7.4s
; ret ; ret
function %snarrow_i32x4(i32) -> i16x8 { function %snarrow_i32x4(i32) -> i16x8 {
@@ -76,7 +74,7 @@ block0(v0: i32):
; block0: ; block0:
; dup v6.4s, w0 ; dup v6.4s, w0
; sqxtn v0.4h, v6.4s ; sqxtn v0.4h, v6.4s
; sqxtn2 v0.8h, v6.4s ; sqxtn2 v0.8h, v0.8h, v6.4s
; ret ; ret
function %snarrow_i64x2(i64) -> i32x4 { function %snarrow_i64x2(i64) -> i32x4 {
@@ -95,7 +93,7 @@ block0(v0: i64):
; block0: ; block0:
; dup v6.2d, x0 ; dup v6.2d, x0
; sqxtn v0.2s, v6.2d ; sqxtn v0.2s, v6.2d
; sqxtn2 v0.4s, v6.2d ; sqxtn2 v0.4s, v0.4s, v6.2d
; ret ; ret
function %unarrow_i16x4(i16) -> i8x8 { function %unarrow_i16x4(i16) -> i8x8 {
@@ -112,10 +110,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v6.4h, w0 ; dup v4.4h, w0
; mov v7.16b, v6.16b ; mov v4.d[1], v4.d[1], v4.d[0]
; mov v7.d[1], v6.d[0] ; sqxtun v0.8b, v4.8h
; sqxtun v0.8b, v7.8h
; ret ; ret
function %unarrow_i16x8(i16) -> i8x16 { function %unarrow_i16x8(i16) -> i8x16 {
@@ -134,7 +131,7 @@ block0(v0: i16):
; block0: ; block0:
; dup v6.8h, w0 ; dup v6.8h, w0
; sqxtun v0.8b, v6.8h ; sqxtun v0.8b, v6.8h
; sqxtun2 v0.16b, v6.8h ; sqxtun2 v0.16b, v0.16b, v6.8h
; ret ; ret
function %unarrow_i32x2(i32) -> i16x4 { function %unarrow_i32x2(i32) -> i16x4 {
@@ -151,10 +148,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v6.2s, w0 ; dup v4.2s, w0
; mov v7.16b, v6.16b ; mov v4.d[1], v4.d[1], v4.d[0]
; mov v7.d[1], v6.d[0] ; sqxtun v0.4h, v4.4s
; sqxtun v0.4h, v7.4s
; ret ; ret
function %unarrow_i32x4(i32) -> i16x8 { function %unarrow_i32x4(i32) -> i16x8 {
@@ -173,7 +169,7 @@ block0(v0: i32):
; block0: ; block0:
; dup v6.4s, w0 ; dup v6.4s, w0
; sqxtun v0.4h, v6.4s ; sqxtun v0.4h, v6.4s
; sqxtun2 v0.8h, v6.4s ; sqxtun2 v0.8h, v0.8h, v6.4s
; ret ; ret
function %unarrow_i64x2(i64) -> i32x4 { function %unarrow_i64x2(i64) -> i32x4 {
@@ -192,7 +188,7 @@ block0(v0: i64):
; block0: ; block0:
; dup v6.2d, x0 ; dup v6.2d, x0
; sqxtun v0.2s, v6.2d ; sqxtun v0.2s, v6.2d
; sqxtun2 v0.4s, v6.2d ; sqxtun2 v0.4s, v0.4s, v6.2d
; ret ; ret
function %uunarrow_i16x4(i16) -> i8x8 { function %uunarrow_i16x4(i16) -> i8x8 {
@@ -209,10 +205,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v6.4h, w0 ; dup v4.4h, w0
; mov v7.16b, v6.16b ; mov v4.d[1], v4.d[1], v4.d[0]
; mov v7.d[1], v6.d[0] ; uqxtn v0.8b, v4.8h
; uqxtn v0.8b, v7.8h
; ret ; ret
function %uunarrow_i16x8(i16) -> i8x16 { function %uunarrow_i16x8(i16) -> i8x16 {
@@ -231,7 +226,7 @@ block0(v0: i16):
; block0: ; block0:
; dup v6.8h, w0 ; dup v6.8h, w0
; uqxtn v0.8b, v6.8h ; uqxtn v0.8b, v6.8h
; uqxtn2 v0.16b, v6.8h ; uqxtn2 v0.16b, v0.16b, v6.8h
; ret ; ret
function %uunarrow_i32x2(i32) -> i16x4 { function %uunarrow_i32x2(i32) -> i16x4 {
@@ -248,10 +243,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v6.2s, w0 ; dup v4.2s, w0
; mov v7.16b, v6.16b ; mov v4.d[1], v4.d[1], v4.d[0]
; mov v7.d[1], v6.d[0] ; uqxtn v0.4h, v4.4s
; uqxtn v0.4h, v7.4s
; ret ; ret
function %uunarrow_i32x4(i32) -> i16x8 { function %uunarrow_i32x4(i32) -> i16x8 {
@@ -270,7 +264,7 @@ block0(v0: i32):
; block0: ; block0:
; dup v6.4s, w0 ; dup v6.4s, w0
; uqxtn v0.4h, v6.4s ; uqxtn v0.4h, v6.4s
; uqxtn2 v0.8h, v6.4s ; uqxtn2 v0.8h, v0.8h, v6.4s
; ret ; ret
function %uunarrow_i64x2(i64) -> i32x4 { function %uunarrow_i64x2(i64) -> i32x4 {
@@ -289,5 +283,6 @@ block0(v0: i64):
; block0: ; block0:
; dup v6.2d, x0 ; dup v6.2d, x0
; uqxtn v0.2s, v6.2d ; uqxtn v0.2s, v6.2d
; uqxtn2 v0.4s, v6.2d ; uqxtn2 v0.4s, v0.4s, v6.2d
; ret ; ret

View File

@@ -197,7 +197,7 @@ block0(v0: f64, v1: f64):
; dup v17.2d, v0.d[0] ; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0] ; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v17.2d, v18.2d ; fcmgt v0.2d, v17.2d, v18.2d
; bsl v0.16b, v18.16b, v17.16b ; bsl v0.16b, v0.16b, v18.16b, v17.16b
; ret ; ret
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 { function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
@@ -216,5 +216,6 @@ block0(v0: f64, v1: f64):
; dup v17.2d, v0.d[0] ; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0] ; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v18.2d, v17.2d ; fcmgt v0.2d, v18.2d, v17.2d
; bsl v0.16b, v18.16b, v17.16b ; bsl v0.16b, v0.16b, v18.16b, v17.16b
; ret ; ret

View File

@@ -309,8 +309,8 @@ block0(v0: f32, v1: f32):
} }
; block0: ; block0:
; ushr v7.2s, v1.2s, #31 ; ushr v6.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31 ; sli v0.2s, v0.2s, v6.2s, #31
; ret ; ret
function %f32(f64, f64) -> f64 { function %f32(f64, f64) -> f64 {
@@ -320,8 +320,8 @@ block0(v0: f64, v1: f64):
} }
; block0: ; block0:
; ushr d7, d1, #63 ; ushr d6, d1, #63
; sli d0, d7, #63 ; sli d0, d0, d6, #63
; ret ; ret
function %f33(f32) -> i32 { function %f33(f32) -> i32 {
@@ -918,9 +918,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
} }
; block0: ; block0:
; mov v17.16b, v0.16b ; fmla v2.4s, v2.4s, v0.4s, v1.4s
; mov v0.16b, v2.16b ; mov v0.16b, v2.16b
; fmla v0.4s, v17.4s, v1.4s
; ret ; ret
function %f79(f32x2, f32x2, f32x2) -> f32x2 { function %f79(f32x2, f32x2, f32x2) -> f32x2 {
@@ -930,9 +929,8 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2):
} }
; block0: ; block0:
; mov v17.16b, v0.16b ; fmla v2.2s, v2.2s, v0.2s, v1.2s
; mov v0.16b, v2.16b ; mov v0.16b, v2.16b
; fmla v0.2s, v17.2s, v1.2s
; ret ; ret
function %f80(f64x2, f64x2, f64x2) -> f64x2 { function %f80(f64x2, f64x2, f64x2) -> f64x2 {
@@ -942,9 +940,8 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
} }
; block0: ; block0:
; mov v17.16b, v0.16b ; fmla v2.2d, v2.2d, v0.2d, v1.2d
; mov v0.16b, v2.16b ; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d
; ret ; ret
function %f81(f32x2, f32x2) -> f32x2 { function %f81(f32x2, f32x2) -> f32x2 {
@@ -954,8 +951,8 @@ block0(v0: f32x2, v1: f32x2):
} }
; block0: ; block0:
; ushr v7.2s, v1.2s, #31 ; ushr v6.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31 ; sli v0.2s, v0.2s, v6.2s, #31
; ret ; ret
function %f82(f32x4, f32x4) -> f32x4 { function %f82(f32x4, f32x4) -> f32x4 {
@@ -965,8 +962,8 @@ block0(v0: f32x4, v1: f32x4):
} }
; block0: ; block0:
; ushr v7.4s, v1.4s, #31 ; ushr v6.4s, v1.4s, #31
; sli v0.4s, v7.4s, #31 ; sli v0.4s, v0.4s, v6.4s, #31
; ret ; ret
function %f83(f64x2, f64x2) -> f64x2 { function %f83(f64x2, f64x2) -> f64x2 {
@@ -976,6 +973,7 @@ block0(v0: f64x2, v1: f64x2):
} }
; block0: ; block0:
; ushr v7.2d, v1.2d, #63 ; ushr v6.2d, v1.2d, #63
; sli v0.2d, v7.2d, #63 ; sli v0.2d, v0.2d, v6.2d, #63
; ret ; ret

View File

@@ -105,7 +105,7 @@ block0:
; movi v0.16b, #0 ; movi v0.16b, #0
; movi v4.16b, #0 ; movi v4.16b, #0
; movi v5.16b, #0 ; movi v5.16b, #0
; bsl v0.16b, v4.16b, v5.16b ; bsl v0.16b, v0.16b, v4.16b, v5.16b
; ret ; ret
function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
@@ -115,7 +115,7 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
} }
; block0: ; block0:
; bsl v0.16b, v1.16b, v2.16b ; bsl v0.16b, v0.16b, v1.16b, v2.16b
; ret ; ret
function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 { function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
@@ -125,7 +125,7 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4):
} }
; block0: ; block0:
; bsl v0.16b, v1.16b, v2.16b ; bsl v0.16b, v0.16b, v1.16b, v2.16b
; ret ; ret
function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 { function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
@@ -135,7 +135,7 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2):
} }
; block0: ; block0:
; bsl v0.16b, v1.16b, v2.16b ; bsl v0.16b, v0.16b, v1.16b, v2.16b
; ret ; ret
function %ishl_i8x16(i32) -> i8x16 { function %ishl_i8x16(i32) -> i8x16 {

View File

@@ -29,9 +29,9 @@ block0:
; block0: ; block0:
; movz x4, #1 ; movz x4, #1
; fmov s30, w4 ; fmov s31, w4
; ldr q3, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000 ; ldr q3, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000
; mov v31.16b, v30.16b ; mov v30.16b, v31.16b
; tbl v0.16b, { v30.16b, v31.16b }, v3.16b ; tbl v0.16b, { v30.16b, v31.16b }, v3.16b
; ret ; ret

View File

@@ -9,7 +9,7 @@ block0(v0: i16x4, v1: i16x4):
} }
; block0: ; block0:
; mov v0.d[1], v1.d[0] ; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtn v0.8b, v0.8h ; sqxtn v0.8b, v0.8h
; ret ; ret
@@ -21,7 +21,7 @@ block0(v0: i16x8, v1: i16x8):
; block0: ; block0:
; sqxtn v0.8b, v0.8h ; sqxtn v0.8b, v0.8h
; sqxtn2 v0.16b, v1.8h ; sqxtn2 v0.16b, v0.16b, v1.8h
; ret ; ret
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 { function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
@@ -31,7 +31,7 @@ block0(v0: i32x2, v1: i32x2):
} }
; block0: ; block0:
; mov v0.d[1], v1.d[0] ; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtn v0.4h, v0.4s ; sqxtn v0.4h, v0.4s
; ret ; ret
@@ -43,7 +43,7 @@ block0(v0: i32x4, v1: i32x4):
; block0: ; block0:
; sqxtn v0.4h, v0.4s ; sqxtn v0.4h, v0.4s
; sqxtn2 v0.8h, v1.4s ; sqxtn2 v0.8h, v0.8h, v1.4s
; ret ; ret
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 { function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
@@ -54,7 +54,7 @@ block0(v0: i64x2, v1: i64x2):
; block0: ; block0:
; sqxtn v0.2s, v0.2d ; sqxtn v0.2s, v0.2d
; sqxtn2 v0.4s, v1.2d ; sqxtn2 v0.4s, v0.4s, v1.2d
; ret ; ret
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 { function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
@@ -64,7 +64,7 @@ block0(v0: i16x4, v1: i16x4):
} }
; block0: ; block0:
; mov v0.d[1], v1.d[0] ; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtun v0.8b, v0.8h ; sqxtun v0.8b, v0.8h
; ret ; ret
@@ -76,7 +76,7 @@ block0(v0: i16x8, v1: i16x8):
; block0: ; block0:
; sqxtun v0.8b, v0.8h ; sqxtun v0.8b, v0.8h
; sqxtun2 v0.16b, v1.8h ; sqxtun2 v0.16b, v0.16b, v1.8h
; ret ; ret
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 { function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
@@ -86,7 +86,7 @@ block0(v0: i32x2, v1: i32x2):
} }
; block0: ; block0:
; mov v0.d[1], v1.d[0] ; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtun v0.4h, v0.4s ; sqxtun v0.4h, v0.4s
; ret ; ret
@@ -98,7 +98,7 @@ block0(v0: i32x4, v1: i32x4):
; block0: ; block0:
; sqxtun v0.4h, v0.4s ; sqxtun v0.4h, v0.4s
; sqxtun2 v0.8h, v1.4s ; sqxtun2 v0.8h, v0.8h, v1.4s
; ret ; ret
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 { function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
@@ -109,7 +109,7 @@ block0(v0: i64x2, v1: i64x2):
; block0: ; block0:
; sqxtun v0.2s, v0.2d ; sqxtun v0.2s, v0.2d
; sqxtun2 v0.4s, v1.2d ; sqxtun2 v0.4s, v0.4s, v1.2d
; ret ; ret
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 { function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
@@ -119,7 +119,7 @@ block0(v0: i16x4, v1: i16x4):
} }
; block0: ; block0:
; mov v0.d[1], v1.d[0] ; mov v0.d[1], v0.d[1], v1.d[0]
; uqxtn v0.8b, v0.8h ; uqxtn v0.8b, v0.8h
; ret ; ret
@@ -131,7 +131,7 @@ block0(v0: i16x8, v1: i16x8):
; block0: ; block0:
; uqxtn v0.8b, v0.8h ; uqxtn v0.8b, v0.8h
; uqxtn2 v0.16b, v1.8h ; uqxtn2 v0.16b, v0.16b, v1.8h
; ret ; ret
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 { function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
@@ -141,7 +141,7 @@ block0(v0: i32x2, v1: i32x2):
} }
; block0: ; block0:
; mov v0.d[1], v1.d[0] ; mov v0.d[1], v0.d[1], v1.d[0]
; uqxtn v0.4h, v0.4s ; uqxtn v0.4h, v0.4s
; ret ; ret
@@ -153,7 +153,7 @@ block0(v0: i32x4, v1: i32x4):
; block0: ; block0:
; uqxtn v0.4h, v0.4s ; uqxtn v0.4h, v0.4s
; uqxtn2 v0.8h, v1.4s ; uqxtn2 v0.8h, v0.8h, v1.4s
; ret ; ret
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 { function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
@@ -164,7 +164,7 @@ block0(v0: i64x2, v1: i64x2):
; block0: ; block0:
; uqxtn v0.2s, v0.2d ; uqxtn v0.2s, v0.2d
; uqxtn2 v0.4s, v1.2d ; uqxtn2 v0.4s, v0.4s, v1.2d
; ret ; ret
function %snarrow_i16x8_zero(i16x8) -> i8x16 { function %snarrow_i16x8_zero(i16x8) -> i8x16 {

View File

@@ -11,7 +11,7 @@ block0:
; block0: ; block0:
; movz x2, #1 ; movz x2, #1
; movk x2, #1, LSL #48 ; movk x2, x2, #1, LSL #48
; dup v0.2d, x2 ; dup v0.2d, x2
; ret ; ret

View File

@@ -11,7 +11,7 @@ block0:
; block0: ; block0:
; movz x1, #1 ; movz x1, #1
; movk x1, #1, LSL #48 ; movk x1, x1, #1, LSL #48
; fmov d0, x1 ; fmov d0, x1
; ret ; ret

View File

@@ -98,16 +98,16 @@ block0(v0: i64):
; subs xzr, sp, x0, UXTX ; subs xzr, sp, x0, UXTX
; b.hs 8 ; udf ; b.hs 8 ; udf
; movz w17, #6784 ; movz w17, #6784
; movk w17, #6, LSL #16 ; movk w17, w17, #6, LSL #16
; add x16, x0, x17, UXTX ; add x16, x0, x17, UXTX
; subs xzr, sp, x16, UXTX ; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf ; b.hs 8 ; udf
; movz w16, #6784 ; movz w16, #6784
; movk w16, #6, LSL #16 ; movk w16, w16, #6, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; movz w16, #6784 ; movz w16, #6784
; movk w16, #6, LSL #16 ; movk w16, w16, #6, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -152,16 +152,16 @@ block0(v0: i64):
; subs xzr, sp, x16, UXTX ; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf ; b.hs 8 ; udf
; movz w17, #6784 ; movz w17, #6784
; movk w17, #6, LSL #16 ; movk w17, w17, #6, LSL #16
; add x16, x16, x17, UXTX ; add x16, x16, x17, UXTX
; subs xzr, sp, x16, UXTX ; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf ; b.hs 8 ; udf
; movz w16, #6784 ; movz w16, #6784
; movk w16, #6, LSL #16 ; movk w16, w16, #6, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; movz w16, #6784 ; movz w16, #6784
; movk w16, #6, LSL #16 ; movk w16, w16, #6, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -177,7 +177,7 @@ block0(v0: i64):
; stp fp, lr, [sp, #-16]! ; stp fp, lr, [sp, #-16]!
; mov fp, sp ; mov fp, sp
; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] ; movz w16, #6784 ; movk w16, w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
; add x16, x16, #32 ; add x16, x16, #32
; subs xzr, sp, x16, UXTX ; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf ; b.hs 8 ; udf

View File

@@ -31,12 +31,12 @@ block0:
; stp fp, lr, [sp, #-16]! ; stp fp, lr, [sp, #-16]!
; mov fp, sp ; mov fp, sp
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; mov x0, sp ; mov x0, sp
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -71,13 +71,13 @@ block0:
; stp fp, lr, [sp, #-16]! ; stp fp, lr, [sp, #-16]!
; mov fp, sp ; mov fp, sp
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; mov x2, sp ; mov x2, sp
; ldr x0, [x2] ; ldr x0, [x2]
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -112,13 +112,13 @@ block0(v0: i64):
; stp fp, lr, [sp, #-16]! ; stp fp, lr, [sp, #-16]!
; mov fp, sp ; mov fp, sp
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; mov x2, sp ; mov x2, sp
; str x0, [x2] ; str x0, [x2]
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -479,13 +479,13 @@ block0(v0: i128):
; stp fp, lr, [sp, #-16]! ; stp fp, lr, [sp, #-16]!
; mov fp, sp ; mov fp, sp
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; mov x5, sp ; mov x5, sp
; stp x0, x1, [x5] ; stp x0, x1, [x5]
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -539,13 +539,13 @@ block0:
; stp fp, lr, [sp, #-16]! ; stp fp, lr, [sp, #-16]!
; mov fp, sp ; mov fp, sp
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX ; sub sp, sp, x16, UXTX
; block0: ; block0:
; mov x5, sp ; mov x5, sp
; ldp x0, x1, [x5] ; ldp x0, x1, [x5]
; movz w16, #34480 ; movz w16, #34480
; movk w16, #1, LSL #16 ; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX ; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret

View File

@@ -10,9 +10,9 @@ block0(v0: i8x16):
; block0: ; block0:
; sshr v3.16b, v0.16b, #7 ; sshr v3.16b, v0.16b, #7
; movz x6, #513 ; movz x6, #513
; movk x6, #2052, LSL #16 ; movk x6, x6, #2052, LSL #16
; movk x6, #8208, LSL #32 ; movk x6, x6, #8208, LSL #32
; movk x6, #32832, LSL #48 ; movk x6, x6, #32832, LSL #48
; dup v17.2d, x6 ; dup v17.2d, x6
; and v20.16b, v3.16b, v17.16b ; and v20.16b, v3.16b, v17.16b
; ext v22.16b, v20.16b, v20.16b, #8 ; ext v22.16b, v20.16b, v20.16b, #8
@@ -30,9 +30,9 @@ block0(v0: i8x16):
; block0: ; block0:
; sshr v3.16b, v0.16b, #7 ; sshr v3.16b, v0.16b, #7
; movz x6, #513 ; movz x6, #513
; movk x6, #2052, LSL #16 ; movk x6, x6, #2052, LSL #16
; movk x6, #8208, LSL #32 ; movk x6, x6, #8208, LSL #32
; movk x6, #32832, LSL #48 ; movk x6, x6, #32832, LSL #48
; dup v17.2d, x6 ; dup v17.2d, x6
; and v20.16b, v3.16b, v17.16b ; and v20.16b, v3.16b, v17.16b
; ext v22.16b, v20.16b, v20.16b, #8 ; ext v22.16b, v20.16b, v20.16b, #8