aarch64: fix up regalloc2 semantics. (#4830)
This PR removes all uses of modify-operands in the aarch64 backend, replacing them with reused-input operands instead. This has the nice effect of removing a bunch of move instructions and more clearly representing inputs and outputs. This PR also removes the explicit use of pinned vregs in the aarch64 backend, instead using fixed-register constraints on the operands when insts or pseudo-inst sequences require certain registers. This is the second PR in the regalloc-semantics cleanup series; after the remaining backend (s390x) and the ABI code are cleaned up as well, we'll be able to simplify the regalloc2 frontend.
This commit is contained in:
@@ -560,10 +560,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
insts.push(Inst::StoreP64 {
|
insts.push(Inst::StoreP64 {
|
||||||
rt: fp_reg(),
|
rt: fp_reg(),
|
||||||
rt2: link_reg(),
|
rt2: link_reg(),
|
||||||
mem: PairAMode::PreIndexed(
|
mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap()),
|
||||||
writable_stack_reg(),
|
|
||||||
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
|
|
||||||
),
|
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -601,10 +598,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
insts.push(Inst::LoadP64 {
|
insts.push(Inst::LoadP64 {
|
||||||
rt: writable_fp_reg(),
|
rt: writable_fp_reg(),
|
||||||
rt2: writable_link_reg(),
|
rt2: writable_link_reg(),
|
||||||
mem: PairAMode::PostIndexed(
|
mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, types::I64).unwrap()),
|
||||||
writable_stack_reg(),
|
|
||||||
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
|
|
||||||
),
|
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
});
|
});
|
||||||
insts
|
insts
|
||||||
@@ -676,10 +670,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
// str rd, [sp, #-16]!
|
// str rd, [sp, #-16]!
|
||||||
insts.push(Inst::Store64 {
|
insts.push(Inst::Store64 {
|
||||||
rd,
|
rd,
|
||||||
mem: AMode::PreIndexed(
|
mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(-clobber_offset_change).unwrap()),
|
||||||
writable_stack_reg(),
|
|
||||||
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
|
|
||||||
),
|
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -708,8 +699,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
insts.push(Inst::StoreP64 {
|
insts.push(Inst::StoreP64 {
|
||||||
rt,
|
rt,
|
||||||
rt2,
|
rt2,
|
||||||
mem: PairAMode::PreIndexed(
|
mem: PairAMode::SPPreIndexed(
|
||||||
writable_stack_reg(),
|
|
||||||
SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
|
SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
|
||||||
),
|
),
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
@@ -734,10 +724,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
|
|
||||||
let store_vec_reg = |rd| Inst::FpuStore64 {
|
let store_vec_reg = |rd| Inst::FpuStore64 {
|
||||||
rd,
|
rd,
|
||||||
mem: AMode::PreIndexed(
|
mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(-clobber_offset_change).unwrap()),
|
||||||
writable_stack_reg(),
|
|
||||||
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
|
|
||||||
),
|
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
};
|
};
|
||||||
let iter = clobbered_vec.chunks_exact(2);
|
let iter = clobbered_vec.chunks_exact(2);
|
||||||
@@ -766,8 +753,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
Inst::FpuStoreP64 {
|
Inst::FpuStoreP64 {
|
||||||
rt,
|
rt,
|
||||||
rt2,
|
rt2,
|
||||||
mem: PairAMode::PreIndexed(
|
mem: PairAMode::SPPreIndexed(
|
||||||
writable_stack_reg(),
|
|
||||||
SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
|
SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
|
||||||
),
|
),
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
@@ -831,16 +817,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
|
|
||||||
let load_vec_reg = |rd| Inst::FpuLoad64 {
|
let load_vec_reg = |rd| Inst::FpuLoad64 {
|
||||||
rd,
|
rd,
|
||||||
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
|
mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()),
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
};
|
};
|
||||||
let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
|
let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
|
||||||
rt,
|
rt,
|
||||||
rt2,
|
rt2,
|
||||||
mem: PairAMode::PostIndexed(
|
mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, F64).unwrap()),
|
||||||
writable_stack_reg(),
|
|
||||||
SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
|
|
||||||
),
|
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -876,10 +859,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
insts.push(Inst::LoadP64 {
|
insts.push(Inst::LoadP64 {
|
||||||
rt,
|
rt,
|
||||||
rt2,
|
rt2,
|
||||||
mem: PairAMode::PostIndexed(
|
mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, I64).unwrap()),
|
||||||
writable_stack_reg(),
|
|
||||||
SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
|
|
||||||
),
|
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -893,7 +873,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
// ldr rd, [sp], #16
|
// ldr rd, [sp], #16
|
||||||
insts.push(Inst::ULoad64 {
|
insts.push(Inst::ULoad64 {
|
||||||
rd,
|
rd,
|
||||||
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
|
mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()),
|
||||||
flags: MemFlags::trusted(),
|
flags: MemFlags::trusted(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -171,13 +171,23 @@
|
|||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(rm PReg))
|
(rm PReg))
|
||||||
|
|
||||||
;; A MOV[Z,N,K] with a 16-bit immediate.
|
;; A MOV[Z,N] with a 16-bit immediate.
|
||||||
(MovWide
|
(MovWide
|
||||||
(op MoveWideOp)
|
(op MoveWideOp)
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(imm MoveWideConst)
|
(imm MoveWideConst)
|
||||||
(size OperandSize))
|
(size OperandSize))
|
||||||
|
|
||||||
|
;; A MOVK with a 16-bit immediate. Modifies its register; we
|
||||||
|
;; model this with a seprate input `rn` and output `rd` virtual
|
||||||
|
;; register, with a regalloc constraint to tie them together.
|
||||||
|
(MovK
|
||||||
|
(rd WritableReg)
|
||||||
|
(rn Reg)
|
||||||
|
(imm MoveWideConst)
|
||||||
|
(size OperandSize))
|
||||||
|
|
||||||
|
|
||||||
;; A sign- or zero-extend operation.
|
;; A sign- or zero-extend operation.
|
||||||
(Extend
|
(Extend
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
@@ -240,7 +250,12 @@
|
|||||||
;; x28 (wr) scratch reg; value afterwards has no meaning
|
;; x28 (wr) scratch reg; value afterwards has no meaning
|
||||||
(AtomicRMWLoop
|
(AtomicRMWLoop
|
||||||
(ty Type) ;; I8, I16, I32 or I64
|
(ty Type) ;; I8, I16, I32 or I64
|
||||||
(op AtomicRMWLoopOp))
|
(op AtomicRMWLoopOp)
|
||||||
|
(addr Reg)
|
||||||
|
(operand Reg)
|
||||||
|
(oldval WritableReg)
|
||||||
|
(scratch1 WritableReg)
|
||||||
|
(scratch2 WritableReg))
|
||||||
|
|
||||||
;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
|
;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
|
||||||
;; store-conditional loop, with acquire-release semantics.
|
;; store-conditional loop, with acquire-release semantics.
|
||||||
@@ -253,7 +268,11 @@
|
|||||||
;; x24 (wr) scratch reg; value afterwards has no meaning
|
;; x24 (wr) scratch reg; value afterwards has no meaning
|
||||||
(AtomicCASLoop
|
(AtomicCASLoop
|
||||||
(ty Type) ;; I8, I16, I32 or I64
|
(ty Type) ;; I8, I16, I32 or I64
|
||||||
)
|
(addr Reg)
|
||||||
|
(expected Reg)
|
||||||
|
(replacement Reg)
|
||||||
|
(oldval WritableReg)
|
||||||
|
(scratch WritableReg))
|
||||||
|
|
||||||
;; An atomic read-modify-write operation. These instructions require the
|
;; An atomic read-modify-write operation. These instructions require the
|
||||||
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||||
@@ -269,7 +288,10 @@
|
|||||||
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||||
;; acquire-release semantics.
|
;; acquire-release semantics.
|
||||||
(AtomicCAS
|
(AtomicCAS
|
||||||
(rs WritableReg)
|
;; `rd` is really `rs` in the encoded instruction (so `rd` == `rs`); we separate
|
||||||
|
;; them here to have separate use and def vregs for regalloc.
|
||||||
|
(rd WritableReg)
|
||||||
|
(rs Reg)
|
||||||
(rt Reg)
|
(rt Reg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(ty Type))
|
(ty Type))
|
||||||
@@ -342,6 +364,16 @@
|
|||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(rn Reg))
|
(rn Reg))
|
||||||
|
|
||||||
|
;; Variant of FpuRRI that modifies its `rd`, and so we name the
|
||||||
|
;; input state `ri` (for "input") and constrain the two
|
||||||
|
;; together.
|
||||||
|
(FpuRRIMod
|
||||||
|
(fpu_op FPUOpRIMod)
|
||||||
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
|
(rn Reg))
|
||||||
|
|
||||||
|
|
||||||
;; 3-op FPU instruction.
|
;; 3-op FPU instruction.
|
||||||
;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
|
;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
|
||||||
(FpuRRRR
|
(FpuRRRR
|
||||||
@@ -479,6 +511,7 @@
|
|||||||
;; Move to a vector element from a GPR.
|
;; Move to a vector element from a GPR.
|
||||||
(MovToVec
|
(MovToVec
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(idx u8)
|
(idx u8)
|
||||||
(size VectorSize))
|
(size VectorSize))
|
||||||
@@ -534,6 +567,7 @@
|
|||||||
;; Move vector element to another vector element.
|
;; Move vector element to another vector element.
|
||||||
(VecMovElement
|
(VecMovElement
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(dest_idx u8)
|
(dest_idx u8)
|
||||||
(src_idx u8)
|
(src_idx u8)
|
||||||
@@ -546,12 +580,19 @@
|
|||||||
(rn Reg)
|
(rn Reg)
|
||||||
(high_half bool))
|
(high_half bool))
|
||||||
|
|
||||||
;; Vector narrowing operation.
|
;; Vector narrowing operation -- low half.
|
||||||
(VecRRNarrow
|
(VecRRNarrowLow
|
||||||
(op VecRRNarrowOp)
|
(op VecRRNarrowOp)
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(high_half bool)
|
(lane_size ScalarSize))
|
||||||
|
|
||||||
|
;; Vector narrowing operation -- high half.
|
||||||
|
(VecRRNarrowHigh
|
||||||
|
(op VecRRNarrowOp)
|
||||||
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
|
(rn Reg)
|
||||||
(lane_size ScalarSize))
|
(lane_size ScalarSize))
|
||||||
|
|
||||||
;; 1-operand vector instruction that operates on a pair of elements.
|
;; 1-operand vector instruction that operates on a pair of elements.
|
||||||
@@ -569,6 +610,17 @@
|
|||||||
(rm Reg)
|
(rm Reg)
|
||||||
(high_half bool))
|
(high_half bool))
|
||||||
|
|
||||||
|
;; 2-operand vector instruction that produces a result with
|
||||||
|
;; twice the lane width and half the number of lanes. Variant
|
||||||
|
;; that modifies `rd` (so takes its initial state as `ri`).
|
||||||
|
(VecRRRLongMod
|
||||||
|
(alu_op VecRRRLongModOp)
|
||||||
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
|
(rn Reg)
|
||||||
|
(rm Reg)
|
||||||
|
(high_half bool))
|
||||||
|
|
||||||
;; 1-operand vector instruction that extends elements of the input
|
;; 1-operand vector instruction that extends elements of the input
|
||||||
;; register and operates on a pair of elements. The output lane width
|
;; register and operates on a pair of elements. The output lane width
|
||||||
;; is double that of the input.
|
;; is double that of the input.
|
||||||
@@ -589,6 +641,7 @@
|
|||||||
(VecRRRMod
|
(VecRRRMod
|
||||||
(alu_op VecALUModOp)
|
(alu_op VecALUModOp)
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(rm Reg)
|
(rm Reg)
|
||||||
(size VectorSize))
|
(size VectorSize))
|
||||||
@@ -623,6 +676,7 @@
|
|||||||
(VecShiftImmMod
|
(VecShiftImmMod
|
||||||
(op VecShiftImmModOp)
|
(op VecShiftImmModOp)
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(size VectorSize)
|
(size VectorSize)
|
||||||
(imm u8))
|
(imm u8))
|
||||||
@@ -635,29 +689,55 @@
|
|||||||
(rm Reg)
|
(rm Reg)
|
||||||
(imm4 u8))
|
(imm4 u8))
|
||||||
|
|
||||||
;; Table vector lookup - single register table. The table consists of 8-bit elements and is
|
;; Table vector lookup - single register table. The table
|
||||||
;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
|
;; consists of 8-bit elements and is stored in `rn`, while `rm`
|
||||||
;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
|
;; contains 8-bit element indices. This variant emits `TBL`,
|
||||||
;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
|
;; which sets elements that correspond to out-of-range indices
|
||||||
;; to 0.
|
;; (greater than 15) to 0.
|
||||||
(VecTbl
|
(VecTbl
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(rm Reg)
|
(rm Reg))
|
||||||
(is_extension bool))
|
|
||||||
|
|
||||||
;; Table vector lookup - two register table. The table consists of 8-bit elements and is
|
;; Table vector lookup - single register table. The table
|
||||||
;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
|
;; consists of 8-bit elements and is stored in `rn`, while `rm`
|
||||||
;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
|
;; contains 8-bit element indices. This variant emits `TBX`,
|
||||||
;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified
|
;; which leaves elements that correspond to out-of-range indices
|
||||||
;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
|
;; (greater than 15) unmodified. Hence, it takes an input vreg in
|
||||||
;; modulo 32, that is v31 and v0 (in that order) are consecutive registers.
|
;; `ri` that is constrained to the same allocation as `rd`.
|
||||||
|
(VecTblExt
|
||||||
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
|
(rn Reg)
|
||||||
|
(rm Reg))
|
||||||
|
|
||||||
|
;; Table vector lookup - two register table. The table consists
|
||||||
|
;; of 8-bit elements and is stored in `rn` and `rn2`, while
|
||||||
|
;; `rm` contains 8-bit element indices. The table registers
|
||||||
|
;; `rn` and `rn2` must have consecutive numbers modulo 32, that
|
||||||
|
;; is v31 and v0 (in that order) are consecutive registers.
|
||||||
|
;; This variant emits `TBL`, which sets out-of-range results to
|
||||||
|
;; 0.
|
||||||
(VecTbl2
|
(VecTbl2
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(rn2 Reg)
|
(rn2 Reg)
|
||||||
(rm Reg)
|
(rm Reg))
|
||||||
(is_extension bool))
|
|
||||||
|
;; Table vector lookup - two register table. The table consists
|
||||||
|
;; of 8-bit elements and is stored in `rn` and `rn2`, while
|
||||||
|
;; `rm` contains 8-bit element indices. The table registers
|
||||||
|
;; `rn` and `rn2` must have consecutive numbers modulo 32, that
|
||||||
|
;; is v31 and v0 (in that order) are consecutive registers.
|
||||||
|
;; This variant emits `TBX`, which leaves out-of-range results
|
||||||
|
;; unmodified, hence takes the initial state of the result
|
||||||
|
;; register in vreg `ri`.
|
||||||
|
(VecTbl2Ext
|
||||||
|
(rd WritableReg)
|
||||||
|
(ri Reg)
|
||||||
|
(rn Reg)
|
||||||
|
(rn2 Reg)
|
||||||
|
(rm Reg))
|
||||||
|
|
||||||
;; Load an element and replicate to all lanes of a vector.
|
;; Load an element and replicate to all lanes of a vector.
|
||||||
(VecLoadReplicate
|
(VecLoadReplicate
|
||||||
@@ -888,7 +968,6 @@
|
|||||||
(enum
|
(enum
|
||||||
(MovZ)
|
(MovZ)
|
||||||
(MovN)
|
(MovN)
|
||||||
(MovK)
|
|
||||||
))
|
))
|
||||||
|
|
||||||
(type UImm5 (primitive UImm5))
|
(type UImm5 (primitive UImm5))
|
||||||
@@ -934,6 +1013,7 @@
|
|||||||
(type AMode extern (enum))
|
(type AMode extern (enum))
|
||||||
(type PairAMode extern (enum))
|
(type PairAMode extern (enum))
|
||||||
(type FPUOpRI extern (enum))
|
(type FPUOpRI extern (enum))
|
||||||
|
(type FPUOpRIMod extern (enum))
|
||||||
|
|
||||||
(type OperandSize extern
|
(type OperandSize extern
|
||||||
(enum Size32
|
(enum Size32
|
||||||
@@ -1287,6 +1367,10 @@
|
|||||||
(Umull8)
|
(Umull8)
|
||||||
(Umull16)
|
(Umull16)
|
||||||
(Umull32)
|
(Umull32)
|
||||||
|
))
|
||||||
|
|
||||||
|
(type VecRRRLongModOp
|
||||||
|
(enum
|
||||||
;; Unsigned multiply add long
|
;; Unsigned multiply add long
|
||||||
(Umlal8)
|
(Umlal8)
|
||||||
(Umlal16)
|
(Umlal16)
|
||||||
@@ -1447,9 +1531,9 @@
|
|||||||
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
|
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
|
||||||
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
|
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
|
||||||
|
|
||||||
;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
|
;; Constructs an FPUOpRIMod.Sli* given the size in bits of the value (or lane)
|
||||||
;; and the amount to shift by.
|
;; and the amount to shift by.
|
||||||
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
|
(decl fpu_op_ri_sli (u8 u8) FPUOpRIMod)
|
||||||
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
|
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
|
||||||
|
|
||||||
(decl imm12_from_negated_u64 (Imm12) u64)
|
(decl imm12_from_negated_u64 (Imm12) u64)
|
||||||
@@ -1524,29 +1608,6 @@
|
|||||||
(decl writable_zero_reg () WritableReg)
|
(decl writable_zero_reg () WritableReg)
|
||||||
(extern constructor writable_zero_reg writable_zero_reg)
|
(extern constructor writable_zero_reg writable_zero_reg)
|
||||||
|
|
||||||
;; Helpers for getting a particular real register
|
|
||||||
(decl xreg (u8) Reg)
|
|
||||||
(extern constructor xreg xreg)
|
|
||||||
|
|
||||||
(decl writable_vreg (u8) WritableReg)
|
|
||||||
(extern constructor writable_vreg writable_vreg)
|
|
||||||
|
|
||||||
(decl writable_xreg (u8) WritableReg)
|
|
||||||
(extern constructor writable_xreg writable_xreg)
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.Mov64` instructions.
|
|
||||||
(decl mov64_to_real (u8 Reg) Reg)
|
|
||||||
(rule (mov64_to_real num src)
|
|
||||||
(let ((dst WritableReg (writable_xreg num))
|
|
||||||
(_ Unit (emit (MInst.Mov (operand_size $I64) dst src))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
(decl mov64_from_real (u8) Reg)
|
|
||||||
(rule (mov64_from_real num)
|
|
||||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
|
||||||
(_ Unit (emit (MInst.Mov (operand_size $I64) dst (xreg num)))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.MovZ` instructions.
|
;; Helper for emitting `MInst.MovZ` instructions.
|
||||||
(decl movz (MoveWideConst OperandSize) Reg)
|
(decl movz (MoveWideConst OperandSize) Reg)
|
||||||
(rule (movz imm size)
|
(rule (movz imm size)
|
||||||
@@ -1601,8 +1662,7 @@
|
|||||||
(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg)
|
(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg)
|
||||||
(rule (vec_rrr_mod op src1 src2 src3 size)
|
(rule (vec_rrr_mod op src1 src2 src3 size)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
|
(_1 Unit (emit (MInst.VecRRRMod op dst src1 src2 src3 size))))
|
||||||
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
|
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
(decl fpu_rri (FPUOpRI Reg) Reg)
|
(decl fpu_rri (FPUOpRI Reg) Reg)
|
||||||
@@ -1611,6 +1671,12 @@
|
|||||||
(_ Unit (emit (MInst.FpuRRI op dst src))))
|
(_ Unit (emit (MInst.FpuRRI op dst src))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
(decl fpu_rri_mod (FPUOpRIMod Reg Reg) Reg)
|
||||||
|
(rule (fpu_rri_mod op dst_src src)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||||
|
(_ Unit (emit (MInst.FpuRRIMod op dst dst_src src))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.FpuRRR` instructions.
|
;; Helper for emitting `MInst.FpuRRR` instructions.
|
||||||
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
|
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
|
||||||
(rule (fpu_rrr op src1 src2 size)
|
(rule (fpu_rrr op src1 src2 size)
|
||||||
@@ -1790,29 +1856,33 @@
|
|||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecTbl` instructions.
|
;; Helper for emitting `MInst.VecTbl` instructions.
|
||||||
(decl vec_tbl (Reg Reg bool) Reg)
|
(decl vec_tbl (Reg Reg) Reg)
|
||||||
(rule (vec_tbl rn rm is_extension)
|
(rule (vec_tbl rn rm)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.VecTbl dst rn rm is_extension))))
|
(_ Unit (emit (MInst.VecTbl dst rn rm))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
(decl vec_tbl_ext (Reg Reg Reg) Reg)
|
||||||
|
(rule (vec_tbl_ext ri rn rm)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
|
(_ Unit (emit (MInst.VecTblExt dst ri rn rm))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecTbl2` instructions.
|
;; Helper for emitting `MInst.VecTbl2` instructions.
|
||||||
;; - 2 register table vector lookups require consecutive table registers;
|
(decl vec_tbl2 (Reg Reg Reg Type) Reg)
|
||||||
;; we satisfy this constraint by hardcoding the usage of v30 and v31.
|
(rule (vec_tbl2 rn rn2 rm ty)
|
||||||
;; - Make sure that both args are in virtual regs, since it is not guaranteed
|
|
||||||
;; that we can get them safely to the temporaries if either is in a real
|
|
||||||
;; register.
|
|
||||||
(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
|
|
||||||
(rule (vec_tbl2 rn rn2 rm is_extension ty)
|
|
||||||
(let (
|
(let (
|
||||||
(temp WritableReg (writable_vreg 30))
|
|
||||||
(temp2 WritableReg (writable_vreg 31))
|
|
||||||
(dst WritableReg (temp_writable_reg $I8X16))
|
(dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(rn Reg (ensure_in_vreg rn ty))
|
(_ Unit (emit (MInst.VecTbl2 dst rn rn2 rm)))
|
||||||
(rn2 Reg (ensure_in_vreg rn2 ty))
|
)
|
||||||
(_ Unit (emit (MInst.FpuMove128 temp rn)))
|
dst))
|
||||||
(_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
|
|
||||||
(_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension)))
|
;; Helper for emitting `MInst.VecTbl2Ext` instructions.
|
||||||
|
(decl vec_tbl2_ext (Reg Reg Reg Reg Type) Reg)
|
||||||
|
(rule (vec_tbl2_ext ri rn rn2 rm ty)
|
||||||
|
(let (
|
||||||
|
(dst WritableReg (temp_writable_reg $I8X16))
|
||||||
|
(_ Unit (emit (MInst.VecTbl2Ext dst ri rn rn2 rm)))
|
||||||
)
|
)
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
@@ -1830,22 +1900,18 @@
|
|||||||
(_ Unit (emit (MInst.VecRRPairLong op dst src))))
|
(_ Unit (emit (MInst.VecRRPairLong op dst src))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
|
;; Helper for emitting `MInst.VecRRRLongMod` instructions.
|
||||||
;; where the operation both reads and modifies the destination register.
|
(decl vec_rrrr_long (VecRRRLongModOp Reg Reg Reg bool) Reg)
|
||||||
;;
|
|
||||||
;; Currently this is only used for `VecRRRLongOp.Umlal*`
|
|
||||||
(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg)
|
|
||||||
(rule (vec_rrrr_long op src1 src2 src3 high_half)
|
(rule (vec_rrrr_long op src1 src2 src3 high_half)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.FpuMove128 dst src1)))
|
(_ Unit (emit (MInst.VecRRRLongMod op dst src1 src2 src3 high_half))))
|
||||||
(_ Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half))))
|
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecRRNarrow` instructions.
|
;; Helper for emitting `MInst.VecRRNarrow` instructions.
|
||||||
(decl vec_rr_narrow (VecRRNarrowOp Reg ScalarSize) Reg)
|
(decl vec_rr_narrow_low (VecRRNarrowOp Reg ScalarSize) Reg)
|
||||||
(rule (vec_rr_narrow op src size)
|
(rule (vec_rr_narrow_low op src size)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.VecRRNarrow op dst src $false size))))
|
(_ Unit (emit (MInst.VecRRNarrowLow op dst src size))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecRRNarrow` instructions which update the
|
;; Helper for emitting `MInst.VecRRNarrow` instructions which update the
|
||||||
@@ -1853,8 +1919,7 @@
|
|||||||
(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg)
|
(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg)
|
||||||
(rule (vec_rr_narrow_high op mod src size)
|
(rule (vec_rr_narrow_high op mod src size)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.FpuMove128 dst mod)))
|
(_ Unit (emit (MInst.VecRRNarrowHigh op dst mod src size))))
|
||||||
(_ Unit (emit (MInst.VecRRNarrow op dst src $true size))))
|
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecRRLong` instructions.
|
;; Helper for emitting `MInst.VecRRLong` instructions.
|
||||||
@@ -1897,16 +1962,14 @@
|
|||||||
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
|
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
|
||||||
(rule (mov_to_vec src1 src2 lane size)
|
(rule (mov_to_vec src1 src2 lane size)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.FpuMove128 dst src1)))
|
(_ Unit (emit (MInst.MovToVec dst src1 src2 lane size))))
|
||||||
(_ Unit (emit (MInst.MovToVec dst src2 lane size))))
|
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecMovElement` instructions.
|
;; Helper for emitting `MInst.VecMovElement` instructions.
|
||||||
(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg)
|
(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg)
|
||||||
(rule (mov_vec_elem src1 src2 dst_idx src_idx size)
|
(rule (mov_vec_elem src1 src2 dst_idx src_idx size)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.FpuMove128 dst src1)))
|
(_ Unit (emit (MInst.VecMovElement dst src1 src2 dst_idx src_idx size))))
|
||||||
(_ Unit (emit (MInst.VecMovElement dst src2 dst_idx src_idx size))))
|
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.MovFromVec` instructions.
|
;; Helper for emitting `MInst.MovFromVec` instructions.
|
||||||
@@ -2104,15 +2167,15 @@
|
|||||||
|
|
||||||
;; Helper for generating `xtn` instructions.
|
;; Helper for generating `xtn` instructions.
|
||||||
(decl xtn (Reg ScalarSize) Reg)
|
(decl xtn (Reg ScalarSize) Reg)
|
||||||
(rule (xtn x size) (vec_rr_narrow (VecRRNarrowOp.Xtn) x size))
|
(rule (xtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Xtn) x size))
|
||||||
|
|
||||||
;; Helper for generating `fcvtn` instructions.
|
;; Helper for generating `fcvtn` instructions.
|
||||||
(decl fcvtn (Reg ScalarSize) Reg)
|
(decl fcvtn (Reg ScalarSize) Reg)
|
||||||
(rule (fcvtn x size) (vec_rr_narrow (VecRRNarrowOp.Fcvtn) x size))
|
(rule (fcvtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Fcvtn) x size))
|
||||||
|
|
||||||
;; Helper for generating `sqxtn` instructions.
|
;; Helper for generating `sqxtn` instructions.
|
||||||
(decl sqxtn (Reg ScalarSize) Reg)
|
(decl sqxtn (Reg ScalarSize) Reg)
|
||||||
(rule (sqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtn) x size))
|
(rule (sqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtn) x size))
|
||||||
|
|
||||||
;; Helper for generating `sqxtn2` instructions.
|
;; Helper for generating `sqxtn2` instructions.
|
||||||
(decl sqxtn2 (Reg Reg ScalarSize) Reg)
|
(decl sqxtn2 (Reg Reg ScalarSize) Reg)
|
||||||
@@ -2120,7 +2183,7 @@
|
|||||||
|
|
||||||
;; Helper for generating `sqxtun` instructions.
|
;; Helper for generating `sqxtun` instructions.
|
||||||
(decl sqxtun (Reg ScalarSize) Reg)
|
(decl sqxtun (Reg ScalarSize) Reg)
|
||||||
(rule (sqxtun x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtun) x size))
|
(rule (sqxtun x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtun) x size))
|
||||||
|
|
||||||
;; Helper for generating `sqxtun2` instructions.
|
;; Helper for generating `sqxtun2` instructions.
|
||||||
(decl sqxtun2 (Reg Reg ScalarSize) Reg)
|
(decl sqxtun2 (Reg Reg ScalarSize) Reg)
|
||||||
@@ -2128,7 +2191,7 @@
|
|||||||
|
|
||||||
;; Helper for generating `uqxtn` instructions.
|
;; Helper for generating `uqxtn` instructions.
|
||||||
(decl uqxtn (Reg ScalarSize) Reg)
|
(decl uqxtn (Reg ScalarSize) Reg)
|
||||||
(rule (uqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Uqxtn) x size))
|
(rule (uqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Uqxtn) x size))
|
||||||
|
|
||||||
;; Helper for generating `uqxtn2` instructions.
|
;; Helper for generating `uqxtn2` instructions.
|
||||||
(decl uqxtn2 (Reg Reg ScalarSize) Reg)
|
(decl uqxtn2 (Reg Reg ScalarSize) Reg)
|
||||||
@@ -2187,7 +2250,7 @@
|
|||||||
|
|
||||||
;; Helper for generating `umlal32` instructions.
|
;; Helper for generating `umlal32` instructions.
|
||||||
(decl umlal32 (Reg Reg Reg bool) Reg)
|
(decl umlal32 (Reg Reg Reg bool) Reg)
|
||||||
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half))
|
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongModOp.Umlal32) x y z high_half))
|
||||||
|
|
||||||
;; Helper for generating `smull8` instructions.
|
;; Helper for generating `smull8` instructions.
|
||||||
(decl smull8 (Reg Reg bool) Reg)
|
(decl smull8 (Reg Reg bool) Reg)
|
||||||
@@ -2719,8 +2782,7 @@
|
|||||||
(rule (lse_atomic_cas addr expect replace ty)
|
(rule (lse_atomic_cas addr expect replace ty)
|
||||||
(let (
|
(let (
|
||||||
(dst WritableReg (temp_writable_reg ty))
|
(dst WritableReg (temp_writable_reg ty))
|
||||||
(_ Unit (emit (MInst.Mov (operand_size ty) dst expect)))
|
(_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty)))
|
||||||
(_ Unit (emit (MInst.AtomicCAS dst replace addr ty)))
|
|
||||||
)
|
)
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
@@ -2730,16 +2792,13 @@
|
|||||||
;; regs, and that's not guaranteed safe if either is in a real reg.
|
;; regs, and that's not guaranteed safe if either is in a real reg.
|
||||||
;; - Move the args to the preordained AtomicRMW input regs
|
;; - Move the args to the preordained AtomicRMW input regs
|
||||||
;; - And finally, copy the preordained AtomicRMW output reg to its destination.
|
;; - And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||||
(decl atomic_rmw_loop (AtomicRMWLoopOp Value Value Type) Reg)
|
(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type) Reg)
|
||||||
(rule (atomic_rmw_loop op p arg2 ty)
|
(rule (atomic_rmw_loop op addr operand ty)
|
||||||
(let (
|
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||||
(v_addr Reg (ensure_in_vreg p $I64))
|
(scratch1 WritableReg (temp_writable_reg $I64))
|
||||||
(v_arg2 Reg (ensure_in_vreg arg2 $I64))
|
(scratch2 WritableReg (temp_writable_reg $I64))
|
||||||
(r_addr Reg (mov64_to_real 25 v_addr))
|
(_ Unit (emit (MInst.AtomicRMWLoop ty op addr operand dst scratch1 scratch2))))
|
||||||
(r_arg2 Reg (mov64_to_real 26 v_arg2))
|
dst))
|
||||||
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
|
|
||||||
)
|
|
||||||
(mov64_from_real 27)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.AtomicCASLoop` instructions.
|
;; Helper for emitting `MInst.AtomicCASLoop` instructions.
|
||||||
;; This is very similar to, but not identical to, the AtomicRmw case. Note
|
;; This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||||
@@ -2749,21 +2808,10 @@
|
|||||||
;; for `atomic_rmw_loop` above.
|
;; for `atomic_rmw_loop` above.
|
||||||
(decl atomic_cas_loop (Reg Reg Reg Type) Reg)
|
(decl atomic_cas_loop (Reg Reg Reg Type) Reg)
|
||||||
(rule (atomic_cas_loop addr expect replace ty)
|
(rule (atomic_cas_loop addr expect replace ty)
|
||||||
(let (
|
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||||
(v_addr Reg (ensure_in_vreg addr $I64))
|
(scratch WritableReg (temp_writable_reg $I64))
|
||||||
(v_exp Reg (ensure_in_vreg expect $I64))
|
(_ Unit (emit (MInst.AtomicCASLoop ty addr expect replace dst scratch))))
|
||||||
(v_rep Reg (ensure_in_vreg replace $I64))
|
dst))
|
||||||
;; Move the args to the preordained AtomicCASLoop input regs
|
|
||||||
(r_addr Reg (mov64_to_real 25 v_addr))
|
|
||||||
(r_exp Reg (mov64_to_real 26 v_exp))
|
|
||||||
(r_rep Reg (mov64_to_real 28 v_rep))
|
|
||||||
;; Now the AtomicCASLoop itself, implemented in the normal way, with a
|
|
||||||
;; load-exclusive, store-exclusive loop
|
|
||||||
(_ Unit (emit (MInst.AtomicCASLoop ty)))
|
|
||||||
)
|
|
||||||
;; And finally, copy the preordained AtomicCASLoop output reg to its destination.
|
|
||||||
;; Also, x24 and x28 are trashed.
|
|
||||||
(mov64_from_real 27)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.MovPReg` instructions.
|
;; Helper for emitting `MInst.MovPReg` instructions.
|
||||||
(decl mov_preg (PReg) Reg)
|
(decl mov_preg (PReg) Reg)
|
||||||
@@ -2811,15 +2859,13 @@
|
|||||||
(decl fcopy_sign (Reg Reg Type) Reg)
|
(decl fcopy_sign (Reg Reg Type) Reg)
|
||||||
(rule (fcopy_sign x y (ty_scalar_float ty))
|
(rule (fcopy_sign x y (ty_scalar_float ty))
|
||||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||||
(_ Unit (emit (MInst.FpuMove64 dst x)))
|
|
||||||
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
|
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
|
||||||
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
|
(_ Unit (emit (MInst.FpuRRIMod (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst x tmp))))
|
||||||
dst))
|
dst))
|
||||||
(rule (fcopy_sign x y ty @ (multi_lane _ _))
|
(rule (fcopy_sign x y ty @ (multi_lane _ _))
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_ Unit (emit (MInst.FpuMove128 dst x)))
|
|
||||||
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
|
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
|
||||||
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
|
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst x tmp (vector_size ty) (max_shift (lane_type ty))))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helpers for generating `MInst.FpuToInt` instructions.
|
;; Helpers for generating `MInst.FpuToInt` instructions.
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
use crate::ir::types::*;
|
use crate::ir::types::*;
|
||||||
use crate::ir::Type;
|
use crate::ir::Type;
|
||||||
use crate::isa::aarch64::inst::*;
|
use crate::isa::aarch64::inst::*;
|
||||||
use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg, Writable};
|
use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg};
|
||||||
use core::convert::Into;
|
use core::convert::Into;
|
||||||
use std::string::String;
|
use std::string::String;
|
||||||
|
|
||||||
@@ -122,9 +122,11 @@ pub enum AMode {
|
|||||||
// Real ARM64 addressing modes:
|
// Real ARM64 addressing modes:
|
||||||
//
|
//
|
||||||
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
|
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
|
||||||
PostIndexed(Writable<Reg>, SImm9),
|
/// Specialized here to SP so we don't have to emit regalloc metadata.
|
||||||
|
SPPostIndexed(SImm9),
|
||||||
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
|
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
|
||||||
PreIndexed(Writable<Reg>, SImm9),
|
/// Specialized here to SP so we don't have to emit regalloc metadata.
|
||||||
|
SPPreIndexed(SImm9),
|
||||||
|
|
||||||
// N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
|
// N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
|
||||||
// what the ISA calls the "register offset" addressing mode. We split out
|
// what the ISA calls the "register offset" addressing mode. We split out
|
||||||
@@ -220,10 +222,12 @@ impl AMode {
|
|||||||
&AMode::RegExtended(r1, r2, ext) => {
|
&AMode::RegExtended(r1, r2, ext) => {
|
||||||
AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext)
|
AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext)
|
||||||
}
|
}
|
||||||
&AMode::PreIndexed(reg, simm9) => AMode::PreIndexed(allocs.next_writable(reg), simm9),
|
// Note that SP is not managed by regalloc, so there is no register to report in the
|
||||||
&AMode::PostIndexed(reg, simm9) => AMode::PostIndexed(allocs.next_writable(reg), simm9),
|
// pre/post-indexed amodes.
|
||||||
&AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty),
|
&AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty),
|
||||||
&AMode::FPOffset(..)
|
&AMode::SPPreIndexed(..)
|
||||||
|
| &AMode::SPPostIndexed(..)
|
||||||
|
| &AMode::FPOffset(..)
|
||||||
| &AMode::SPOffset(..)
|
| &AMode::SPOffset(..)
|
||||||
| &AMode::NominalSPOffset(..)
|
| &AMode::NominalSPOffset(..)
|
||||||
| AMode::Label(..) => self.clone(),
|
| AMode::Label(..) => self.clone(),
|
||||||
@@ -235,8 +239,8 @@ impl AMode {
|
|||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum PairAMode {
|
pub enum PairAMode {
|
||||||
SignedOffset(Reg, SImm7Scaled),
|
SignedOffset(Reg, SImm7Scaled),
|
||||||
PreIndexed(Writable<Reg>, SImm7Scaled),
|
SPPreIndexed(SImm7Scaled),
|
||||||
PostIndexed(Writable<Reg>, SImm7Scaled),
|
SPPostIndexed(SImm7Scaled),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PairAMode {
|
impl PairAMode {
|
||||||
@@ -246,12 +250,7 @@ impl PairAMode {
|
|||||||
&PairAMode::SignedOffset(reg, simm7scaled) => {
|
&PairAMode::SignedOffset(reg, simm7scaled) => {
|
||||||
PairAMode::SignedOffset(allocs.next(reg), simm7scaled)
|
PairAMode::SignedOffset(allocs.next(reg), simm7scaled)
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, simm7scaled) => {
|
&PairAMode::SPPreIndexed(..) | &PairAMode::SPPostIndexed(..) => self.clone(),
|
||||||
PairAMode::PreIndexed(allocs.next_writable(reg), simm7scaled)
|
|
||||||
}
|
|
||||||
&PairAMode::PostIndexed(reg, simm7scaled) => {
|
|
||||||
PairAMode::PostIndexed(allocs.next_writable(reg), simm7scaled)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -470,15 +469,13 @@ impl PrettyPrint for AMode {
|
|||||||
format!("[{}, {}, {}]", r1, r2, op)
|
format!("[{}, {}, {}]", r1, r2, op)
|
||||||
}
|
}
|
||||||
&AMode::Label(ref label) => label.pretty_print(0, allocs),
|
&AMode::Label(ref label) => label.pretty_print(0, allocs),
|
||||||
&AMode::PreIndexed(r, simm9) => {
|
&AMode::SPPreIndexed(simm9) => {
|
||||||
let r = pretty_print_reg(r.to_reg(), allocs);
|
|
||||||
let simm9 = simm9.pretty_print(8, allocs);
|
let simm9 = simm9.pretty_print(8, allocs);
|
||||||
format!("[{}, {}]!", r, simm9)
|
format!("[sp, {}]!", simm9)
|
||||||
}
|
}
|
||||||
&AMode::PostIndexed(r, simm9) => {
|
&AMode::SPPostIndexed(simm9) => {
|
||||||
let r = pretty_print_reg(r.to_reg(), allocs);
|
|
||||||
let simm9 = simm9.pretty_print(8, allocs);
|
let simm9 = simm9.pretty_print(8, allocs);
|
||||||
format!("[{}], {}", r, simm9)
|
format!("[sp], {}", simm9)
|
||||||
}
|
}
|
||||||
// Eliminated by `mem_finalize()`.
|
// Eliminated by `mem_finalize()`.
|
||||||
&AMode::SPOffset(..)
|
&AMode::SPOffset(..)
|
||||||
@@ -503,15 +500,13 @@ impl PrettyPrint for PairAMode {
|
|||||||
format!("[{}]", reg)
|
format!("[{}]", reg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, simm7) => {
|
&PairAMode::SPPreIndexed(simm7) => {
|
||||||
let reg = pretty_print_reg(reg.to_reg(), allocs);
|
|
||||||
let simm7 = simm7.pretty_print(8, allocs);
|
let simm7 = simm7.pretty_print(8, allocs);
|
||||||
format!("[{}, {}]!", reg, simm7)
|
format!("[sp, {}]!", simm7)
|
||||||
}
|
}
|
||||||
&PairAMode::PostIndexed(reg, simm7) => {
|
&PairAMode::SPPostIndexed(simm7) => {
|
||||||
let reg = pretty_print_reg(reg.to_reg(), allocs);
|
|
||||||
let simm7 = simm7.pretty_print(8, allocs);
|
let simm7 = simm7.pretty_print(8, allocs);
|
||||||
format!("[{}], {}", reg, simm7)
|
format!("[sp], {}", simm7)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -184,7 +184,6 @@ fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: Op
|
|||||||
let op = match op {
|
let op = match op {
|
||||||
MoveWideOp::MovN => 0b00,
|
MoveWideOp::MovN => 0b00,
|
||||||
MoveWideOp::MovZ => 0b10,
|
MoveWideOp::MovZ => 0b10,
|
||||||
MoveWideOp::MovK => 0b11,
|
|
||||||
};
|
};
|
||||||
0x12800000
|
0x12800000
|
||||||
| size.sf_bit() << 31
|
| size.sf_bit() << 31
|
||||||
@@ -194,6 +193,15 @@ fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: Op
|
|||||||
| machreg_to_gpr(rd.to_reg())
|
| machreg_to_gpr(rd.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
|
||||||
|
assert!(imm.shift <= 0b11);
|
||||||
|
0x72800000
|
||||||
|
| size.sf_bit() << 31
|
||||||
|
| u32::from(imm.shift) << 21
|
||||||
|
| u32::from(imm.bits) << 5
|
||||||
|
| machreg_to_gpr(rd.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
|
fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
|
||||||
(op_31_22 << 22)
|
(op_31_22 << 22)
|
||||||
| (simm7.bits() << 15)
|
| (simm7.bits() << 15)
|
||||||
@@ -1040,12 +1048,12 @@ impl MachInstEmit for Inst {
|
|||||||
_ => panic!("Unspported size for LDR from constant pool!"),
|
_ => panic!("Unspported size for LDR from constant pool!"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&AMode::PreIndexed(reg, simm9) => {
|
&AMode::SPPreIndexed(simm9) => {
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
|
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
|
||||||
}
|
}
|
||||||
&AMode::PostIndexed(reg, simm9) => {
|
&AMode::SPPostIndexed(simm9) => {
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
|
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
|
||||||
}
|
}
|
||||||
// Eliminated by `mem_finalize()` above.
|
// Eliminated by `mem_finalize()` above.
|
||||||
@@ -1134,12 +1142,12 @@ impl MachInstEmit for Inst {
|
|||||||
&AMode::Label(..) => {
|
&AMode::Label(..) => {
|
||||||
panic!("Store to a MemLabel not implemented!");
|
panic!("Store to a MemLabel not implemented!");
|
||||||
}
|
}
|
||||||
&AMode::PreIndexed(reg, simm9) => {
|
&AMode::SPPreIndexed(simm9) => {
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
|
sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
|
||||||
}
|
}
|
||||||
&AMode::PostIndexed(reg, simm9) => {
|
&AMode::SPPostIndexed(simm9) => {
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
|
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
|
||||||
}
|
}
|
||||||
// Eliminated by `mem_finalize()` above.
|
// Eliminated by `mem_finalize()` above.
|
||||||
@@ -1170,14 +1178,14 @@ impl MachInstEmit for Inst {
|
|||||||
let reg = allocs.next(reg);
|
let reg = allocs.next(reg);
|
||||||
sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, simm7) => {
|
&PairAMode::SPPreIndexed(simm7) => {
|
||||||
assert_eq!(simm7.scale_ty, I64);
|
assert_eq!(simm7.scale_ty, I64);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PostIndexed(reg, simm7) => {
|
&PairAMode::SPPostIndexed(simm7) => {
|
||||||
assert_eq!(simm7.scale_ty, I64);
|
assert_eq!(simm7.scale_ty, I64);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1203,14 +1211,14 @@ impl MachInstEmit for Inst {
|
|||||||
let reg = allocs.next(reg);
|
let reg = allocs.next(reg);
|
||||||
sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, simm7) => {
|
&PairAMode::SPPreIndexed(simm7) => {
|
||||||
assert_eq!(simm7.scale_ty, I64);
|
assert_eq!(simm7.scale_ty, I64);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PostIndexed(reg, simm7) => {
|
&PairAMode::SPPostIndexed(simm7) => {
|
||||||
assert_eq!(simm7.scale_ty, I64);
|
assert_eq!(simm7.scale_ty, I64);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1249,14 +1257,14 @@ impl MachInstEmit for Inst {
|
|||||||
let reg = allocs.next(reg);
|
let reg = allocs.next(reg);
|
||||||
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, simm7) => {
|
&PairAMode::SPPreIndexed(simm7) => {
|
||||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PostIndexed(reg, simm7) => {
|
&PairAMode::SPPostIndexed(simm7) => {
|
||||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1295,14 +1303,14 @@ impl MachInstEmit for Inst {
|
|||||||
let reg = allocs.next(reg);
|
let reg = allocs.next(reg);
|
||||||
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, simm7) => {
|
&PairAMode::SPPreIndexed(simm7) => {
|
||||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
&PairAMode::PostIndexed(reg, simm7) => {
|
&PairAMode::SPPostIndexed(simm7) => {
|
||||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||||
let reg = allocs.next(reg.to_reg());
|
let reg = stack_reg();
|
||||||
sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
|
sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1356,6 +1364,12 @@ impl MachInstEmit for Inst {
|
|||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
sink.put4(enc_move_wide(op, rd, imm, size));
|
sink.put4(enc_move_wide(op, rd, imm, size));
|
||||||
}
|
}
|
||||||
|
&Inst::MovK { rd, rn, imm, size } => {
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
debug_assert_eq!(rn, rd.to_reg());
|
||||||
|
sink.put4(enc_movk(rd, imm, size));
|
||||||
|
}
|
||||||
&Inst::CSel { rd, rn, rm, cond } => {
|
&Inst::CSel { rd, rn, rm, cond } => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
@@ -1403,7 +1417,7 @@ impl MachInstEmit for Inst {
|
|||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
|
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMWLoop { ty, op } => {
|
&Inst::AtomicRMWLoop { ty, op, .. } => {
|
||||||
/* Emit this:
|
/* Emit this:
|
||||||
again:
|
again:
|
||||||
ldaxr{,b,h} x/w27, [x25]
|
ldaxr{,b,h} x/w27, [x25]
|
||||||
@@ -1581,8 +1595,10 @@ impl MachInstEmit for Inst {
|
|||||||
));
|
));
|
||||||
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
|
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
|
||||||
}
|
}
|
||||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
&Inst::AtomicCAS { rd, rs, rt, rn, ty } => {
|
||||||
let rs = allocs.next_writable(rs);
|
let rd = allocs.next_writable(rd);
|
||||||
|
let rs = allocs.next(rs);
|
||||||
|
debug_assert_eq!(rd.to_reg(), rs);
|
||||||
let rt = allocs.next(rt);
|
let rt = allocs.next(rt);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let size = match ty {
|
let size = match ty {
|
||||||
@@ -1593,9 +1609,9 @@ impl MachInstEmit for Inst {
|
|||||||
_ => panic!("Unsupported type: {}", ty),
|
_ => panic!("Unsupported type: {}", ty),
|
||||||
};
|
};
|
||||||
|
|
||||||
sink.put4(enc_cas(size, rs, rt, rn));
|
sink.put4(enc_cas(size, rd, rt, rn));
|
||||||
}
|
}
|
||||||
&Inst::AtomicCASLoop { ty } => {
|
&Inst::AtomicCASLoop { ty, .. } => {
|
||||||
/* Emit this:
|
/* Emit this:
|
||||||
again:
|
again:
|
||||||
ldaxr{,b,h} x/w27, [x25]
|
ldaxr{,b,h} x/w27, [x25]
|
||||||
@@ -1788,7 +1804,15 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
FPUOpRI::Sli64(imm) => {
|
}
|
||||||
|
}
|
||||||
|
&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
|
match fpu_op {
|
||||||
|
FPUOpRIMod::Sli64(imm) => {
|
||||||
debug_assert_eq!(64, imm.lane_size_in_bits);
|
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||||
sink.put4(
|
sink.put4(
|
||||||
0b01_1_111110_0000000_010101_00000_00000
|
0b01_1_111110_0000000_010101_00000_00000
|
||||||
@@ -1797,7 +1821,7 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
FPUOpRI::Sli32(imm) => {
|
FPUOpRIMod::Sli32(imm) => {
|
||||||
debug_assert_eq!(32, imm.lane_size_in_bits);
|
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||||
sink.put4(
|
sink.put4(
|
||||||
0b0_0_1_011110_0000000_010101_00000_00000
|
0b0_0_1_011110_0000000_010101_00000_00000
|
||||||
@@ -2036,11 +2060,14 @@ impl MachInstEmit for Inst {
|
|||||||
&Inst::VecShiftImmMod {
|
&Inst::VecShiftImmMod {
|
||||||
op,
|
op,
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
size,
|
size,
|
||||||
imm,
|
imm,
|
||||||
} => {
|
} => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let (is_shr, mut template) = match op {
|
let (is_shr, mut template) = match op {
|
||||||
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
|
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
|
||||||
@@ -2096,30 +2123,43 @@ impl MachInstEmit for Inst {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&Inst::VecTbl {
|
&Inst::VecTbl { rd, rn, rm } => {
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
is_extension,
|
|
||||||
} => {
|
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let rm = allocs.next(rm);
|
let rm = allocs.next(rm);
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
|
sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
|
||||||
}
|
}
|
||||||
&Inst::VecTbl2 {
|
&Inst::VecTblExt { rd, ri, rn, rm } => {
|
||||||
rd,
|
let rn = allocs.next(rn);
|
||||||
rn,
|
let rm = allocs.next(rm);
|
||||||
rn2,
|
let rd = allocs.next_writable(rd);
|
||||||
rm,
|
let ri = allocs.next(ri);
|
||||||
is_extension,
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
} => {
|
sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let rn2 = allocs.next(rn2);
|
let rn2 = allocs.next(rn2);
|
||||||
let rm = allocs.next(rm);
|
let rm = allocs.next(rm);
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
|
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
|
||||||
sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
|
sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2Ext {
|
||||||
|
rd,
|
||||||
|
ri,
|
||||||
|
rn,
|
||||||
|
rn2,
|
||||||
|
rm,
|
||||||
|
} => {
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
let rn2 = allocs.next(rn2);
|
||||||
|
let rm = allocs.next(rm);
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
|
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
|
||||||
|
sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
|
||||||
}
|
}
|
||||||
&Inst::FpuCmp { size, rn, rm } => {
|
&Inst::FpuCmp { size, rn, rm } => {
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
@@ -2254,8 +2294,16 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
&Inst::MovToVec { rd, rn, idx, size } => {
|
&Inst::MovToVec {
|
||||||
|
rd,
|
||||||
|
ri,
|
||||||
|
rn,
|
||||||
|
idx,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let (imm5, shift) = match size.lane_size() {
|
let (imm5, shift) = match size.lane_size() {
|
||||||
ScalarSize::Size8 => (0b00001, 1),
|
ScalarSize::Size8 => (0b00001, 1),
|
||||||
@@ -2475,15 +2523,26 @@ impl MachInstEmit for Inst {
|
|||||||
rn,
|
rn,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
&Inst::VecRRNarrow {
|
&Inst::VecRRNarrowLow {
|
||||||
op,
|
op,
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
high_half,
|
|
||||||
lane_size,
|
lane_size,
|
||||||
|
}
|
||||||
|
| &Inst::VecRRNarrowHigh {
|
||||||
|
op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
lane_size,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
|
let high_half = match self {
|
||||||
|
&Inst::VecRRNarrowLow { .. } => false,
|
||||||
|
&Inst::VecRRNarrowHigh { .. } => true,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
let size = match lane_size {
|
let size = match lane_size {
|
||||||
ScalarSize::Size8 => 0b00,
|
ScalarSize::Size8 => 0b00,
|
||||||
@@ -2516,12 +2575,15 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
&Inst::VecMovElement {
|
&Inst::VecMovElement {
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
dest_idx,
|
dest_idx,
|
||||||
src_idx,
|
src_idx,
|
||||||
size,
|
size,
|
||||||
} => {
|
} => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let (imm5, shift) = match size.lane_size() {
|
let (imm5, shift) = match size.lane_size() {
|
||||||
ScalarSize::Size8 => (0b00001, 1),
|
ScalarSize::Size8 => (0b00001, 1),
|
||||||
@@ -2569,9 +2631,34 @@ impl MachInstEmit for Inst {
|
|||||||
VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
|
VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
|
||||||
VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
|
VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
|
||||||
VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
|
VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
|
||||||
VecRRRLongOp::Umlal8 => (0b1, 0b00, 0b0),
|
};
|
||||||
VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
|
sink.put4(enc_vec_rrr_long(
|
||||||
VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
|
high_half as u32,
|
||||||
|
u,
|
||||||
|
size,
|
||||||
|
bit14,
|
||||||
|
rm,
|
||||||
|
rn,
|
||||||
|
rd,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
&Inst::VecRRRLongMod {
|
||||||
|
rd,
|
||||||
|
ri,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op,
|
||||||
|
high_half,
|
||||||
|
} => {
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
let rm = allocs.next(rm);
|
||||||
|
let (u, size, bit14) = match alu_op {
|
||||||
|
VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
|
||||||
|
VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
|
||||||
|
VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rrr_long(
|
sink.put4(enc_vec_rrr_long(
|
||||||
high_half as u32,
|
high_half as u32,
|
||||||
@@ -2702,12 +2789,15 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
&Inst::VecRRRMod {
|
&Inst::VecRRRMod {
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
rm,
|
rm,
|
||||||
alu_op,
|
alu_op,
|
||||||
size,
|
size,
|
||||||
} => {
|
} => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
|
let ri = allocs.next(ri);
|
||||||
|
debug_assert_eq!(rd.to_reg(), ri);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let rm = allocs.next(rm);
|
let rm = allocs.next(rm);
|
||||||
let (q, _enc_size) = size.enc_size();
|
let (q, _enc_size) = size.enc_size();
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
|
|||||||
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
|
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
|
||||||
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
|
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
|
||||||
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
|
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
|
||||||
VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
|
VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||||
@@ -49,6 +49,13 @@ pub enum FPUOpRI {
|
|||||||
UShr32(FPURightShiftImm),
|
UShr32(FPURightShiftImm),
|
||||||
/// Unsigned right shift. Rd = Rn << #imm
|
/// Unsigned right shift. Rd = Rn << #imm
|
||||||
UShr64(FPURightShiftImm),
|
UShr64(FPURightShiftImm),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A floating-point unit (FPU) operation with two args, a register and
|
||||||
|
/// an immediate that modifies its dest (so takes that input value as a
|
||||||
|
/// separate virtual register).
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum FPUOpRIMod {
|
||||||
/// Shift left and insert. Rd |= Rn << #imm
|
/// Shift left and insert. Rd |= Rn << #imm
|
||||||
Sli32(FPULeftShiftImm),
|
Sli32(FPULeftShiftImm),
|
||||||
/// Shift left and insert. Rd |= Rn << #imm
|
/// Shift left and insert. Rd |= Rn << #imm
|
||||||
@@ -197,9 +204,9 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
||||||
insts.push(Inst::MovWide {
|
insts.push(Inst::MovK {
|
||||||
op: MoveWideOp::MovK,
|
|
||||||
rd,
|
rd,
|
||||||
|
rn: rd.to_reg(), // Redef the same virtual register.
|
||||||
imm,
|
imm,
|
||||||
size,
|
size,
|
||||||
});
|
});
|
||||||
@@ -550,9 +557,7 @@ fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &AMode, collector: &mut OperandC
|
|||||||
collector.reg_use(r2);
|
collector.reg_use(r2);
|
||||||
}
|
}
|
||||||
&AMode::Label(..) => {}
|
&AMode::Label(..) => {}
|
||||||
&AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
|
&AMode::SPPreIndexed(..) | &AMode::SPPostIndexed(..) => {}
|
||||||
collector.reg_mod(reg);
|
|
||||||
}
|
|
||||||
&AMode::FPOffset(..) => {}
|
&AMode::FPOffset(..) => {}
|
||||||
&AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {}
|
&AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {}
|
||||||
&AMode::RegOffset(r, ..) => {
|
&AMode::RegOffset(r, ..) => {
|
||||||
@@ -570,9 +575,7 @@ fn pairmemarg_operands<F: Fn(VReg) -> VReg>(
|
|||||||
&PairAMode::SignedOffset(reg, ..) => {
|
&PairAMode::SignedOffset(reg, ..) => {
|
||||||
collector.reg_use(reg);
|
collector.reg_use(reg);
|
||||||
}
|
}
|
||||||
&PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
|
&PairAMode::SPPreIndexed(..) | &PairAMode::SPPostIndexed(..) => {}
|
||||||
collector.reg_mod(reg);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -657,10 +660,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
debug_assert!(rd.to_reg().is_virtual());
|
debug_assert!(rd.to_reg().is_virtual());
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
}
|
}
|
||||||
&Inst::MovWide { op, rd, .. } => match op {
|
&Inst::MovK { rd, rn, .. } => {
|
||||||
MoveWideOp::MovK => collector.reg_mod(rd),
|
collector.reg_use(rn);
|
||||||
_ => collector.reg_def(rd),
|
collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
|
||||||
},
|
}
|
||||||
|
&Inst::MovWide { rd, .. } => {
|
||||||
|
collector.reg_def(rd);
|
||||||
|
}
|
||||||
&Inst::CSel { rd, rn, rm, .. } => {
|
&Inst::CSel { rd, rn, rm, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
@@ -681,13 +687,21 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
&Inst::CCmpImm { rn, .. } => {
|
&Inst::CCmpImm { rn, .. } => {
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMWLoop { op, .. } => {
|
&Inst::AtomicRMWLoop {
|
||||||
collector.reg_use(xreg(25));
|
op,
|
||||||
collector.reg_use(xreg(26));
|
addr,
|
||||||
collector.reg_def(writable_xreg(24));
|
operand,
|
||||||
collector.reg_def(writable_xreg(27));
|
oldval,
|
||||||
|
scratch1,
|
||||||
|
scratch2,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
collector.reg_fixed_use(addr, xreg(25));
|
||||||
|
collector.reg_fixed_use(operand, xreg(26));
|
||||||
|
collector.reg_fixed_def(oldval, xreg(27));
|
||||||
|
collector.reg_fixed_def(scratch1, xreg(24));
|
||||||
if op != AtomicRMWLoopOp::Xchg {
|
if op != AtomicRMWLoopOp::Xchg {
|
||||||
collector.reg_def(writable_xreg(28));
|
collector.reg_fixed_def(scratch2, xreg(28));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMW { rs, rt, rn, .. } => {
|
&Inst::AtomicRMW { rs, rt, rn, .. } => {
|
||||||
@@ -695,17 +709,25 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_def(rt);
|
collector.reg_def(rt);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::AtomicCAS { rs, rt, rn, .. } => {
|
&Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
|
||||||
collector.reg_mod(rs);
|
collector.reg_reuse_def(rd, 1); // reuse `rs`.
|
||||||
|
collector.reg_use(rs);
|
||||||
collector.reg_use(rt);
|
collector.reg_use(rt);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::AtomicCASLoop { .. } => {
|
&Inst::AtomicCASLoop {
|
||||||
collector.reg_use(xreg(25));
|
addr,
|
||||||
collector.reg_use(xreg(26));
|
expected,
|
||||||
collector.reg_use(xreg(28));
|
replacement,
|
||||||
collector.reg_def(writable_xreg(24));
|
oldval,
|
||||||
collector.reg_def(writable_xreg(27));
|
scratch,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
collector.reg_fixed_use(addr, xreg(25));
|
||||||
|
collector.reg_fixed_use(expected, xreg(26));
|
||||||
|
collector.reg_fixed_use(replacement, xreg(28));
|
||||||
|
collector.reg_fixed_def(oldval, xreg(24));
|
||||||
|
collector.reg_fixed_def(scratch, xreg(27));
|
||||||
}
|
}
|
||||||
&Inst::LoadAcquire { rt, rn, .. } => {
|
&Inst::LoadAcquire { rt, rn, .. } => {
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
@@ -741,11 +763,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
&Inst::FpuRRI { fpu_op, rd, rn, .. } => {
|
&Inst::FpuRRI { rd, rn, .. } => {
|
||||||
match fpu_op {
|
collector.reg_def(rd);
|
||||||
FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.reg_def(rd),
|
collector.reg_use(rn);
|
||||||
FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.reg_mod(rd),
|
}
|
||||||
}
|
&Inst::FpuRRIMod { rd, ri, rn, .. } => {
|
||||||
|
collector.reg_reuse_def(rd, 1); // reuse `ri`.
|
||||||
|
collector.reg_use(ri);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
|
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
|
||||||
@@ -767,8 +791,9 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecShiftImmMod { rd, rn, .. } => {
|
&Inst::VecShiftImmMod { rd, ri, rn, .. } => {
|
||||||
collector.reg_mod(rd);
|
collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
|
||||||
|
collector.reg_use(ri);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecExtract { rd, rn, rm, .. } => {
|
&Inst::VecExtract { rd, rn, rm, .. } => {
|
||||||
@@ -776,37 +801,42 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
&Inst::VecTbl {
|
&Inst::VecTbl { rd, rn, rm } => {
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
is_extension,
|
|
||||||
} => {
|
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
|
collector.reg_def(rd);
|
||||||
if is_extension {
|
|
||||||
collector.reg_mod(rd);
|
|
||||||
} else {
|
|
||||||
collector.reg_def(rd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
&Inst::VecTbl2 {
|
&Inst::VecTblExt { rd, ri, rn, rm } => {
|
||||||
|
collector.reg_use(rn);
|
||||||
|
collector.reg_use(rm);
|
||||||
|
collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
|
||||||
|
collector.reg_use(ri);
|
||||||
|
}
|
||||||
|
|
||||||
|
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
|
||||||
|
// Constrain to v30 / v31 so that we satisfy the "adjacent
|
||||||
|
// registers" constraint without use of pinned vregs in
|
||||||
|
// lowering.
|
||||||
|
collector.reg_fixed_use(rn, vreg(30));
|
||||||
|
collector.reg_fixed_use(rn2, vreg(31));
|
||||||
|
collector.reg_use(rm);
|
||||||
|
collector.reg_def(rd);
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2Ext {
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
rn2,
|
rn2,
|
||||||
rm,
|
rm,
|
||||||
is_extension,
|
|
||||||
} => {
|
} => {
|
||||||
collector.reg_use(rn);
|
// Constrain to v30 / v31 so that we satisfy the "adjacent
|
||||||
collector.reg_use(rn2);
|
// registers" constraint without use of pinned vregs in
|
||||||
|
// lowering.
|
||||||
|
collector.reg_fixed_use(rn, vreg(30));
|
||||||
|
collector.reg_fixed_use(rn2, vreg(31));
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
|
collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
|
||||||
if is_extension {
|
collector.reg_use(ri);
|
||||||
collector.reg_mod(rd);
|
|
||||||
} else {
|
|
||||||
collector.reg_def(rd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
&Inst::VecLoadReplicate { rd, rn, .. } => {
|
&Inst::VecLoadReplicate { rd, rn, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
@@ -900,8 +930,9 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
&Inst::FpuMoveFPImm { rd, .. } => {
|
&Inst::FpuMoveFPImm { rd, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
}
|
}
|
||||||
&Inst::MovToVec { rd, rn, .. } => {
|
&Inst::MovToVec { rd, ri, rn, .. } => {
|
||||||
collector.reg_mod(rd);
|
collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
|
||||||
|
collector.reg_use(ri);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
|
&Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
|
||||||
@@ -926,38 +957,36 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecMovElement { rd, rn, .. } => {
|
&Inst::VecMovElement { rd, ri, rn, .. } => {
|
||||||
collector.reg_mod(rd);
|
collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
|
||||||
|
collector.reg_use(ri);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecRRLong { rd, rn, .. } => {
|
&Inst::VecRRLong { rd, rn, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecRRNarrow {
|
&Inst::VecRRNarrowLow { rd, rn, .. } => {
|
||||||
rd, rn, high_half, ..
|
|
||||||
} => {
|
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
|
collector.reg_def(rd);
|
||||||
if high_half {
|
}
|
||||||
collector.reg_mod(rd);
|
&Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
|
||||||
} else {
|
collector.reg_use(rn);
|
||||||
collector.reg_def(rd);
|
collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
|
||||||
}
|
collector.reg_use(ri);
|
||||||
}
|
}
|
||||||
&Inst::VecRRPair { rd, rn, .. } => {
|
&Inst::VecRRPair { rd, rn, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecRRRLong {
|
&Inst::VecRRRLong { rd, rn, rm, .. } => {
|
||||||
alu_op, rd, rn, rm, ..
|
collector.reg_def(rd);
|
||||||
} => {
|
collector.reg_use(rn);
|
||||||
match alu_op {
|
collector.reg_use(rm);
|
||||||
VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
|
}
|
||||||
collector.reg_mod(rd)
|
&Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
|
||||||
}
|
collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
|
||||||
_ => collector.reg_def(rd),
|
collector.reg_use(ri);
|
||||||
};
|
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
@@ -970,8 +999,9 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
&Inst::VecRRRMod { rd, rn, rm, .. } => {
|
&Inst::VecRRRMod { rd, ri, rn, rm, .. } => {
|
||||||
collector.reg_mod(rd);
|
collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
|
||||||
|
collector.reg_use(ri);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
@@ -1508,12 +1538,22 @@ impl Inst {
|
|||||||
let op_str = match op {
|
let op_str = match op {
|
||||||
MoveWideOp::MovZ => "movz",
|
MoveWideOp::MovZ => "movz",
|
||||||
MoveWideOp::MovN => "movn",
|
MoveWideOp::MovN => "movn",
|
||||||
MoveWideOp::MovK => "movk",
|
|
||||||
};
|
};
|
||||||
let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
|
let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
|
||||||
let imm = imm.pretty_print(0, allocs);
|
let imm = imm.pretty_print(0, allocs);
|
||||||
format!("{} {}, {}", op_str, rd, imm)
|
format!("{} {}, {}", op_str, rd, imm)
|
||||||
}
|
}
|
||||||
|
&Inst::MovK {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
ref imm,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
|
let rn = pretty_print_ireg(rn, size, allocs);
|
||||||
|
let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
|
||||||
|
let imm = imm.pretty_print(0, allocs);
|
||||||
|
format!("movk {}, {}, {}", rd, rn, imm)
|
||||||
|
}
|
||||||
&Inst::CSel { rd, rn, rm, cond } => {
|
&Inst::CSel { rd, rn, rm, cond } => {
|
||||||
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
|
||||||
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
|
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
|
||||||
@@ -1589,75 +1629,45 @@ impl Inst {
|
|||||||
};
|
};
|
||||||
format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn)
|
format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn)
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMWLoop { ty, op, .. } => {
|
&Inst::AtomicRMWLoop {
|
||||||
let ty_suffix = match ty {
|
ty,
|
||||||
I8 => "b",
|
op,
|
||||||
I16 => "h",
|
addr,
|
||||||
_ => "",
|
operand,
|
||||||
};
|
oldval,
|
||||||
let size = OperandSize::from_ty(ty);
|
scratch1,
|
||||||
let r_addr = pretty_print_ireg(xreg(25), OperandSize::Size64, allocs);
|
scratch2,
|
||||||
let r_arg2 = pretty_print_ireg(xreg(26), size, allocs);
|
} => {
|
||||||
let r_status = pretty_print_ireg(xreg(24), OperandSize::Size32, allocs);
|
let op = match op {
|
||||||
let r_tmp = pretty_print_ireg(xreg(27), size, allocs);
|
|
||||||
let mut r_dst = pretty_print_ireg(xreg(28), size, allocs);
|
|
||||||
|
|
||||||
let mut loop_str: String = "1: ".to_string();
|
|
||||||
loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));
|
|
||||||
|
|
||||||
let op_str = match op {
|
|
||||||
AtomicRMWLoopOp::Add => "add",
|
AtomicRMWLoopOp::Add => "add",
|
||||||
AtomicRMWLoopOp::Sub => "sub",
|
AtomicRMWLoopOp::Sub => "sub",
|
||||||
AtomicRMWLoopOp::Eor => "eor",
|
AtomicRMWLoopOp::Eor => "eor",
|
||||||
AtomicRMWLoopOp::Orr => "orr",
|
AtomicRMWLoopOp::Orr => "orr",
|
||||||
AtomicRMWLoopOp::And => "and",
|
AtomicRMWLoopOp::And => "and",
|
||||||
_ => "",
|
AtomicRMWLoopOp::Nand => "nand",
|
||||||
|
AtomicRMWLoopOp::Smin => "smin",
|
||||||
|
AtomicRMWLoopOp::Smax => "smax",
|
||||||
|
AtomicRMWLoopOp::Umin => "umin",
|
||||||
|
AtomicRMWLoopOp::Umax => "umax",
|
||||||
|
AtomicRMWLoopOp::Xchg => "xchg",
|
||||||
};
|
};
|
||||||
|
let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs);
|
||||||
if op_str.is_empty() {
|
let operand = pretty_print_ireg(operand, OperandSize::Size64, allocs);
|
||||||
match op {
|
let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs);
|
||||||
AtomicRMWLoopOp::Xchg => r_dst = r_arg2,
|
let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64, allocs);
|
||||||
AtomicRMWLoopOp::Nand => {
|
let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64, allocs);
|
||||||
loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2));
|
format!(
|
||||||
loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst));
|
"atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
|
||||||
}
|
op,
|
||||||
_ => {
|
ty.bits(),
|
||||||
if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax)
|
addr,
|
||||||
&& (ty == I8 || ty == I16)
|
operand,
|
||||||
{
|
oldval,
|
||||||
loop_str
|
scratch1,
|
||||||
.push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp));
|
scratch2,
|
||||||
loop_str.push_str(&format!(
|
)
|
||||||
"cmp {}, {}, sxt{}; ",
|
|
||||||
r_tmp, r_arg2, ty_suffix
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
|
|
||||||
}
|
|
||||||
let cond = match op {
|
|
||||||
AtomicRMWLoopOp::Smin => "lt",
|
|
||||||
AtomicRMWLoopOp::Smax => "gt",
|
|
||||||
AtomicRMWLoopOp::Umin => "lo",
|
|
||||||
AtomicRMWLoopOp::Umax => "hi",
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
loop_str.push_str(&format!(
|
|
||||||
"csel {}, {}, {}, {}; ",
|
|
||||||
r_dst, r_tmp, r_arg2, cond
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2));
|
|
||||||
}
|
|
||||||
loop_str.push_str(&format!(
|
|
||||||
"stlxr{} {}, {}, [{}]; ",
|
|
||||||
ty_suffix, r_status, r_dst, r_addr
|
|
||||||
));
|
|
||||||
loop_str.push_str(&format!("cbnz {}, 1b", r_status));
|
|
||||||
loop_str
|
|
||||||
}
|
}
|
||||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
&Inst::AtomicCAS { rd, rs, rt, rn, ty } => {
|
||||||
let op = match ty {
|
let op = match ty {
|
||||||
I8 => "casalb",
|
I8 => "casalb",
|
||||||
I16 => "casalh",
|
I16 => "casalh",
|
||||||
@@ -1665,16 +1675,35 @@ impl Inst {
|
|||||||
_ => panic!("Unsupported type: {}", ty),
|
_ => panic!("Unsupported type: {}", ty),
|
||||||
};
|
};
|
||||||
let size = OperandSize::from_ty(ty);
|
let size = OperandSize::from_ty(ty);
|
||||||
let rs = pretty_print_ireg(rs.to_reg(), size, allocs);
|
let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
|
||||||
|
let rs = pretty_print_ireg(rs, size, allocs);
|
||||||
let rt = pretty_print_ireg(rt, size, allocs);
|
let rt = pretty_print_ireg(rt, size, allocs);
|
||||||
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
|
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
|
||||||
|
|
||||||
format!("{} {}, {}, [{}]", op, rs, rt, rn)
|
format!("{} {}, {}, {}, [{}]", op, rd, rs, rt, rn)
|
||||||
}
|
}
|
||||||
&Inst::AtomicCASLoop { ty } => {
|
&Inst::AtomicCASLoop {
|
||||||
|
ty,
|
||||||
|
addr,
|
||||||
|
expected,
|
||||||
|
replacement,
|
||||||
|
oldval,
|
||||||
|
scratch,
|
||||||
|
} => {
|
||||||
|
let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs);
|
||||||
|
let expected = pretty_print_ireg(expected, OperandSize::Size64, allocs);
|
||||||
|
let replacement = pretty_print_ireg(replacement, OperandSize::Size64, allocs);
|
||||||
|
let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs);
|
||||||
|
let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64, allocs);
|
||||||
format!(
|
format!(
|
||||||
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
|
"atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
|
||||||
ty.bits())
|
ty.bits(),
|
||||||
|
addr,
|
||||||
|
expected,
|
||||||
|
replacement,
|
||||||
|
oldval,
|
||||||
|
scratch,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
&Inst::LoadAcquire {
|
&Inst::LoadAcquire {
|
||||||
access_ty, rt, rn, ..
|
access_ty, rt, rn, ..
|
||||||
@@ -1777,8 +1806,6 @@ impl Inst {
|
|||||||
let (op, imm, vector) = match fpu_op {
|
let (op, imm, vector) = match fpu_op {
|
||||||
FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true),
|
FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true),
|
||||||
FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false),
|
FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false),
|
||||||
FPUOpRI::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true),
|
|
||||||
FPUOpRI::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let (rd, rn) = if vector {
|
let (rd, rn) = if vector {
|
||||||
@@ -1794,6 +1821,27 @@ impl Inst {
|
|||||||
};
|
};
|
||||||
format!("{} {}, {}, {}", op, rd, rn, imm)
|
format!("{} {}, {}, {}", op, rd, rn, imm)
|
||||||
}
|
}
|
||||||
|
&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
|
||||||
|
let (op, imm, vector) = match fpu_op {
|
||||||
|
FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true),
|
||||||
|
FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false),
|
||||||
|
};
|
||||||
|
|
||||||
|
let (rd, ri, rn) = if vector {
|
||||||
|
(
|
||||||
|
pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs),
|
||||||
|
pretty_print_vreg_vector(ri, VectorSize::Size32x2, allocs),
|
||||||
|
pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs),
|
||||||
|
pretty_print_vreg_scalar(ri, ScalarSize::Size64, allocs),
|
||||||
|
pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
format!("{} {}, {}, {}, {}", op, rd, ri, rn, imm)
|
||||||
|
}
|
||||||
&Inst::FpuRRRR {
|
&Inst::FpuRRRR {
|
||||||
fpu_op,
|
fpu_op,
|
||||||
size,
|
size,
|
||||||
@@ -1983,11 +2031,18 @@ impl Inst {
|
|||||||
|
|
||||||
format!("fmov {}, {}", rd, imm)
|
format!("fmov {}, {}", rd, imm)
|
||||||
}
|
}
|
||||||
&Inst::MovToVec { rd, rn, idx, size } => {
|
&Inst::MovToVec {
|
||||||
|
rd,
|
||||||
|
ri,
|
||||||
|
rn,
|
||||||
|
idx,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
let rd =
|
let rd =
|
||||||
pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size(), allocs);
|
pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size(), allocs);
|
||||||
|
let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size(), allocs);
|
||||||
let rn = pretty_print_ireg(rn, size.operand_size(), allocs);
|
let rn = pretty_print_ireg(rn, size.operand_size(), allocs);
|
||||||
format!("mov {}, {}", rd, rn)
|
format!("mov {}, {}, {}", rd, ri, rn)
|
||||||
}
|
}
|
||||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||||
let op = match size {
|
let op = match size {
|
||||||
@@ -2062,6 +2117,7 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
&Inst::VecMovElement {
|
&Inst::VecMovElement {
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
dest_idx,
|
dest_idx,
|
||||||
src_idx,
|
src_idx,
|
||||||
@@ -2073,8 +2129,9 @@ impl Inst {
|
|||||||
size.lane_size(),
|
size.lane_size(),
|
||||||
allocs,
|
allocs,
|
||||||
);
|
);
|
||||||
|
let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size(), allocs);
|
||||||
let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size(), allocs);
|
let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size(), allocs);
|
||||||
format!("mov {}, {}", rd, rn)
|
format!("mov {}, {}, {}", rd, ri, rn)
|
||||||
}
|
}
|
||||||
&Inst::VecRRLong {
|
&Inst::VecRRLong {
|
||||||
op,
|
op,
|
||||||
@@ -2119,16 +2176,28 @@ impl Inst {
|
|||||||
|
|
||||||
format!("{} {}, {}{}", op, rd, rn, suffix)
|
format!("{} {}, {}{}", op, rd, rn, suffix)
|
||||||
}
|
}
|
||||||
&Inst::VecRRNarrow {
|
&Inst::VecRRNarrowLow {
|
||||||
op,
|
op,
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
high_half,
|
|
||||||
lane_size,
|
lane_size,
|
||||||
|
..
|
||||||
|
}
|
||||||
|
| &Inst::VecRRNarrowHigh {
|
||||||
|
op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
lane_size,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
let vec64 = VectorSize::from_lane_size(lane_size, false);
|
let vec64 = VectorSize::from_lane_size(lane_size, false);
|
||||||
let vec128 = VectorSize::from_lane_size(lane_size, true);
|
let vec128 = VectorSize::from_lane_size(lane_size, true);
|
||||||
let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
|
let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
|
||||||
|
let high_half = match self {
|
||||||
|
&Inst::VecRRNarrowLow { .. } => false,
|
||||||
|
&Inst::VecRRNarrowHigh { .. } => true,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
let (op, rd_size) = match (op, high_half) {
|
let (op, rd_size) = match (op, high_half) {
|
||||||
(VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
|
(VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
|
||||||
(VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
|
(VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
|
||||||
@@ -2143,8 +2212,15 @@ impl Inst {
|
|||||||
};
|
};
|
||||||
let rn = pretty_print_vreg_vector(rn, rn_size, allocs);
|
let rn = pretty_print_vreg_vector(rn, rn_size, allocs);
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);
|
||||||
|
let ri = match self {
|
||||||
|
&Inst::VecRRNarrowLow { .. } => "".to_string(),
|
||||||
|
&Inst::VecRRNarrowHigh { ri, .. } => {
|
||||||
|
format!("{}, ", pretty_print_vreg_vector(ri, rd_size, allocs))
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}{}", op, rd, ri, rn)
|
||||||
}
|
}
|
||||||
&Inst::VecRRPair { op, rd, rn } => {
|
&Inst::VecRRPair { op, rd, rn } => {
|
||||||
let op = match op {
|
let op = match op {
|
||||||
@@ -2227,6 +2303,7 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
&Inst::VecRRRMod {
|
&Inst::VecRRRMod {
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
rm,
|
rm,
|
||||||
alu_op,
|
alu_op,
|
||||||
@@ -2237,9 +2314,10 @@ impl Inst {
|
|||||||
VecALUModOp::Fmla => ("fmla", size),
|
VecALUModOp::Fmla => ("fmla", size),
|
||||||
};
|
};
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||||
|
let ri = pretty_print_vreg_vector(ri, size, allocs);
|
||||||
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
||||||
let rm = pretty_print_vreg_vector(rm, size, allocs);
|
let rm = pretty_print_vreg_vector(rm, size, allocs);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm)
|
||||||
}
|
}
|
||||||
&Inst::VecRRRLong {
|
&Inst::VecRRRLong {
|
||||||
rd,
|
rd,
|
||||||
@@ -2285,30 +2363,46 @@ impl Inst {
|
|||||||
(VecRRRLongOp::Umull32, true) => {
|
(VecRRRLongOp::Umull32, true) => {
|
||||||
("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
|
("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
|
||||||
}
|
}
|
||||||
(VecRRRLongOp::Umlal8, false) => {
|
|
||||||
("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
|
|
||||||
}
|
|
||||||
(VecRRRLongOp::Umlal8, true) => {
|
|
||||||
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
|
|
||||||
}
|
|
||||||
(VecRRRLongOp::Umlal16, false) => {
|
|
||||||
("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
|
|
||||||
}
|
|
||||||
(VecRRRLongOp::Umlal16, true) => {
|
|
||||||
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
|
|
||||||
}
|
|
||||||
(VecRRRLongOp::Umlal32, false) => {
|
|
||||||
("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
|
|
||||||
}
|
|
||||||
(VecRRRLongOp::Umlal32, true) => {
|
|
||||||
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs);
|
||||||
let rn = pretty_print_vreg_vector(rn, src_size, allocs);
|
let rn = pretty_print_vreg_vector(rn, src_size, allocs);
|
||||||
let rm = pretty_print_vreg_vector(rm, src_size, allocs);
|
let rm = pretty_print_vreg_vector(rm, src_size, allocs);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
}
|
}
|
||||||
|
&Inst::VecRRRLongMod {
|
||||||
|
rd,
|
||||||
|
ri,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op,
|
||||||
|
high_half,
|
||||||
|
} => {
|
||||||
|
let (op, dest_size, src_size) = match (alu_op, high_half) {
|
||||||
|
(VecRRRLongModOp::Umlal8, false) => {
|
||||||
|
("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
|
||||||
|
}
|
||||||
|
(VecRRRLongModOp::Umlal8, true) => {
|
||||||
|
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
|
||||||
|
}
|
||||||
|
(VecRRRLongModOp::Umlal16, false) => {
|
||||||
|
("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
|
||||||
|
}
|
||||||
|
(VecRRRLongModOp::Umlal16, true) => {
|
||||||
|
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
|
||||||
|
}
|
||||||
|
(VecRRRLongModOp::Umlal32, false) => {
|
||||||
|
("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
|
||||||
|
}
|
||||||
|
(VecRRRLongModOp::Umlal32, true) => {
|
||||||
|
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs);
|
||||||
|
let ri = pretty_print_vreg_vector(ri, dest_size, allocs);
|
||||||
|
let rn = pretty_print_vreg_vector(rn, src_size, allocs);
|
||||||
|
let rm = pretty_print_vreg_vector(rm, src_size, allocs);
|
||||||
|
format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm)
|
||||||
|
}
|
||||||
&Inst::VecMisc { op, rd, rn, size } => {
|
&Inst::VecMisc { op, rd, rn, size } => {
|
||||||
let (op, size, suffix) = match op {
|
let (op, size, suffix) = match op {
|
||||||
VecMisc2::Not => (
|
VecMisc2::Not => (
|
||||||
@@ -2378,6 +2472,7 @@ impl Inst {
|
|||||||
&Inst::VecShiftImmMod {
|
&Inst::VecShiftImmMod {
|
||||||
op,
|
op,
|
||||||
rd,
|
rd,
|
||||||
|
ri,
|
||||||
rn,
|
rn,
|
||||||
size,
|
size,
|
||||||
imm,
|
imm,
|
||||||
@@ -2386,8 +2481,9 @@ impl Inst {
|
|||||||
VecShiftImmModOp::Sli => "sli",
|
VecShiftImmModOp::Sli => "sli",
|
||||||
};
|
};
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||||
|
let ri = pretty_print_vreg_vector(ri, size, allocs);
|
||||||
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
||||||
format!("{} {}, {}, #{}", op, rd, rn, imm)
|
format!("{} {}, {}, {}, #{}", op, rd, ri, rn, imm)
|
||||||
}
|
}
|
||||||
&Inst::VecExtract { rd, rn, rm, imm4 } => {
|
&Inst::VecExtract { rd, rn, rm, imm4 } => {
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
||||||
@@ -2395,31 +2491,39 @@ impl Inst {
|
|||||||
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
||||||
format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
|
format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
|
||||||
}
|
}
|
||||||
&Inst::VecTbl {
|
&Inst::VecTbl { rd, rn, rm } => {
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
is_extension,
|
|
||||||
} => {
|
|
||||||
let op = if is_extension { "tbx" } else { "tbl" };
|
|
||||||
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
||||||
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
||||||
format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
|
format!("tbl {}, {{ {} }}, {}", rd, rn, rm)
|
||||||
}
|
}
|
||||||
&Inst::VecTbl2 {
|
&Inst::VecTblExt { rd, ri, rn, rm } => {
|
||||||
rd,
|
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
||||||
rn,
|
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
||||||
rn2,
|
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
||||||
rm,
|
let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs);
|
||||||
is_extension,
|
format!("tbx {}, {}, {{ {} }}, {}", rd, ri, rn, rm)
|
||||||
} => {
|
}
|
||||||
let op = if is_extension { "tbx" } else { "tbl" };
|
&Inst::VecTbl2 { rd, rn, rn2, rm } => {
|
||||||
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
||||||
let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs);
|
let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs);
|
||||||
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
||||||
format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
|
format!("tbl {}, {{ {}, {} }}, {}", rd, rn, rn2, rm)
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2Ext {
|
||||||
|
rd,
|
||||||
|
ri,
|
||||||
|
rn,
|
||||||
|
rn2,
|
||||||
|
rm,
|
||||||
|
} => {
|
||||||
|
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
||||||
|
let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs);
|
||||||
|
let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
|
||||||
|
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
||||||
|
let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs);
|
||||||
|
format!("tbx {}, {}, {{ {}, {} }}, {}", rd, ri, rn, rn2, rm)
|
||||||
}
|
}
|
||||||
&Inst::VecLoadReplicate { rd, rn, size, .. } => {
|
&Inst::VecLoadReplicate { rd, rn, size, .. } => {
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ pub(crate) const fn vreg_preg(num: u8) -> PReg {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get a writable reference to a V-register.
|
/// Get a writable reference to a V-register.
|
||||||
|
#[cfg(test)] // Used only in test code.
|
||||||
pub fn writable_vreg(num: u8) -> Writable<Reg> {
|
pub fn writable_vreg(num: u8) -> Writable<Reg> {
|
||||||
Writable::from_reg(vreg(num))
|
Writable::from_reg(vreg(num))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -103,12 +103,12 @@
|
|||||||
|
|
||||||
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
|
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
|
||||||
(let ((mask_reg Reg (constant_f128 mask)))
|
(let ((mask_reg Reg (constant_f128 mask)))
|
||||||
(vec_tbl2 rn rn2 mask_reg $false ty)))
|
(vec_tbl2 rn rn2 mask_reg ty)))
|
||||||
|
|
||||||
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type vec_i128_ty (swizzle rn rm)))
|
(rule (lower (has_type vec_i128_ty (swizzle rn rm)))
|
||||||
(vec_tbl rn rm #f))
|
(vec_tbl rn rm))
|
||||||
|
|
||||||
;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use generated_code::Context;
|
|||||||
use super::{
|
use super::{
|
||||||
lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode,
|
lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode,
|
||||||
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
|
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
|
||||||
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
|
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift,
|
||||||
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
|
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
|
||||||
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
|
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
|
||||||
NZCV,
|
NZCV,
|
||||||
@@ -28,7 +28,6 @@ use crate::{
|
|||||||
},
|
},
|
||||||
isa::aarch64::abi::AArch64Caller,
|
isa::aarch64::abi::AArch64Caller,
|
||||||
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
|
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
|
||||||
isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
|
|
||||||
isa::unwind::UnwindInst,
|
isa::unwind::UnwindInst,
|
||||||
machinst::{ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData},
|
machinst::{ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData},
|
||||||
};
|
};
|
||||||
@@ -209,9 +208,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
if upper_halfword != 0 {
|
if upper_halfword != 0 {
|
||||||
self.emit(&MInst::MovWide {
|
self.emit(&MInst::MovK {
|
||||||
op: MoveWideOp::MovK,
|
|
||||||
rd,
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
|
imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
|
||||||
size,
|
size,
|
||||||
});
|
});
|
||||||
@@ -263,9 +262,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
||||||
self.emit(&MInst::MovWide {
|
self.emit(&MInst::MovK {
|
||||||
op: MoveWideOp::MovK,
|
|
||||||
rd,
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
imm,
|
imm,
|
||||||
size,
|
size,
|
||||||
});
|
});
|
||||||
@@ -294,18 +293,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
zero_reg()
|
zero_reg()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn xreg(&mut self, index: u8) -> Reg {
|
|
||||||
xreg(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn writable_xreg(&mut self, index: u8) -> WritableReg {
|
|
||||||
writable_xreg(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn writable_vreg(&mut self, index: u8) -> WritableReg {
|
|
||||||
writable_vreg(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
|
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
|
||||||
let (val, extend) =
|
let (val, extend) =
|
||||||
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
|
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
|
||||||
@@ -718,11 +705,11 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
|
fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRIMod {
|
||||||
if ty_bits == 32 {
|
if ty_bits == 32 {
|
||||||
FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
FPUOpRIMod::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
||||||
} else if ty_bits == 64 {
|
} else if ty_bits == 64 {
|
||||||
FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
FPUOpRIMod::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
||||||
} else {
|
} else {
|
||||||
unimplemented!(
|
unimplemented!(
|
||||||
"unexpected input size for fpu_op_ri_sli: {} (shift: {})",
|
"unexpected input size for fpu_op_ri_sli: {} (shift: {})",
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ block0(v0: i64):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz w3, #51712
|
; movz w3, #51712
|
||||||
; movk w3, #15258, LSL #16
|
; movk w3, w3, #15258, LSL #16
|
||||||
; add x3, x3, x0
|
; add x3, x3, x0
|
||||||
; ldr w0, [x3]
|
; ldr w0, [x3]
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
@@ -142,9 +142,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -164,9 +163,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -186,9 +184,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -208,9 +205,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
|
|||||||
@@ -14,9 +14,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_add_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -36,9 +35,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_add_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -58,9 +56,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_add_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -80,9 +77,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_add_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -102,9 +98,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_sub_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; sub x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -124,9 +119,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_sub_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; sub w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -146,9 +140,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_sub_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; sub w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -168,9 +161,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_sub_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -190,9 +182,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_and_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -212,9 +203,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_and_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -234,9 +224,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_and_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -256,9 +245,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_and_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -278,9 +266,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -300,9 +287,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -322,9 +308,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -344,9 +329,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -366,9 +350,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_orr_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -388,9 +371,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_orr_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -410,9 +392,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_orr_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -432,9 +413,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_orr_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -454,9 +434,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_eor_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -476,9 +455,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_eor_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -498,9 +476,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_eor_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -520,9 +497,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_eor_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -542,9 +518,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -564,9 +539,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -586,9 +560,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -608,9 +581,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -630,9 +602,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -652,9 +623,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -674,9 +644,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -696,9 +665,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -718,9 +686,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -740,9 +707,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -762,9 +728,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -784,9 +749,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_smin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -806,9 +770,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -828,9 +791,8 @@ block0(v0: i64, v1: i32):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -850,9 +812,8 @@ block0(v0: i64, v1: i16):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
@@ -872,9 +833,8 @@ block0(v0: i64, v1: i8):
|
|||||||
; stp x24, x25, [sp, #-16]!
|
; stp x24, x25, [sp, #-16]!
|
||||||
; block0:
|
; block0:
|
||||||
; mov x25, x0
|
; mov x25, x0
|
||||||
; mov x4, x1
|
; mov x26, x1
|
||||||
; mov x26, x4
|
; atomic_rmw_loop_umin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
|
||||||
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
||||||
; ldp x24, x25, [sp], #16
|
; ldp x24, x25, [sp], #16
|
||||||
; ldp x26, x27, [sp], #16
|
; ldp x26, x27, [sp], #16
|
||||||
; ldr x28, [sp], #16
|
; ldr x28, [sp], #16
|
||||||
|
|||||||
@@ -245,11 +245,11 @@ block0(v0: i128):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; fmov d6, x0
|
; fmov d7, x0
|
||||||
; mov v6.d[1], x1
|
; mov v7.d[1], v7.d[1], x1
|
||||||
; cnt v19.16b, v6.16b
|
; cnt v18.16b, v7.16b
|
||||||
; addv b21, v19.16b
|
; addv b20, v18.16b
|
||||||
; umov w0, v21.b[0]
|
; umov w0, v20.b[0]
|
||||||
; movz w1, #0
|
; movz w1, #0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -130,9 +130,9 @@ block0:
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x0, #58
|
; movz x0, #58
|
||||||
; movk x0, #4626, LSL #16
|
; movk x0, x0, #4626, LSL #16
|
||||||
; movk x0, #61603, LSL #32
|
; movk x0, x0, #61603, LSL #32
|
||||||
; movk x0, #62283, LSL #48
|
; movk x0, x0, #62283, LSL #48
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f() -> i64 {
|
function %f() -> i64 {
|
||||||
@@ -143,7 +143,7 @@ block0:
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x0, #7924, LSL #16
|
; movz x0, #7924, LSL #16
|
||||||
; movk x0, #4841, LSL #48
|
; movk x0, x0, #4841, LSL #48
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f() -> i64 {
|
function %f() -> i64 {
|
||||||
@@ -154,7 +154,7 @@ block0:
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movn x0, #57611, LSL #16
|
; movn x0, #57611, LSL #16
|
||||||
; movk x0, #4841, LSL #48
|
; movk x0, x0, #4841, LSL #48
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f() -> i32 {
|
function %f() -> i32 {
|
||||||
|
|||||||
@@ -15,10 +15,9 @@ block0(v0: i16):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; dup v6.4h, w0
|
; dup v4.4h, w0
|
||||||
; mov v7.16b, v6.16b
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
||||||
; mov v7.d[1], v6.d[0]
|
; sqxtn v0.8b, v4.8h
|
||||||
; sqxtn v0.8b, v7.8h
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i16x8(i16) -> i8x16 {
|
function %snarrow_i16x8(i16) -> i8x16 {
|
||||||
@@ -37,7 +36,7 @@ block0(v0: i16):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.8h, w0
|
; dup v6.8h, w0
|
||||||
; sqxtn v0.8b, v6.8h
|
; sqxtn v0.8b, v6.8h
|
||||||
; sqxtn2 v0.16b, v6.8h
|
; sqxtn2 v0.16b, v0.16b, v6.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i32x2(i32) -> i16x4 {
|
function %snarrow_i32x2(i32) -> i16x4 {
|
||||||
@@ -54,10 +53,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; dup v6.2s, w0
|
; dup v4.2s, w0
|
||||||
; mov v7.16b, v6.16b
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
||||||
; mov v7.d[1], v6.d[0]
|
; sqxtn v0.4h, v4.4s
|
||||||
; sqxtn v0.4h, v7.4s
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i32x4(i32) -> i16x8 {
|
function %snarrow_i32x4(i32) -> i16x8 {
|
||||||
@@ -76,7 +74,7 @@ block0(v0: i32):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.4s, w0
|
; dup v6.4s, w0
|
||||||
; sqxtn v0.4h, v6.4s
|
; sqxtn v0.4h, v6.4s
|
||||||
; sqxtn2 v0.8h, v6.4s
|
; sqxtn2 v0.8h, v0.8h, v6.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i64x2(i64) -> i32x4 {
|
function %snarrow_i64x2(i64) -> i32x4 {
|
||||||
@@ -95,7 +93,7 @@ block0(v0: i64):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.2d, x0
|
; dup v6.2d, x0
|
||||||
; sqxtn v0.2s, v6.2d
|
; sqxtn v0.2s, v6.2d
|
||||||
; sqxtn2 v0.4s, v6.2d
|
; sqxtn2 v0.4s, v0.4s, v6.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i16x4(i16) -> i8x8 {
|
function %unarrow_i16x4(i16) -> i8x8 {
|
||||||
@@ -112,10 +110,9 @@ block0(v0: i16):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; dup v6.4h, w0
|
; dup v4.4h, w0
|
||||||
; mov v7.16b, v6.16b
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
||||||
; mov v7.d[1], v6.d[0]
|
; sqxtun v0.8b, v4.8h
|
||||||
; sqxtun v0.8b, v7.8h
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i16x8(i16) -> i8x16 {
|
function %unarrow_i16x8(i16) -> i8x16 {
|
||||||
@@ -134,7 +131,7 @@ block0(v0: i16):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.8h, w0
|
; dup v6.8h, w0
|
||||||
; sqxtun v0.8b, v6.8h
|
; sqxtun v0.8b, v6.8h
|
||||||
; sqxtun2 v0.16b, v6.8h
|
; sqxtun2 v0.16b, v0.16b, v6.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i32x2(i32) -> i16x4 {
|
function %unarrow_i32x2(i32) -> i16x4 {
|
||||||
@@ -151,10 +148,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; dup v6.2s, w0
|
; dup v4.2s, w0
|
||||||
; mov v7.16b, v6.16b
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
||||||
; mov v7.d[1], v6.d[0]
|
; sqxtun v0.4h, v4.4s
|
||||||
; sqxtun v0.4h, v7.4s
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i32x4(i32) -> i16x8 {
|
function %unarrow_i32x4(i32) -> i16x8 {
|
||||||
@@ -173,7 +169,7 @@ block0(v0: i32):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.4s, w0
|
; dup v6.4s, w0
|
||||||
; sqxtun v0.4h, v6.4s
|
; sqxtun v0.4h, v6.4s
|
||||||
; sqxtun2 v0.8h, v6.4s
|
; sqxtun2 v0.8h, v0.8h, v6.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i64x2(i64) -> i32x4 {
|
function %unarrow_i64x2(i64) -> i32x4 {
|
||||||
@@ -192,7 +188,7 @@ block0(v0: i64):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.2d, x0
|
; dup v6.2d, x0
|
||||||
; sqxtun v0.2s, v6.2d
|
; sqxtun v0.2s, v6.2d
|
||||||
; sqxtun2 v0.4s, v6.2d
|
; sqxtun2 v0.4s, v0.4s, v6.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i16x4(i16) -> i8x8 {
|
function %uunarrow_i16x4(i16) -> i8x8 {
|
||||||
@@ -209,10 +205,9 @@ block0(v0: i16):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; dup v6.4h, w0
|
; dup v4.4h, w0
|
||||||
; mov v7.16b, v6.16b
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
||||||
; mov v7.d[1], v6.d[0]
|
; uqxtn v0.8b, v4.8h
|
||||||
; uqxtn v0.8b, v7.8h
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i16x8(i16) -> i8x16 {
|
function %uunarrow_i16x8(i16) -> i8x16 {
|
||||||
@@ -231,7 +226,7 @@ block0(v0: i16):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.8h, w0
|
; dup v6.8h, w0
|
||||||
; uqxtn v0.8b, v6.8h
|
; uqxtn v0.8b, v6.8h
|
||||||
; uqxtn2 v0.16b, v6.8h
|
; uqxtn2 v0.16b, v0.16b, v6.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i32x2(i32) -> i16x4 {
|
function %uunarrow_i32x2(i32) -> i16x4 {
|
||||||
@@ -248,10 +243,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; dup v6.2s, w0
|
; dup v4.2s, w0
|
||||||
; mov v7.16b, v6.16b
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
||||||
; mov v7.d[1], v6.d[0]
|
; uqxtn v0.4h, v4.4s
|
||||||
; uqxtn v0.4h, v7.4s
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i32x4(i32) -> i16x8 {
|
function %uunarrow_i32x4(i32) -> i16x8 {
|
||||||
@@ -270,7 +264,7 @@ block0(v0: i32):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.4s, w0
|
; dup v6.4s, w0
|
||||||
; uqxtn v0.4h, v6.4s
|
; uqxtn v0.4h, v6.4s
|
||||||
; uqxtn2 v0.8h, v6.4s
|
; uqxtn2 v0.8h, v0.8h, v6.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i64x2(i64) -> i32x4 {
|
function %uunarrow_i64x2(i64) -> i32x4 {
|
||||||
@@ -289,5 +283,6 @@ block0(v0: i64):
|
|||||||
; block0:
|
; block0:
|
||||||
; dup v6.2d, x0
|
; dup v6.2d, x0
|
||||||
; uqxtn v0.2s, v6.2d
|
; uqxtn v0.2s, v6.2d
|
||||||
; uqxtn2 v0.4s, v6.2d
|
; uqxtn2 v0.4s, v0.4s, v6.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ block0(v0: f64, v1: f64):
|
|||||||
; dup v17.2d, v0.d[0]
|
; dup v17.2d, v0.d[0]
|
||||||
; dup v18.2d, v1.d[0]
|
; dup v18.2d, v1.d[0]
|
||||||
; fcmgt v0.2d, v17.2d, v18.2d
|
; fcmgt v0.2d, v17.2d, v18.2d
|
||||||
; bsl v0.16b, v18.16b, v17.16b
|
; bsl v0.16b, v0.16b, v18.16b, v17.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
|
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
|
||||||
@@ -216,5 +216,6 @@ block0(v0: f64, v1: f64):
|
|||||||
; dup v17.2d, v0.d[0]
|
; dup v17.2d, v0.d[0]
|
||||||
; dup v18.2d, v1.d[0]
|
; dup v18.2d, v1.d[0]
|
||||||
; fcmgt v0.2d, v18.2d, v17.2d
|
; fcmgt v0.2d, v18.2d, v17.2d
|
||||||
; bsl v0.16b, v18.16b, v17.16b
|
; bsl v0.16b, v0.16b, v18.16b, v17.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -309,8 +309,8 @@ block0(v0: f32, v1: f32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ushr v7.2s, v1.2s, #31
|
; ushr v6.2s, v1.2s, #31
|
||||||
; sli v0.2s, v7.2s, #31
|
; sli v0.2s, v0.2s, v6.2s, #31
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f32(f64, f64) -> f64 {
|
function %f32(f64, f64) -> f64 {
|
||||||
@@ -320,8 +320,8 @@ block0(v0: f64, v1: f64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ushr d7, d1, #63
|
; ushr d6, d1, #63
|
||||||
; sli d0, d7, #63
|
; sli d0, d0, d6, #63
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f33(f32) -> i32 {
|
function %f33(f32) -> i32 {
|
||||||
@@ -918,9 +918,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v17.16b, v0.16b
|
; fmla v2.4s, v2.4s, v0.4s, v1.4s
|
||||||
; mov v0.16b, v2.16b
|
; mov v0.16b, v2.16b
|
||||||
; fmla v0.4s, v17.4s, v1.4s
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f79(f32x2, f32x2, f32x2) -> f32x2 {
|
function %f79(f32x2, f32x2, f32x2) -> f32x2 {
|
||||||
@@ -930,9 +929,8 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v17.16b, v0.16b
|
; fmla v2.2s, v2.2s, v0.2s, v1.2s
|
||||||
; mov v0.16b, v2.16b
|
; mov v0.16b, v2.16b
|
||||||
; fmla v0.2s, v17.2s, v1.2s
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f80(f64x2, f64x2, f64x2) -> f64x2 {
|
function %f80(f64x2, f64x2, f64x2) -> f64x2 {
|
||||||
@@ -942,9 +940,8 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v17.16b, v0.16b
|
; fmla v2.2d, v2.2d, v0.2d, v1.2d
|
||||||
; mov v0.16b, v2.16b
|
; mov v0.16b, v2.16b
|
||||||
; fmla v0.2d, v17.2d, v1.2d
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f81(f32x2, f32x2) -> f32x2 {
|
function %f81(f32x2, f32x2) -> f32x2 {
|
||||||
@@ -954,8 +951,8 @@ block0(v0: f32x2, v1: f32x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ushr v7.2s, v1.2s, #31
|
; ushr v6.2s, v1.2s, #31
|
||||||
; sli v0.2s, v7.2s, #31
|
; sli v0.2s, v0.2s, v6.2s, #31
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f82(f32x4, f32x4) -> f32x4 {
|
function %f82(f32x4, f32x4) -> f32x4 {
|
||||||
@@ -965,8 +962,8 @@ block0(v0: f32x4, v1: f32x4):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ushr v7.4s, v1.4s, #31
|
; ushr v6.4s, v1.4s, #31
|
||||||
; sli v0.4s, v7.4s, #31
|
; sli v0.4s, v0.4s, v6.4s, #31
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f83(f64x2, f64x2) -> f64x2 {
|
function %f83(f64x2, f64x2) -> f64x2 {
|
||||||
@@ -976,6 +973,7 @@ block0(v0: f64x2, v1: f64x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ushr v7.2d, v1.2d, #63
|
; ushr v6.2d, v1.2d, #63
|
||||||
; sli v0.2d, v7.2d, #63
|
; sli v0.2d, v0.2d, v6.2d, #63
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ block0:
|
|||||||
; movi v0.16b, #0
|
; movi v0.16b, #0
|
||||||
; movi v4.16b, #0
|
; movi v4.16b, #0
|
||||||
; movi v5.16b, #0
|
; movi v5.16b, #0
|
||||||
; bsl v0.16b, v4.16b, v5.16b
|
; bsl v0.16b, v0.16b, v4.16b, v5.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
|
function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
|
||||||
@@ -115,7 +115,7 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; bsl v0.16b, v1.16b, v2.16b
|
; bsl v0.16b, v0.16b, v1.16b, v2.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
|
function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
|
||||||
@@ -125,7 +125,7 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; bsl v0.16b, v1.16b, v2.16b
|
; bsl v0.16b, v0.16b, v1.16b, v2.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
|
function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
|
||||||
@@ -135,7 +135,7 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; bsl v0.16b, v1.16b, v2.16b
|
; bsl v0.16b, v0.16b, v1.16b, v2.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %ishl_i8x16(i32) -> i8x16 {
|
function %ishl_i8x16(i32) -> i8x16 {
|
||||||
|
|||||||
@@ -29,9 +29,9 @@ block0:
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x4, #1
|
; movz x4, #1
|
||||||
; fmov s30, w4
|
; fmov s31, w4
|
||||||
; ldr q3, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000
|
; ldr q3, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000
|
||||||
; mov v31.16b, v30.16b
|
; mov v30.16b, v31.16b
|
||||||
; tbl v0.16b, { v30.16b, v31.16b }, v3.16b
|
; tbl v0.16b, { v30.16b, v31.16b }, v3.16b
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ block0(v0: i16x4, v1: i16x4):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v0.d[1], v1.d[0]
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
||||||
; sqxtn v0.8b, v0.8h
|
; sqxtn v0.8b, v0.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ block0(v0: i16x8, v1: i16x8):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sqxtn v0.8b, v0.8h
|
; sqxtn v0.8b, v0.8h
|
||||||
; sqxtn2 v0.16b, v1.8h
|
; sqxtn2 v0.16b, v0.16b, v1.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
||||||
@@ -31,7 +31,7 @@ block0(v0: i32x2, v1: i32x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v0.d[1], v1.d[0]
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
||||||
; sqxtn v0.4h, v0.4s
|
; sqxtn v0.4h, v0.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@ block0(v0: i32x4, v1: i32x4):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sqxtn v0.4h, v0.4s
|
; sqxtn v0.4h, v0.4s
|
||||||
; sqxtn2 v0.8h, v1.4s
|
; sqxtn2 v0.8h, v0.8h, v1.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
||||||
@@ -54,7 +54,7 @@ block0(v0: i64x2, v1: i64x2):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sqxtn v0.2s, v0.2d
|
; sqxtn v0.2s, v0.2d
|
||||||
; sqxtn2 v0.4s, v1.2d
|
; sqxtn2 v0.4s, v0.4s, v1.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
||||||
@@ -64,7 +64,7 @@ block0(v0: i16x4, v1: i16x4):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v0.d[1], v1.d[0]
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
||||||
; sqxtun v0.8b, v0.8h
|
; sqxtun v0.8b, v0.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -76,7 +76,7 @@ block0(v0: i16x8, v1: i16x8):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sqxtun v0.8b, v0.8h
|
; sqxtun v0.8b, v0.8h
|
||||||
; sqxtun2 v0.16b, v1.8h
|
; sqxtun2 v0.16b, v0.16b, v1.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
||||||
@@ -86,7 +86,7 @@ block0(v0: i32x2, v1: i32x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v0.d[1], v1.d[0]
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
||||||
; sqxtun v0.4h, v0.4s
|
; sqxtun v0.4h, v0.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -98,7 +98,7 @@ block0(v0: i32x4, v1: i32x4):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sqxtun v0.4h, v0.4s
|
; sqxtun v0.4h, v0.4s
|
||||||
; sqxtun2 v0.8h, v1.4s
|
; sqxtun2 v0.8h, v0.8h, v1.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
||||||
@@ -109,7 +109,7 @@ block0(v0: i64x2, v1: i64x2):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sqxtun v0.2s, v0.2d
|
; sqxtun v0.2s, v0.2d
|
||||||
; sqxtun2 v0.4s, v1.2d
|
; sqxtun2 v0.4s, v0.4s, v1.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
||||||
@@ -119,7 +119,7 @@ block0(v0: i16x4, v1: i16x4):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v0.d[1], v1.d[0]
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
||||||
; uqxtn v0.8b, v0.8h
|
; uqxtn v0.8b, v0.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -131,7 +131,7 @@ block0(v0: i16x8, v1: i16x8):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uqxtn v0.8b, v0.8h
|
; uqxtn v0.8b, v0.8h
|
||||||
; uqxtn2 v0.16b, v1.8h
|
; uqxtn2 v0.16b, v0.16b, v1.8h
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
||||||
@@ -141,7 +141,7 @@ block0(v0: i32x2, v1: i32x2):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov v0.d[1], v1.d[0]
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
||||||
; uqxtn v0.4h, v0.4s
|
; uqxtn v0.4h, v0.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ block0(v0: i32x4, v1: i32x4):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uqxtn v0.4h, v0.4s
|
; uqxtn v0.4h, v0.4s
|
||||||
; uqxtn2 v0.8h, v1.4s
|
; uqxtn2 v0.8h, v0.8h, v1.4s
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
||||||
@@ -164,7 +164,7 @@ block0(v0: i64x2, v1: i64x2):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uqxtn v0.2s, v0.2d
|
; uqxtn v0.2s, v0.2d
|
||||||
; uqxtn2 v0.4s, v1.2d
|
; uqxtn2 v0.4s, v0.4s, v1.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %snarrow_i16x8_zero(i16x8) -> i8x16 {
|
function %snarrow_i16x8_zero(i16x8) -> i8x16 {
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ block0:
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x2, #1
|
; movz x2, #1
|
||||||
; movk x2, #1, LSL #48
|
; movk x2, x2, #1, LSL #48
|
||||||
; dup v0.2d, x2
|
; dup v0.2d, x2
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ block0:
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x1, #1
|
; movz x1, #1
|
||||||
; movk x1, #1, LSL #48
|
; movk x1, x1, #1, LSL #48
|
||||||
; fmov d0, x1
|
; fmov d0, x1
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -98,16 +98,16 @@ block0(v0: i64):
|
|||||||
; subs xzr, sp, x0, UXTX
|
; subs xzr, sp, x0, UXTX
|
||||||
; b.hs 8 ; udf
|
; b.hs 8 ; udf
|
||||||
; movz w17, #6784
|
; movz w17, #6784
|
||||||
; movk w17, #6, LSL #16
|
; movk w17, w17, #6, LSL #16
|
||||||
; add x16, x0, x17, UXTX
|
; add x16, x0, x17, UXTX
|
||||||
; subs xzr, sp, x16, UXTX
|
; subs xzr, sp, x16, UXTX
|
||||||
; b.hs 8 ; udf
|
; b.hs 8 ; udf
|
||||||
; movz w16, #6784
|
; movz w16, #6784
|
||||||
; movk w16, #6, LSL #16
|
; movk w16, w16, #6, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; movz w16, #6784
|
; movz w16, #6784
|
||||||
; movk w16, #6, LSL #16
|
; movk w16, w16, #6, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -152,16 +152,16 @@ block0(v0: i64):
|
|||||||
; subs xzr, sp, x16, UXTX
|
; subs xzr, sp, x16, UXTX
|
||||||
; b.hs 8 ; udf
|
; b.hs 8 ; udf
|
||||||
; movz w17, #6784
|
; movz w17, #6784
|
||||||
; movk w17, #6, LSL #16
|
; movk w17, w17, #6, LSL #16
|
||||||
; add x16, x16, x17, UXTX
|
; add x16, x16, x17, UXTX
|
||||||
; subs xzr, sp, x16, UXTX
|
; subs xzr, sp, x16, UXTX
|
||||||
; b.hs 8 ; udf
|
; b.hs 8 ; udf
|
||||||
; movz w16, #6784
|
; movz w16, #6784
|
||||||
; movk w16, #6, LSL #16
|
; movk w16, w16, #6, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; movz w16, #6784
|
; movz w16, #6784
|
||||||
; movk w16, #6, LSL #16
|
; movk w16, w16, #6, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -177,7 +177,7 @@ block0(v0: i64):
|
|||||||
|
|
||||||
; stp fp, lr, [sp, #-16]!
|
; stp fp, lr, [sp, #-16]!
|
||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
|
; movz w16, #6784 ; movk w16, w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
|
||||||
; add x16, x16, #32
|
; add x16, x16, #32
|
||||||
; subs xzr, sp, x16, UXTX
|
; subs xzr, sp, x16, UXTX
|
||||||
; b.hs 8 ; udf
|
; b.hs 8 ; udf
|
||||||
|
|||||||
@@ -31,12 +31,12 @@ block0:
|
|||||||
; stp fp, lr, [sp, #-16]!
|
; stp fp, lr, [sp, #-16]!
|
||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x0, sp
|
; mov x0, sp
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -71,13 +71,13 @@ block0:
|
|||||||
; stp fp, lr, [sp, #-16]!
|
; stp fp, lr, [sp, #-16]!
|
||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x2, sp
|
; mov x2, sp
|
||||||
; ldr x0, [x2]
|
; ldr x0, [x2]
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -112,13 +112,13 @@ block0(v0: i64):
|
|||||||
; stp fp, lr, [sp, #-16]!
|
; stp fp, lr, [sp, #-16]!
|
||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x2, sp
|
; mov x2, sp
|
||||||
; str x0, [x2]
|
; str x0, [x2]
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -479,13 +479,13 @@ block0(v0: i128):
|
|||||||
; stp fp, lr, [sp, #-16]!
|
; stp fp, lr, [sp, #-16]!
|
||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x5, sp
|
; mov x5, sp
|
||||||
; stp x0, x1, [x5]
|
; stp x0, x1, [x5]
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -539,13 +539,13 @@ block0:
|
|||||||
; stp fp, lr, [sp, #-16]!
|
; stp fp, lr, [sp, #-16]!
|
||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x5, sp
|
; mov x5, sp
|
||||||
; ldp x0, x1, [x5]
|
; ldp x0, x1, [x5]
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ block0(v0: i8x16):
|
|||||||
; block0:
|
; block0:
|
||||||
; sshr v3.16b, v0.16b, #7
|
; sshr v3.16b, v0.16b, #7
|
||||||
; movz x6, #513
|
; movz x6, #513
|
||||||
; movk x6, #2052, LSL #16
|
; movk x6, x6, #2052, LSL #16
|
||||||
; movk x6, #8208, LSL #32
|
; movk x6, x6, #8208, LSL #32
|
||||||
; movk x6, #32832, LSL #48
|
; movk x6, x6, #32832, LSL #48
|
||||||
; dup v17.2d, x6
|
; dup v17.2d, x6
|
||||||
; and v20.16b, v3.16b, v17.16b
|
; and v20.16b, v3.16b, v17.16b
|
||||||
; ext v22.16b, v20.16b, v20.16b, #8
|
; ext v22.16b, v20.16b, v20.16b, #8
|
||||||
@@ -30,9 +30,9 @@ block0(v0: i8x16):
|
|||||||
; block0:
|
; block0:
|
||||||
; sshr v3.16b, v0.16b, #7
|
; sshr v3.16b, v0.16b, #7
|
||||||
; movz x6, #513
|
; movz x6, #513
|
||||||
; movk x6, #2052, LSL #16
|
; movk x6, x6, #2052, LSL #16
|
||||||
; movk x6, #8208, LSL #32
|
; movk x6, x6, #8208, LSL #32
|
||||||
; movk x6, #32832, LSL #48
|
; movk x6, x6, #32832, LSL #48
|
||||||
; dup v17.2d, x6
|
; dup v17.2d, x6
|
||||||
; and v20.16b, v3.16b, v17.16b
|
; and v20.16b, v3.16b, v17.16b
|
||||||
; ext v22.16b, v20.16b, v20.16b, #8
|
; ext v22.16b, v20.16b, v20.16b, #8
|
||||||
|
|||||||
Reference in New Issue
Block a user