diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 8052874603..3f5aeb784f 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -560,10 +560,7 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::StoreP64 { rt: fp_reg(), rt2: link_reg(), - mem: PairAMode::PreIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap()), flags: MemFlags::trusted(), }); @@ -601,10 +598,7 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::LoadP64 { rt: writable_fp_reg(), rt2: writable_link_reg(), - mem: PairAMode::PostIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, types::I64).unwrap()), flags: MemFlags::trusted(), }); insts @@ -676,10 +670,7 @@ impl ABIMachineSpec for AArch64MachineDeps { // str rd, [sp, #-16]! insts.push(Inst::Store64 { rd, - mem: AMode::PreIndexed( - writable_stack_reg(), - SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), - ), + mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(-clobber_offset_change).unwrap()), flags: MemFlags::trusted(), }); @@ -708,8 +699,7 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::StoreP64 { rt, rt2, - mem: PairAMode::PreIndexed( - writable_stack_reg(), + mem: PairAMode::SPPreIndexed( SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(), ), flags: MemFlags::trusted(), @@ -734,10 +724,7 @@ impl ABIMachineSpec for AArch64MachineDeps { let store_vec_reg = |rd| Inst::FpuStore64 { rd, - mem: AMode::PreIndexed( - writable_stack_reg(), - SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), - ), + mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(-clobber_offset_change).unwrap()), flags: MemFlags::trusted(), }; let iter = clobbered_vec.chunks_exact(2); @@ -766,8 +753,7 @@ impl ABIMachineSpec for AArch64MachineDeps { Inst::FpuStoreP64 { rt, rt2, - mem: PairAMode::PreIndexed( - writable_stack_reg(), + mem: PairAMode::SPPreIndexed( SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(), ), flags: MemFlags::trusted(), @@ -831,16 +817,13 @@ impl ABIMachineSpec for AArch64MachineDeps { let load_vec_reg = |rd| Inst::FpuLoad64 { rd, - mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()), flags: MemFlags::trusted(), }; let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 { rt, rt2, - mem: PairAMode::PostIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(16, F64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, F64).unwrap()), flags: MemFlags::trusted(), }; @@ -876,10 +859,7 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::LoadP64 { rt, rt2, - mem: PairAMode::PostIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(16, I64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, I64).unwrap()), flags: MemFlags::trusted(), }); } @@ -893,7 +873,7 @@ impl ABIMachineSpec for AArch64MachineDeps { // ldr rd, [sp], #16 insts.push(Inst::ULoad64 { rd, - mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()), flags: MemFlags::trusted(), }); } diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index a49f8872d1..fb2a81ba13 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -171,13 +171,23 @@ (rd WritableReg) (rm PReg)) - ;; A MOV[Z,N,K] with a 16-bit immediate. + ;; A MOV[Z,N] with a 16-bit immediate. (MovWide (op MoveWideOp) (rd WritableReg) (imm MoveWideConst) (size OperandSize)) + ;; A MOVK with a 16-bit immediate. Modifies its register; we + ;; model this with a seprate input `rn` and output `rd` virtual + ;; register, with a regalloc constraint to tie them together. + (MovK + (rd WritableReg) + (rn Reg) + (imm MoveWideConst) + (size OperandSize)) + + ;; A sign- or zero-extend operation. (Extend (rd WritableReg) @@ -240,7 +250,12 @@ ;; x28 (wr) scratch reg; value afterwards has no meaning (AtomicRMWLoop (ty Type) ;; I8, I16, I32 or I64 - (op AtomicRMWLoopOp)) + (op AtomicRMWLoopOp) + (addr Reg) + (operand Reg) + (oldval WritableReg) + (scratch1 WritableReg) + (scratch2 WritableReg)) ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked ;; store-conditional loop, with acquire-release semantics. @@ -253,7 +268,11 @@ ;; x24 (wr) scratch reg; value afterwards has no meaning (AtomicCASLoop (ty Type) ;; I8, I16, I32 or I64 - ) + (addr Reg) + (expected Reg) + (replacement Reg) + (oldval WritableReg) + (scratch WritableReg)) ;; An atomic read-modify-write operation. These instructions require the ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have @@ -269,7 +288,10 @@ ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have ;; acquire-release semantics. (AtomicCAS - (rs WritableReg) + ;; `rd` is really `rs` in the encoded instruction (so `rd` == `rs`); we separate + ;; them here to have separate use and def vregs for regalloc. + (rd WritableReg) + (rs Reg) (rt Reg) (rn Reg) (ty Type)) @@ -342,6 +364,16 @@ (rd WritableReg) (rn Reg)) + ;; Variant of FpuRRI that modifies its `rd`, and so we name the + ;; input state `ri` (for "input") and constrain the two + ;; together. + (FpuRRIMod + (fpu_op FPUOpRIMod) + (rd WritableReg) + (ri Reg) + (rn Reg)) + + ;; 3-op FPU instruction. ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16). (FpuRRRR @@ -479,6 +511,7 @@ ;; Move to a vector element from a GPR. (MovToVec (rd WritableReg) + (ri Reg) (rn Reg) (idx u8) (size VectorSize)) @@ -534,6 +567,7 @@ ;; Move vector element to another vector element. (VecMovElement (rd WritableReg) + (ri Reg) (rn Reg) (dest_idx u8) (src_idx u8) @@ -546,12 +580,19 @@ (rn Reg) (high_half bool)) - ;; Vector narrowing operation. - (VecRRNarrow + ;; Vector narrowing operation -- low half. + (VecRRNarrowLow (op VecRRNarrowOp) (rd WritableReg) (rn Reg) - (high_half bool) + (lane_size ScalarSize)) + + ;; Vector narrowing operation -- high half. + (VecRRNarrowHigh + (op VecRRNarrowOp) + (rd WritableReg) + (ri Reg) + (rn Reg) (lane_size ScalarSize)) ;; 1-operand vector instruction that operates on a pair of elements. @@ -569,6 +610,17 @@ (rm Reg) (high_half bool)) + ;; 2-operand vector instruction that produces a result with + ;; twice the lane width and half the number of lanes. Variant + ;; that modifies `rd` (so takes its initial state as `ri`). + (VecRRRLongMod + (alu_op VecRRRLongModOp) + (rd WritableReg) + (ri Reg) + (rn Reg) + (rm Reg) + (high_half bool)) + ;; 1-operand vector instruction that extends elements of the input ;; register and operates on a pair of elements. The output lane width ;; is double that of the input. @@ -589,6 +641,7 @@ (VecRRRMod (alu_op VecALUModOp) (rd WritableReg) + (ri Reg) (rn Reg) (rm Reg) (size VectorSize)) @@ -623,6 +676,7 @@ (VecShiftImmMod (op VecShiftImmModOp) (rd WritableReg) + (ri Reg) (rn Reg) (size VectorSize) (imm u8)) @@ -635,29 +689,55 @@ (rm Reg) (imm4 u8)) - ;; Table vector lookup - single register table. The table consists of 8-bit elements and is - ;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether - ;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination - ;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them - ;; to 0. + ;; Table vector lookup - single register table. The table + ;; consists of 8-bit elements and is stored in `rn`, while `rm` + ;; contains 8-bit element indices. This variant emits `TBL`, + ;; which sets elements that correspond to out-of-range indices + ;; (greater than 15) to 0. (VecTbl (rd WritableReg) (rn Reg) - (rm Reg) - (is_extension bool)) + (rm Reg)) - ;; Table vector lookup - two register table. The table consists of 8-bit elements and is - ;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension` - ;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in - ;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified - ;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers - ;; modulo 32, that is v31 and v0 (in that order) are consecutive registers. + ;; Table vector lookup - single register table. The table + ;; consists of 8-bit elements and is stored in `rn`, while `rm` + ;; contains 8-bit element indices. This variant emits `TBX`, + ;; which leaves elements that correspond to out-of-range indices + ;; (greater than 15) unmodified. Hence, it takes an input vreg in + ;; `ri` that is constrained to the same allocation as `rd`. + (VecTblExt + (rd WritableReg) + (ri Reg) + (rn Reg) + (rm Reg)) + + ;; Table vector lookup - two register table. The table consists + ;; of 8-bit elements and is stored in `rn` and `rn2`, while + ;; `rm` contains 8-bit element indices. The table registers + ;; `rn` and `rn2` must have consecutive numbers modulo 32, that + ;; is v31 and v0 (in that order) are consecutive registers. + ;; This variant emits `TBL`, which sets out-of-range results to + ;; 0. (VecTbl2 (rd WritableReg) (rn Reg) (rn2 Reg) - (rm Reg) - (is_extension bool)) + (rm Reg)) + + ;; Table vector lookup - two register table. The table consists + ;; of 8-bit elements and is stored in `rn` and `rn2`, while + ;; `rm` contains 8-bit element indices. The table registers + ;; `rn` and `rn2` must have consecutive numbers modulo 32, that + ;; is v31 and v0 (in that order) are consecutive registers. + ;; This variant emits `TBX`, which leaves out-of-range results + ;; unmodified, hence takes the initial state of the result + ;; register in vreg `ri`. + (VecTbl2Ext + (rd WritableReg) + (ri Reg) + (rn Reg) + (rn2 Reg) + (rm Reg)) ;; Load an element and replicate to all lanes of a vector. (VecLoadReplicate @@ -888,7 +968,6 @@ (enum (MovZ) (MovN) - (MovK) )) (type UImm5 (primitive UImm5)) @@ -934,6 +1013,7 @@ (type AMode extern (enum)) (type PairAMode extern (enum)) (type FPUOpRI extern (enum)) +(type FPUOpRIMod extern (enum)) (type OperandSize extern (enum Size32 @@ -1287,6 +1367,10 @@ (Umull8) (Umull16) (Umull32) +)) + +(type VecRRRLongModOp + (enum ;; Unsigned multiply add long (Umlal8) (Umlal16) @@ -1447,9 +1531,9 @@ (decl fpu_op_ri_ushr (u8 u8) FPUOpRI) (extern constructor fpu_op_ri_ushr fpu_op_ri_ushr) -;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane) +;; Constructs an FPUOpRIMod.Sli* given the size in bits of the value (or lane) ;; and the amount to shift by. -(decl fpu_op_ri_sli (u8 u8) FPUOpRI) +(decl fpu_op_ri_sli (u8 u8) FPUOpRIMod) (extern constructor fpu_op_ri_sli fpu_op_ri_sli) (decl imm12_from_negated_u64 (Imm12) u64) @@ -1524,29 +1608,6 @@ (decl writable_zero_reg () WritableReg) (extern constructor writable_zero_reg writable_zero_reg) -;; Helpers for getting a particular real register -(decl xreg (u8) Reg) -(extern constructor xreg xreg) - -(decl writable_vreg (u8) WritableReg) -(extern constructor writable_vreg writable_vreg) - -(decl writable_xreg (u8) WritableReg) -(extern constructor writable_xreg writable_xreg) - -;; Helper for emitting `MInst.Mov64` instructions. -(decl mov64_to_real (u8 Reg) Reg) -(rule (mov64_to_real num src) - (let ((dst WritableReg (writable_xreg num)) - (_ Unit (emit (MInst.Mov (operand_size $I64) dst src)))) - dst)) - -(decl mov64_from_real (u8) Reg) -(rule (mov64_from_real num) - (let ((dst WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.Mov (operand_size $I64) dst (xreg num))))) - dst)) - ;; Helper for emitting `MInst.MovZ` instructions. (decl movz (MoveWideConst OperandSize) Reg) (rule (movz imm size) @@ -1601,8 +1662,7 @@ (decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg) (rule (vec_rrr_mod op src1 src2 src3 size) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_1 Unit (emit (MInst.FpuMove128 dst src1))) - (_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size)))) + (_1 Unit (emit (MInst.VecRRRMod op dst src1 src2 src3 size)))) dst)) (decl fpu_rri (FPUOpRI Reg) Reg) @@ -1611,6 +1671,12 @@ (_ Unit (emit (MInst.FpuRRI op dst src)))) dst)) +(decl fpu_rri_mod (FPUOpRIMod Reg Reg) Reg) +(rule (fpu_rri_mod op dst_src src) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRRIMod op dst dst_src src)))) + dst)) + ;; Helper for emitting `MInst.FpuRRR` instructions. (decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg) (rule (fpu_rrr op src1 src2 size) @@ -1790,29 +1856,33 @@ dst)) ;; Helper for emitting `MInst.VecTbl` instructions. -(decl vec_tbl (Reg Reg bool) Reg) -(rule (vec_tbl rn rm is_extension) +(decl vec_tbl (Reg Reg) Reg) +(rule (vec_tbl rn rm) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.VecTbl dst rn rm is_extension)))) + (_ Unit (emit (MInst.VecTbl dst rn rm)))) + dst)) + +(decl vec_tbl_ext (Reg Reg Reg) Reg) +(rule (vec_tbl_ext ri rn rm) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecTblExt dst ri rn rm)))) dst)) ;; Helper for emitting `MInst.VecTbl2` instructions. -;; - 2 register table vector lookups require consecutive table registers; -;; we satisfy this constraint by hardcoding the usage of v30 and v31. -;; - Make sure that both args are in virtual regs, since it is not guaranteed -;; that we can get them safely to the temporaries if either is in a real -;; register. -(decl vec_tbl2 (Reg Reg Reg bool Type) Reg) -(rule (vec_tbl2 rn rn2 rm is_extension ty) +(decl vec_tbl2 (Reg Reg Reg Type) Reg) +(rule (vec_tbl2 rn rn2 rm ty) (let ( - (temp WritableReg (writable_vreg 30)) - (temp2 WritableReg (writable_vreg 31)) (dst WritableReg (temp_writable_reg $I8X16)) - (rn Reg (ensure_in_vreg rn ty)) - (rn2 Reg (ensure_in_vreg rn2 ty)) - (_ Unit (emit (MInst.FpuMove128 temp rn))) - (_ Unit (emit (MInst.FpuMove128 temp2 rn2))) - (_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension))) + (_ Unit (emit (MInst.VecTbl2 dst rn rn2 rm))) + ) + dst)) + +;; Helper for emitting `MInst.VecTbl2Ext` instructions. +(decl vec_tbl2_ext (Reg Reg Reg Reg Type) Reg) +(rule (vec_tbl2_ext ri rn rn2 rm ty) + (let ( + (dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecTbl2Ext dst ri rn rn2 rm))) ) dst)) @@ -1830,22 +1900,18 @@ (_ Unit (emit (MInst.VecRRPairLong op dst src)))) dst)) -;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants -;; where the operation both reads and modifies the destination register. -;; -;; Currently this is only used for `VecRRRLongOp.Umlal*` -(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg) +;; Helper for emitting `MInst.VecRRRLongMod` instructions. +(decl vec_rrrr_long (VecRRRLongModOp Reg Reg Reg bool) Reg) (rule (vec_rrrr_long op src1 src2 src3 high_half) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.FpuMove128 dst src1))) - (_ Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half)))) + (_ Unit (emit (MInst.VecRRRLongMod op dst src1 src2 src3 high_half)))) dst)) ;; Helper for emitting `MInst.VecRRNarrow` instructions. -(decl vec_rr_narrow (VecRRNarrowOp Reg ScalarSize) Reg) -(rule (vec_rr_narrow op src size) +(decl vec_rr_narrow_low (VecRRNarrowOp Reg ScalarSize) Reg) +(rule (vec_rr_narrow_low op src size) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.VecRRNarrow op dst src $false size)))) + (_ Unit (emit (MInst.VecRRNarrowLow op dst src size)))) dst)) ;; Helper for emitting `MInst.VecRRNarrow` instructions which update the @@ -1853,8 +1919,7 @@ (decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg) (rule (vec_rr_narrow_high op mod src size) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.FpuMove128 dst mod))) - (_ Unit (emit (MInst.VecRRNarrow op dst src $true size)))) + (_ Unit (emit (MInst.VecRRNarrowHigh op dst mod src size)))) dst)) ;; Helper for emitting `MInst.VecRRLong` instructions. @@ -1897,16 +1962,14 @@ (decl mov_to_vec (Reg Reg u8 VectorSize) Reg) (rule (mov_to_vec src1 src2 lane size) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.FpuMove128 dst src1))) - (_ Unit (emit (MInst.MovToVec dst src2 lane size)))) + (_ Unit (emit (MInst.MovToVec dst src1 src2 lane size)))) dst)) ;; Helper for emitting `MInst.VecMovElement` instructions. (decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg) (rule (mov_vec_elem src1 src2 dst_idx src_idx size) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.FpuMove128 dst src1))) - (_ Unit (emit (MInst.VecMovElement dst src2 dst_idx src_idx size)))) + (_ Unit (emit (MInst.VecMovElement dst src1 src2 dst_idx src_idx size)))) dst)) ;; Helper for emitting `MInst.MovFromVec` instructions. @@ -2104,15 +2167,15 @@ ;; Helper for generating `xtn` instructions. (decl xtn (Reg ScalarSize) Reg) -(rule (xtn x size) (vec_rr_narrow (VecRRNarrowOp.Xtn) x size)) +(rule (xtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Xtn) x size)) ;; Helper for generating `fcvtn` instructions. (decl fcvtn (Reg ScalarSize) Reg) -(rule (fcvtn x size) (vec_rr_narrow (VecRRNarrowOp.Fcvtn) x size)) +(rule (fcvtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Fcvtn) x size)) ;; Helper for generating `sqxtn` instructions. (decl sqxtn (Reg ScalarSize) Reg) -(rule (sqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtn) x size)) +(rule (sqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtn) x size)) ;; Helper for generating `sqxtn2` instructions. (decl sqxtn2 (Reg Reg ScalarSize) Reg) @@ -2120,7 +2183,7 @@ ;; Helper for generating `sqxtun` instructions. (decl sqxtun (Reg ScalarSize) Reg) -(rule (sqxtun x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtun) x size)) +(rule (sqxtun x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtun) x size)) ;; Helper for generating `sqxtun2` instructions. (decl sqxtun2 (Reg Reg ScalarSize) Reg) @@ -2128,7 +2191,7 @@ ;; Helper for generating `uqxtn` instructions. (decl uqxtn (Reg ScalarSize) Reg) -(rule (uqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Uqxtn) x size)) +(rule (uqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Uqxtn) x size)) ;; Helper for generating `uqxtn2` instructions. (decl uqxtn2 (Reg Reg ScalarSize) Reg) @@ -2187,7 +2250,7 @@ ;; Helper for generating `umlal32` instructions. (decl umlal32 (Reg Reg Reg bool) Reg) -(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half)) +(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongModOp.Umlal32) x y z high_half)) ;; Helper for generating `smull8` instructions. (decl smull8 (Reg Reg bool) Reg) @@ -2719,8 +2782,7 @@ (rule (lse_atomic_cas addr expect replace ty) (let ( (dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.Mov (operand_size ty) dst expect))) - (_ Unit (emit (MInst.AtomicCAS dst replace addr ty))) + (_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty))) ) dst)) @@ -2730,16 +2792,13 @@ ;; regs, and that's not guaranteed safe if either is in a real reg. ;; - Move the args to the preordained AtomicRMW input regs ;; - And finally, copy the preordained AtomicRMW output reg to its destination. -(decl atomic_rmw_loop (AtomicRMWLoopOp Value Value Type) Reg) -(rule (atomic_rmw_loop op p arg2 ty) - (let ( - (v_addr Reg (ensure_in_vreg p $I64)) - (v_arg2 Reg (ensure_in_vreg arg2 $I64)) - (r_addr Reg (mov64_to_real 25 v_addr)) - (r_arg2 Reg (mov64_to_real 26 v_arg2)) - (_ Unit (emit (MInst.AtomicRMWLoop ty op))) - ) - (mov64_from_real 27))) +(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type) Reg) +(rule (atomic_rmw_loop op addr operand ty) + (let ((dst WritableReg (temp_writable_reg $I64)) + (scratch1 WritableReg (temp_writable_reg $I64)) + (scratch2 WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicRMWLoop ty op addr operand dst scratch1 scratch2)))) + dst)) ;; Helper for emitting `MInst.AtomicCASLoop` instructions. ;; This is very similar to, but not identical to, the AtomicRmw case. Note @@ -2749,21 +2808,10 @@ ;; for `atomic_rmw_loop` above. (decl atomic_cas_loop (Reg Reg Reg Type) Reg) (rule (atomic_cas_loop addr expect replace ty) - (let ( - (v_addr Reg (ensure_in_vreg addr $I64)) - (v_exp Reg (ensure_in_vreg expect $I64)) - (v_rep Reg (ensure_in_vreg replace $I64)) - ;; Move the args to the preordained AtomicCASLoop input regs - (r_addr Reg (mov64_to_real 25 v_addr)) - (r_exp Reg (mov64_to_real 26 v_exp)) - (r_rep Reg (mov64_to_real 28 v_rep)) - ;; Now the AtomicCASLoop itself, implemented in the normal way, with a - ;; load-exclusive, store-exclusive loop - (_ Unit (emit (MInst.AtomicCASLoop ty))) - ) - ;; And finally, copy the preordained AtomicCASLoop output reg to its destination. - ;; Also, x24 and x28 are trashed. - (mov64_from_real 27))) + (let ((dst WritableReg (temp_writable_reg $I64)) + (scratch WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicCASLoop ty addr expect replace dst scratch)))) + dst)) ;; Helper for emitting `MInst.MovPReg` instructions. (decl mov_preg (PReg) Reg) @@ -2811,15 +2859,13 @@ (decl fcopy_sign (Reg Reg Type) Reg) (rule (fcopy_sign x y (ty_scalar_float ty)) (let ((dst WritableReg (temp_writable_reg $F64)) - (_ Unit (emit (MInst.FpuMove64 dst x))) (tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y)) - (_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp)))) + (_ Unit (emit (MInst.FpuRRIMod (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst x tmp)))) dst)) (rule (fcopy_sign x y ty @ (multi_lane _ _)) (let ((dst WritableReg (temp_writable_reg $I8X16)) - (_ Unit (emit (MInst.FpuMove128 dst x))) (tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty))) - (_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty)))))) + (_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst x tmp (vector_size ty) (max_shift (lane_type ty)))))) dst)) ;; Helpers for generating `MInst.FpuToInt` instructions. diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 4428be2a83..57869e1c32 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -3,7 +3,7 @@ use crate::ir::types::*; use crate::ir::Type; use crate::isa::aarch64::inst::*; -use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg, Writable}; +use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg}; use core::convert::Into; use std::string::String; @@ -122,9 +122,11 @@ pub enum AMode { // Real ARM64 addressing modes: // /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation. - PostIndexed(Writable, SImm9), + /// Specialized here to SP so we don't have to emit regalloc metadata. + SPPostIndexed(SImm9), /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation. - PreIndexed(Writable, SImm9), + /// Specialized here to SP so we don't have to emit regalloc metadata. + SPPreIndexed(SImm9), // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to // what the ISA calls the "register offset" addressing mode. We split out @@ -220,10 +222,12 @@ impl AMode { &AMode::RegExtended(r1, r2, ext) => { AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext) } - &AMode::PreIndexed(reg, simm9) => AMode::PreIndexed(allocs.next_writable(reg), simm9), - &AMode::PostIndexed(reg, simm9) => AMode::PostIndexed(allocs.next_writable(reg), simm9), + // Note that SP is not managed by regalloc, so there is no register to report in the + // pre/post-indexed amodes. &AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty), - &AMode::FPOffset(..) + &AMode::SPPreIndexed(..) + | &AMode::SPPostIndexed(..) + | &AMode::FPOffset(..) | &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) | AMode::Label(..) => self.clone(), @@ -235,8 +239,8 @@ impl AMode { #[derive(Clone, Debug)] pub enum PairAMode { SignedOffset(Reg, SImm7Scaled), - PreIndexed(Writable, SImm7Scaled), - PostIndexed(Writable, SImm7Scaled), + SPPreIndexed(SImm7Scaled), + SPPostIndexed(SImm7Scaled), } impl PairAMode { @@ -246,12 +250,7 @@ impl PairAMode { &PairAMode::SignedOffset(reg, simm7scaled) => { PairAMode::SignedOffset(allocs.next(reg), simm7scaled) } - &PairAMode::PreIndexed(reg, simm7scaled) => { - PairAMode::PreIndexed(allocs.next_writable(reg), simm7scaled) - } - &PairAMode::PostIndexed(reg, simm7scaled) => { - PairAMode::PostIndexed(allocs.next_writable(reg), simm7scaled) - } + &PairAMode::SPPreIndexed(..) | &PairAMode::SPPostIndexed(..) => self.clone(), } } } @@ -470,15 +469,13 @@ impl PrettyPrint for AMode { format!("[{}, {}, {}]", r1, r2, op) } &AMode::Label(ref label) => label.pretty_print(0, allocs), - &AMode::PreIndexed(r, simm9) => { - let r = pretty_print_reg(r.to_reg(), allocs); + &AMode::SPPreIndexed(simm9) => { let simm9 = simm9.pretty_print(8, allocs); - format!("[{}, {}]!", r, simm9) + format!("[sp, {}]!", simm9) } - &AMode::PostIndexed(r, simm9) => { - let r = pretty_print_reg(r.to_reg(), allocs); + &AMode::SPPostIndexed(simm9) => { let simm9 = simm9.pretty_print(8, allocs); - format!("[{}], {}", r, simm9) + format!("[sp], {}", simm9) } // Eliminated by `mem_finalize()`. &AMode::SPOffset(..) @@ -503,15 +500,13 @@ impl PrettyPrint for PairAMode { format!("[{}]", reg) } } - &PairAMode::PreIndexed(reg, simm7) => { - let reg = pretty_print_reg(reg.to_reg(), allocs); + &PairAMode::SPPreIndexed(simm7) => { let simm7 = simm7.pretty_print(8, allocs); - format!("[{}, {}]!", reg, simm7) + format!("[sp, {}]!", simm7) } - &PairAMode::PostIndexed(reg, simm7) => { - let reg = pretty_print_reg(reg.to_reg(), allocs); + &PairAMode::SPPostIndexed(simm7) => { let simm7 = simm7.pretty_print(8, allocs); - format!("[{}], {}", reg, simm7) + format!("[sp], {}", simm7) } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 57e1bfb488..3fb53f81f9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -184,7 +184,6 @@ fn enc_move_wide(op: MoveWideOp, rd: Writable, imm: MoveWideConst, size: Op let op = match op { MoveWideOp::MovN => 0b00, MoveWideOp::MovZ => 0b10, - MoveWideOp::MovK => 0b11, }; 0x12800000 | size.sf_bit() << 31 @@ -194,6 +193,15 @@ fn enc_move_wide(op: MoveWideOp, rd: Writable, imm: MoveWideConst, size: Op | machreg_to_gpr(rd.to_reg()) } +fn enc_movk(rd: Writable, imm: MoveWideConst, size: OperandSize) -> u32 { + assert!(imm.shift <= 0b11); + 0x72800000 + | size.sf_bit() << 31 + | u32::from(imm.shift) << 21 + | u32::from(imm.bits) << 5 + | machreg_to_gpr(rd.to_reg()) +} + fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 { (op_31_22 << 22) | (simm7.bits() << 15) @@ -1040,12 +1048,12 @@ impl MachInstEmit for Inst { _ => panic!("Unspported size for LDR from constant pool!"), } } - &AMode::PreIndexed(reg, simm9) => { - let reg = allocs.next(reg.to_reg()); + &AMode::SPPreIndexed(simm9) => { + let reg = stack_reg(); sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); } - &AMode::PostIndexed(reg, simm9) => { - let reg = allocs.next(reg.to_reg()); + &AMode::SPPostIndexed(simm9) => { + let reg = stack_reg(); sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); } // Eliminated by `mem_finalize()` above. @@ -1134,12 +1142,12 @@ impl MachInstEmit for Inst { &AMode::Label(..) => { panic!("Store to a MemLabel not implemented!"); } - &AMode::PreIndexed(reg, simm9) => { - let reg = allocs.next(reg.to_reg()); + &AMode::SPPreIndexed(simm9) => { + let reg = stack_reg(); sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); } - &AMode::PostIndexed(reg, simm9) => { - let reg = allocs.next(reg.to_reg()); + &AMode::SPPostIndexed(simm9) => { + let reg = stack_reg(); sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); } // Eliminated by `mem_finalize()` above. @@ -1170,14 +1178,14 @@ impl MachInstEmit for Inst { let reg = allocs.next(reg); sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); } - &PairAMode::PreIndexed(reg, simm7) => { + &PairAMode::SPPreIndexed(simm7) => { assert_eq!(simm7.scale_ty, I64); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2)); } - &PairAMode::PostIndexed(reg, simm7) => { + &PairAMode::SPPostIndexed(simm7) => { assert_eq!(simm7.scale_ty, I64); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2)); } } @@ -1203,14 +1211,14 @@ impl MachInstEmit for Inst { let reg = allocs.next(reg); sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); } - &PairAMode::PreIndexed(reg, simm7) => { + &PairAMode::SPPreIndexed(simm7) => { assert_eq!(simm7.scale_ty, I64); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2)); } - &PairAMode::PostIndexed(reg, simm7) => { + &PairAMode::SPPostIndexed(simm7) => { assert_eq!(simm7.scale_ty, I64); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2)); } } @@ -1249,14 +1257,14 @@ impl MachInstEmit for Inst { let reg = allocs.next(reg); sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2)); } - &PairAMode::PreIndexed(reg, simm7) => { + &PairAMode::SPPreIndexed(simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2)); } - &PairAMode::PostIndexed(reg, simm7) => { + &PairAMode::SPPostIndexed(simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2)); } } @@ -1295,14 +1303,14 @@ impl MachInstEmit for Inst { let reg = allocs.next(reg); sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2)); } - &PairAMode::PreIndexed(reg, simm7) => { + &PairAMode::SPPreIndexed(simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2)); } - &PairAMode::PostIndexed(reg, simm7) => { + &PairAMode::SPPostIndexed(simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - let reg = allocs.next(reg.to_reg()); + let reg = stack_reg(); sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2)); } } @@ -1356,6 +1364,12 @@ impl MachInstEmit for Inst { let rd = allocs.next_writable(rd); sink.put4(enc_move_wide(op, rd, imm, size)); } + &Inst::MovK { rd, rn, imm, size } => { + let rn = allocs.next(rn); + let rd = allocs.next_writable(rd); + debug_assert_eq!(rn, rd.to_reg()); + sink.put4(enc_movk(rd, imm, size)); + } &Inst::CSel { rd, rn, rm, cond } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); @@ -1403,7 +1417,7 @@ impl MachInstEmit for Inst { let rn = allocs.next(rn); sink.put4(enc_acq_rel(ty, op, rs, rt, rn)); } - &Inst::AtomicRMWLoop { ty, op } => { + &Inst::AtomicRMWLoop { ty, op, .. } => { /* Emit this: again: ldaxr{,b,h} x/w27, [x25] @@ -1581,8 +1595,10 @@ impl MachInstEmit for Inst { )); sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); } - &Inst::AtomicCAS { rs, rt, rn, ty } => { - let rs = allocs.next_writable(rs); + &Inst::AtomicCAS { rd, rs, rt, rn, ty } => { + let rd = allocs.next_writable(rd); + let rs = allocs.next(rs); + debug_assert_eq!(rd.to_reg(), rs); let rt = allocs.next(rt); let rn = allocs.next(rn); let size = match ty { @@ -1593,9 +1609,9 @@ impl MachInstEmit for Inst { _ => panic!("Unsupported type: {}", ty), }; - sink.put4(enc_cas(size, rs, rt, rn)); + sink.put4(enc_cas(size, rd, rt, rn)); } - &Inst::AtomicCASLoop { ty } => { + &Inst::AtomicCASLoop { ty, .. } => { /* Emit this: again: ldaxr{,b,h} x/w27, [x25] @@ -1788,7 +1804,15 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ) } - FPUOpRI::Sli64(imm) => { + } + } + &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => { + let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + let rn = allocs.next(rn); + debug_assert_eq!(rd.to_reg(), ri); + match fpu_op { + FPUOpRIMod::Sli64(imm) => { debug_assert_eq!(64, imm.lane_size_in_bits); sink.put4( 0b01_1_111110_0000000_010101_00000_00000 @@ -1797,7 +1821,7 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ) } - FPUOpRI::Sli32(imm) => { + FPUOpRIMod::Sli32(imm) => { debug_assert_eq!(32, imm.lane_size_in_bits); sink.put4( 0b0_0_1_011110_0000000_010101_00000_00000 @@ -2036,11 +2060,14 @@ impl MachInstEmit for Inst { &Inst::VecShiftImmMod { op, rd, + ri, rn, size, imm, } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let (is_shr, mut template) = match op { VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32), @@ -2096,30 +2123,43 @@ impl MachInstEmit for Inst { ); } } - &Inst::VecTbl { - rd, - rn, - rm, - is_extension, - } => { + &Inst::VecTbl { rd, rn, rm } => { let rn = allocs.next(rn); let rm = allocs.next(rm); let rd = allocs.next_writable(rd); - sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm)); + sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm)); } - &Inst::VecTbl2 { - rd, - rn, - rn2, - rm, - is_extension, - } => { + &Inst::VecTblExt { rd, ri, rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); + sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm)); + } + &Inst::VecTbl2 { rd, rn, rn2, rm } => { let rn = allocs.next(rn); let rn2 = allocs.next(rn2); let rm = allocs.next(rm); let rd = allocs.next_writable(rd); assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); - sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm)); + sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm)); + } + &Inst::VecTbl2Ext { + rd, + ri, + rn, + rn2, + rm, + } => { + let rn = allocs.next(rn); + let rn2 = allocs.next(rn2); + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); + assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); + sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm)); } &Inst::FpuCmp { size, rn, rm } => { let rn = allocs.next(rn); @@ -2254,8 +2294,16 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::MovToVec { rd, rn, idx, size } => { + &Inst::MovToVec { + rd, + ri, + rn, + idx, + size, + } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let (imm5, shift) = match size.lane_size() { ScalarSize::Size8 => (0b00001, 1), @@ -2475,15 +2523,26 @@ impl MachInstEmit for Inst { rn, )); } - &Inst::VecRRNarrow { + &Inst::VecRRNarrowLow { op, rd, rn, - high_half, lane_size, + } + | &Inst::VecRRNarrowHigh { + op, + rd, + rn, + lane_size, + .. } => { let rn = allocs.next(rn); let rd = allocs.next_writable(rd); + let high_half = match self { + &Inst::VecRRNarrowLow { .. } => false, + &Inst::VecRRNarrowHigh { .. } => true, + _ => unreachable!(), + }; let size = match lane_size { ScalarSize::Size8 => 0b00, @@ -2516,12 +2575,15 @@ impl MachInstEmit for Inst { } &Inst::VecMovElement { rd, + ri, rn, dest_idx, src_idx, size, } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let (imm5, shift) = match size.lane_size() { ScalarSize::Size8 => (0b00001, 1), @@ -2569,9 +2631,34 @@ impl MachInstEmit for Inst { VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1), VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1), VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1), - VecRRRLongOp::Umlal8 => (0b1, 0b00, 0b0), - VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0), - VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0), + }; + sink.put4(enc_vec_rrr_long( + high_half as u32, + u, + size, + bit14, + rm, + rn, + rd, + )); + } + &Inst::VecRRRLongMod { + rd, + ri, + rn, + rm, + alu_op, + high_half, + } => { + let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let (u, size, bit14) = match alu_op { + VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0), + VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0), + VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0), }; sink.put4(enc_vec_rrr_long( high_half as u32, @@ -2702,12 +2789,15 @@ impl MachInstEmit for Inst { } &Inst::VecRRRMod { rd, + ri, rn, rm, alu_op, size, } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let rm = allocs.next(rm); let (q, _enc_size) = size.enc_size(); diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index b3dc56d568..a3eaabd68e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1614,20 +1614,20 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(16).unwrap()), flags: MemFlags::trusted(), }, - "410C41F8", - "ldr x1, [x2, #16]!", + "E10F41F8", + "ldr x1, [sp, #16]!", )); insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()), flags: MemFlags::trusted(), }, - "410441F8", - "ldr x1, [x2], #16", + "E10741F8", + "ldr x1, [sp], #16", )); insns.push(( Inst::ULoad64 { @@ -1663,7 +1663,7 @@ fn test_aarch64_binemit() { flags: MemFlags::trusted(), }, "300080521002A072B063308B010240F9", - "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", + "movz w16, #1 ; movk w16, w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); insns.push(( @@ -1807,20 +1807,20 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::SPPreIndexed(SImm9::maybe_from_i64(16).unwrap()), flags: MemFlags::trusted(), }, - "410C01F8", - "str x1, [x2, #16]!", + "E10F01F8", + "str x1, [sp, #16]!", )); insns.push(( Inst::Store64 { rd: xreg(1), - mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::SPPostIndexed(SImm9::maybe_from_i64(16).unwrap()), flags: MemFlags::trusted(), }, - "410401F8", - "str x1, [x2], #16", + "E10701F8", + "str x1, [sp], #16", )); insns.push(( @@ -1867,27 +1867,21 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(8), rt2: xreg(9), - mem: PairAMode::PreIndexed( - writable_xreg(10), - SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), flags: MemFlags::trusted(), }, - "4825BCA9", - "stp x8, x9, [x10, #-64]!", + "E827BCA9", + "stp x8, x9, [sp, #-64]!", )); insns.push(( Inst::StoreP64 { rt: xreg(15), rt2: xreg(16), - mem: PairAMode::PostIndexed( - writable_xreg(20), - SImm7Scaled::maybe_from_i64(504, I64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(504, I64).unwrap()), flags: MemFlags::trusted(), }, - "8FC29FA8", - "stp x15, x16, [x20], #504", + "EFC39FA8", + "stp x15, x16, [sp], #504", )); insns.push(( @@ -1934,27 +1928,21 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(9), - mem: PairAMode::PreIndexed( - writable_xreg(10), - SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), flags: MemFlags::trusted(), }, - "4825FCA9", - "ldp x8, x9, [x10, #-64]!", + "E827FCA9", + "ldp x8, x9, [sp, #-64]!", )); insns.push(( Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(25), - mem: PairAMode::PostIndexed( - writable_xreg(12), - SImm7Scaled::maybe_from_i64(504, I64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(504, I64).unwrap()), flags: MemFlags::trusted(), }, - "88E5DFA8", - "ldp x8, x25, [x12], #504", + "E8E7DFA8", + "ldp x8, x25, [sp], #504", )); insns.push(( @@ -2079,64 +2067,64 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::MovWide { - op: MoveWideOp::MovK, + Inst::MovK { rd: writable_xreg(12), + rn: xreg(12), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(), size: OperandSize::Size64, }, "0C0080F2", - "movk x12, #0", + "movk x12, x12, #0", )); insns.push(( - Inst::MovWide { - op: MoveWideOp::MovK, + Inst::MovK { rd: writable_xreg(19), + rn: xreg(19), imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(), size: OperandSize::Size64, }, "1300A0F2", - "movk x19, #0, LSL #16", + "movk x19, x19, #0, LSL #16", )); insns.push(( - Inst::MovWide { - op: MoveWideOp::MovK, + Inst::MovK { rd: writable_xreg(3), + rn: xreg(3), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), size: OperandSize::Size64, }, "E3FF9FF2", - "movk x3, #65535", + "movk x3, x3, #65535", )); insns.push(( - Inst::MovWide { - op: MoveWideOp::MovK, + Inst::MovK { rd: writable_xreg(8), + rn: xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), size: OperandSize::Size64, }, "E8FFBFF2", - "movk x8, #65535, LSL #16", + "movk x8, x8, #65535, LSL #16", )); insns.push(( - Inst::MovWide { - op: MoveWideOp::MovK, + Inst::MovK { rd: writable_xreg(8), + rn: xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), size: OperandSize::Size64, }, "E8FFDFF2", - "movk x8, #65535, LSL #32", + "movk x8, x8, #65535, LSL #32", )); insns.push(( - Inst::MovWide { - op: MoveWideOp::MovK, + Inst::MovK { rd: writable_xreg(8), + rn: xreg(8), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), size: OperandSize::Size64, }, "E8FFFFF2", - "movk x8, #65535, LSL #48", + "movk x8, x8, #65535, LSL #48", )); insns.push(( @@ -2267,22 +2255,24 @@ fn test_aarch64_binemit() { insns.push(( Inst::MovToVec { rd: writable_vreg(0), + ri: vreg(0), rn: xreg(0), idx: 7, size: VectorSize::Size8x8, }, "001C0F4E", - "mov v0.b[7], w0", + "mov v0.b[7], v0.b[7], w0", )); insns.push(( Inst::MovToVec { rd: writable_vreg(20), + ri: vreg(20), rn: xreg(21), idx: 0, size: VectorSize::Size64x2, }, "B41E084E", - "mov v20.d[0], x21", + "mov v20.d[0], v20.d[0], x21", )); insns.push(( Inst::MovFromVec { @@ -2649,25 +2639,27 @@ fn test_aarch64_binemit() { insns.push(( Inst::VecMovElement { rd: writable_vreg(0), + ri: vreg(0), rn: vreg(31), dest_idx: 7, src_idx: 7, size: VectorSize::Size16x8, }, "E0771E6E", - "mov v0.h[7], v31.h[7]", + "mov v0.h[7], v0.h[7], v31.h[7]", )); insns.push(( Inst::VecMovElement { rd: writable_vreg(31), + ri: vreg(31), rn: vreg(16), dest_idx: 1, src_idx: 0, size: VectorSize::Size32x2, }, "1F060C6E", - "mov v31.s[1], v16.s[0]", + "mov v31.s[1], v31.s[1], v16.s[0]", )); insns.push(( @@ -2726,11 +2718,10 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Xtn, rd: writable_vreg(25), rn: vreg(17), - high_half: false, lane_size: ScalarSize::Size8, }, "392A210E", @@ -2738,23 +2729,22 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Xtn, rd: writable_vreg(3), + ri: vreg(3), rn: vreg(10), - high_half: true, lane_size: ScalarSize::Size16, }, "4329614E", - "xtn2 v3.8h, v10.4s", + "xtn2 v3.8h, v3.8h, v10.4s", )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Xtn, rd: writable_vreg(22), rn: vreg(8), - high_half: false, lane_size: ScalarSize::Size32, }, "1629A10E", @@ -2762,35 +2752,34 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Sqxtn, rd: writable_vreg(7), + ri: vreg(7), rn: vreg(22), - high_half: true, lane_size: ScalarSize::Size8, }, "C74A214E", - "sqxtn2 v7.16b, v22.8h", + "sqxtn2 v7.16b, v7.16b, v22.8h", )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Sqxtn, rd: writable_vreg(31), + ri: vreg(31), rn: vreg(0), - high_half: true, lane_size: ScalarSize::Size16, }, "1F48614E", - "sqxtn2 v31.8h, v0.4s", + "sqxtn2 v31.8h, v31.8h, v0.4s", )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Sqxtn, rd: writable_vreg(14), rn: vreg(20), - high_half: false, lane_size: ScalarSize::Size32, }, "8E4AA10E", @@ -2798,11 +2787,10 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Sqxtun, rd: writable_vreg(16), rn: vreg(23), - high_half: false, lane_size: ScalarSize::Size8, }, "F02A212E", @@ -2810,23 +2798,22 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Sqxtun, rd: writable_vreg(28), + ri: vreg(28), rn: vreg(9), - high_half: true, lane_size: ScalarSize::Size16, }, "3C29616E", - "sqxtun2 v28.8h, v9.4s", + "sqxtun2 v28.8h, v28.8h, v9.4s", )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Sqxtun, rd: writable_vreg(15), rn: vreg(15), - high_half: false, lane_size: ScalarSize::Size32, }, "EF29A12E", @@ -2834,23 +2821,22 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Uqxtn, rd: writable_vreg(21), + ri: vreg(21), rn: vreg(4), - high_half: true, lane_size: ScalarSize::Size8, }, "9548216E", - "uqxtn2 v21.16b, v4.8h", + "uqxtn2 v21.16b, v21.16b, v4.8h", )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Uqxtn, rd: writable_vreg(31), rn: vreg(31), - high_half: false, lane_size: ScalarSize::Size16, }, "FF4B612E", @@ -2858,23 +2844,22 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Uqxtn, rd: writable_vreg(11), + ri: vreg(11), rn: vreg(12), - high_half: true, lane_size: ScalarSize::Size32, }, "8B49A16E", - "uqxtn2 v11.4s, v12.2d", + "uqxtn2 v11.4s, v11.4s, v12.2d", )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Fcvtn, rd: writable_vreg(0), rn: vreg(0), - high_half: false, lane_size: ScalarSize::Size16, }, "0068210E", @@ -2882,11 +2867,10 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowLow { op: VecRRNarrowOp::Fcvtn, rd: writable_vreg(2), rn: vreg(7), - high_half: false, lane_size: ScalarSize::Size32, }, "E268610E", @@ -2894,15 +2878,15 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRNarrow { + Inst::VecRRNarrowHigh { op: VecRRNarrowOp::Fcvtn, rd: writable_vreg(31), + ri: vreg(31), rn: vreg(30), - high_half: true, lane_size: ScalarSize::Size32, }, "DF6B614E", - "fcvtn2 v31.4s, v30.2d", + "fcvtn2 v31.4s, v31.4s, v30.2d", )); insns.push(( @@ -3415,12 +3399,13 @@ fn test_aarch64_binemit() { Inst::VecRRRMod { alu_op: VecALUModOp::Bsl, rd: writable_vreg(8), + ri: vreg(8), rn: vreg(9), rm: vreg(1), size: VectorSize::Size8x16, }, "281D616E", - "bsl v8.16b, v9.16b, v1.16b", + "bsl v8.16b, v8.16b, v9.16b, v1.16b", )); insns.push(( @@ -4123,36 +4108,39 @@ fn test_aarch64_binemit() { Inst::VecRRRMod { alu_op: VecALUModOp::Fmla, rd: writable_vreg(2), + ri: vreg(2), rn: vreg(0), rm: vreg(5), size: VectorSize::Size32x2, }, "02CC250E", - "fmla v2.2s, v0.2s, v5.2s", + "fmla v2.2s, v2.2s, v0.2s, v5.2s", )); insns.push(( Inst::VecRRRMod { alu_op: VecALUModOp::Fmla, rd: writable_vreg(2), + ri: vreg(2), rn: vreg(0), rm: vreg(5), size: VectorSize::Size32x4, }, "02CC254E", - "fmla v2.4s, v0.4s, v5.4s", + "fmla v2.4s, v2.4s, v0.4s, v5.4s", )); insns.push(( Inst::VecRRRMod { alu_op: VecALUModOp::Fmla, rd: writable_vreg(2), + ri: vreg(2), rn: vreg(0), rm: vreg(5), size: VectorSize::Size64x2, }, "02CC654E", - "fmla v2.2d, v0.2d, v5.2d", + "fmla v2.2d, v2.2d, v0.2d, v5.2d", )); insns.push(( @@ -4276,15 +4264,16 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal8, + Inst::VecRRRLongMod { + alu_op: VecRRRLongModOp::Umlal8, rd: writable_vreg(4), + ri: vreg(4), rn: vreg(8), rm: vreg(16), high_half: false, }, "0481302E", - "umlal v4.8h, v8.8b, v16.8b", + "umlal v4.8h, v4.8h, v8.8b, v16.8b", )); insns.push(( @@ -4312,15 +4301,16 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal16, + Inst::VecRRRLongMod { + alu_op: VecRRRLongModOp::Umlal16, rd: writable_vreg(7), + ri: vreg(7), rn: vreg(14), rm: vreg(21), high_half: false, }, "C781752E", - "umlal v7.4s, v14.4h, v21.4h", + "umlal v7.4s, v7.4s, v14.4h, v21.4h", )); insns.push(( @@ -4348,15 +4338,16 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal32, + Inst::VecRRRLongMod { + alu_op: VecRRRLongModOp::Umlal32, rd: writable_vreg(9), + ri: vreg(9), rn: vreg(20), rm: vreg(17), high_half: false, }, "8982B12E", - "umlal v9.2d, v20.2s, v17.2s", + "umlal v9.2d, v9.2d, v20.2s, v17.2s", )); insns.push(( @@ -4384,15 +4375,16 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal8, + Inst::VecRRRLongMod { + alu_op: VecRRRLongModOp::Umlal8, rd: writable_vreg(1), + ri: vreg(1), rn: vreg(5), rm: vreg(15), high_half: true, }, "A1802F6E", - "umlal2 v1.8h, v5.16b, v15.16b", + "umlal2 v1.8h, v1.8h, v5.16b, v15.16b", )); insns.push(( @@ -4420,15 +4412,16 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal16, + Inst::VecRRRLongMod { + alu_op: VecRRRLongModOp::Umlal16, rd: writable_vreg(11), + ri: vreg(11), rn: vreg(10), rm: vreg(12), high_half: true, }, "4B816C6E", - "umlal2 v11.4s, v10.8h, v12.8h", + "umlal2 v11.4s, v11.4s, v10.8h, v12.8h", )); insns.push(( @@ -4456,15 +4449,16 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal32, + Inst::VecRRRLongMod { + alu_op: VecRRRLongModOp::Umlal32, rd: writable_vreg(10), + ri: vreg(10), rn: vreg(29), rm: vreg(2), high_half: true, }, "AA83A26E", - "umlal2 v10.2d, v29.4s, v2.4s", + "umlal2 v10.2d, v10.2d, v29.4s, v2.4s", )); insns.push(( @@ -5418,21 +5412,20 @@ fn test_aarch64_binemit() { rd: writable_vreg(0), rn: vreg(31), rm: vreg(16), - is_extension: false, }, "E003104E", "tbl v0.16b, { v31.16b }, v16.16b", )); insns.push(( - Inst::VecTbl { + Inst::VecTblExt { rd: writable_vreg(4), + ri: vreg(4), rn: vreg(12), rm: vreg(23), - is_extension: true, }, "8411174E", - "tbx v4.16b, { v12.16b }, v23.16b", + "tbx v4.16b, v4.16b, { v12.16b }, v23.16b", )); insns.push(( @@ -5441,22 +5434,21 @@ fn test_aarch64_binemit() { rn: vreg(31), rn2: vreg(0), rm: vreg(26), - is_extension: false, }, "F0231A4E", "tbl v16.16b, { v31.16b, v0.16b }, v26.16b", )); insns.push(( - Inst::VecTbl2 { + Inst::VecTbl2Ext { rd: writable_vreg(3), + ri: vreg(3), rn: vreg(11), rn2: vreg(12), rm: vreg(19), - is_extension: true, }, "6331134E", - "tbx v3.16b, { v11.16b, v12.16b }, v19.16b", + "tbx v3.16b, v3.16b, { v11.16b, v12.16b }, v19.16b", )); insns.push(( @@ -6201,23 +6193,25 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::FpuRRI { - fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()), + Inst::FpuRRIMod { + fpu_op: FPUOpRIMod::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()), rd: writable_vreg(4), + ri: vreg(4), rn: vreg(10), }, "44553F2F", - "sli v4.2s, v10.2s, #31", + "sli v4.2s, v4.2s, v10.2s, #31", )); insns.push(( - Inst::FpuRRI { - fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()), + Inst::FpuRRIMod { + fpu_op: FPUOpRIMod::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()), rd: writable_vreg(4), + ri: vreg(4), rn: vreg(10), }, "44557F7F", - "sli d4, d10, #63", + "sli d4, d4, d10, #63", )); insns.push(( @@ -6505,24 +6499,18 @@ fn test_aarch64_binemit() { Inst::FpuLoadP64 { rt: writable_vreg(19), rt2: writable_vreg(11), - mem: PairAMode::PreIndexed( - writable_xreg(25), - SImm7Scaled::maybe_from_i64(-512, F64).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-512, F64).unwrap()), flags: MemFlags::trusted(), }, - "332FE06D", - "ldp d19, d11, [x25, #-512]!", + "F32FE06D", + "ldp d19, d11, [sp, #-512]!", )); insns.push(( Inst::FpuLoadP64 { rt: writable_vreg(7), rt2: writable_vreg(20), - mem: PairAMode::PostIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(64, F64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(64, F64).unwrap()), flags: MemFlags::trusted(), }, "E753C46C", @@ -6547,28 +6535,22 @@ fn test_aarch64_binemit() { Inst::FpuStoreP64 { rt: vreg(16), rt2: vreg(8), - mem: PairAMode::PreIndexed( - writable_xreg(15), - SImm7Scaled::maybe_from_i64(48, F64).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(48, F64).unwrap()), flags: MemFlags::trusted(), }, - "F021836D", - "stp d16, d8, [x15, #48]!", + "F023836D", + "stp d16, d8, [sp, #48]!", )); insns.push(( Inst::FpuStoreP64 { rt: vreg(5), rt2: vreg(6), - mem: PairAMode::PostIndexed( - writable_xreg(28), - SImm7Scaled::maybe_from_i64(-32, F64).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(-32, F64).unwrap()), flags: MemFlags::trusted(), }, - "851BBE6C", - "stp d5, d6, [x28], #-32", + "E51BBE6C", + "stp d5, d6, [sp], #-32", )); insns.push(( @@ -6586,28 +6568,22 @@ fn test_aarch64_binemit() { Inst::FpuLoadP128 { rt: writable_vreg(29), rt2: writable_vreg(9), - mem: PairAMode::PreIndexed( - writable_xreg(16), - SImm7Scaled::maybe_from_i64(-1024, I8X16).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-1024, I8X16).unwrap()), flags: MemFlags::trusted(), }, - "1D26E0AD", - "ldp q29, q9, [x16, #-1024]!", + "FD27E0AD", + "ldp q29, q9, [sp, #-1024]!", )); insns.push(( Inst::FpuLoadP128 { rt: writable_vreg(10), rt2: writable_vreg(20), - mem: PairAMode::PostIndexed( - writable_xreg(26), - SImm7Scaled::maybe_from_i64(256, I8X16).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(256, I8X16).unwrap()), flags: MemFlags::trusted(), }, - "4A53C8AC", - "ldp q10, q20, [x26], #256", + "EA53C8AC", + "ldp q10, q20, [sp], #256", )); insns.push(( @@ -6628,10 +6604,7 @@ fn test_aarch64_binemit() { Inst::FpuStoreP128 { rt: vreg(27), rt2: vreg(13), - mem: PairAMode::PreIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(-192, I8X16).unwrap(), - ), + mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-192, I8X16).unwrap()), flags: MemFlags::trusted(), }, "FB37BAAD", @@ -6642,14 +6615,11 @@ fn test_aarch64_binemit() { Inst::FpuStoreP128 { rt: vreg(18), rt2: vreg(22), - mem: PairAMode::PostIndexed( - writable_xreg(13), - SImm7Scaled::maybe_from_i64(304, I8X16).unwrap(), - ), + mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(304, I8X16).unwrap()), flags: MemFlags::trusted(), }, - "B2D989AC", - "stp q18, q22, [x13], #304", + "F2DB89AC", + "stp q18, q22, [sp], #304", )); insns.push(( @@ -6769,105 +6739,170 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Sub, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F087C031A4B3CFF1808B8FFFFB5", - "1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_sub_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Eor, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F487C031A4A3CFF1848B8FFFFB5", - "1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_eor_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Add, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F087C031A0B3CFF1808B8FFFFB5", - "1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_add_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I32, op: AtomicRMWLoopOp::Orr, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F887C031A2A3CFF1888B8FFFFB5", - "1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_orr_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I64, op: AtomicRMWLoopOp::And, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5FC87C031A8A3CFF18C8B8FFFFB5", - "1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_and_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Xchg, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F083AFF1808D8FFFFB5", - "1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_xchg_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Nand, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5", - "1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Smin, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F487B3F00137FA33A6B7CB39A9A3CFF184878FFFFB5", - "1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_smin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I32, op: AtomicRMWLoopOp::Smin, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5", - "1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_smin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I64, op: AtomicRMWLoopOp::Smax, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5", - "1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_smax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Smax, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F087B1F00137F833A6B7CC39A9A3CFF180878FFFFB5", - "1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_smax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Umin, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5", - "1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_umin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Umax, + addr: xreg(25), + operand: xreg(26), + oldval: writable_xreg(27), + scratch1: writable_xreg(24), + scratch2: writable_xreg(28), }, "3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5", - "1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b", + "atomic_rmw_loop_umax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28", )); insns.push(( @@ -7269,74 +7304,98 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicCAS { - rs: writable_xreg(28), + rd: writable_xreg(28), + rs: xreg(28), rt: xreg(20), rn: xreg(10), ty: I8, }, "54FDFC08", - "casalb w28, w20, [x10]", + "casalb w28, w28, w20, [x10]", )); insns.push(( Inst::AtomicCAS { - rs: writable_xreg(2), + rd: writable_xreg(2), + rs: xreg(2), rt: xreg(19), rn: xreg(23), ty: I16, }, "F3FEE248", - "casalh w2, w19, [x23]", + "casalh w2, w2, w19, [x23]", )); insns.push(( Inst::AtomicCAS { - rs: writable_xreg(0), + rd: writable_xreg(0), + rs: xreg(0), rt: zero_reg(), rn: stack_reg(), ty: I32, }, "FFFFE088", - "casal w0, wzr, [sp]", + "casal w0, w0, wzr, [sp]", )); insns.push(( Inst::AtomicCAS { - rs: writable_xreg(7), + rd: writable_xreg(7), + rs: xreg(7), rt: xreg(15), rn: xreg(27), ty: I64, }, "6FFFE7C8", - "casal x7, x15, [x27]", + "casal x7, x7, x15, [x27]", )); insns.push(( Inst::AtomicCASLoop { ty: I8, + addr: xreg(25), + expected: xreg(26), + replacement: xreg(28), + oldval: writable_xreg(27), + scratch: writable_xreg(24), }, "3BFF5F087F033AEB610000543CFF180898FFFFB5", - "atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + "atomic_cas_loop_8 addr=x25, expect=x26, replacement=x28, oldval=x27, scratch=x24", )); insns.push(( Inst::AtomicCASLoop { ty: I16, + addr: xreg(25), + expected: xreg(26), + replacement: xreg(28), + oldval: writable_xreg(27), + scratch: writable_xreg(24), }, "3BFF5F487F233AEB610000543CFF184898FFFFB5", - "atomically { compare-and-swap(16_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + "atomic_cas_loop_16 addr=x25, expect=x26, replacement=x28, oldval=x27, scratch=x24", )); insns.push(( Inst::AtomicCASLoop { ty: I32, + addr: xreg(25), + expected: xreg(26), + replacement: xreg(28), + oldval: writable_xreg(27), + scratch: writable_xreg(24), }, "3BFF5F887F031AEB610000543CFF188898FFFFB5", - "atomically { compare-and-swap(32_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + "atomic_cas_loop_32 addr=x25, expect=x26, replacement=x28, oldval=x27, scratch=x24", )); insns.push(( Inst::AtomicCASLoop { ty: I64, + addr: xreg(25), + expected: xreg(26), + replacement: xreg(28), + oldval: writable_xreg(27), + scratch: writable_xreg(24), }, "3BFF5FC87F031AEB610000543CFF18C898FFFFB5", - "atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + "atomic_cas_loop_64 addr=x25, expect=x26, replacement=x28, oldval=x27, scratch=x24", )); insns.push(( diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 3d55806cf5..8add4a18ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{ ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, - VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp, + VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp, }; /// A floating-point unit (FPU) operation with two args, a register and an immediate. @@ -49,6 +49,13 @@ pub enum FPUOpRI { UShr32(FPURightShiftImm), /// Unsigned right shift. Rd = Rn << #imm UShr64(FPURightShiftImm), +} + +/// A floating-point unit (FPU) operation with two args, a register and +/// an immediate that modifies its dest (so takes that input value as a +/// separate virtual register). +#[derive(Copy, Clone, Debug)] +pub enum FPUOpRIMod { /// Shift left and insert. Rd |= Rn << #imm Sli32(FPULeftShiftImm), /// Shift left and insert. Rd |= Rn << #imm @@ -197,9 +204,9 @@ impl Inst { } } else { let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); - insts.push(Inst::MovWide { - op: MoveWideOp::MovK, + insts.push(Inst::MovK { rd, + rn: rd.to_reg(), // Redef the same virtual register. imm, size, }); @@ -550,9 +557,7 @@ fn memarg_operands VReg>(memarg: &AMode, collector: &mut OperandC collector.reg_use(r2); } &AMode::Label(..) => {} - &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => { - collector.reg_mod(reg); - } + &AMode::SPPreIndexed(..) | &AMode::SPPostIndexed(..) => {} &AMode::FPOffset(..) => {} &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {} &AMode::RegOffset(r, ..) => { @@ -570,9 +575,7 @@ fn pairmemarg_operands VReg>( &PairAMode::SignedOffset(reg, ..) => { collector.reg_use(reg); } - &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => { - collector.reg_mod(reg); - } + &PairAMode::SPPreIndexed(..) | &PairAMode::SPPostIndexed(..) => {} } } @@ -657,10 +660,13 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan debug_assert!(rd.to_reg().is_virtual()); collector.reg_def(rd); } - &Inst::MovWide { op, rd, .. } => match op { - MoveWideOp::MovK => collector.reg_mod(rd), - _ => collector.reg_def(rd), - }, + &Inst::MovK { rd, rn, .. } => { + collector.reg_use(rn); + collector.reg_reuse_def(rd, 0); // `rn` == `rd`. + } + &Inst::MovWide { rd, .. } => { + collector.reg_def(rd); + } &Inst::CSel { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); @@ -681,13 +687,21 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan &Inst::CCmpImm { rn, .. } => { collector.reg_use(rn); } - &Inst::AtomicRMWLoop { op, .. } => { - collector.reg_use(xreg(25)); - collector.reg_use(xreg(26)); - collector.reg_def(writable_xreg(24)); - collector.reg_def(writable_xreg(27)); + &Inst::AtomicRMWLoop { + op, + addr, + operand, + oldval, + scratch1, + scratch2, + .. + } => { + collector.reg_fixed_use(addr, xreg(25)); + collector.reg_fixed_use(operand, xreg(26)); + collector.reg_fixed_def(oldval, xreg(27)); + collector.reg_fixed_def(scratch1, xreg(24)); if op != AtomicRMWLoopOp::Xchg { - collector.reg_def(writable_xreg(28)); + collector.reg_fixed_def(scratch2, xreg(28)); } } &Inst::AtomicRMW { rs, rt, rn, .. } => { @@ -695,17 +709,25 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rt); collector.reg_use(rn); } - &Inst::AtomicCAS { rs, rt, rn, .. } => { - collector.reg_mod(rs); + &Inst::AtomicCAS { rd, rs, rt, rn, .. } => { + collector.reg_reuse_def(rd, 1); // reuse `rs`. + collector.reg_use(rs); collector.reg_use(rt); collector.reg_use(rn); } - &Inst::AtomicCASLoop { .. } => { - collector.reg_use(xreg(25)); - collector.reg_use(xreg(26)); - collector.reg_use(xreg(28)); - collector.reg_def(writable_xreg(24)); - collector.reg_def(writable_xreg(27)); + &Inst::AtomicCASLoop { + addr, + expected, + replacement, + oldval, + scratch, + .. + } => { + collector.reg_fixed_use(addr, xreg(25)); + collector.reg_fixed_use(expected, xreg(26)); + collector.reg_fixed_use(replacement, xreg(28)); + collector.reg_fixed_def(oldval, xreg(24)); + collector.reg_fixed_def(scratch, xreg(27)); } &Inst::LoadAcquire { rt, rn, .. } => { collector.reg_use(rn); @@ -741,11 +763,13 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rn); collector.reg_use(rm); } - &Inst::FpuRRI { fpu_op, rd, rn, .. } => { - match fpu_op { - FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.reg_def(rd), - FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.reg_mod(rd), - } + &Inst::FpuRRI { rd, rn, .. } => { + collector.reg_def(rd); + collector.reg_use(rn); + } + &Inst::FpuRRIMod { rd, ri, rn, .. } => { + collector.reg_reuse_def(rd, 1); // reuse `ri`. + collector.reg_use(ri); collector.reg_use(rn); } &Inst::FpuRRRR { rd, rn, rm, ra, .. } => { @@ -767,8 +791,9 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); collector.reg_use(rn); } - &Inst::VecShiftImmMod { rd, rn, .. } => { - collector.reg_mod(rd); + &Inst::VecShiftImmMod { rd, ri, rn, .. } => { + collector.reg_reuse_def(rd, 1); // `rd` == `ri`. + collector.reg_use(ri); collector.reg_use(rn); } &Inst::VecExtract { rd, rn, rm, .. } => { @@ -776,37 +801,42 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rn); collector.reg_use(rm); } - &Inst::VecTbl { - rd, - rn, - rm, - is_extension, - } => { + &Inst::VecTbl { rd, rn, rm } => { collector.reg_use(rn); collector.reg_use(rm); - - if is_extension { - collector.reg_mod(rd); - } else { - collector.reg_def(rd); - } + collector.reg_def(rd); } - &Inst::VecTbl2 { + &Inst::VecTblExt { rd, ri, rn, rm } => { + collector.reg_use(rn); + collector.reg_use(rm); + collector.reg_reuse_def(rd, 3); // `rd` == `ri`. + collector.reg_use(ri); + } + + &Inst::VecTbl2 { rd, rn, rn2, rm } => { + // Constrain to v30 / v31 so that we satisfy the "adjacent + // registers" constraint without use of pinned vregs in + // lowering. + collector.reg_fixed_use(rn, vreg(30)); + collector.reg_fixed_use(rn2, vreg(31)); + collector.reg_use(rm); + collector.reg_def(rd); + } + &Inst::VecTbl2Ext { rd, + ri, rn, rn2, rm, - is_extension, } => { - collector.reg_use(rn); - collector.reg_use(rn2); + // Constrain to v30 / v31 so that we satisfy the "adjacent + // registers" constraint without use of pinned vregs in + // lowering. + collector.reg_fixed_use(rn, vreg(30)); + collector.reg_fixed_use(rn2, vreg(31)); collector.reg_use(rm); - - if is_extension { - collector.reg_mod(rd); - } else { - collector.reg_def(rd); - } + collector.reg_reuse_def(rd, 4); // `rd` == `ri`. + collector.reg_use(ri); } &Inst::VecLoadReplicate { rd, rn, .. } => { collector.reg_def(rd); @@ -900,8 +930,9 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan &Inst::FpuMoveFPImm { rd, .. } => { collector.reg_def(rd); } - &Inst::MovToVec { rd, rn, .. } => { - collector.reg_mod(rd); + &Inst::MovToVec { rd, ri, rn, .. } => { + collector.reg_reuse_def(rd, 1); // `rd` == `ri`. + collector.reg_use(ri); collector.reg_use(rn); } &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => { @@ -926,38 +957,36 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); collector.reg_use(rn); } - &Inst::VecMovElement { rd, rn, .. } => { - collector.reg_mod(rd); + &Inst::VecMovElement { rd, ri, rn, .. } => { + collector.reg_reuse_def(rd, 1); // `rd` == `ri`. + collector.reg_use(ri); collector.reg_use(rn); } &Inst::VecRRLong { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } - &Inst::VecRRNarrow { - rd, rn, high_half, .. - } => { + &Inst::VecRRNarrowLow { rd, rn, .. } => { collector.reg_use(rn); - - if high_half { - collector.reg_mod(rd); - } else { - collector.reg_def(rd); - } + collector.reg_def(rd); + } + &Inst::VecRRNarrowHigh { rd, ri, rn, .. } => { + collector.reg_use(rn); + collector.reg_reuse_def(rd, 2); // `rd` == `ri`. + collector.reg_use(ri); } &Inst::VecRRPair { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } - &Inst::VecRRRLong { - alu_op, rd, rn, rm, .. - } => { - match alu_op { - VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => { - collector.reg_mod(rd) - } - _ => collector.reg_def(rd), - }; + &Inst::VecRRRLong { rd, rn, rm, .. } => { + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); + } + &Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => { + collector.reg_reuse_def(rd, 1); // `rd` == `ri`. + collector.reg_use(ri); collector.reg_use(rn); collector.reg_use(rm); } @@ -970,8 +999,9 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rn); collector.reg_use(rm); } - &Inst::VecRRRMod { rd, rn, rm, .. } => { - collector.reg_mod(rd); + &Inst::VecRRRMod { rd, ri, rn, rm, .. } => { + collector.reg_reuse_def(rd, 1); // `rd` == `ri`. + collector.reg_use(ri); collector.reg_use(rn); collector.reg_use(rm); } @@ -1508,12 +1538,22 @@ impl Inst { let op_str = match op { MoveWideOp::MovZ => "movz", MoveWideOp::MovN => "movn", - MoveWideOp::MovK => "movk", }; let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let imm = imm.pretty_print(0, allocs); format!("{} {}, {}", op_str, rd, imm) } + &Inst::MovK { + rd, + rn, + ref imm, + size, + } => { + let rn = pretty_print_ireg(rn, size, allocs); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let imm = imm.pretty_print(0, allocs); + format!("movk {}, {}, {}", rd, rn, imm) + } &Inst::CSel { rd, rn, rm, cond } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); @@ -1589,75 +1629,45 @@ impl Inst { }; format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn) } - &Inst::AtomicRMWLoop { ty, op, .. } => { - let ty_suffix = match ty { - I8 => "b", - I16 => "h", - _ => "", - }; - let size = OperandSize::from_ty(ty); - let r_addr = pretty_print_ireg(xreg(25), OperandSize::Size64, allocs); - let r_arg2 = pretty_print_ireg(xreg(26), size, allocs); - let r_status = pretty_print_ireg(xreg(24), OperandSize::Size32, allocs); - let r_tmp = pretty_print_ireg(xreg(27), size, allocs); - let mut r_dst = pretty_print_ireg(xreg(28), size, allocs); - - let mut loop_str: String = "1: ".to_string(); - loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr)); - - let op_str = match op { + &Inst::AtomicRMWLoop { + ty, + op, + addr, + operand, + oldval, + scratch1, + scratch2, + } => { + let op = match op { AtomicRMWLoopOp::Add => "add", AtomicRMWLoopOp::Sub => "sub", AtomicRMWLoopOp::Eor => "eor", AtomicRMWLoopOp::Orr => "orr", AtomicRMWLoopOp::And => "and", - _ => "", + AtomicRMWLoopOp::Nand => "nand", + AtomicRMWLoopOp::Smin => "smin", + AtomicRMWLoopOp::Smax => "smax", + AtomicRMWLoopOp::Umin => "umin", + AtomicRMWLoopOp::Umax => "umax", + AtomicRMWLoopOp::Xchg => "xchg", }; - - if op_str.is_empty() { - match op { - AtomicRMWLoopOp::Xchg => r_dst = r_arg2, - AtomicRMWLoopOp::Nand => { - loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2)); - loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst)); - } - _ => { - if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax) - && (ty == I8 || ty == I16) - { - loop_str - .push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp)); - loop_str.push_str(&format!( - "cmp {}, {}, sxt{}; ", - r_tmp, r_arg2, ty_suffix - )); - } else { - loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2)); - } - let cond = match op { - AtomicRMWLoopOp::Smin => "lt", - AtomicRMWLoopOp::Smax => "gt", - AtomicRMWLoopOp::Umin => "lo", - AtomicRMWLoopOp::Umax => "hi", - _ => unreachable!(), - }; - loop_str.push_str(&format!( - "csel {}, {}, {}, {}; ", - r_dst, r_tmp, r_arg2, cond - )); - } - }; - } else { - loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2)); - } - loop_str.push_str(&format!( - "stlxr{} {}, {}, [{}]; ", - ty_suffix, r_status, r_dst, r_addr - )); - loop_str.push_str(&format!("cbnz {}, 1b", r_status)); - loop_str + let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs); + let operand = pretty_print_ireg(operand, OperandSize::Size64, allocs); + let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs); + let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64, allocs); + let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64, allocs); + format!( + "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}", + op, + ty.bits(), + addr, + operand, + oldval, + scratch1, + scratch2, + ) } - &Inst::AtomicCAS { rs, rt, rn, ty } => { + &Inst::AtomicCAS { rd, rs, rt, rn, ty } => { let op = match ty { I8 => "casalb", I16 => "casalh", @@ -1665,16 +1675,35 @@ impl Inst { _ => panic!("Unsupported type: {}", ty), }; let size = OperandSize::from_ty(ty); - let rs = pretty_print_ireg(rs.to_reg(), size, allocs); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rs = pretty_print_ireg(rs, size, allocs); let rt = pretty_print_ireg(rt, size, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); - format!("{} {}, {}, [{}]", op, rs, rt, rn) + format!("{} {}, {}, {}, [{}]", op, rd, rs, rt, rn) } - &Inst::AtomicCASLoop { ty } => { + &Inst::AtomicCASLoop { + ty, + addr, + expected, + replacement, + oldval, + scratch, + } => { + let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs); + let expected = pretty_print_ireg(expected, OperandSize::Size64, allocs); + let replacement = pretty_print_ireg(replacement, OperandSize::Size64, allocs); + let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs); + let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64, allocs); format!( - "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}", - ty.bits()) + "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}", + ty.bits(), + addr, + expected, + replacement, + oldval, + scratch, + ) } &Inst::LoadAcquire { access_ty, rt, rn, .. @@ -1777,8 +1806,6 @@ impl Inst { let (op, imm, vector) = match fpu_op { FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true), FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false), - FPUOpRI::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true), - FPUOpRI::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false), }; let (rd, rn) = if vector { @@ -1794,6 +1821,27 @@ impl Inst { }; format!("{} {}, {}, {}", op, rd, rn, imm) } + &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => { + let (op, imm, vector) = match fpu_op { + FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true), + FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false), + }; + + let (rd, ri, rn) = if vector { + ( + pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs), + pretty_print_vreg_vector(ri, VectorSize::Size32x2, allocs), + pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs), + ) + } else { + ( + pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs), + pretty_print_vreg_scalar(ri, ScalarSize::Size64, allocs), + pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs), + ) + }; + format!("{} {}, {}, {}, {}", op, rd, ri, rn, imm) + } &Inst::FpuRRRR { fpu_op, size, @@ -1983,11 +2031,18 @@ impl Inst { format!("fmov {}, {}", rd, imm) } - &Inst::MovToVec { rd, rn, idx, size } => { + &Inst::MovToVec { + rd, + ri, + rn, + idx, + size, + } => { let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size(), allocs); + let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size(), allocs); let rn = pretty_print_ireg(rn, size.operand_size(), allocs); - format!("mov {}, {}", rd, rn) + format!("mov {}, {}, {}", rd, ri, rn) } &Inst::MovFromVec { rd, rn, idx, size } => { let op = match size { @@ -2062,6 +2117,7 @@ impl Inst { } &Inst::VecMovElement { rd, + ri, rn, dest_idx, src_idx, @@ -2073,8 +2129,9 @@ impl Inst { size.lane_size(), allocs, ); + let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size(), allocs); let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size(), allocs); - format!("mov {}, {}", rd, rn) + format!("mov {}, {}, {}", rd, ri, rn) } &Inst::VecRRLong { op, @@ -2119,16 +2176,28 @@ impl Inst { format!("{} {}, {}{}", op, rd, rn, suffix) } - &Inst::VecRRNarrow { + &Inst::VecRRNarrowLow { op, rd, rn, - high_half, lane_size, + .. + } + | &Inst::VecRRNarrowHigh { + op, + rd, + rn, + lane_size, + .. } => { let vec64 = VectorSize::from_lane_size(lane_size, false); let vec128 = VectorSize::from_lane_size(lane_size, true); let rn_size = VectorSize::from_lane_size(lane_size.widen(), true); + let high_half = match self { + &Inst::VecRRNarrowLow { .. } => false, + &Inst::VecRRNarrowHigh { .. } => true, + _ => unreachable!(), + }; let (op, rd_size) = match (op, high_half) { (VecRRNarrowOp::Xtn, false) => ("xtn", vec64), (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128), @@ -2143,8 +2212,15 @@ impl Inst { }; let rn = pretty_print_vreg_vector(rn, rn_size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); + let ri = match self { + &Inst::VecRRNarrowLow { .. } => "".to_string(), + &Inst::VecRRNarrowHigh { ri, .. } => { + format!("{}, ", pretty_print_vreg_vector(ri, rd_size, allocs)) + } + _ => unreachable!(), + }; - format!("{} {}, {}", op, rd, rn) + format!("{} {}, {}{}", op, rd, ri, rn) } &Inst::VecRRPair { op, rd, rn } => { let op = match op { @@ -2227,6 +2303,7 @@ impl Inst { } &Inst::VecRRRMod { rd, + ri, rn, rm, alu_op, @@ -2237,9 +2314,10 @@ impl Inst { VecALUModOp::Fmla => ("fmla", size), }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let ri = pretty_print_vreg_vector(ri, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); let rm = pretty_print_vreg_vector(rm, size, allocs); - format!("{} {}, {}, {}", op, rd, rn, rm) + format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm) } &Inst::VecRRRLong { rd, @@ -2285,30 +2363,46 @@ impl Inst { (VecRRRLongOp::Umull32, true) => { ("umull2", VectorSize::Size64x2, VectorSize::Size32x4) } - (VecRRRLongOp::Umlal8, false) => { - ("umlal", VectorSize::Size16x8, VectorSize::Size8x8) - } - (VecRRRLongOp::Umlal8, true) => { - ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16) - } - (VecRRRLongOp::Umlal16, false) => { - ("umlal", VectorSize::Size32x4, VectorSize::Size16x4) - } - (VecRRRLongOp::Umlal16, true) => { - ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8) - } - (VecRRRLongOp::Umlal32, false) => { - ("umlal", VectorSize::Size64x2, VectorSize::Size32x2) - } - (VecRRRLongOp::Umlal32, true) => { - ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4) - } }; let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs); let rn = pretty_print_vreg_vector(rn, src_size, allocs); let rm = pretty_print_vreg_vector(rm, src_size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } + &Inst::VecRRRLongMod { + rd, + ri, + rn, + rm, + alu_op, + high_half, + } => { + let (op, dest_size, src_size) = match (alu_op, high_half) { + (VecRRRLongModOp::Umlal8, false) => { + ("umlal", VectorSize::Size16x8, VectorSize::Size8x8) + } + (VecRRRLongModOp::Umlal8, true) => { + ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16) + } + (VecRRRLongModOp::Umlal16, false) => { + ("umlal", VectorSize::Size32x4, VectorSize::Size16x4) + } + (VecRRRLongModOp::Umlal16, true) => { + ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8) + } + (VecRRRLongModOp::Umlal32, false) => { + ("umlal", VectorSize::Size64x2, VectorSize::Size32x2) + } + (VecRRRLongModOp::Umlal32, true) => { + ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4) + } + }; + let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs); + let ri = pretty_print_vreg_vector(ri, dest_size, allocs); + let rn = pretty_print_vreg_vector(rn, src_size, allocs); + let rm = pretty_print_vreg_vector(rm, src_size, allocs); + format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm) + } &Inst::VecMisc { op, rd, rn, size } => { let (op, size, suffix) = match op { VecMisc2::Not => ( @@ -2378,6 +2472,7 @@ impl Inst { &Inst::VecShiftImmMod { op, rd, + ri, rn, size, imm, @@ -2386,8 +2481,9 @@ impl Inst { VecShiftImmModOp::Sli => "sli", }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let ri = pretty_print_vreg_vector(ri, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); - format!("{} {}, {}, #{}", op, rd, rn, imm) + format!("{} {}, {}, {}, #{}", op, rd, ri, rn, imm) } &Inst::VecExtract { rd, rn, rm, imm4 } => { let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); @@ -2395,31 +2491,39 @@ impl Inst { let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4) } - &Inst::VecTbl { - rd, - rn, - rm, - is_extension, - } => { - let op = if is_extension { "tbx" } else { "tbl" }; + &Inst::VecTbl { rd, rn, rm } => { let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); - format!("{} {}, {{ {} }}, {}", op, rd, rn, rm) + format!("tbl {}, {{ {} }}, {}", rd, rn, rm) } - &Inst::VecTbl2 { - rd, - rn, - rn2, - rm, - is_extension, - } => { - let op = if is_extension { "tbx" } else { "tbl" }; + &Inst::VecTblExt { rd, ri, rn, rm } => { + let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); + let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); + let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); + let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs); + format!("tbx {}, {}, {{ {} }}, {}", rd, ri, rn, rm) + } + &Inst::VecTbl2 { rd, rn, rn2, rm } => { let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); - format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm) + format!("tbl {}, {{ {}, {} }}, {}", rd, rn, rn2, rm) + } + &Inst::VecTbl2Ext { + rd, + ri, + rn, + rn2, + rm, + } => { + let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); + let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs); + let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); + let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); + let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs); + format!("tbx {}, {}, {{ {}, {} }}, {}", rd, ri, rn, rn2, rm) } &Inst::VecLoadReplicate { rd, rn, size, .. } => { let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 3c1114a515..eacd0b4330 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -50,6 +50,7 @@ pub(crate) const fn vreg_preg(num: u8) -> PReg { } /// Get a writable reference to a V-register. +#[cfg(test)] // Used only in test code. pub fn writable_vreg(num: u8) -> Writable { Writable::from_reg(vreg(num)) } diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index b0bb4e6f1c..8b97f64fda 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -103,12 +103,12 @@ (rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask)))) (let ((mask_reg Reg (constant_f128 mask))) - (vec_tbl2 rn rn2 mask_reg $false ty))) + (vec_tbl2 rn rn2 mask_reg ty))) ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type vec_i128_ty (swizzle rn rm))) - (vec_tbl rn rm #f)) + (vec_tbl rn rm)) ;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index e4fb6ea6f5..20606c4588 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -8,7 +8,7 @@ use generated_code::Context; use super::{ lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, - CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, + CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, @@ -28,7 +28,6 @@ use crate::{ }, isa::aarch64::abi::AArch64Caller, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, - isa::aarch64::lower::{writable_vreg, writable_xreg, xreg}, isa::unwind::UnwindInst, machinst::{ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData}, }; @@ -209,9 +208,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { }); if upper_halfword != 0 { - self.emit(&MInst::MovWide { - op: MoveWideOp::MovK, + self.emit(&MInst::MovK { rd, + rn: rd.to_reg(), imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(), size, }); @@ -263,9 +262,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } } else { let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); - self.emit(&MInst::MovWide { - op: MoveWideOp::MovK, + self.emit(&MInst::MovK { rd, + rn: rd.to_reg(), imm, size, }); @@ -294,18 +293,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { zero_reg() } - fn xreg(&mut self, index: u8) -> Reg { - xreg(index) - } - - fn writable_xreg(&mut self, index: u8) -> WritableReg { - writable_xreg(index) - } - - fn writable_vreg(&mut self, index: u8) -> WritableReg { - writable_vreg(index) - } - fn extended_value_from_value(&mut self, val: Value) -> Option { let (val, extend) = super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?; @@ -718,11 +705,11 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } } - fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI { + fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRIMod { if ty_bits == 32 { - FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) + FPUOpRIMod::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) } else if ty_bits == 64 { - FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) + FPUOpRIMod::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) } else { unimplemented!( "unexpected input size for fpu_op_ri_sli: {} (shift: {})", diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index 83b7b96bc2..92dfe41526 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -139,7 +139,7 @@ block0(v0: i64): ; block0: ; movz w3, #51712 -; movk w3, #15258, LSL #16 +; movk w3, w3, #15258, LSL #16 ; add x3, x3, x0 ; ldr w0, [x3] ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif index 0e33718593..9ca6b575cf 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif @@ -142,9 +142,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -164,9 +163,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -186,9 +184,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -208,9 +205,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif index 0b017ad3df..d2ba234244 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif @@ -14,9 +14,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_add_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -36,9 +35,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_add_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -58,9 +56,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_add_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -80,9 +77,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_add_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -102,9 +98,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; sub x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_sub_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -124,9 +119,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; sub w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_sub_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -146,9 +140,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; sub w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_sub_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -168,9 +161,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_sub_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -190,9 +182,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_and_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -212,9 +203,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_and_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -234,9 +224,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_and_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -256,9 +245,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_and_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -278,9 +266,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -300,9 +287,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -322,9 +308,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -344,9 +329,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -366,9 +350,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_orr_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -388,9 +371,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_orr_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -410,9 +392,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_orr_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -432,9 +413,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_orr_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -454,9 +434,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_eor_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -476,9 +455,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_eor_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -498,9 +476,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_eor_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -520,9 +497,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_eor_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -542,9 +518,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -564,9 +539,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -586,9 +560,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -608,9 +581,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -630,9 +602,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -652,9 +623,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -674,9 +644,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -696,9 +665,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -718,9 +686,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -740,9 +707,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -762,9 +728,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -784,9 +749,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_smin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -806,9 +770,8 @@ block0(v0: i64, v1: i64): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -828,9 +791,8 @@ block0(v0: i64, v1: i32): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -850,9 +812,8 @@ block0(v0: i64, v1: i16): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 @@ -872,9 +833,8 @@ block0(v0: i64, v1: i8): ; stp x24, x25, [sp, #-16]! ; block0: ; mov x25, x0 -; mov x4, x1 -; mov x26, x4 -; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b +; mov x26, x1 +; atomic_rmw_loop_umin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28 ; ldp x24, x25, [sp], #16 ; ldp x26, x27, [sp], #16 ; ldr x28, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index 5419d077b8..38cb3ed290 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -245,11 +245,11 @@ block0(v0: i128): } ; block0: -; fmov d6, x0 -; mov v6.d[1], x1 -; cnt v19.16b, v6.16b -; addv b21, v19.16b -; umov w0, v21.b[0] +; fmov d7, x0 +; mov v7.d[1], v7.d[1], x1 +; cnt v18.16b, v7.16b +; addv b20, v18.16b +; umov w0, v20.b[0] ; movz w1, #0 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif index a6a7a95549..a7fa74698f 100644 --- a/cranelift/filetests/filetests/isa/aarch64/constants.clif +++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif @@ -130,9 +130,9 @@ block0: ; block0: ; movz x0, #58 -; movk x0, #4626, LSL #16 -; movk x0, #61603, LSL #32 -; movk x0, #62283, LSL #48 +; movk x0, x0, #4626, LSL #16 +; movk x0, x0, #61603, LSL #32 +; movk x0, x0, #62283, LSL #48 ; ret function %f() -> i64 { @@ -143,7 +143,7 @@ block0: ; block0: ; movz x0, #7924, LSL #16 -; movk x0, #4841, LSL #48 +; movk x0, x0, #4841, LSL #48 ; ret function %f() -> i64 { @@ -154,7 +154,7 @@ block0: ; block0: ; movn x0, #57611, LSL #16 -; movk x0, #4841, LSL #48 +; movk x0, x0, #4841, LSL #48 ; ret function %f() -> i32 { diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif index 7b041b5a14..303e030ab1 100644 --- a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif @@ -15,10 +15,9 @@ block0(v0: i16): } ; block0: -; dup v6.4h, w0 -; mov v7.16b, v6.16b -; mov v7.d[1], v6.d[0] -; sqxtn v0.8b, v7.8h +; dup v4.4h, w0 +; mov v4.d[1], v4.d[1], v4.d[0] +; sqxtn v0.8b, v4.8h ; ret function %snarrow_i16x8(i16) -> i8x16 { @@ -37,7 +36,7 @@ block0(v0: i16): ; block0: ; dup v6.8h, w0 ; sqxtn v0.8b, v6.8h -; sqxtn2 v0.16b, v6.8h +; sqxtn2 v0.16b, v0.16b, v6.8h ; ret function %snarrow_i32x2(i32) -> i16x4 { @@ -54,10 +53,9 @@ block0(v0: i32): } ; block0: -; dup v6.2s, w0 -; mov v7.16b, v6.16b -; mov v7.d[1], v6.d[0] -; sqxtn v0.4h, v7.4s +; dup v4.2s, w0 +; mov v4.d[1], v4.d[1], v4.d[0] +; sqxtn v0.4h, v4.4s ; ret function %snarrow_i32x4(i32) -> i16x8 { @@ -76,7 +74,7 @@ block0(v0: i32): ; block0: ; dup v6.4s, w0 ; sqxtn v0.4h, v6.4s -; sqxtn2 v0.8h, v6.4s +; sqxtn2 v0.8h, v0.8h, v6.4s ; ret function %snarrow_i64x2(i64) -> i32x4 { @@ -95,7 +93,7 @@ block0(v0: i64): ; block0: ; dup v6.2d, x0 ; sqxtn v0.2s, v6.2d -; sqxtn2 v0.4s, v6.2d +; sqxtn2 v0.4s, v0.4s, v6.2d ; ret function %unarrow_i16x4(i16) -> i8x8 { @@ -112,10 +110,9 @@ block0(v0: i16): } ; block0: -; dup v6.4h, w0 -; mov v7.16b, v6.16b -; mov v7.d[1], v6.d[0] -; sqxtun v0.8b, v7.8h +; dup v4.4h, w0 +; mov v4.d[1], v4.d[1], v4.d[0] +; sqxtun v0.8b, v4.8h ; ret function %unarrow_i16x8(i16) -> i8x16 { @@ -134,7 +131,7 @@ block0(v0: i16): ; block0: ; dup v6.8h, w0 ; sqxtun v0.8b, v6.8h -; sqxtun2 v0.16b, v6.8h +; sqxtun2 v0.16b, v0.16b, v6.8h ; ret function %unarrow_i32x2(i32) -> i16x4 { @@ -151,10 +148,9 @@ block0(v0: i32): } ; block0: -; dup v6.2s, w0 -; mov v7.16b, v6.16b -; mov v7.d[1], v6.d[0] -; sqxtun v0.4h, v7.4s +; dup v4.2s, w0 +; mov v4.d[1], v4.d[1], v4.d[0] +; sqxtun v0.4h, v4.4s ; ret function %unarrow_i32x4(i32) -> i16x8 { @@ -173,7 +169,7 @@ block0(v0: i32): ; block0: ; dup v6.4s, w0 ; sqxtun v0.4h, v6.4s -; sqxtun2 v0.8h, v6.4s +; sqxtun2 v0.8h, v0.8h, v6.4s ; ret function %unarrow_i64x2(i64) -> i32x4 { @@ -192,7 +188,7 @@ block0(v0: i64): ; block0: ; dup v6.2d, x0 ; sqxtun v0.2s, v6.2d -; sqxtun2 v0.4s, v6.2d +; sqxtun2 v0.4s, v0.4s, v6.2d ; ret function %uunarrow_i16x4(i16) -> i8x8 { @@ -209,10 +205,9 @@ block0(v0: i16): } ; block0: -; dup v6.4h, w0 -; mov v7.16b, v6.16b -; mov v7.d[1], v6.d[0] -; uqxtn v0.8b, v7.8h +; dup v4.4h, w0 +; mov v4.d[1], v4.d[1], v4.d[0] +; uqxtn v0.8b, v4.8h ; ret function %uunarrow_i16x8(i16) -> i8x16 { @@ -231,7 +226,7 @@ block0(v0: i16): ; block0: ; dup v6.8h, w0 ; uqxtn v0.8b, v6.8h -; uqxtn2 v0.16b, v6.8h +; uqxtn2 v0.16b, v0.16b, v6.8h ; ret function %uunarrow_i32x2(i32) -> i16x4 { @@ -248,10 +243,9 @@ block0(v0: i32): } ; block0: -; dup v6.2s, w0 -; mov v7.16b, v6.16b -; mov v7.d[1], v6.d[0] -; uqxtn v0.4h, v7.4s +; dup v4.2s, w0 +; mov v4.d[1], v4.d[1], v4.d[0] +; uqxtn v0.4h, v4.4s ; ret function %uunarrow_i32x4(i32) -> i16x8 { @@ -270,7 +264,7 @@ block0(v0: i32): ; block0: ; dup v6.4s, w0 ; uqxtn v0.4h, v6.4s -; uqxtn2 v0.8h, v6.4s +; uqxtn2 v0.8h, v0.8h, v6.4s ; ret function %uunarrow_i64x2(i64) -> i32x4 { @@ -289,5 +283,6 @@ block0(v0: i64): ; block0: ; dup v6.2d, x0 ; uqxtn v0.2s, v6.2d -; uqxtn2 v0.4s, v6.2d +; uqxtn2 v0.4s, v0.4s, v6.2d ; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif index 0fbcf700bd..722bf860d5 100644 --- a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif @@ -197,7 +197,7 @@ block0(v0: f64, v1: f64): ; dup v17.2d, v0.d[0] ; dup v18.2d, v1.d[0] ; fcmgt v0.2d, v17.2d, v18.2d -; bsl v0.16b, v18.16b, v17.16b +; bsl v0.16b, v0.16b, v18.16b, v17.16b ; ret function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 { @@ -216,5 +216,6 @@ block0(v0: f64, v1: f64): ; dup v17.2d, v0.d[0] ; dup v18.2d, v1.d[0] ; fcmgt v0.2d, v18.2d, v17.2d -; bsl v0.16b, v18.16b, v17.16b +; bsl v0.16b, v0.16b, v18.16b, v17.16b ; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 16f38886a2..eac5fddaac 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -309,8 +309,8 @@ block0(v0: f32, v1: f32): } ; block0: -; ushr v7.2s, v1.2s, #31 -; sli v0.2s, v7.2s, #31 +; ushr v6.2s, v1.2s, #31 +; sli v0.2s, v0.2s, v6.2s, #31 ; ret function %f32(f64, f64) -> f64 { @@ -320,8 +320,8 @@ block0(v0: f64, v1: f64): } ; block0: -; ushr d7, d1, #63 -; sli d0, d7, #63 +; ushr d6, d1, #63 +; sli d0, d0, d6, #63 ; ret function %f33(f32) -> i32 { @@ -918,9 +918,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): } ; block0: -; mov v17.16b, v0.16b +; fmla v2.4s, v2.4s, v0.4s, v1.4s ; mov v0.16b, v2.16b -; fmla v0.4s, v17.4s, v1.4s ; ret function %f79(f32x2, f32x2, f32x2) -> f32x2 { @@ -930,9 +929,8 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2): } ; block0: -; mov v17.16b, v0.16b +; fmla v2.2s, v2.2s, v0.2s, v1.2s ; mov v0.16b, v2.16b -; fmla v0.2s, v17.2s, v1.2s ; ret function %f80(f64x2, f64x2, f64x2) -> f64x2 { @@ -942,9 +940,8 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): } ; block0: -; mov v17.16b, v0.16b +; fmla v2.2d, v2.2d, v0.2d, v1.2d ; mov v0.16b, v2.16b -; fmla v0.2d, v17.2d, v1.2d ; ret function %f81(f32x2, f32x2) -> f32x2 { @@ -954,8 +951,8 @@ block0(v0: f32x2, v1: f32x2): } ; block0: -; ushr v7.2s, v1.2s, #31 -; sli v0.2s, v7.2s, #31 +; ushr v6.2s, v1.2s, #31 +; sli v0.2s, v0.2s, v6.2s, #31 ; ret function %f82(f32x4, f32x4) -> f32x4 { @@ -965,8 +962,8 @@ block0(v0: f32x4, v1: f32x4): } ; block0: -; ushr v7.4s, v1.4s, #31 -; sli v0.4s, v7.4s, #31 +; ushr v6.4s, v1.4s, #31 +; sli v0.4s, v0.4s, v6.4s, #31 ; ret function %f83(f64x2, f64x2) -> f64x2 { @@ -976,6 +973,7 @@ block0(v0: f64x2, v1: f64x2): } ; block0: -; ushr v7.2d, v1.2d, #63 -; sli v0.2d, v7.2d, #63 +; ushr v6.2d, v1.2d, #63 +; sli v0.2d, v0.2d, v6.2d, #63 ; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif index f9e9967ffe..cdd5d92b46 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif @@ -105,7 +105,7 @@ block0: ; movi v0.16b, #0 ; movi v4.16b, #0 ; movi v5.16b, #0 -; bsl v0.16b, v4.16b, v5.16b +; bsl v0.16b, v0.16b, v4.16b, v5.16b ; ret function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { @@ -115,7 +115,7 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8): } ; block0: -; bsl v0.16b, v1.16b, v2.16b +; bsl v0.16b, v0.16b, v1.16b, v2.16b ; ret function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 { @@ -125,7 +125,7 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4): } ; block0: -; bsl v0.16b, v1.16b, v2.16b +; bsl v0.16b, v0.16b, v1.16b, v2.16b ; ret function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 { @@ -135,7 +135,7 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2): } ; block0: -; bsl v0.16b, v1.16b, v2.16b +; bsl v0.16b, v0.16b, v1.16b, v2.16b ; ret function %ishl_i8x16(i32) -> i8x16 { diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif index 2f4f35f574..a6968ab206 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif @@ -29,9 +29,9 @@ block0: ; block0: ; movz x4, #1 -; fmov s30, w4 +; fmov s31, w4 ; ldr q3, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000 -; mov v31.16b, v30.16b +; mov v30.16b, v31.16b ; tbl v0.16b, { v30.16b, v31.16b }, v3.16b ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif b/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif index dcf23e1cfe..50b147adff 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif @@ -9,7 +9,7 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v1.d[0] +; mov v0.d[1], v0.d[1], v1.d[0] ; sqxtn v0.8b, v0.8h ; ret @@ -21,7 +21,7 @@ block0(v0: i16x8, v1: i16x8): ; block0: ; sqxtn v0.8b, v0.8h -; sqxtn2 v0.16b, v1.8h +; sqxtn2 v0.16b, v0.16b, v1.8h ; ret function %snarrow_i32x2(i32x2, i32x2) -> i16x4 { @@ -31,7 +31,7 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v1.d[0] +; mov v0.d[1], v0.d[1], v1.d[0] ; sqxtn v0.4h, v0.4s ; ret @@ -43,7 +43,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; sqxtn v0.4h, v0.4s -; sqxtn2 v0.8h, v1.4s +; sqxtn2 v0.8h, v0.8h, v1.4s ; ret function %snarrow_i64x2(i64x2, i64x2) -> i32x4 { @@ -54,7 +54,7 @@ block0(v0: i64x2, v1: i64x2): ; block0: ; sqxtn v0.2s, v0.2d -; sqxtn2 v0.4s, v1.2d +; sqxtn2 v0.4s, v0.4s, v1.2d ; ret function %unarrow_i16x4(i16x4, i16x4) -> i8x8 { @@ -64,7 +64,7 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v1.d[0] +; mov v0.d[1], v0.d[1], v1.d[0] ; sqxtun v0.8b, v0.8h ; ret @@ -76,7 +76,7 @@ block0(v0: i16x8, v1: i16x8): ; block0: ; sqxtun v0.8b, v0.8h -; sqxtun2 v0.16b, v1.8h +; sqxtun2 v0.16b, v0.16b, v1.8h ; ret function %unarrow_i32x2(i32x2, i32x2) -> i16x4 { @@ -86,7 +86,7 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v1.d[0] +; mov v0.d[1], v0.d[1], v1.d[0] ; sqxtun v0.4h, v0.4s ; ret @@ -98,7 +98,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; sqxtun v0.4h, v0.4s -; sqxtun2 v0.8h, v1.4s +; sqxtun2 v0.8h, v0.8h, v1.4s ; ret function %unarrow_i64x2(i64x2, i64x2) -> i32x4 { @@ -109,7 +109,7 @@ block0(v0: i64x2, v1: i64x2): ; block0: ; sqxtun v0.2s, v0.2d -; sqxtun2 v0.4s, v1.2d +; sqxtun2 v0.4s, v0.4s, v1.2d ; ret function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 { @@ -119,7 +119,7 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v1.d[0] +; mov v0.d[1], v0.d[1], v1.d[0] ; uqxtn v0.8b, v0.8h ; ret @@ -131,7 +131,7 @@ block0(v0: i16x8, v1: i16x8): ; block0: ; uqxtn v0.8b, v0.8h -; uqxtn2 v0.16b, v1.8h +; uqxtn2 v0.16b, v0.16b, v1.8h ; ret function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 { @@ -141,7 +141,7 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v1.d[0] +; mov v0.d[1], v0.d[1], v1.d[0] ; uqxtn v0.4h, v0.4s ; ret @@ -153,7 +153,7 @@ block0(v0: i32x4, v1: i32x4): ; block0: ; uqxtn v0.4h, v0.4s -; uqxtn2 v0.8h, v1.4s +; uqxtn2 v0.8h, v0.8h, v1.4s ; ret function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 { @@ -164,7 +164,7 @@ block0(v0: i64x2, v1: i64x2): ; block0: ; uqxtn v0.2s, v0.2d -; uqxtn2 v0.4s, v1.2d +; uqxtn2 v0.4s, v0.4s, v1.2d ; ret function %snarrow_i16x8_zero(i16x8) -> i8x16 { diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif index b26811e6fa..e66fcd2101 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif @@ -11,7 +11,7 @@ block0: ; block0: ; movz x2, #1 -; movk x2, #1, LSL #48 +; movk x2, x2, #1, LSL #48 ; dup v0.2d, x2 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif index 70ceecd6db..388168c5e2 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif @@ -11,7 +11,7 @@ block0: ; block0: ; movz x1, #1 -; movk x1, #1, LSL #48 +; movk x1, x1, #1, LSL #48 ; fmov d0, x1 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif index 2fe23da047..703d8c4b35 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif @@ -98,16 +98,16 @@ block0(v0: i64): ; subs xzr, sp, x0, UXTX ; b.hs 8 ; udf ; movz w17, #6784 -; movk w17, #6, LSL #16 +; movk w17, w17, #6, LSL #16 ; add x16, x0, x17, UXTX ; subs xzr, sp, x16, UXTX ; b.hs 8 ; udf ; movz w16, #6784 -; movk w16, #6, LSL #16 +; movk w16, w16, #6, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; movz w16, #6784 -; movk w16, #6, LSL #16 +; movk w16, w16, #6, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret @@ -152,16 +152,16 @@ block0(v0: i64): ; subs xzr, sp, x16, UXTX ; b.hs 8 ; udf ; movz w17, #6784 -; movk w17, #6, LSL #16 +; movk w17, w17, #6, LSL #16 ; add x16, x16, x17, UXTX ; subs xzr, sp, x16, UXTX ; b.hs 8 ; udf ; movz w16, #6784 -; movk w16, #6, LSL #16 +; movk w16, w16, #6, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; movz w16, #6784 -; movk w16, #6, LSL #16 +; movk w16, w16, #6, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret @@ -177,7 +177,7 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] +; movz w16, #6784 ; movk w16, w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] ; add x16, x16, #32 ; subs xzr, sp, x16, UXTX ; b.hs 8 ; udf diff --git a/cranelift/filetests/filetests/isa/aarch64/stack.clif b/cranelift/filetests/filetests/isa/aarch64/stack.clif index a1478dccda..ac723f088b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack.clif @@ -31,12 +31,12 @@ block0: ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; mov x0, sp ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret @@ -71,13 +71,13 @@ block0: ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; mov x2, sp ; ldr x0, [x2] ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret @@ -112,13 +112,13 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; mov x2, sp ; str x0, [x2] ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret @@ -479,13 +479,13 @@ block0(v0: i128): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; mov x5, sp ; stp x0, x1, [x5] ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret @@ -539,13 +539,13 @@ block0: ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; sub sp, sp, x16, UXTX ; block0: ; mov x5, sp ; ldp x0, x1, [x5] ; movz w16, #34480 -; movk w16, #1, LSL #16 +; movk w16, w16, #1, LSL #16 ; add sp, sp, x16, UXTX ; ldp fp, lr, [sp], #16 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif b/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif index 53a99fe2c8..75fc84903b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif +++ b/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif @@ -10,9 +10,9 @@ block0(v0: i8x16): ; block0: ; sshr v3.16b, v0.16b, #7 ; movz x6, #513 -; movk x6, #2052, LSL #16 -; movk x6, #8208, LSL #32 -; movk x6, #32832, LSL #48 +; movk x6, x6, #2052, LSL #16 +; movk x6, x6, #8208, LSL #32 +; movk x6, x6, #32832, LSL #48 ; dup v17.2d, x6 ; and v20.16b, v3.16b, v17.16b ; ext v22.16b, v20.16b, v20.16b, #8 @@ -30,9 +30,9 @@ block0(v0: i8x16): ; block0: ; sshr v3.16b, v0.16b, #7 ; movz x6, #513 -; movk x6, #2052, LSL #16 -; movk x6, #8208, LSL #32 -; movk x6, #32832, LSL #48 +; movk x6, x6, #2052, LSL #16 +; movk x6, x6, #8208, LSL #32 +; movk x6, x6, #32832, LSL #48 ; dup v17.2d, x6 ; and v20.16b, v3.16b, v17.16b ; ext v22.16b, v20.16b, v20.16b, #8