aarch64: fix up regalloc2 semantics. (#4830)

This PR removes all uses of modify-operands in the aarch64 backend,
replacing them with reused-input operands instead. This has the nice
effect of removing a bunch of move instructions and more clearly
representing inputs and outputs.

This PR also removes the explicit use of pinned vregs in the aarch64
backend, instead using fixed-register constraints on the operands when
insts or pseudo-inst sequences require certain registers.

This is the second PR in the regalloc-semantics cleanup series; after
the remaining backend (s390x) and the ABI code are cleaned up as well,
we'll be able to simplify the regalloc2 frontend.
This commit is contained in:
Chris Fallin
2022-09-01 14:25:20 -07:00
committed by GitHub
parent ac2d4c4818
commit ae5fe8a728
25 changed files with 1098 additions and 886 deletions

View File

@@ -139,7 +139,7 @@ block0(v0: i64):
; block0:
; movz w3, #51712
; movk w3, #15258, LSL #16
; movk w3, w3, #15258, LSL #16
; add x3, x3, x0
; ldr w0, [x3]
; ret

View File

@@ -142,9 +142,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -164,9 +163,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -186,9 +184,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -208,9 +205,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16

View File

@@ -14,9 +14,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_add_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -36,9 +35,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_add_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -58,9 +56,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_add_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -80,9 +77,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_add_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -102,9 +98,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; sub x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_sub_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -124,9 +119,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; sub w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_sub_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -146,9 +140,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; sub w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_sub_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -168,9 +161,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_sub_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -190,9 +182,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_and_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -212,9 +203,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_and_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -234,9 +224,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_and_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -256,9 +245,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_and_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -278,9 +266,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -300,9 +287,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -322,9 +308,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -344,9 +329,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_nand_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -366,9 +350,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_orr_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -388,9 +371,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_orr_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -410,9 +392,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_orr_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -432,9 +413,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_orr_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -454,9 +434,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_eor_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -476,9 +455,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_eor_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -498,9 +476,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_eor_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -520,9 +497,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_eor_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -542,9 +518,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -564,9 +539,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -586,9 +560,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -608,9 +581,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -630,9 +602,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umax_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -652,9 +623,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umax_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -674,9 +644,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umax_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -696,9 +665,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umax_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -718,9 +686,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -740,9 +707,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -762,9 +728,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -784,9 +749,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_smin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -806,9 +770,8 @@ block0(v0: i64, v1: i64):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umin_64 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -828,9 +791,8 @@ block0(v0: i64, v1: i32):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umin_32 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -850,9 +812,8 @@ block0(v0: i64, v1: i16):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umin_16 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16
@@ -872,9 +833,8 @@ block0(v0: i64, v1: i8):
; stp x24, x25, [sp, #-16]!
; block0:
; mov x25, x0
; mov x4, x1
; mov x26, x4
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b
; mov x26, x1
; atomic_rmw_loop_umin_8 addr=x25 operand=x26 oldval=x27 scratch1=x24 scratch2=x28
; ldp x24, x25, [sp], #16
; ldp x26, x27, [sp], #16
; ldr x28, [sp], #16

View File

@@ -245,11 +245,11 @@ block0(v0: i128):
}
; block0:
; fmov d6, x0
; mov v6.d[1], x1
; cnt v19.16b, v6.16b
; addv b21, v19.16b
; umov w0, v21.b[0]
; fmov d7, x0
; mov v7.d[1], v7.d[1], x1
; cnt v18.16b, v7.16b
; addv b20, v18.16b
; umov w0, v20.b[0]
; movz w1, #0
; ret

View File

@@ -130,9 +130,9 @@ block0:
; block0:
; movz x0, #58
; movk x0, #4626, LSL #16
; movk x0, #61603, LSL #32
; movk x0, #62283, LSL #48
; movk x0, x0, #4626, LSL #16
; movk x0, x0, #61603, LSL #32
; movk x0, x0, #62283, LSL #48
; ret
function %f() -> i64 {
@@ -143,7 +143,7 @@ block0:
; block0:
; movz x0, #7924, LSL #16
; movk x0, #4841, LSL #48
; movk x0, x0, #4841, LSL #48
; ret
function %f() -> i64 {
@@ -154,7 +154,7 @@ block0:
; block0:
; movn x0, #57611, LSL #16
; movk x0, #4841, LSL #48
; movk x0, x0, #4841, LSL #48
; ret
function %f() -> i32 {

View File

@@ -15,10 +15,9 @@ block0(v0: i16):
}
; block0:
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtn v0.8b, v7.8h
; dup v4.4h, w0
; mov v4.d[1], v4.d[1], v4.d[0]
; sqxtn v0.8b, v4.8h
; ret
function %snarrow_i16x8(i16) -> i8x16 {
@@ -37,7 +36,7 @@ block0(v0: i16):
; block0:
; dup v6.8h, w0
; sqxtn v0.8b, v6.8h
; sqxtn2 v0.16b, v6.8h
; sqxtn2 v0.16b, v0.16b, v6.8h
; ret
function %snarrow_i32x2(i32) -> i16x4 {
@@ -54,10 +53,9 @@ block0(v0: i32):
}
; block0:
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtn v0.4h, v7.4s
; dup v4.2s, w0
; mov v4.d[1], v4.d[1], v4.d[0]
; sqxtn v0.4h, v4.4s
; ret
function %snarrow_i32x4(i32) -> i16x8 {
@@ -76,7 +74,7 @@ block0(v0: i32):
; block0:
; dup v6.4s, w0
; sqxtn v0.4h, v6.4s
; sqxtn2 v0.8h, v6.4s
; sqxtn2 v0.8h, v0.8h, v6.4s
; ret
function %snarrow_i64x2(i64) -> i32x4 {
@@ -95,7 +93,7 @@ block0(v0: i64):
; block0:
; dup v6.2d, x0
; sqxtn v0.2s, v6.2d
; sqxtn2 v0.4s, v6.2d
; sqxtn2 v0.4s, v0.4s, v6.2d
; ret
function %unarrow_i16x4(i16) -> i8x8 {
@@ -112,10 +110,9 @@ block0(v0: i16):
}
; block0:
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtun v0.8b, v7.8h
; dup v4.4h, w0
; mov v4.d[1], v4.d[1], v4.d[0]
; sqxtun v0.8b, v4.8h
; ret
function %unarrow_i16x8(i16) -> i8x16 {
@@ -134,7 +131,7 @@ block0(v0: i16):
; block0:
; dup v6.8h, w0
; sqxtun v0.8b, v6.8h
; sqxtun2 v0.16b, v6.8h
; sqxtun2 v0.16b, v0.16b, v6.8h
; ret
function %unarrow_i32x2(i32) -> i16x4 {
@@ -151,10 +148,9 @@ block0(v0: i32):
}
; block0:
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtun v0.4h, v7.4s
; dup v4.2s, w0
; mov v4.d[1], v4.d[1], v4.d[0]
; sqxtun v0.4h, v4.4s
; ret
function %unarrow_i32x4(i32) -> i16x8 {
@@ -173,7 +169,7 @@ block0(v0: i32):
; block0:
; dup v6.4s, w0
; sqxtun v0.4h, v6.4s
; sqxtun2 v0.8h, v6.4s
; sqxtun2 v0.8h, v0.8h, v6.4s
; ret
function %unarrow_i64x2(i64) -> i32x4 {
@@ -192,7 +188,7 @@ block0(v0: i64):
; block0:
; dup v6.2d, x0
; sqxtun v0.2s, v6.2d
; sqxtun2 v0.4s, v6.2d
; sqxtun2 v0.4s, v0.4s, v6.2d
; ret
function %uunarrow_i16x4(i16) -> i8x8 {
@@ -209,10 +205,9 @@ block0(v0: i16):
}
; block0:
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; uqxtn v0.8b, v7.8h
; dup v4.4h, w0
; mov v4.d[1], v4.d[1], v4.d[0]
; uqxtn v0.8b, v4.8h
; ret
function %uunarrow_i16x8(i16) -> i8x16 {
@@ -231,7 +226,7 @@ block0(v0: i16):
; block0:
; dup v6.8h, w0
; uqxtn v0.8b, v6.8h
; uqxtn2 v0.16b, v6.8h
; uqxtn2 v0.16b, v0.16b, v6.8h
; ret
function %uunarrow_i32x2(i32) -> i16x4 {
@@ -248,10 +243,9 @@ block0(v0: i32):
}
; block0:
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; uqxtn v0.4h, v7.4s
; dup v4.2s, w0
; mov v4.d[1], v4.d[1], v4.d[0]
; uqxtn v0.4h, v4.4s
; ret
function %uunarrow_i32x4(i32) -> i16x8 {
@@ -270,7 +264,7 @@ block0(v0: i32):
; block0:
; dup v6.4s, w0
; uqxtn v0.4h, v6.4s
; uqxtn2 v0.8h, v6.4s
; uqxtn2 v0.8h, v0.8h, v6.4s
; ret
function %uunarrow_i64x2(i64) -> i32x4 {
@@ -289,5 +283,6 @@ block0(v0: i64):
; block0:
; dup v6.2d, x0
; uqxtn v0.2s, v6.2d
; uqxtn2 v0.4s, v6.2d
; uqxtn2 v0.4s, v0.4s, v6.2d
; ret

View File

@@ -197,7 +197,7 @@ block0(v0: f64, v1: f64):
; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v17.2d, v18.2d
; bsl v0.16b, v18.16b, v17.16b
; bsl v0.16b, v0.16b, v18.16b, v17.16b
; ret
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
@@ -216,5 +216,6 @@ block0(v0: f64, v1: f64):
; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v18.2d, v17.2d
; bsl v0.16b, v18.16b, v17.16b
; bsl v0.16b, v0.16b, v18.16b, v17.16b
; ret

View File

@@ -309,8 +309,8 @@ block0(v0: f32, v1: f32):
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ushr v6.2s, v1.2s, #31
; sli v0.2s, v0.2s, v6.2s, #31
; ret
function %f32(f64, f64) -> f64 {
@@ -320,8 +320,8 @@ block0(v0: f64, v1: f64):
}
; block0:
; ushr d7, d1, #63
; sli d0, d7, #63
; ushr d6, d1, #63
; sli d0, d0, d6, #63
; ret
function %f33(f32) -> i32 {
@@ -918,9 +918,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
}
; block0:
; mov v17.16b, v0.16b
; fmla v2.4s, v2.4s, v0.4s, v1.4s
; mov v0.16b, v2.16b
; fmla v0.4s, v17.4s, v1.4s
; ret
function %f79(f32x2, f32x2, f32x2) -> f32x2 {
@@ -930,9 +929,8 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2):
}
; block0:
; mov v17.16b, v0.16b
; fmla v2.2s, v2.2s, v0.2s, v1.2s
; mov v0.16b, v2.16b
; fmla v0.2s, v17.2s, v1.2s
; ret
function %f80(f64x2, f64x2, f64x2) -> f64x2 {
@@ -942,9 +940,8 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
}
; block0:
; mov v17.16b, v0.16b
; fmla v2.2d, v2.2d, v0.2d, v1.2d
; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d
; ret
function %f81(f32x2, f32x2) -> f32x2 {
@@ -954,8 +951,8 @@ block0(v0: f32x2, v1: f32x2):
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ushr v6.2s, v1.2s, #31
; sli v0.2s, v0.2s, v6.2s, #31
; ret
function %f82(f32x4, f32x4) -> f32x4 {
@@ -965,8 +962,8 @@ block0(v0: f32x4, v1: f32x4):
}
; block0:
; ushr v7.4s, v1.4s, #31
; sli v0.4s, v7.4s, #31
; ushr v6.4s, v1.4s, #31
; sli v0.4s, v0.4s, v6.4s, #31
; ret
function %f83(f64x2, f64x2) -> f64x2 {
@@ -976,6 +973,7 @@ block0(v0: f64x2, v1: f64x2):
}
; block0:
; ushr v7.2d, v1.2d, #63
; sli v0.2d, v7.2d, #63
; ushr v6.2d, v1.2d, #63
; sli v0.2d, v0.2d, v6.2d, #63
; ret

View File

@@ -105,7 +105,7 @@ block0:
; movi v0.16b, #0
; movi v4.16b, #0
; movi v5.16b, #0
; bsl v0.16b, v4.16b, v5.16b
; bsl v0.16b, v0.16b, v4.16b, v5.16b
; ret
function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
@@ -115,7 +115,7 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
}
; block0:
; bsl v0.16b, v1.16b, v2.16b
; bsl v0.16b, v0.16b, v1.16b, v2.16b
; ret
function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
@@ -125,7 +125,7 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4):
}
; block0:
; bsl v0.16b, v1.16b, v2.16b
; bsl v0.16b, v0.16b, v1.16b, v2.16b
; ret
function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
@@ -135,7 +135,7 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2):
}
; block0:
; bsl v0.16b, v1.16b, v2.16b
; bsl v0.16b, v0.16b, v1.16b, v2.16b
; ret
function %ishl_i8x16(i32) -> i8x16 {

View File

@@ -29,9 +29,9 @@ block0:
; block0:
; movz x4, #1
; fmov s30, w4
; fmov s31, w4
; ldr q3, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000
; mov v31.16b, v30.16b
; mov v30.16b, v31.16b
; tbl v0.16b, { v30.16b, v31.16b }, v3.16b
; ret

View File

@@ -9,7 +9,7 @@ block0(v0: i16x4, v1: i16x4):
}
; block0:
; mov v0.d[1], v1.d[0]
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtn v0.8b, v0.8h
; ret
@@ -21,7 +21,7 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; sqxtn v0.8b, v0.8h
; sqxtn2 v0.16b, v1.8h
; sqxtn2 v0.16b, v0.16b, v1.8h
; ret
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
@@ -31,7 +31,7 @@ block0(v0: i32x2, v1: i32x2):
}
; block0:
; mov v0.d[1], v1.d[0]
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtn v0.4h, v0.4s
; ret
@@ -43,7 +43,7 @@ block0(v0: i32x4, v1: i32x4):
; block0:
; sqxtn v0.4h, v0.4s
; sqxtn2 v0.8h, v1.4s
; sqxtn2 v0.8h, v0.8h, v1.4s
; ret
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
@@ -54,7 +54,7 @@ block0(v0: i64x2, v1: i64x2):
; block0:
; sqxtn v0.2s, v0.2d
; sqxtn2 v0.4s, v1.2d
; sqxtn2 v0.4s, v0.4s, v1.2d
; ret
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
@@ -64,7 +64,7 @@ block0(v0: i16x4, v1: i16x4):
}
; block0:
; mov v0.d[1], v1.d[0]
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtun v0.8b, v0.8h
; ret
@@ -76,7 +76,7 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; sqxtun v0.8b, v0.8h
; sqxtun2 v0.16b, v1.8h
; sqxtun2 v0.16b, v0.16b, v1.8h
; ret
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
@@ -86,7 +86,7 @@ block0(v0: i32x2, v1: i32x2):
}
; block0:
; mov v0.d[1], v1.d[0]
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtun v0.4h, v0.4s
; ret
@@ -98,7 +98,7 @@ block0(v0: i32x4, v1: i32x4):
; block0:
; sqxtun v0.4h, v0.4s
; sqxtun2 v0.8h, v1.4s
; sqxtun2 v0.8h, v0.8h, v1.4s
; ret
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
@@ -109,7 +109,7 @@ block0(v0: i64x2, v1: i64x2):
; block0:
; sqxtun v0.2s, v0.2d
; sqxtun2 v0.4s, v1.2d
; sqxtun2 v0.4s, v0.4s, v1.2d
; ret
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
@@ -119,7 +119,7 @@ block0(v0: i16x4, v1: i16x4):
}
; block0:
; mov v0.d[1], v1.d[0]
; mov v0.d[1], v0.d[1], v1.d[0]
; uqxtn v0.8b, v0.8h
; ret
@@ -131,7 +131,7 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; uqxtn v0.8b, v0.8h
; uqxtn2 v0.16b, v1.8h
; uqxtn2 v0.16b, v0.16b, v1.8h
; ret
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
@@ -141,7 +141,7 @@ block0(v0: i32x2, v1: i32x2):
}
; block0:
; mov v0.d[1], v1.d[0]
; mov v0.d[1], v0.d[1], v1.d[0]
; uqxtn v0.4h, v0.4s
; ret
@@ -153,7 +153,7 @@ block0(v0: i32x4, v1: i32x4):
; block0:
; uqxtn v0.4h, v0.4s
; uqxtn2 v0.8h, v1.4s
; uqxtn2 v0.8h, v0.8h, v1.4s
; ret
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
@@ -164,7 +164,7 @@ block0(v0: i64x2, v1: i64x2):
; block0:
; uqxtn v0.2s, v0.2d
; uqxtn2 v0.4s, v1.2d
; uqxtn2 v0.4s, v0.4s, v1.2d
; ret
function %snarrow_i16x8_zero(i16x8) -> i8x16 {

View File

@@ -11,7 +11,7 @@ block0:
; block0:
; movz x2, #1
; movk x2, #1, LSL #48
; movk x2, x2, #1, LSL #48
; dup v0.2d, x2
; ret

View File

@@ -11,7 +11,7 @@ block0:
; block0:
; movz x1, #1
; movk x1, #1, LSL #48
; movk x1, x1, #1, LSL #48
; fmov d0, x1
; ret

View File

@@ -98,16 +98,16 @@ block0(v0: i64):
; subs xzr, sp, x0, UXTX
; b.hs 8 ; udf
; movz w17, #6784
; movk w17, #6, LSL #16
; movk w17, w17, #6, LSL #16
; add x16, x0, x17, UXTX
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; movz w16, #6784
; movk w16, #6, LSL #16
; movk w16, w16, #6, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; movz w16, #6784
; movk w16, #6, LSL #16
; movk w16, w16, #6, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
@@ -152,16 +152,16 @@ block0(v0: i64):
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; movz w17, #6784
; movk w17, #6, LSL #16
; movk w17, w17, #6, LSL #16
; add x16, x16, x17, UXTX
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf
; movz w16, #6784
; movk w16, #6, LSL #16
; movk w16, w16, #6, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; movz w16, #6784
; movk w16, #6, LSL #16
; movk w16, w16, #6, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
@@ -177,7 +177,7 @@ block0(v0: i64):
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
; movz w16, #6784 ; movk w16, w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
; add x16, x16, #32
; subs xzr, sp, x16, UXTX
; b.hs 8 ; udf

View File

@@ -31,12 +31,12 @@ block0:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; mov x0, sp
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
@@ -71,13 +71,13 @@ block0:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; mov x2, sp
; ldr x0, [x2]
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
@@ -112,13 +112,13 @@ block0(v0: i64):
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; mov x2, sp
; str x0, [x2]
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
@@ -479,13 +479,13 @@ block0(v0: i128):
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; mov x5, sp
; stp x0, x1, [x5]
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret
@@ -539,13 +539,13 @@ block0:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; sub sp, sp, x16, UXTX
; block0:
; mov x5, sp
; ldp x0, x1, [x5]
; movz w16, #34480
; movk w16, #1, LSL #16
; movk w16, w16, #1, LSL #16
; add sp, sp, x16, UXTX
; ldp fp, lr, [sp], #16
; ret

View File

@@ -10,9 +10,9 @@ block0(v0: i8x16):
; block0:
; sshr v3.16b, v0.16b, #7
; movz x6, #513
; movk x6, #2052, LSL #16
; movk x6, #8208, LSL #32
; movk x6, #32832, LSL #48
; movk x6, x6, #2052, LSL #16
; movk x6, x6, #8208, LSL #32
; movk x6, x6, #32832, LSL #48
; dup v17.2d, x6
; and v20.16b, v3.16b, v17.16b
; ext v22.16b, v20.16b, v20.16b, #8
@@ -30,9 +30,9 @@ block0(v0: i8x16):
; block0:
; sshr v3.16b, v0.16b, #7
; movz x6, #513
; movk x6, #2052, LSL #16
; movk x6, #8208, LSL #32
; movk x6, #32832, LSL #48
; movk x6, x6, #2052, LSL #16
; movk x6, x6, #8208, LSL #32
; movk x6, x6, #32832, LSL #48
; dup v17.2d, x6
; and v20.16b, v3.16b, v17.16b
; ext v22.16b, v20.16b, v20.16b, #8