ABI: implement register arguments with constraints. (#4858)
* ABI: implement register arguments with constraints. Currently, Cranelift's ABI code emits a sequence of moves from physical registers into vregs at the top of the function body, one for every register-carried argument. For a number of reasons, we want to move to operand constraints instead, and remove the use of explicitly-named "pinned vregs"; this allows for better regalloc in theory, as it removes the need to "reverse-engineer" the sequence of moves. This PR alters the ABI code so that it generates a single "args" pseudo-instruction as the first instruction in the function body. This pseudo-inst defs all register arguments, and constrains them to the appropriate registers at the def-point. Subsequently the regalloc can move them wherever it needs to. Some care was taken not to have this pseudo-inst show up in post-regalloc disassemblies, but the change did cause a general regalloc "shift" in many tests, so the precise-output updates are a bit noisy. Sorry about that! A subsequent PR will handle the other half of the ABI code, namely, the callsite case, with a similar preg-to-constraint conversion. * Update based on review feedback. * Review feedback.
This commit is contained in:
@@ -36,8 +36,8 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov w5, w0
|
||||
; ldr w0, [x5, w1, UXTW]
|
||||
; mov w4, w0
|
||||
; ldr w0, [x4, w1, UXTW]
|
||||
; ret
|
||||
|
||||
function %f8(i64, i32) -> i32 {
|
||||
@@ -52,10 +52,10 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; add x5, x0, #68
|
||||
; add x5, x5, x0
|
||||
; add x5, x5, x1, SXTW
|
||||
; ldr w0, [x5, w1, SXTW]
|
||||
; add x4, x0, #68
|
||||
; add x4, x4, x0
|
||||
; add x4, x4, x1, SXTW
|
||||
; ldr w0, [x4, w1, SXTW]
|
||||
; ret
|
||||
|
||||
function %f9(i64, i64, i64) -> i32 {
|
||||
@@ -85,10 +85,10 @@ block0(v0: i64, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x7, #4100
|
||||
; add x7, x7, x1
|
||||
; add x7, x7, x2
|
||||
; ldr w0, [x7, x0]
|
||||
; movz x5, #4100
|
||||
; add x5, x5, x1
|
||||
; add x5, x5, x2
|
||||
; ldr w0, [x5, x0]
|
||||
; ret
|
||||
|
||||
function %f10() -> i32 {
|
||||
@@ -166,8 +166,8 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxtw x5, w0
|
||||
; ldr w0, [x5, w1, SXTW]
|
||||
; sxtw x4, w0
|
||||
; ldr w0, [x4, w1, SXTW]
|
||||
; ret
|
||||
|
||||
function %f18(i64, i64, i64) -> i32 {
|
||||
@@ -179,8 +179,8 @@ block0(v0: i64, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movn w7, #4097
|
||||
; ldrsh x0, [x7]
|
||||
; movn w5, #4097
|
||||
; ldrsh x0, [x5]
|
||||
; ret
|
||||
|
||||
function %f19(i64, i64, i64) -> i32 {
|
||||
@@ -192,8 +192,8 @@ block0(v0: i64, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x7, #4098
|
||||
; ldrsh x0, [x7]
|
||||
; movz x5, #4098
|
||||
; ldrsh x0, [x5]
|
||||
; ret
|
||||
|
||||
function %f20(i64, i64, i64) -> i32 {
|
||||
@@ -205,9 +205,9 @@ block0(v0: i64, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movn w7, #4097
|
||||
; sxtw x9, w7
|
||||
; ldrsh x0, [x9]
|
||||
; movn w5, #4097
|
||||
; sxtw x7, w5
|
||||
; ldrsh x0, [x7]
|
||||
; ret
|
||||
|
||||
function %f21(i64, i64, i64) -> i32 {
|
||||
@@ -219,9 +219,9 @@ block0(v0: i64, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x7, #4098
|
||||
; sxtw x9, w7
|
||||
; ldrsh x0, [x9]
|
||||
; movz x5, #4098
|
||||
; sxtw x7, w5
|
||||
; ldrsh x0, [x7]
|
||||
; ret
|
||||
|
||||
function %i128(i64) -> i128 {
|
||||
@@ -327,13 +327,13 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov x8, x0
|
||||
; add x8, x8, x1, SXTW
|
||||
; ldp x10, x11, [x8, #24]
|
||||
; mov x7, x0
|
||||
; add x7, x7, x1, SXTW
|
||||
; ldp x9, x10, [x7, #24]
|
||||
; add x0, x0, x1, SXTW
|
||||
; mov x15, x10
|
||||
; mov x1, x11
|
||||
; stp x15, x1, [x0, #24]
|
||||
; mov x0, x10
|
||||
; mov x14, x9
|
||||
; mov x1, x10
|
||||
; stp x14, x1, [x0, #24]
|
||||
; mov x0, x9
|
||||
; ret
|
||||
|
||||
|
||||
@@ -109,8 +109,8 @@ block0(v0: i64, v1: i64):
|
||||
|
||||
; block0:
|
||||
; cbnz x1, 8 ; udf
|
||||
; sdiv x6, x0, x1
|
||||
; msub x0, x6, x1, x0
|
||||
; sdiv x5, x0, x1
|
||||
; msub x0, x5, x1, x0
|
||||
; ret
|
||||
|
||||
function %f11(i64, i64) -> i64 {
|
||||
@@ -121,8 +121,8 @@ block0(v0: i64, v1: i64):
|
||||
|
||||
; block0:
|
||||
; cbnz x1, 8 ; udf
|
||||
; udiv x6, x0, x1
|
||||
; msub x0, x6, x1, x0
|
||||
; udiv x5, x0, x1
|
||||
; msub x0, x5, x1, x0
|
||||
; ret
|
||||
|
||||
function %f12(i32, i32) -> i32 {
|
||||
@@ -132,13 +132,13 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxtw x5, w0
|
||||
; sxtw x7, w1
|
||||
; cbnz x7, 8 ; udf
|
||||
; adds wzr, w7, #1
|
||||
; ccmp w5, #1, #nzcv, eq
|
||||
; sxtw x4, w0
|
||||
; sxtw x6, w1
|
||||
; cbnz x6, 8 ; udf
|
||||
; adds wzr, w6, #1
|
||||
; ccmp w4, #1, #nzcv, eq
|
||||
; b.vc 8 ; udf
|
||||
; sdiv x0, x5, x7
|
||||
; sdiv x0, x4, x6
|
||||
; ret
|
||||
|
||||
function %f13(i32) -> i32 {
|
||||
@@ -161,10 +161,10 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov w5, w0
|
||||
; mov w7, w1
|
||||
; cbnz x7, 8 ; udf
|
||||
; udiv x0, x5, x7
|
||||
; mov w4, w0
|
||||
; mov w6, w1
|
||||
; cbnz x6, 8 ; udf
|
||||
; udiv x0, x4, x6
|
||||
; ret
|
||||
|
||||
function %f15(i32) -> i32 {
|
||||
@@ -187,11 +187,11 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxtw x5, w0
|
||||
; sxtw x7, w1
|
||||
; cbnz x7, 8 ; udf
|
||||
; sdiv x10, x5, x7
|
||||
; msub x0, x10, x7, x5
|
||||
; sxtw x4, w0
|
||||
; sxtw x6, w1
|
||||
; cbnz x6, 8 ; udf
|
||||
; sdiv x9, x4, x6
|
||||
; msub x0, x9, x6, x4
|
||||
; ret
|
||||
|
||||
function %f17(i32, i32) -> i32 {
|
||||
@@ -201,11 +201,11 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov w5, w0
|
||||
; mov w7, w1
|
||||
; cbnz x7, 8 ; udf
|
||||
; udiv x10, x5, x7
|
||||
; msub x0, x10, x7, x5
|
||||
; mov w4, w0
|
||||
; mov w6, w1
|
||||
; cbnz x6, 8 ; udf
|
||||
; udiv x9, x4, x6
|
||||
; msub x0, x9, x6, x4
|
||||
; ret
|
||||
|
||||
function %f18(i64, i64) -> i64 {
|
||||
@@ -379,9 +379,9 @@ block0(v0: i128, v1: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; umulh x10, x0, x2
|
||||
; madd x12, x0, x3, x10
|
||||
; madd x1, x1, x2, x12
|
||||
; umulh x7, x0, x2
|
||||
; madd x9, x0, x3, x7
|
||||
; madd x1, x1, x2, x9
|
||||
; madd x0, x0, x2, xzr
|
||||
; ret
|
||||
|
||||
@@ -437,8 +437,8 @@ block0(v0: i32, v1: i32, v2: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; madd w8, w1, w2, wzr
|
||||
; sub w0, w8, w0
|
||||
; madd w6, w1, w2, wzr
|
||||
; sub w0, w6, w0
|
||||
; ret
|
||||
|
||||
function %imul_sub_i64(i64, i64, i64) -> i64 {
|
||||
@@ -449,8 +449,8 @@ block0(v0: i64, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; madd x8, x1, x2, xzr
|
||||
; sub x0, x8, x0
|
||||
; madd x6, x1, x2, xzr
|
||||
; sub x0, x6, x0
|
||||
; ret
|
||||
|
||||
function %srem_const (i64) -> i64 {
|
||||
|
||||
@@ -8,7 +8,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldaddal x1, x4, [x0]
|
||||
; ldaddal x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_add_i32(i64, i32) {
|
||||
@@ -18,7 +18,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldaddal w1, w4, [x0]
|
||||
; ldaddal w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_add_i16(i64, i16) {
|
||||
@@ -28,7 +28,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldaddalh w1, w4, [x0]
|
||||
; ldaddalh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_add_i8(i64, i8) {
|
||||
@@ -38,7 +38,7 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldaddalb w1, w4, [x0]
|
||||
; ldaddalb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i64(i64, i64) {
|
||||
@@ -48,8 +48,8 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub x4, xzr, x1
|
||||
; ldaddal x4, x6, [x0]
|
||||
; sub x3, xzr, x1
|
||||
; ldaddal x3, x5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i32(i64, i32) {
|
||||
@@ -59,8 +59,8 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w4, wzr, w1
|
||||
; ldaddal w4, w6, [x0]
|
||||
; sub w3, wzr, w1
|
||||
; ldaddal w3, w5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i16(i64, i16) {
|
||||
@@ -70,8 +70,8 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w4, wzr, w1
|
||||
; ldaddalh w4, w6, [x0]
|
||||
; sub w3, wzr, w1
|
||||
; ldaddalh w3, w5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i8(i64, i8) {
|
||||
@@ -81,8 +81,8 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w4, wzr, w1
|
||||
; ldaddalb w4, w6, [x0]
|
||||
; sub w3, wzr, w1
|
||||
; ldaddalb w3, w5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i64(i64, i64) {
|
||||
@@ -92,8 +92,8 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; eon x4, x1, xzr
|
||||
; ldclral x4, x6, [x0]
|
||||
; eon x3, x1, xzr
|
||||
; ldclral x3, x5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i32(i64, i32) {
|
||||
@@ -103,8 +103,8 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; eon w4, w1, wzr
|
||||
; ldclral w4, w6, [x0]
|
||||
; eon w3, w1, wzr
|
||||
; ldclral w3, w5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i16(i64, i16) {
|
||||
@@ -114,8 +114,8 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; eon w4, w1, wzr
|
||||
; ldclralh w4, w6, [x0]
|
||||
; eon w3, w1, wzr
|
||||
; ldclralh w3, w5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i8(i64, i8) {
|
||||
@@ -125,8 +125,8 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; eon w4, w1, wzr
|
||||
; ldclralb w4, w6, [x0]
|
||||
; eon w3, w1, wzr
|
||||
; ldclralb w3, w5, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_nand_i64(i64, i64) {
|
||||
@@ -220,7 +220,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsetal x1, x4, [x0]
|
||||
; ldsetal x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_or_i32(i64, i32) {
|
||||
@@ -230,7 +230,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsetal w1, w4, [x0]
|
||||
; ldsetal w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_or_i16(i64, i16) {
|
||||
@@ -240,7 +240,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsetalh w1, w4, [x0]
|
||||
; ldsetalh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_or_i8(i64, i8) {
|
||||
@@ -250,7 +250,7 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsetalb w1, w4, [x0]
|
||||
; ldsetalb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_xor_i64(i64, i64) {
|
||||
@@ -260,7 +260,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldeoral x1, x4, [x0]
|
||||
; ldeoral x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_xor_i32(i64, i32) {
|
||||
@@ -270,7 +270,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldeoral w1, w4, [x0]
|
||||
; ldeoral w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_xor_i16(i64, i16) {
|
||||
@@ -280,7 +280,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldeoralh w1, w4, [x0]
|
||||
; ldeoralh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_xor_i8(i64, i8) {
|
||||
@@ -290,7 +290,7 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldeoralb w1, w4, [x0]
|
||||
; ldeoralb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smax_i64(i64, i64) {
|
||||
@@ -300,7 +300,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsmaxal x1, x4, [x0]
|
||||
; ldsmaxal x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smax_i32(i64, i32) {
|
||||
@@ -310,7 +310,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsmaxal w1, w4, [x0]
|
||||
; ldsmaxal w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smax_i16(i64, i16) {
|
||||
@@ -320,7 +320,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsmaxalh w1, w4, [x0]
|
||||
; ldsmaxalh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smax_i8(i64, i8) {
|
||||
@@ -330,7 +330,7 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsmaxalb w1, w4, [x0]
|
||||
; ldsmaxalb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umax_i64(i64, i64) {
|
||||
@@ -340,7 +340,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldumaxal x1, x4, [x0]
|
||||
; ldumaxal x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umax_i32(i64, i32) {
|
||||
@@ -350,7 +350,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldumaxal w1, w4, [x0]
|
||||
; ldumaxal w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umax_i16(i64, i16) {
|
||||
@@ -360,7 +360,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldumaxalh w1, w4, [x0]
|
||||
; ldumaxalh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umax_i8(i64, i8) {
|
||||
@@ -370,7 +370,7 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldumaxalb w1, w4, [x0]
|
||||
; ldumaxalb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smin_i64(i64, i64) {
|
||||
@@ -380,7 +380,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsminal x1, x4, [x0]
|
||||
; ldsminal x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smin_i32(i64, i32) {
|
||||
@@ -390,7 +390,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsminal w1, w4, [x0]
|
||||
; ldsminal w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smin_i16(i64, i16) {
|
||||
@@ -400,7 +400,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsminalh w1, w4, [x0]
|
||||
; ldsminalh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_smin_i8(i64, i8) {
|
||||
@@ -410,7 +410,7 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldsminalb w1, w4, [x0]
|
||||
; ldsminalb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umin_i64(i64, i64) {
|
||||
@@ -420,7 +420,7 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lduminal x1, x4, [x0]
|
||||
; lduminal x1, x3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umin_i32(i64, i32) {
|
||||
@@ -430,7 +430,7 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lduminal w1, w4, [x0]
|
||||
; lduminal w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umin_i16(i64, i16) {
|
||||
@@ -440,7 +440,7 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lduminalh w1, w4, [x0]
|
||||
; lduminalh w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_umin_i8(i64, i8) {
|
||||
@@ -450,6 +450,6 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lduminalb w1, w4, [x0]
|
||||
; lduminalb w1, w3, [x0]
|
||||
; ret
|
||||
|
||||
|
||||
@@ -51,9 +51,9 @@ block0(v0: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; rbit x6, x0
|
||||
; rbit x5, x0
|
||||
; rbit x0, x1
|
||||
; mov x1, x6
|
||||
; mov x1, x5
|
||||
; ret
|
||||
|
||||
function %b(i8) -> i8 {
|
||||
@@ -107,10 +107,10 @@ block0(v0: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; clz x6, x1
|
||||
; clz x8, x0
|
||||
; lsr x10, x6, #6
|
||||
; madd x0, x8, x10, x6
|
||||
; clz x5, x1
|
||||
; clz x7, x0
|
||||
; lsr x9, x5, #6
|
||||
; madd x0, x7, x9, x5
|
||||
; movz w1, #0
|
||||
; ret
|
||||
|
||||
@@ -165,14 +165,14 @@ block0(v0: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; cls x6, x0
|
||||
; cls x8, x1
|
||||
; eon x10, x1, x0
|
||||
; lsr x12, x10, #63
|
||||
; madd x14, x6, x12, x12
|
||||
; subs xzr, x8, #63
|
||||
; csel x1, x14, xzr, eq
|
||||
; add x0, x1, x8
|
||||
; cls x5, x0
|
||||
; cls x7, x1
|
||||
; eon x9, x1, x0
|
||||
; lsr x11, x9, #63
|
||||
; madd x13, x5, x11, x11
|
||||
; subs xzr, x7, #63
|
||||
; csel x0, x13, xzr, eq
|
||||
; add x0, x0, x7
|
||||
; movz w1, #0
|
||||
; ret
|
||||
|
||||
@@ -229,12 +229,12 @@ block0(v0: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; rbit x6, x0
|
||||
; rbit x8, x1
|
||||
; clz x10, x6
|
||||
; clz x12, x8
|
||||
; lsr x14, x10, #6
|
||||
; madd x0, x12, x14, x10
|
||||
; rbit x5, x0
|
||||
; rbit x7, x1
|
||||
; clz x9, x5
|
||||
; clz x11, x7
|
||||
; lsr x13, x9, #6
|
||||
; madd x0, x11, x13, x9
|
||||
; movz w1, #0
|
||||
; ret
|
||||
|
||||
@@ -245,11 +245,11 @@ block0(v0: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; fmov d7, x0
|
||||
; mov v7.d[1], v7.d[1], x1
|
||||
; cnt v18.16b, v7.16b
|
||||
; addv b20, v18.16b
|
||||
; umov w0, v20.b[0]
|
||||
; fmov d6, x0
|
||||
; mov v6.d[1], v6.d[1], x1
|
||||
; cnt v17.16b, v6.16b
|
||||
; addv b19, v17.16b
|
||||
; umov w0, v19.b[0]
|
||||
; movz w1, #0
|
||||
; ret
|
||||
|
||||
@@ -821,15 +821,15 @@ block0(v0: i128, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lsl x8, x0, x2
|
||||
; lsl x10, x1, x2
|
||||
; orn w12, wzr, w2
|
||||
; lsr x14, x0, #1
|
||||
; lsr x0, x14, x12
|
||||
; orr x3, x10, x0
|
||||
; lsl x6, x0, x2
|
||||
; lsl x8, x1, x2
|
||||
; orn w10, wzr, w2
|
||||
; lsr x12, x0, #1
|
||||
; lsr x14, x12, x10
|
||||
; orr x1, x8, x14
|
||||
; ands xzr, x2, #64
|
||||
; csel x0, xzr, x8, ne
|
||||
; csel x1, x8, x3, ne
|
||||
; csel x0, xzr, x6, ne
|
||||
; csel x1, x6, x1, ne
|
||||
; ret
|
||||
|
||||
function %ishl_i128_i128(i128, i128) -> i128 {
|
||||
@@ -839,15 +839,15 @@ block0(v0: i128, v1: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lsl x10, x0, x2
|
||||
; lsl x12, x1, x2
|
||||
; orn w14, wzr, w2
|
||||
; lsr x0, x0, #1
|
||||
; lsr x3, x0, x14
|
||||
; orr x4, x12, x3
|
||||
; lsl x7, x0, x2
|
||||
; lsl x9, x1, x2
|
||||
; orn w11, wzr, w2
|
||||
; lsr x13, x0, #1
|
||||
; lsr x15, x13, x11
|
||||
; orr x1, x9, x15
|
||||
; ands xzr, x2, #64
|
||||
; csel x0, xzr, x10, ne
|
||||
; csel x1, x10, x4, ne
|
||||
; csel x0, xzr, x7, ne
|
||||
; csel x1, x7, x1, ne
|
||||
; ret
|
||||
|
||||
function %ushr_i128_i8(i128, i8) -> i128 {
|
||||
@@ -857,15 +857,15 @@ block0(v0: i128, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lsr x8, x0, x2
|
||||
; lsr x10, x1, x2
|
||||
; orn w12, wzr, w2
|
||||
; lsl x14, x1, #1
|
||||
; lsl x0, x14, x12
|
||||
; orr x3, x8, x0
|
||||
; lsr x6, x0, x2
|
||||
; lsr x8, x1, x2
|
||||
; orn w10, wzr, w2
|
||||
; lsl x12, x1, #1
|
||||
; lsl x14, x12, x10
|
||||
; orr x0, x6, x14
|
||||
; ands xzr, x2, #64
|
||||
; csel x0, x10, x3, ne
|
||||
; csel x1, xzr, x10, ne
|
||||
; csel x0, x8, x0, ne
|
||||
; csel x1, xzr, x8, ne
|
||||
; ret
|
||||
|
||||
function %ushr_i128_i128(i128, i128) -> i128 {
|
||||
@@ -875,15 +875,15 @@ block0(v0: i128, v1: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lsr x10, x0, x2
|
||||
; lsr x12, x1, x2
|
||||
; orn w14, wzr, w2
|
||||
; lsl x0, x1, #1
|
||||
; lsl x3, x0, x14
|
||||
; orr x4, x10, x3
|
||||
; lsr x7, x0, x2
|
||||
; lsr x9, x1, x2
|
||||
; orn w11, wzr, w2
|
||||
; lsl x13, x1, #1
|
||||
; lsl x15, x13, x11
|
||||
; orr x1, x7, x15
|
||||
; ands xzr, x2, #64
|
||||
; csel x0, x12, x4, ne
|
||||
; csel x1, xzr, x12, ne
|
||||
; csel x0, x9, x1, ne
|
||||
; csel x1, xzr, x9, ne
|
||||
; ret
|
||||
|
||||
function %sshr_i128_i8(i128, i8) -> i128 {
|
||||
@@ -893,16 +893,16 @@ block0(v0: i128, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lsr x8, x0, x2
|
||||
; asr x10, x1, x2
|
||||
; orn w12, wzr, w2
|
||||
; lsl x14, x1, #1
|
||||
; lsl x0, x14, x12
|
||||
; asr x3, x1, #63
|
||||
; orr x4, x8, x0
|
||||
; lsr x6, x0, x2
|
||||
; asr x8, x1, x2
|
||||
; orn w10, wzr, w2
|
||||
; lsl x12, x1, #1
|
||||
; lsl x14, x12, x10
|
||||
; asr x1, x1, #63
|
||||
; orr x3, x6, x14
|
||||
; ands xzr, x2, #64
|
||||
; csel x0, x10, x4, ne
|
||||
; csel x1, x3, x10, ne
|
||||
; csel x0, x8, x3, ne
|
||||
; csel x1, x1, x8, ne
|
||||
; ret
|
||||
|
||||
function %sshr_i128_i128(i128, i128) -> i128 {
|
||||
@@ -912,14 +912,15 @@ block0(v0: i128, v1: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lsr x10, x0, x2
|
||||
; asr x12, x1, x2
|
||||
; orn w14, wzr, w2
|
||||
; lsl x0, x1, #1
|
||||
; lsl x3, x0, x14
|
||||
; asr x4, x1, #63
|
||||
; orr x6, x10, x3
|
||||
; lsr x7, x0, x2
|
||||
; asr x9, x1, x2
|
||||
; orn w11, wzr, w2
|
||||
; lsl x13, x1, #1
|
||||
; lsl x15, x13, x11
|
||||
; asr x1, x1, #63
|
||||
; orr x3, x7, x15
|
||||
; ands xzr, x2, #64
|
||||
; csel x0, x12, x6, ne
|
||||
; csel x1, x4, x12, ne
|
||||
; csel x0, x9, x3, ne
|
||||
; csel x1, x1, x9, ne
|
||||
; ret
|
||||
|
||||
|
||||
@@ -105,8 +105,8 @@ block0(v0: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov x14, x0
|
||||
; mov x8, x1
|
||||
; mov x15, x0
|
||||
; mov x13, x1
|
||||
; movz x0, #42
|
||||
; movz x1, #42
|
||||
; movz x2, #42
|
||||
@@ -115,7 +115,7 @@ block0(v0: i8):
|
||||
; movz x5, #42
|
||||
; movz x6, #42
|
||||
; movz x7, #42
|
||||
; strb w14, [x8]
|
||||
; strb w15, [x13]
|
||||
; ret
|
||||
|
||||
function %f8() {
|
||||
@@ -376,18 +376,18 @@ block0(v0: i128, v1: i64):
|
||||
; stp fp, lr, [sp, #-16]!
|
||||
; mov fp, sp
|
||||
; block0:
|
||||
; mov x14, x2
|
||||
; mov x11, x2
|
||||
; sub sp, sp, #16
|
||||
; virtual_sp_offset_adjust 16
|
||||
; mov x13, x0
|
||||
; mov x15, x1
|
||||
; mov x2, x13
|
||||
; mov x3, x15
|
||||
; mov x4, x13
|
||||
; mov x5, x15
|
||||
; mov x6, x14
|
||||
; str x13, [sp]
|
||||
; str x15, [sp, #8]
|
||||
; mov x10, x0
|
||||
; mov x12, x1
|
||||
; mov x2, x10
|
||||
; mov x3, x12
|
||||
; mov x4, x10
|
||||
; mov x5, x12
|
||||
; mov x6, x11
|
||||
; str x10, [sp]
|
||||
; str x12, [sp, #8]
|
||||
; ldr x7, 8 ; b 12 ; data TestCase(%f14) + 0
|
||||
; blr x7
|
||||
; add sp, sp, #16
|
||||
@@ -419,18 +419,18 @@ block0(v0: i128, v1: i64):
|
||||
; stp fp, lr, [sp, #-16]!
|
||||
; mov fp, sp
|
||||
; block0:
|
||||
; mov x14, x2
|
||||
; mov x11, x2
|
||||
; sub sp, sp, #16
|
||||
; virtual_sp_offset_adjust 16
|
||||
; mov x13, x0
|
||||
; mov x15, x1
|
||||
; mov x2, x13
|
||||
; mov x3, x15
|
||||
; mov x4, x13
|
||||
; mov x5, x15
|
||||
; mov x6, x14
|
||||
; str x13, [sp]
|
||||
; str x15, [sp, #8]
|
||||
; mov x10, x0
|
||||
; mov x12, x1
|
||||
; mov x2, x10
|
||||
; mov x3, x12
|
||||
; mov x4, x10
|
||||
; mov x5, x12
|
||||
; mov x6, x11
|
||||
; str x10, [sp]
|
||||
; str x12, [sp, #8]
|
||||
; ldr x7, 8 ; b 12 ; data TestCase(%f15) + 0
|
||||
; blr x7
|
||||
; add sp, sp, #16
|
||||
|
||||
@@ -45,10 +45,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, lo
|
||||
; cset x8, lo
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, lt
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, lt
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_ult_i128(i128, i128) -> b1 {
|
||||
@@ -59,10 +59,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, lo
|
||||
; cset x8, lo
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, lo
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, lo
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_sle_i128(i128, i128) -> b1 {
|
||||
@@ -73,10 +73,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, ls
|
||||
; cset x8, ls
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, le
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, le
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_ule_i128(i128, i128) -> b1 {
|
||||
@@ -87,10 +87,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, ls
|
||||
; cset x8, ls
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, ls
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, ls
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_sgt_i128(i128, i128) -> b1 {
|
||||
@@ -101,10 +101,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, hi
|
||||
; cset x8, hi
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, gt
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, gt
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_ugt_i128(i128, i128) -> b1 {
|
||||
@@ -115,10 +115,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, hi
|
||||
; cset x8, hi
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, hi
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, hi
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_sge_i128(i128, i128) -> b1 {
|
||||
@@ -129,10 +129,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, hs
|
||||
; cset x8, hs
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, ge
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, ge
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %icmp_uge_i128(i128, i128) -> b1 {
|
||||
@@ -143,10 +143,10 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x11, hs
|
||||
; cset x8, hs
|
||||
; subs xzr, x1, x3
|
||||
; cset x14, hs
|
||||
; csel x0, x11, x14, eq
|
||||
; cset x11, hs
|
||||
; csel x0, x8, x11, eq
|
||||
; ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
@@ -207,8 +207,8 @@ block1:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; orr x4, x0, x1
|
||||
; cbz x4, label1 ; b label2
|
||||
; orr x3, x0, x1
|
||||
; cbz x3, label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
; block2:
|
||||
@@ -227,8 +227,8 @@ block1:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; orr x4, x0, x1
|
||||
; cbnz x4, label1 ; b label2
|
||||
; orr x3, x0, x1
|
||||
; cbnz x3, label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
; block2:
|
||||
@@ -287,11 +287,11 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, lo
|
||||
; cset x6, lo
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, lt
|
||||
; csel x9, x9, x12, eq
|
||||
; subs xzr, xzr, x9
|
||||
; cset x9, lt
|
||||
; csel x6, x6, x9, eq
|
||||
; subs xzr, xzr, x6
|
||||
; b.lt label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -311,11 +311,11 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, lo
|
||||
; cset x6, lo
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, lo
|
||||
; csel x9, x9, x12, eq
|
||||
; subs xzr, xzr, x9
|
||||
; cset x9, lo
|
||||
; csel x6, x6, x9, eq
|
||||
; subs xzr, xzr, x6
|
||||
; b.lo label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -335,12 +335,12 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, ls
|
||||
; cset x6, ls
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, le
|
||||
; csel x9, x9, x12, eq
|
||||
; movz x12, #1
|
||||
; subs xzr, x12, x9
|
||||
; cset x9, le
|
||||
; csel x6, x6, x9, eq
|
||||
; movz x9, #1
|
||||
; subs xzr, x9, x6
|
||||
; b.le label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -360,12 +360,12 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, ls
|
||||
; cset x6, ls
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, ls
|
||||
; csel x9, x9, x12, eq
|
||||
; movz x12, #1
|
||||
; subs xzr, x12, x9
|
||||
; cset x9, ls
|
||||
; csel x6, x6, x9, eq
|
||||
; movz x9, #1
|
||||
; subs xzr, x9, x6
|
||||
; b.ls label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -385,11 +385,11 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, hi
|
||||
; cset x6, hi
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, gt
|
||||
; csel x9, x9, x12, eq
|
||||
; subs xzr, x9, xzr
|
||||
; cset x9, gt
|
||||
; csel x6, x6, x9, eq
|
||||
; subs xzr, x6, xzr
|
||||
; b.gt label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -409,11 +409,11 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, hi
|
||||
; cset x6, hi
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, hi
|
||||
; csel x9, x9, x12, eq
|
||||
; subs xzr, x9, xzr
|
||||
; cset x9, hi
|
||||
; csel x6, x6, x9, eq
|
||||
; subs xzr, x6, xzr
|
||||
; b.hi label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -433,12 +433,12 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, hs
|
||||
; cset x6, hs
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, ge
|
||||
; csel x9, x9, x12, eq
|
||||
; movz x12, #1
|
||||
; subs xzr, x9, x12
|
||||
; cset x9, ge
|
||||
; csel x6, x6, x9, eq
|
||||
; movz x9, #1
|
||||
; subs xzr, x6, x9
|
||||
; b.ge label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -458,12 +458,12 @@ block1:
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, x2
|
||||
; cset x9, hs
|
||||
; cset x6, hs
|
||||
; subs xzr, x1, x3
|
||||
; cset x12, hs
|
||||
; csel x9, x9, x12, eq
|
||||
; movz x12, #1
|
||||
; subs xzr, x9, x12
|
||||
; cset x9, hs
|
||||
; csel x6, x6, x9, eq
|
||||
; movz x9, #1
|
||||
; subs xzr, x6, x9
|
||||
; b.hs label1 ; b label2
|
||||
; block1:
|
||||
; b label3
|
||||
@@ -471,4 +471,3 @@ block1:
|
||||
; b label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ block0(v0: i8, v1: i64, v2: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w8, w0
|
||||
; subs wzr, w8, #42
|
||||
; uxtb w6, w0
|
||||
; subs wzr, w6, #42
|
||||
; csel x0, x1, x2, eq
|
||||
; ret
|
||||
|
||||
@@ -37,9 +37,9 @@ block0(v0: i8, v1: i8, v2: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; and w7, w1, w0
|
||||
; bic w9, w2, w0
|
||||
; orr w0, w7, w9
|
||||
; and w5, w1, w0
|
||||
; bic w7, w2, w0
|
||||
; orr w0, w5, w7
|
||||
; ret
|
||||
|
||||
function %i(b1, i8, i8) -> i8 {
|
||||
@@ -49,8 +49,8 @@ block0(v0: b1, v1: i8, v2: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; and w8, w0, #1
|
||||
; subs wzr, w8, wzr
|
||||
; and w6, w0, #1
|
||||
; subs wzr, w6, wzr
|
||||
; csel x0, x1, x2, ne
|
||||
; ret
|
||||
|
||||
@@ -74,8 +74,8 @@ block0(v0: b1, v1: i128, v2: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; and w14, w0, #1
|
||||
; subs wzr, w14, wzr
|
||||
; and w10, w0, #1
|
||||
; subs wzr, w10, wzr
|
||||
; csel x0, x2, x4, ne
|
||||
; csel x1, x3, x5, ne
|
||||
; ret
|
||||
|
||||
@@ -14,9 +14,9 @@ block0(v0: i8, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.16b, w0
|
||||
; dup v17.16b, w1
|
||||
; add v0.16b, v16.16b, v17.16b
|
||||
; dup v7.16b, w0
|
||||
; dup v16.16b, w1
|
||||
; add v0.16b, v7.16b, v16.16b
|
||||
; ret
|
||||
|
||||
function %i16x8_splat_add(i16, i16) -> i16x8 {
|
||||
@@ -32,9 +32,9 @@ block0(v0: i16, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.8h, w0
|
||||
; dup v17.8h, w1
|
||||
; add v0.8h, v16.8h, v17.8h
|
||||
; dup v7.8h, w0
|
||||
; dup v16.8h, w1
|
||||
; add v0.8h, v7.8h, v16.8h
|
||||
; ret
|
||||
|
||||
function %i32x4_splat_mul(i32, i32) -> i32x4 {
|
||||
@@ -50,9 +50,9 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.4s, w0
|
||||
; dup v17.4s, w1
|
||||
; mul v0.4s, v16.4s, v17.4s
|
||||
; dup v7.4s, w0
|
||||
; dup v16.4s, w1
|
||||
; mul v0.4s, v7.4s, v16.4s
|
||||
; ret
|
||||
|
||||
function %i64x2_splat_sub(i64, i64) -> i64x2 {
|
||||
@@ -68,9 +68,9 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.2d, x0
|
||||
; dup v17.2d, x1
|
||||
; sub v0.2d, v16.2d, v17.2d
|
||||
; dup v7.2d, x0
|
||||
; dup v16.2d, x1
|
||||
; sub v0.2d, v7.2d, v16.2d
|
||||
; ret
|
||||
|
||||
function %f32x4_splat_add(f32, f32) -> f32x4 {
|
||||
@@ -86,9 +86,9 @@ block0(v0: f32, v1: f32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.4s, v0.s[0]
|
||||
; dup v17.4s, v1.s[0]
|
||||
; fadd v0.4s, v16.4s, v17.4s
|
||||
; dup v7.4s, v0.s[0]
|
||||
; dup v16.4s, v1.s[0]
|
||||
; fadd v0.4s, v7.4s, v16.4s
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_sub(f64, f64) -> f64x2 {
|
||||
@@ -104,9 +104,9 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fsub v0.2d, v16.2d, v17.2d
|
||||
; dup v7.2d, v0.d[0]
|
||||
; dup v16.2d, v1.d[0]
|
||||
; fsub v0.2d, v7.2d, v16.2d
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_mul(f64, f64) -> f64x2 {
|
||||
@@ -122,9 +122,9 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fmul v0.2d, v16.2d, v17.2d
|
||||
; dup v7.2d, v0.d[0]
|
||||
; dup v16.2d, v1.d[0]
|
||||
; fmul v0.2d, v7.2d, v16.2d
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_div(f64, f64) -> f64x2 {
|
||||
@@ -140,9 +140,9 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fdiv v0.2d, v16.2d, v17.2d
|
||||
; dup v7.2d, v0.d[0]
|
||||
; dup v16.2d, v1.d[0]
|
||||
; fdiv v0.2d, v7.2d, v16.2d
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_min(f64, f64) -> f64x2 {
|
||||
@@ -158,9 +158,9 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fmin v0.2d, v16.2d, v17.2d
|
||||
; dup v7.2d, v0.d[0]
|
||||
; dup v16.2d, v1.d[0]
|
||||
; fmin v0.2d, v7.2d, v16.2d
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_max(f64, f64) -> f64x2 {
|
||||
@@ -176,9 +176,9 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fmax v0.2d, v16.2d, v17.2d
|
||||
; dup v7.2d, v0.d[0]
|
||||
; dup v16.2d, v1.d[0]
|
||||
; fmax v0.2d, v7.2d, v16.2d
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_min_pseudo(f64, f64) -> f64x2 {
|
||||
@@ -194,10 +194,10 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v17.2d, v0.d[0]
|
||||
; dup v18.2d, v1.d[0]
|
||||
; fcmgt v0.2d, v17.2d, v18.2d
|
||||
; bsl v0.16b, v0.16b, v18.16b, v17.16b
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fcmgt v0.2d, v16.2d, v17.2d
|
||||
; bsl v0.16b, v0.16b, v17.16b, v16.16b
|
||||
; ret
|
||||
|
||||
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
|
||||
@@ -213,9 +213,9 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; dup v17.2d, v0.d[0]
|
||||
; dup v18.2d, v1.d[0]
|
||||
; fcmgt v0.2d, v18.2d, v17.2d
|
||||
; bsl v0.16b, v0.16b, v18.16b, v17.16b
|
||||
; dup v16.2d, v0.d[0]
|
||||
; dup v17.2d, v1.d[0]
|
||||
; fcmgt v0.2d, v17.2d, v16.2d
|
||||
; bsl v0.16b, v0.16b, v17.16b, v16.16b
|
||||
; ret
|
||||
|
||||
|
||||
@@ -109,15 +109,15 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w0, w0
|
||||
; ucvtf s26, w0
|
||||
; uxth w0, w1
|
||||
; ucvtf s27, w0
|
||||
; ucvtf s25, w2
|
||||
; ucvtf s28, x3
|
||||
; fadd s26, s26, s27
|
||||
; fadd s25, s26, s25
|
||||
; fadd s0, s25, s28
|
||||
; uxtb w13, w0
|
||||
; ucvtf s23, w13
|
||||
; uxth w13, w1
|
||||
; ucvtf s24, w13
|
||||
; ucvtf s22, w2
|
||||
; ucvtf s25, x3
|
||||
; fadd s23, s23, s24
|
||||
; fadd s22, s23, s22
|
||||
; fadd s0, s22, s25
|
||||
; ret
|
||||
|
||||
function %f11(i32x4) -> f64x2 {
|
||||
|
||||
@@ -309,8 +309,8 @@ block0(v0: f32, v1: f32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v6.2s, v1.2s, #31
|
||||
; sli v0.2s, v0.2s, v6.2s, #31
|
||||
; ushr v5.2s, v1.2s, #31
|
||||
; sli v0.2s, v0.2s, v5.2s, #31
|
||||
; ret
|
||||
|
||||
function %f32(f64, f64) -> f64 {
|
||||
@@ -320,8 +320,8 @@ block0(v0: f64, v1: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr d6, d1, #63
|
||||
; sli d0, d0, d6, #63
|
||||
; ushr d5, d1, #63
|
||||
; sli d0, d0, d5, #63
|
||||
; ret
|
||||
|
||||
function %f33(f32) -> i32 {
|
||||
@@ -951,8 +951,8 @@ block0(v0: f32x2, v1: f32x2):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v6.2s, v1.2s, #31
|
||||
; sli v0.2s, v0.2s, v6.2s, #31
|
||||
; ushr v5.2s, v1.2s, #31
|
||||
; sli v0.2s, v0.2s, v5.2s, #31
|
||||
; ret
|
||||
|
||||
function %f82(f32x4, f32x4) -> f32x4 {
|
||||
@@ -962,8 +962,8 @@ block0(v0: f32x4, v1: f32x4):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v6.4s, v1.4s, #31
|
||||
; sli v0.4s, v0.4s, v6.4s, #31
|
||||
; ushr v5.4s, v1.4s, #31
|
||||
; sli v0.4s, v0.4s, v5.4s, #31
|
||||
; ret
|
||||
|
||||
function %f83(f64x2, f64x2) -> f64x2 {
|
||||
@@ -973,7 +973,7 @@ block0(v0: f64x2, v1: f64x2):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v6.2d, v1.2d, #63
|
||||
; sli v0.2d, v0.2d, v6.2d, #63
|
||||
; ushr v5.2d, v1.2d, #63
|
||||
; sli v0.2d, v0.2d, v5.2d, #63
|
||||
; ret
|
||||
|
||||
|
||||
@@ -14,16 +14,16 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov w10, w1
|
||||
; ldr x11, [x0]
|
||||
; mov x11, x11
|
||||
; subs xzr, x10, x11
|
||||
; mov w9, w1
|
||||
; ldr x10, [x0]
|
||||
; mov x10, x10
|
||||
; subs xzr, x9, x10
|
||||
; b.ls label1 ; b label2
|
||||
; block1:
|
||||
; add x12, x0, x1, UXTW
|
||||
; subs xzr, x10, x11
|
||||
; movz x13, #0
|
||||
; csel x0, x13, x12, hi
|
||||
; add x11, x0, x1, UXTW
|
||||
; subs xzr, x9, x10
|
||||
; movz x12, #0
|
||||
; csel x0, x12, x11, hi
|
||||
; csdb
|
||||
; ret
|
||||
; block2:
|
||||
@@ -39,15 +39,16 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; mov w8, w1
|
||||
; subs xzr, x8, #65536
|
||||
; mov w7, w1
|
||||
; subs xzr, x7, #65536
|
||||
; b.ls label1 ; b label2
|
||||
; block1:
|
||||
; add x10, x0, x1, UXTW
|
||||
; subs xzr, x8, #65536
|
||||
; movz x11, #0
|
||||
; csel x0, x11, x10, hi
|
||||
; add x9, x0, x1, UXTW
|
||||
; subs xzr, x7, #65536
|
||||
; movz x10, #0
|
||||
; csel x0, x10, x9, hi
|
||||
; csdb
|
||||
; ret
|
||||
; block2:
|
||||
; udf #0xc11f
|
||||
|
||||
|
||||
@@ -69,28 +69,28 @@ block3(v7: r64, v8: r64):
|
||||
; block0:
|
||||
; str x1, [sp, #16]
|
||||
; str x0, [sp, #8]
|
||||
; ldr x2, 8 ; b 12 ; data TestCase(%f) + 0
|
||||
; blr x2
|
||||
; mov x4, sp
|
||||
; ldr x11, [sp, #8]
|
||||
; str x11, [x4]
|
||||
; and w5, w0, #1
|
||||
; cbz x5, label1 ; b label3
|
||||
; ldr x1, 8 ; b 12 ; data TestCase(%f) + 0
|
||||
; blr x1
|
||||
; mov x3, sp
|
||||
; ldr x9, [sp, #8]
|
||||
; str x9, [x3]
|
||||
; and w4, w0, #1
|
||||
; cbz x4, label1 ; b label3
|
||||
; block1:
|
||||
; b label2
|
||||
; block2:
|
||||
; mov x1, x11
|
||||
; mov x1, x9
|
||||
; ldr x0, [sp, #16]
|
||||
; b label5
|
||||
; block3:
|
||||
; b label4
|
||||
; block4:
|
||||
; mov x0, x11
|
||||
; mov x0, x9
|
||||
; ldr x1, [sp, #16]
|
||||
; b label5
|
||||
; block5:
|
||||
; mov x6, sp
|
||||
; ldr x2, [x6]
|
||||
; mov x5, sp
|
||||
; ldr x2, [x5]
|
||||
; add sp, sp, #32
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
|
||||
@@ -13,28 +13,28 @@ block0(v0: i128, v1: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; orr x10, xzr, #128
|
||||
; sub x12, x10, x2
|
||||
; lsr x14, x0, x2
|
||||
; lsr x3, x1, x2
|
||||
; orn w4, wzr, w2
|
||||
; lsl x5, x1, #1
|
||||
; lsl x6, x5, x4
|
||||
; orr x8, x14, x6
|
||||
; orr x7, xzr, #128
|
||||
; sub x9, x7, x2
|
||||
; lsr x11, x0, x2
|
||||
; lsr x13, x1, x2
|
||||
; orn w15, wzr, w2
|
||||
; lsl x3, x1, #1
|
||||
; lsl x3, x3, x15
|
||||
; orr x5, x11, x3
|
||||
; ands xzr, x2, #64
|
||||
; csel x11, x3, x8, ne
|
||||
; csel x13, xzr, x3, ne
|
||||
; lsl x15, x0, x12
|
||||
; lsl x1, x1, x12
|
||||
; orn w3, wzr, w12
|
||||
; lsr x5, x0, #1
|
||||
; lsr x7, x5, x3
|
||||
; orr x9, x1, x7
|
||||
; ands xzr, x12, #64
|
||||
; csel x12, xzr, x15, ne
|
||||
; csel x14, x15, x9, ne
|
||||
; orr x1, x13, x14
|
||||
; orr x0, x11, x12
|
||||
; csel x8, x13, x5, ne
|
||||
; csel x10, xzr, x13, ne
|
||||
; lsl x12, x0, x9
|
||||
; lsl x14, x1, x9
|
||||
; orn w1, wzr, w9
|
||||
; lsr x2, x0, #1
|
||||
; lsr x4, x2, x1
|
||||
; orr x6, x14, x4
|
||||
; ands xzr, x9, #64
|
||||
; csel x9, xzr, x12, ne
|
||||
; csel x11, x12, x6, ne
|
||||
; orr x1, x10, x11
|
||||
; orr x0, x8, x9
|
||||
; ret
|
||||
|
||||
function %f0(i64, i64) -> i64 {
|
||||
@@ -64,13 +64,13 @@ block0(v0: i16, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxth w5, w0
|
||||
; and w7, w1, #15
|
||||
; sub w9, w7, #16
|
||||
; sub w11, wzr, w9
|
||||
; lsr w13, w5, w7
|
||||
; lsl w15, w5, w11
|
||||
; orr w0, w15, w13
|
||||
; uxth w4, w0
|
||||
; and w6, w1, #15
|
||||
; sub w8, w6, #16
|
||||
; sub w10, wzr, w8
|
||||
; lsr w12, w4, w6
|
||||
; lsl w14, w4, w10
|
||||
; orr w0, w14, w12
|
||||
; ret
|
||||
|
||||
function %f3(i8, i8) -> i8 {
|
||||
@@ -80,13 +80,13 @@ block0(v0: i8, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w5, w0
|
||||
; and w7, w1, #7
|
||||
; sub w9, w7, #8
|
||||
; sub w11, wzr, w9
|
||||
; lsr w13, w5, w7
|
||||
; lsl w15, w5, w11
|
||||
; orr w0, w15, w13
|
||||
; uxtb w4, w0
|
||||
; and w6, w1, #7
|
||||
; sub w8, w6, #8
|
||||
; sub w10, wzr, w8
|
||||
; lsr w12, w4, w6
|
||||
; lsl w14, w4, w10
|
||||
; orr w0, w14, w12
|
||||
; ret
|
||||
|
||||
function %i128_rotl(i128, i128) -> i128 {
|
||||
@@ -96,28 +96,28 @@ block0(v0: i128, v1: i128):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; orr x10, xzr, #128
|
||||
; sub x12, x10, x2
|
||||
; lsl x14, x0, x2
|
||||
; lsl x3, x1, x2
|
||||
; orn w4, wzr, w2
|
||||
; lsr x5, x0, #1
|
||||
; lsr x6, x5, x4
|
||||
; orr x8, x3, x6
|
||||
; orr x7, xzr, #128
|
||||
; sub x9, x7, x2
|
||||
; lsl x11, x0, x2
|
||||
; lsl x13, x1, x2
|
||||
; orn w15, wzr, w2
|
||||
; lsr x3, x0, #1
|
||||
; lsr x3, x3, x15
|
||||
; orr x5, x13, x3
|
||||
; ands xzr, x2, #64
|
||||
; csel x8, xzr, x11, ne
|
||||
; csel x10, x11, x5, ne
|
||||
; lsr x12, x0, x9
|
||||
; lsr x14, x1, x9
|
||||
; orn w0, wzr, w9
|
||||
; lsl x2, x1, #1
|
||||
; lsl x4, x2, x0
|
||||
; orr x6, x12, x4
|
||||
; ands xzr, x9, #64
|
||||
; csel x9, x14, x6, ne
|
||||
; csel x11, xzr, x14, ne
|
||||
; csel x13, x14, x8, ne
|
||||
; lsr x15, x0, x12
|
||||
; lsr x2, x1, x12
|
||||
; orn w3, wzr, w12
|
||||
; lsl x5, x1, #1
|
||||
; lsl x7, x5, x3
|
||||
; orr x9, x15, x7
|
||||
; ands xzr, x12, #64
|
||||
; csel x12, x2, x9, ne
|
||||
; csel x14, xzr, x2, ne
|
||||
; orr x0, x11, x12
|
||||
; orr x1, x13, x14
|
||||
; orr x0, x8, x9
|
||||
; orr x1, x10, x11
|
||||
; ret
|
||||
|
||||
function %f4(i64, i64) -> i64 {
|
||||
@@ -127,8 +127,8 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub x5, xzr, x1
|
||||
; ror x0, x0, x5
|
||||
; sub x4, xzr, x1
|
||||
; ror x0, x0, x4
|
||||
; ret
|
||||
|
||||
function %f5(i32, i32) -> i32 {
|
||||
@@ -138,8 +138,8 @@ block0(v0: i32, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w5, wzr, w1
|
||||
; ror w0, w0, w5
|
||||
; sub w4, wzr, w1
|
||||
; ror w0, w0, w4
|
||||
; ret
|
||||
|
||||
function %f6(i16, i16) -> i16 {
|
||||
@@ -149,14 +149,14 @@ block0(v0: i16, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w5, wzr, w1
|
||||
; uxth w7, w0
|
||||
; and w9, w5, #15
|
||||
; sub w11, w9, #16
|
||||
; sub w13, wzr, w11
|
||||
; lsr w15, w7, w9
|
||||
; lsl w1, w7, w13
|
||||
; orr w0, w1, w15
|
||||
; sub w4, wzr, w1
|
||||
; uxth w6, w0
|
||||
; and w8, w4, #15
|
||||
; sub w10, w8, #16
|
||||
; sub w12, wzr, w10
|
||||
; lsr w14, w6, w8
|
||||
; lsl w0, w6, w12
|
||||
; orr w0, w0, w14
|
||||
; ret
|
||||
|
||||
function %f7(i8, i8) -> i8 {
|
||||
@@ -166,14 +166,14 @@ block0(v0: i8, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w5, wzr, w1
|
||||
; uxtb w7, w0
|
||||
; and w9, w5, #7
|
||||
; sub w11, w9, #8
|
||||
; sub w13, wzr, w11
|
||||
; lsr w15, w7, w9
|
||||
; lsl w1, w7, w13
|
||||
; orr w0, w1, w15
|
||||
; sub w4, wzr, w1
|
||||
; uxtb w6, w0
|
||||
; and w8, w4, #7
|
||||
; sub w10, w8, #8
|
||||
; sub w12, wzr, w10
|
||||
; lsr w14, w6, w8
|
||||
; lsl w0, w6, w12
|
||||
; orr w0, w0, w14
|
||||
; ret
|
||||
|
||||
function %f8(i64, i64) -> i64 {
|
||||
@@ -203,9 +203,9 @@ block0(v0: i16, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxth w5, w0
|
||||
; and w7, w1, #15
|
||||
; lsr w0, w5, w7
|
||||
; uxth w4, w0
|
||||
; and w6, w1, #15
|
||||
; lsr w0, w4, w6
|
||||
; ret
|
||||
|
||||
function %f11(i8, i8) -> i8 {
|
||||
@@ -215,9 +215,9 @@ block0(v0: i8, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w5, w0
|
||||
; and w7, w1, #7
|
||||
; lsr w0, w5, w7
|
||||
; uxtb w4, w0
|
||||
; and w6, w1, #7
|
||||
; lsr w0, w4, w6
|
||||
; ret
|
||||
|
||||
function %f12(i64, i64) -> i64 {
|
||||
@@ -247,8 +247,8 @@ block0(v0: i16, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; and w5, w1, #15
|
||||
; lsl w0, w0, w5
|
||||
; and w4, w1, #15
|
||||
; lsl w0, w0, w4
|
||||
; ret
|
||||
|
||||
function %f15(i8, i8) -> i8 {
|
||||
@@ -258,8 +258,8 @@ block0(v0: i8, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; and w5, w1, #7
|
||||
; lsl w0, w0, w5
|
||||
; and w4, w1, #7
|
||||
; lsl w0, w0, w4
|
||||
; ret
|
||||
|
||||
function %f16(i64, i64) -> i64 {
|
||||
@@ -289,9 +289,9 @@ block0(v0: i16, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxth w5, w0
|
||||
; and w7, w1, #15
|
||||
; asr w0, w5, w7
|
||||
; sxth w4, w0
|
||||
; and w6, w1, #15
|
||||
; asr w0, w4, w6
|
||||
; ret
|
||||
|
||||
function %f19(i8, i8) -> i8 {
|
||||
@@ -301,9 +301,9 @@ block0(v0: i8, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sxtb w5, w0
|
||||
; and w7, w1, #7
|
||||
; asr w0, w5, w7
|
||||
; sxtb w4, w0
|
||||
; and w6, w1, #7
|
||||
; asr w0, w4, w6
|
||||
; ret
|
||||
|
||||
function %f20(i64) -> i64 {
|
||||
|
||||
@@ -69,13 +69,13 @@ block0(v0: i64x2, v1: i64x2):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #1
|
||||
; dup v6.2d, x6
|
||||
; orr v17.16b, v0.16b, v1.16b
|
||||
; and v19.16b, v17.16b, v6.16b
|
||||
; ushr v21.2d, v0.2d, #1
|
||||
; ushr v23.2d, v1.2d, #1
|
||||
; add v25.2d, v21.2d, v23.2d
|
||||
; add v0.2d, v19.2d, v25.2d
|
||||
; movz x5, #1
|
||||
; dup v5.2d, x5
|
||||
; orr v16.16b, v0.16b, v1.16b
|
||||
; and v18.16b, v16.16b, v5.16b
|
||||
; ushr v20.2d, v0.2d, #1
|
||||
; ushr v22.2d, v1.2d, #1
|
||||
; add v24.2d, v20.2d, v22.2d
|
||||
; add v0.2d, v18.2d, v24.2d
|
||||
; ret
|
||||
|
||||
|
||||
@@ -191,11 +191,11 @@ block0(v0: i8x16, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x5, #3
|
||||
; and w7, w5, #7
|
||||
; sub x9, xzr, x7
|
||||
; dup v19.16b, w9
|
||||
; sshl v0.16b, v0.16b, v19.16b
|
||||
; movz x4, #3
|
||||
; and w6, w4, #7
|
||||
; sub x8, xzr, x6
|
||||
; dup v18.16b, w8
|
||||
; sshl v0.16b, v0.16b, v18.16b
|
||||
; ret
|
||||
|
||||
function %sshr_i64x2(i64x2, i32) -> i64x2 {
|
||||
@@ -205,9 +205,9 @@ block0(v0: i64x2, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; and w5, w0, #63
|
||||
; sub x7, xzr, x5
|
||||
; dup v17.2d, x7
|
||||
; sshl v0.2d, v0.2d, v17.2d
|
||||
; and w4, w0, #63
|
||||
; sub x6, xzr, x4
|
||||
; dup v16.2d, x6
|
||||
; sshl v0.2d, v0.2d, v16.2d
|
||||
; ret
|
||||
|
||||
|
||||
@@ -86,9 +86,9 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldrb w8, [x0]
|
||||
; ldrb w7, [x0]
|
||||
; ld1r { v0.16b }, [x1]
|
||||
; dup v1.16b, w8
|
||||
; dup v1.16b, w7
|
||||
; ret
|
||||
|
||||
function %f8(i64, i64) -> i8x16, i8x16 {
|
||||
@@ -100,9 +100,9 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldrb w8, [x0]
|
||||
; dup v0.16b, w8
|
||||
; dup v1.16b, w8
|
||||
; ldrb w7, [x0]
|
||||
; dup v0.16b, w7
|
||||
; dup v1.16b, w7
|
||||
; ret
|
||||
|
||||
function %f9() -> i32x2 {
|
||||
|
||||
@@ -442,8 +442,8 @@ block0(v0: i128):
|
||||
; mov fp, sp
|
||||
; sub sp, sp, #16
|
||||
; block0:
|
||||
; mov x5, sp
|
||||
; stp x0, x1, [x5]
|
||||
; mov x4, sp
|
||||
; stp x0, x1, [x4]
|
||||
; add sp, sp, #16
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
@@ -461,8 +461,8 @@ block0(v0: i128):
|
||||
; mov fp, sp
|
||||
; sub sp, sp, #32
|
||||
; block0:
|
||||
; add x5, sp, #32
|
||||
; stp x0, x1, [x5]
|
||||
; add x4, sp, #32
|
||||
; stp x0, x1, [x4]
|
||||
; add sp, sp, #32
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
@@ -482,8 +482,8 @@ block0(v0: i128):
|
||||
; movk w16, w16, #1, LSL #16
|
||||
; sub sp, sp, x16, UXTX
|
||||
; block0:
|
||||
; mov x5, sp
|
||||
; stp x0, x1, [x5]
|
||||
; mov x4, sp
|
||||
; stp x0, x1, [x4]
|
||||
; movz w16, #34480
|
||||
; movk w16, w16, #1, LSL #16
|
||||
; add sp, sp, x16, UXTX
|
||||
|
||||
Reference in New Issue
Block a user