s390x: Support both big- and little-endian vector lane order (#4682)

This implements the s390x back-end portion of the solution for
https://github.com/bytecodealliance/wasmtime/issues/4566

We now support both big- and little-endian vector lane order
in code generation.  The order used for a function is determined
by the function's ABI: if it uses a Wasmtime ABI, it will use
little-endian lane order, and big-endian lane order otherwise.
(This ensures that all raw_bitcast instructions generated by
both wasmtime and other cranelift frontends can always be
implemented as a no-op.)

Lane order affects the implementation of a number of operations:
- Vector immediates
- Vector memory load / store (in big- and little-endian variants)
- Operations explicitly using lane numbers
  (insertlane, extractlane, shuffle, swizzle)
- Operations implicitly using lane numbers
  (iadd_pairwise, narrow/widen, promote/demote, fcvt_low, vhigh_bits)

In addition, when calling a function using a different lane order,
we need to lane-swap all vector values passed or returned in registers.

A small number of changes to common code were also needed:

- Ensure we always select a Wasmtime calling convention on s390x
  in crates/cranelift (func_signature).

- Fix vector immediates for filetests/runtests.  In PR #4427,
  I attempted to fix this by byte-swapping the V128 value, but
  with the new scheme, we'd instead need to perform a per-lane
  byte swap.  Since we do not know the actual type in write_to_slice
  and read_from_slice, this isn't easily possible.

  Revert this part of PR #4427 again, and instead just mark the
  memory buffer as little-endian when emitting the trampoline;
  the back-end will then emit correct code to load the constant.

- Change a runtest in simd-bitselect-to-vselect.clif to no longer
  make little-endian lane order assumptions.

- Remove runtests in simd-swizzle.clif that make little-endian
  lane order assumptions by relying on implicit type conversion
  when using a non-i16x8 swizzle result type (this feature should
  probably be removed anyway).

Tested with both wasmtime and cg_clif.
This commit is contained in:
Ulrich Weigand
2022-08-11 21:10:46 +02:00
committed by GitHub
parent c1c48b4386
commit 67870d1518
29 changed files with 6584 additions and 593 deletions

View File

@@ -0,0 +1,127 @@
test compile precise-output
target s390x
function %caller_be_to_be(i64x2, i32x4, i16x8, i8x16) -> i32x4 {
fn0 = %callee_be(i64x2, i32x4, i16x8, i8x16) -> i32x4
block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16):
v4 = call fn0(v0, v1, v2, v3)
return v4
}
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
; virtual_sp_offset_adjust 160
; block0:
; bras %r1, 12 ; data %callee_be + 0 ; lg %r3, 0(%r1)
; basr %r14, %r3
; lmg %r14, %r15, 272(%r15)
; br %r14
function %caller_be_to_le(i64x2, i32x4, i16x8, i8x16) -> i32x4 {
fn0 = %callee_le(i64x2, i32x4, i16x8, i8x16) -> i32x4 wasmtime_system_v
block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16):
v4 = call fn0(v0, v1, v2, v3)
return v4
}
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -224
; virtual_sp_offset_adjust 160
; std %f8, 160(%r15)
; std %f9, 168(%r15)
; std %f10, 176(%r15)
; std %f11, 184(%r15)
; std %f12, 192(%r15)
; std %f13, 200(%r15)
; std %f14, 208(%r15)
; std %f15, 216(%r15)
; block0:
; vpdi %v24, %v24, %v24, 4
; vpdi %v20, %v25, %v25, 4
; verllg %v25, %v20, 32
; vpdi %v26, %v26, %v26, 4
; verllg %v28, %v26, 32
; verllf %v26, %v28, 16
; vpdi %v0, %v27, %v27, 4
; verllg %v2, %v0, 32
; verllf %v4, %v2, 16
; verllh %v27, %v4, 8
; bras %r1, 12 ; data %callee_le + 0 ; lg %r3, 0(%r1)
; basr %r14, %r3
; vpdi %v22, %v24, %v24, 4
; verllg %v24, %v22, 32
; ld %f8, 160(%r15)
; ld %f9, 168(%r15)
; ld %f10, 176(%r15)
; ld %f11, 184(%r15)
; ld %f12, 192(%r15)
; ld %f13, 200(%r15)
; ld %f14, 208(%r15)
; ld %f15, 216(%r15)
; lmg %r14, %r15, 336(%r15)
; br %r14
function %caller_le_to_be(i64x2, i32x4, i16x8, i8x16) -> i32x4 wasmtime_system_v {
fn0 = %callee_be(i64x2, i32x4, i16x8, i8x16) -> i32x4
block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16):
v4 = call fn0(v0, v1, v2, v3)
return v4
}
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -224
; virtual_sp_offset_adjust 160
; std %f8, 160(%r15)
; std %f9, 168(%r15)
; std %f10, 176(%r15)
; std %f11, 184(%r15)
; std %f12, 192(%r15)
; std %f13, 200(%r15)
; std %f14, 208(%r15)
; std %f15, 216(%r15)
; block0:
; vpdi %v24, %v24, %v24, 4
; vpdi %v20, %v25, %v25, 4
; verllg %v25, %v20, 32
; vpdi %v26, %v26, %v26, 4
; verllg %v28, %v26, 32
; verllf %v26, %v28, 16
; vpdi %v0, %v27, %v27, 4
; verllg %v2, %v0, 32
; verllf %v4, %v2, 16
; verllh %v27, %v4, 8
; bras %r1, 12 ; data %callee_be + 0 ; lg %r3, 0(%r1)
; basr %r14, %r3
; vpdi %v22, %v24, %v24, 4
; verllg %v24, %v22, 32
; ld %f8, 160(%r15)
; ld %f9, 168(%r15)
; ld %f10, 176(%r15)
; ld %f11, 184(%r15)
; ld %f12, 192(%r15)
; ld %f13, 200(%r15)
; ld %f14, 208(%r15)
; ld %f15, 216(%r15)
; lmg %r14, %r15, 336(%r15)
; br %r14
function %caller_le_to_le(i64x2, i32x4, i16x8, i8x16) -> i32x4 wasmtime_system_v {
fn0 = %callee_le(i64x2, i32x4, i16x8, i8x16) -> i32x4 wasmtime_system_v
block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16):
v4 = call fn0(v0, v1, v2, v3)
return v4
}
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
; virtual_sp_offset_adjust 160
; block0:
; bras %r1, 12 ; data %callee_le + 0 ; lg %r3, 0(%r1)
; basr %r14, %r3
; lmg %r14, %r15, 272(%r15)
; br %r14

View File

@@ -457,21 +457,6 @@ block0(v0: i8x16, v1: i8x16):
; vpksh %v24, %v17, %v23
; br %r14
function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise.i32x4 v0, v1
return v2
}
; block0:
; vrepib %v5, 32
; vsrlb %v7, %v25, %v5
; vaf %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vaf %v21, %v24, %v19
; vpkg %v24, %v17, %v21
; br %r14
function %usub_sat64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = usub_sat.i64x2 v0, v1
@@ -568,7 +553,7 @@ block0(v0: i8x16, v1: i8x16):
; vpksh %v24, %v17, %v23
; br %r14
function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 {
function %iadd_pairwise_i32x4_be(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise.i32x4 v0, v1
return v2
@@ -576,14 +561,14 @@ block0(v0: i32x4, v1: i32x4):
; block0:
; vrepib %v5, 32
; vsrlb %v7, %v25, %v5
; vaf %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vaf %v21, %v24, %v19
; vsrlb %v7, %v24, %v5
; vaf %v17, %v24, %v7
; vsrlb %v19, %v25, %v5
; vaf %v21, %v25, %v19
; vpkg %v24, %v17, %v21
; br %r14
function %iadd_pairwise_i16x8(i16x8, i16x8) -> i16x8 {
function %iadd_pairwise_i16x8_be(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = iadd_pairwise.i16x8 v0, v1
return v2
@@ -591,14 +576,14 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; vrepib %v5, 16
; vsrlb %v7, %v25, %v5
; vah %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vah %v21, %v24, %v19
; vsrlb %v7, %v24, %v5
; vah %v17, %v24, %v7
; vsrlb %v19, %v25, %v5
; vah %v21, %v25, %v19
; vpkf %v24, %v17, %v21
; br %r14
function %iadd_pairwise_i8x16(i8x16, i8x16) -> i8x16 {
function %iadd_pairwise_i8x16_be(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = iadd_pairwise.i8x16 v0, v1
return v2
@@ -606,13 +591,58 @@ block0(v0: i8x16, v1: i8x16):
; block0:
; vrepib %v5, 8
; vsrlb %v7, %v25, %v5
; vab %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vab %v21, %v24, %v19
; vsrlb %v7, %v24, %v5
; vab %v17, %v24, %v7
; vsrlb %v19, %v25, %v5
; vab %v21, %v25, %v19
; vpkh %v24, %v17, %v21
; br %r14
function %iadd_pairwise_i32x4_le(i32x4, i32x4) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise.i32x4 v0, v1
return v2
}
; block0:
; vrepib %v5, 32
; vsrlb %v7, %v24, %v5
; vaf %v17, %v24, %v7
; vsrlb %v19, %v25, %v5
; vaf %v21, %v25, %v19
; vpkg %v24, %v21, %v17
; br %r14
function %iadd_pairwise_i16x8_le(i16x8, i16x8) -> i16x8 wasmtime_system_v {
block0(v0: i16x8, v1: i16x8):
v2 = iadd_pairwise.i16x8 v0, v1
return v2
}
; block0:
; vrepib %v5, 16
; vsrlb %v7, %v24, %v5
; vah %v17, %v24, %v7
; vsrlb %v19, %v25, %v5
; vah %v21, %v25, %v19
; vpkf %v24, %v21, %v17
; br %r14
function %iadd_pairwise_i8x16_le(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = iadd_pairwise.i8x16 v0, v1
return v2
}
; block0:
; vrepib %v5, 8
; vsrlb %v7, %v24, %v5
; vab %v17, %v24, %v7
; vsrlb %v19, %v25, %v5
; vab %v21, %v25, %v19
; vpkh %v24, %v21, %v17
; br %r14
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imul.i64x2 v0, v1

View File

@@ -0,0 +1,213 @@
test compile precise-output
target s390x
function %vconst_i64x2_zero() -> i64x2 wasmtime_system_v {
block0:
v1 = vconst.i64x2 [0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i64x2_splat1() -> i64x2 wasmtime_system_v {
block0:
v1 = vconst.i64x2 [32767 32767]
return v1
}
; block0:
; vrepig %v24, 32767
; br %r14
function %vconst_i64x2_splat2() -> i64x2 wasmtime_system_v {
block0:
v1 = vconst.i64x2 [-32768 -32768]
return v1
}
; block0:
; vrepig %v24, -32768
; br %r14
function %vconst_i64x2_splat3() -> i64x2 wasmtime_system_v {
block0:
v1 = vconst.i64x2 [32768 32768]
return v1
}
; block0:
; bras %r1, 12 ; data.u64 0x0000000000008000 ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i64x2_splat4() -> i64x2 wasmtime_system_v {
block0:
v1 = vconst.i64x2 [-32769 -32769]
return v1
}
; block0:
; bras %r1, 12 ; data.u64 0xffffffffffff7fff ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i64x2_mixed() -> i64x2 wasmtime_system_v {
block0:
v1 = vconst.i64x2 [1 2]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00000000000000020000000000000001 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i32x4_zero() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [0 0 0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i32x4_splat1() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [32767 32767 32767 32767]
return v1
}
; block0:
; vrepif %v24, 32767
; br %r14
function %vconst_i32x4_splat2() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [-32768 -32768 -32768 -32768]
return v1
}
; block0:
; vrepif %v24, -32768
; br %r14
function %vconst_i32x4_splat3() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [32768 32768 32768 32768]
return v1
}
; block0:
; bras %r1, 8 ; data.u32 0x00008000 ; vlrepf %v24, 0(%r1)
; br %r14
function %vconst_i32x4_splat4() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [-32769 -32769 -32769 -32769]
return v1
}
; block0:
; bras %r1, 8 ; data.u32 0xffff7fff ; vlrepf %v24, 0(%r1)
; br %r14
function %vconst_i32x4_splat_i64() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [1 2 1 2]
return v1
}
; block0:
; bras %r1, 12 ; data.u64 0x0000000200000001 ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i32x4_mixed() -> i32x4 wasmtime_system_v {
block0:
v1 = vconst.i32x4 [1 2 3 4]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00000004000000030000000200000001 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i16x8_zero() -> i16x8 wasmtime_system_v {
block0:
v1 = vconst.i16x8 [0 0 0 0 0 0 0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i16x8_splat1() -> i16x8 wasmtime_system_v {
block0:
v1 = vconst.i16x8 [32767 32767 32767 32767 32767 32767 32767 32767]
return v1
}
; block0:
; vrepih %v24, 32767
; br %r14
function %vconst_i16x8_splat2() -> i16x8 wasmtime_system_v {
block0:
v1 = vconst.i16x8 [-32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768]
return v1
}
; block0:
; vrepih %v24, -32768
; br %r14
function %vconst_i16x8_mixed() -> i16x8 wasmtime_system_v {
block0:
v1 = vconst.i16x8 [1 2 3 4 5 6 7 8]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00080007000600050004000300020001 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i8x16_zero() -> i8x16 wasmtime_system_v {
block0:
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i8x16_splat1() -> i8x16 wasmtime_system_v {
block0:
v1 = vconst.i8x16 [127 127 127 127 127 127 127 127 127 127 127 127 127 127 127 127]
return v1
}
; block0:
; vrepib %v24, 127
; br %r14
function %vconst_i8x16_splat2() -> i8x16 wasmtime_system_v {
block0:
v1 = vconst.i8x16 [-128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128]
return v1
}
; block0:
; vrepib %v24, 128
; br %r14
function %vconst_i8x16_mixed() -> i8x16 wasmtime_system_v {
block0:
v1 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x100f0e0d0c0b0a090807060504030201 ; vl %v24, 0(%r1)
; br %r14

View File

@@ -58,7 +58,7 @@ block0:
}
; block0:
; bras %r1, 20 ; data.u128 0x00000000000000020000000000000001 ; vl %v24, 0(%r1)
; bras %r1, 20 ; data.u128 0x00000000000000010000000000000002 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i32x4_zero() -> i32x4 {
@@ -118,7 +118,7 @@ block0:
}
; block0:
; bras %r1, 12 ; data.u64 0x0000000200000001 ; vlrepg %v24, 0(%r1)
; bras %r1, 12 ; data.u64 0x0000000100000002 ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i32x4_mixed() -> i32x4 {
@@ -128,7 +128,7 @@ block0:
}
; block0:
; bras %r1, 20 ; data.u128 0x00000004000000030000000200000001 ; vl %v24, 0(%r1)
; bras %r1, 20 ; data.u128 0x00000001000000020000000300000004 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i16x8_zero() -> i16x8 {
@@ -168,7 +168,7 @@ block0:
}
; block0:
; bras %r1, 20 ; data.u128 0x00080007000600050004000300020001 ; vl %v24, 0(%r1)
; bras %r1, 20 ; data.u128 0x00010002000300040005000600070008 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i8x16_zero() -> i8x16 {
@@ -208,6 +208,6 @@ block0:
}
; block0:
; bras %r1, 20 ; data.u128 0x100f0e0d0c0b0a090807060504030201 ; vl %v24, 0(%r1)
; bras %r1, 20 ; data.u128 0x0102030405060708090a0b0c0d0e0f10 ; vl %v24, 0(%r1)
; br %r14

View File

@@ -0,0 +1,222 @@
test compile precise-output
target s390x
function %snarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 wasmtime_system_v {
block0(v0: i64x2, v1: i64x2):
v2 = snarrow.i64x2 v0, v1
return v2
}
; block0:
; vpksg %v24, %v25, %v24
; br %r14
function %snarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 wasmtime_system_v {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow.i32x4 v0, v1
return v2
}
; block0:
; vpksf %v24, %v25, %v24
; br %r14
function %snarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 wasmtime_system_v {
block0(v0: i16x8, v1: i16x8):
v2 = snarrow.i16x8 v0, v1
return v2
}
; block0:
; vpksh %v24, %v25, %v24
; br %r14
function %unarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 wasmtime_system_v {
block0(v0: i64x2, v1: i64x2):
v2 = unarrow.i64x2 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vmxg %v7, %v24, %v5
; vmxg %v17, %v25, %v5
; vpklsg %v24, %v17, %v7
; br %r14
function %unarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 wasmtime_system_v {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow.i32x4 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vmxf %v7, %v24, %v5
; vmxf %v17, %v25, %v5
; vpklsf %v24, %v17, %v7
; br %r14
function %unarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 wasmtime_system_v {
block0(v0: i16x8, v1: i16x8):
v2 = unarrow.i16x8 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vmxh %v7, %v24, %v5
; vmxh %v17, %v25, %v5
; vpklsh %v24, %v17, %v7
; br %r14
function %uunarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 wasmtime_system_v {
block0(v0: i64x2, v1: i64x2):
v2 = uunarrow.i64x2 v0, v1
return v2
}
; block0:
; vpklsg %v24, %v25, %v24
; br %r14
function %uunarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 wasmtime_system_v {
block0(v0: i32x4, v1: i32x4):
v2 = uunarrow.i32x4 v0, v1
return v2
}
; block0:
; vpklsf %v24, %v25, %v24
; br %r14
function %uunarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 wasmtime_system_v {
block0(v0: i16x8, v1: i16x8):
v2 = uunarrow.i16x8 v0, v1
return v2
}
; block0:
; vpklsh %v24, %v25, %v24
; br %r14
function %swiden_low_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
block0(v0: i32x4):
v1 = swiden_low.i32x4 v0
return v1
}
; block0:
; vuplf %v24, %v24
; br %r14
function %swiden_low_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
block0(v0: i16x8):
v1 = swiden_low.i16x8 v0
return v1
}
; block0:
; vuplh %v24, %v24
; br %r14
function %swiden_low_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
block0(v0: i8x16):
v1 = swiden_low.i8x16 v0
return v1
}
; block0:
; vuplb %v24, %v24
; br %r14
function %swiden_high_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
block0(v0: i32x4):
v1 = swiden_high.i32x4 v0
return v1
}
; block0:
; vuphf %v24, %v24
; br %r14
function %swiden_high_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
block0(v0: i16x8):
v1 = swiden_high.i16x8 v0
return v1
}
; block0:
; vuphh %v24, %v24
; br %r14
function %swiden_high_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
block0(v0: i8x16):
v1 = swiden_high.i8x16 v0
return v1
}
; block0:
; vuphb %v24, %v24
; br %r14
function %uwiden_low_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
block0(v0: i32x4):
v1 = uwiden_low.i32x4 v0
return v1
}
; block0:
; vupllf %v24, %v24
; br %r14
function %uwiden_low_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
block0(v0: i16x8):
v1 = uwiden_low.i16x8 v0
return v1
}
; block0:
; vupllh %v24, %v24
; br %r14
function %uwiden_low_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
block0(v0: i8x16):
v1 = uwiden_low.i8x16 v0
return v1
}
; block0:
; vupllb %v24, %v24
; br %r14
function %uwiden_high_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
block0(v0: i32x4):
v1 = uwiden_high.i32x4 v0
return v1
}
; block0:
; vuplhf %v24, %v24
; br %r14
function %uwiden_high_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
block0(v0: i16x8):
v1 = uwiden_high.i16x8 v0
return v1
}
; block0:
; vuplhh %v24, %v24
; br %r14
function %uwiden_high_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
block0(v0: i8x16):
v1 = uwiden_high.i8x16 v0
return v1
}
; block0:
; vuplhb %v24, %v24
; br %r14

View File

@@ -8,7 +8,7 @@ block0(v0: i64x2, v1: i64x2):
}
; block0:
; vpksg %v24, %v25, %v24
; vpksg %v24, %v24, %v25
; br %r14
function %snarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 {
@@ -18,7 +18,7 @@ block0(v0: i32x4, v1: i32x4):
}
; block0:
; vpksf %v24, %v25, %v24
; vpksf %v24, %v24, %v25
; br %r14
function %snarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 {
@@ -28,7 +28,7 @@ block0(v0: i16x8, v1: i16x8):
}
; block0:
; vpksh %v24, %v25, %v24
; vpksh %v24, %v24, %v25
; br %r14
function %unarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 {
@@ -39,8 +39,8 @@ block0(v0: i64x2, v1: i64x2):
; block0:
; vgbm %v5, 0
; vmxg %v7, %v25, %v5
; vmxg %v17, %v24, %v5
; vmxg %v7, %v24, %v5
; vmxg %v17, %v25, %v5
; vpklsg %v24, %v7, %v17
; br %r14
@@ -52,8 +52,8 @@ block0(v0: i32x4, v1: i32x4):
; block0:
; vgbm %v5, 0
; vmxf %v7, %v25, %v5
; vmxf %v17, %v24, %v5
; vmxf %v7, %v24, %v5
; vmxf %v17, %v25, %v5
; vpklsf %v24, %v7, %v17
; br %r14
@@ -65,8 +65,8 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; vgbm %v5, 0
; vmxh %v7, %v25, %v5
; vmxh %v17, %v24, %v5
; vmxh %v7, %v24, %v5
; vmxh %v17, %v25, %v5
; vpklsh %v24, %v7, %v17
; br %r14
@@ -77,7 +77,7 @@ block0(v0: i64x2, v1: i64x2):
}
; block0:
; vpklsg %v24, %v25, %v24
; vpklsg %v24, %v24, %v25
; br %r14
function %uunarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 {
@@ -87,7 +87,7 @@ block0(v0: i32x4, v1: i32x4):
}
; block0:
; vpklsf %v24, %v25, %v24
; vpklsf %v24, %v24, %v25
; br %r14
function %uunarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 {
@@ -97,7 +97,7 @@ block0(v0: i16x8, v1: i16x8):
}
; block0:
; vpklsh %v24, %v25, %v24
; vpklsh %v24, %v24, %v25
; br %r14
function %swiden_low_i32x4_i64x2(i32x4) -> i64x2 {
@@ -107,7 +107,7 @@ block0(v0: i32x4):
}
; block0:
; vuplf %v24, %v24
; vuphf %v24, %v24
; br %r14
function %swiden_low_i16x8_i32x4(i16x8) -> i32x4 {
@@ -117,7 +117,7 @@ block0(v0: i16x8):
}
; block0:
; vuplh %v24, %v24
; vuphh %v24, %v24
; br %r14
function %swiden_low_i8x16_i16x8(i8x16) -> i16x8 {
@@ -127,7 +127,7 @@ block0(v0: i8x16):
}
; block0:
; vuplb %v24, %v24
; vuphb %v24, %v24
; br %r14
function %swiden_high_i32x4_i64x2(i32x4) -> i64x2 {
@@ -137,7 +137,7 @@ block0(v0: i32x4):
}
; block0:
; vuphf %v24, %v24
; vuplf %v24, %v24
; br %r14
function %swiden_high_i16x8_i32x4(i16x8) -> i32x4 {
@@ -147,7 +147,7 @@ block0(v0: i16x8):
}
; block0:
; vuphh %v24, %v24
; vuplh %v24, %v24
; br %r14
function %swiden_high_i8x16_i16x8(i8x16) -> i16x8 {
@@ -157,7 +157,7 @@ block0(v0: i8x16):
}
; block0:
; vuphb %v24, %v24
; vuplb %v24, %v24
; br %r14
function %uwiden_low_i32x4_i64x2(i32x4) -> i64x2 {
@@ -167,7 +167,7 @@ block0(v0: i32x4):
}
; block0:
; vupllf %v24, %v24
; vuplhf %v24, %v24
; br %r14
function %uwiden_low_i16x8_i32x4(i16x8) -> i32x4 {
@@ -177,7 +177,7 @@ block0(v0: i16x8):
}
; block0:
; vupllh %v24, %v24
; vuplhh %v24, %v24
; br %r14
function %uwiden_low_i8x16_i16x8(i8x16) -> i16x8 {
@@ -187,7 +187,7 @@ block0(v0: i8x16):
}
; block0:
; vupllb %v24, %v24
; vuplhb %v24, %v24
; br %r14
function %uwiden_high_i32x4_i64x2(i32x4) -> i64x2 {
@@ -197,7 +197,7 @@ block0(v0: i32x4):
}
; block0:
; vuplhf %v24, %v24
; vupllf %v24, %v24
; br %r14
function %uwiden_high_i16x8_i32x4(i16x8) -> i32x4 {
@@ -207,7 +207,7 @@ block0(v0: i16x8):
}
; block0:
; vuplhh %v24, %v24
; vupllh %v24, %v24
; br %r14
function %uwiden_high_i8x16_i16x8(i8x16) -> i16x8 {
@@ -217,6 +217,6 @@ block0(v0: i8x16):
}
; block0:
; vuplhb %v24, %v24
; vupllb %v24, %v24
; br %r14

View File

@@ -21,7 +21,17 @@ block0:
; vgbm %v24, 0
; br %r14
function %vconst_f32x4_mixed() -> f32x4 {
function %vconst_f32x4_mixed_be() -> f32x4 {
block0:
v1 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x3f800000400000004040000040800000 ; vl %v24, 0(%r1)
; br %r14
function %vconst_f32x4_mixed_le() -> f32x4 wasmtime_system_v {
block0:
v1 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0]
return v1
@@ -31,7 +41,17 @@ block0:
; bras %r1, 20 ; data.u128 0x4080000040400000400000003f800000 ; vl %v24, 0(%r1)
; br %r14
function %vconst_f64x2_mixed() -> f64x2 {
function %vconst_f64x2_mixed_be() -> f64x2 {
block0:
v1 = vconst.f64x2 [0x1.0 0x2.0]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x3ff00000000000004000000000000000 ; vl %v24, 0(%r1)
; br %r14
function %vconst_f64x2_mixed_le() -> f64x2 wasmtime_system_v {
block0:
v1 = vconst.f64x2 [0x1.0 0x2.0]
return v1
@@ -261,7 +281,18 @@ block0(v0: f64x2):
; vflcdb %v24, %v24
; br %r14
function %fvpromote_low_f32x4(f32x4) -> f64x2 {
function %fvpromote_low_f32x4_be(f32x4) -> f64x2 {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
}
; block0:
; vmrhf %v3, %v24, %v24
; vldeb %v24, %v3
; br %r14
function %fvpromote_low_f32x4_le(f32x4) -> f64x2 wasmtime_system_v {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
@@ -272,7 +303,7 @@ block0(v0: f32x4):
; vldeb %v24, %v3
; br %r14
function %fvdemote_f64x2(f64x2) -> f32x4 {
function %fvdemote_f64x2_be(f64x2) -> f32x4 {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
@@ -280,9 +311,22 @@ block0(v0: f64x2):
; block0:
; vledb %v3, %v24, 0, 0
; vgbm %v5, 0
; bras %r1, 20 ; data.u128 0x10101010101010100001020308090a0b ; vl %v7, 0(%r1)
; vperm %v24, %v3, %v5, %v7
; vesrlg %v5, %v3, 32
; vgbm %v7, 0
; vpkg %v24, %v5, %v7
; br %r14
function %fvdemote_f64x2_le(f64x2) -> f32x4 wasmtime_system_v {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
}
; block0:
; vledb %v3, %v24, 0, 0
; vesrlg %v5, %v3, 32
; vgbm %v7, 0
; vpkg %v24, %v7, %v5
; br %r14
function %ceil_f32x4(f32x4) -> f32x4 {
@@ -462,7 +506,18 @@ block0(v0: i64x2):
; br %r14
function %fcvt_low_from_sint_i32x4_f64x2(i32x4) -> f64x2 {
function %fcvt_low_from_sint_i32x4_f64x2_be(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = fcvt_low_from_sint.f64x2 v0
return v1
}
; block0:
; vuphf %v3, %v24
; vcdgb %v24, %v3, 0, 4
; br %r14
function %fcvt_low_from_sint_i32x4_f64x2_le(i32x4) -> f64x2 wasmtime_system_v {
block0(v0: i32x4):
v1 = fcvt_low_from_sint.f64x2 v0
return v1

View File

@@ -9,7 +9,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vleg %v24, 0(%r2), 1
; vleg %v24, 0(%r2), 0
; br %r14
function %insertlane_i64x2_mem_1(i64x2, i64) -> i64x2 {
@@ -20,7 +20,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vleg %v24, 0(%r2), 0
; vleg %v24, 0(%r2), 1
; br %r14
function %insertlane_i64x2_mem_little_0(i64x2, i64) -> i64x2 {
@@ -31,7 +31,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vlebrg %v24, 0(%r2), 1
; vlebrg %v24, 0(%r2), 0
; br %r14
function %insertlane_i64x2_mem_little_1(i64x2, i64) -> i64x2 {
@@ -42,7 +42,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vlebrg %v24, 0(%r2), 0
; vlebrg %v24, 0(%r2), 1
; br %r14
function %insertlane_i32x4_mem_0(i32x4, i64) -> i32x4 {
@@ -53,7 +53,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlef %v24, 0(%r2), 3
; vlef %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_3(i32x4, i64) -> i32x4 {
@@ -64,7 +64,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlef %v24, 0(%r2), 0
; vlef %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_little_0(i32x4, i64) -> i32x4 {
@@ -75,7 +75,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlebrf %v24, 0(%r2), 3
; vlebrf %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_little_3(i32x4, i64) -> i32x4 {
@@ -86,7 +86,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlebrf %v24, 0(%r2), 0
; vlebrf %v24, 0(%r2), 3
; br %r14
function %insertlane_i16x8_mem_0(i16x8, i64) -> i16x8 {
@@ -97,7 +97,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vleh %v24, 0(%r2), 7
; vleh %v24, 0(%r2), 0
; br %r14
function %insertlane_i16x8_mem_7(i16x8, i64) -> i16x8 {
@@ -108,7 +108,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vleh %v24, 0(%r2), 0
; vleh %v24, 0(%r2), 7
; br %r14
function %insertlane_i16x8_mem_little_0(i16x8, i64) -> i16x8 {
@@ -119,7 +119,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vlebrh %v24, 0(%r2), 7
; vlebrh %v24, 0(%r2), 0
; br %r14
function %insertlane_i16x8_mem_little_7(i16x8, i64) -> i16x8 {
@@ -130,7 +130,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vlebrh %v24, 0(%r2), 0
; vlebrh %v24, 0(%r2), 7
; br %r14
function %insertlane_i8x16_mem_0(i8x16, i64) -> i8x16 {
@@ -141,7 +141,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vleb %v24, 0(%r2), 15
; vleb %v24, 0(%r2), 0
; br %r14
function %insertlane_i8x16_mem_15(i8x16, i64) -> i8x16 {
@@ -152,7 +152,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vleb %v24, 0(%r2), 0
; vleb %v24, 0(%r2), 15
; br %r14
function %insertlane_i8x16_mem_little_0(i8x16, i64) -> i8x16 {
@@ -163,7 +163,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vleb %v24, 0(%r2), 15
; vleb %v24, 0(%r2), 0
; br %r14
function %insertlane_i8x16_mem_little_15(i8x16, i64) -> i8x16 {
@@ -174,7 +174,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vleb %v24, 0(%r2), 0
; vleb %v24, 0(%r2), 15
; br %r14
function %insertlane_f64x2_mem_0(f64x2, i64) -> f64x2 {
@@ -185,7 +185,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vleg %v24, 0(%r2), 1
; vleg %v24, 0(%r2), 0
; br %r14
function %insertlane_f64x2_mem_1(f64x2, i64) -> f64x2 {
@@ -196,7 +196,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vleg %v24, 0(%r2), 0
; vleg %v24, 0(%r2), 1
; br %r14
function %insertlane_f64x2_mem_little_0(f64x2, i64) -> f64x2 {
@@ -207,7 +207,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vlebrg %v24, 0(%r2), 1
; vlebrg %v24, 0(%r2), 0
; br %r14
function %insertlane_f64x2_mem_little_1(f64x2, i64) -> f64x2 {
@@ -218,7 +218,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vlebrg %v24, 0(%r2), 0
; vlebrg %v24, 0(%r2), 1
; br %r14
function %insertlane_f32x4_mem_0(f32x4, i64) -> f32x4 {
@@ -229,7 +229,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vlef %v24, 0(%r2), 3
; vlef %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_3(i32x4, i64) -> i32x4 {
@@ -240,7 +240,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlef %v24, 0(%r2), 0
; vlef %v24, 0(%r2), 3
; br %r14
function %insertlane_f32x4_mem_little_0(f32x4, i64) -> f32x4 {
@@ -251,7 +251,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vlebrf %v24, 0(%r2), 3
; vlebrf %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_little_3(i32x4, i64) -> i32x4 {
@@ -262,7 +262,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlebrf %v24, 0(%r2), 0
; vlebrf %v24, 0(%r2), 3
; br %r14
function %extractlane_i64x2_mem_0(i64x2, i64) {
@@ -273,7 +273,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vsteg %v24, 0(%r2), 1
; vsteg %v24, 0(%r2), 0
; br %r14
function %extractlane_i64x2_mem_1(i64x2, i64) {
@@ -284,7 +284,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vsteg %v24, 0(%r2), 0
; vsteg %v24, 0(%r2), 1
; br %r14
function %extractlane_i64x2_mem_little_0(i64x2, i64) {
@@ -295,7 +295,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vstebrg %v24, 0(%r2), 1
; vstebrg %v24, 0(%r2), 0
; br %r14
function %extractlane_i64x2_mem_little_1(i64x2, i64) {
@@ -306,7 +306,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vstebrg %v24, 0(%r2), 0
; vstebrg %v24, 0(%r2), 1
; br %r14
function %extractlane_i32x4_mem_0(i32x4, i64) {
@@ -317,7 +317,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vstef %v24, 0(%r2), 3
; vstef %v24, 0(%r2), 0
; br %r14
function %extractlane_i32x4_mem_3(i32x4, i64) {
@@ -328,7 +328,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vstef %v24, 0(%r2), 0
; vstef %v24, 0(%r2), 3
; br %r14
function %extractlane_i32x4_mem_little_0(i32x4, i64) {
@@ -339,7 +339,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vstebrf %v24, 0(%r2), 3
; vstebrf %v24, 0(%r2), 0
; br %r14
function %extractlane_i32x4_mem_little_3(i32x4, i64) {
@@ -350,7 +350,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vstebrf %v24, 0(%r2), 0
; vstebrf %v24, 0(%r2), 3
; br %r14
function %extractlane_i16x8_mem_0(i16x8, i64) {
@@ -361,7 +361,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vsteh %v24, 0(%r2), 7
; vsteh %v24, 0(%r2), 0
; br %r14
function %extractlane_i16x8_mem_7(i16x8, i64) {
@@ -372,7 +372,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vsteh %v24, 0(%r2), 0
; vsteh %v24, 0(%r2), 7
; br %r14
function %extractlane_i16x8_mem_little_0(i16x8, i64) {
@@ -383,7 +383,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vstebrh %v24, 0(%r2), 7
; vstebrh %v24, 0(%r2), 0
; br %r14
function %extractlane_i16x8_mem_little_7(i16x8, i64) {
@@ -394,7 +394,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vstebrh %v24, 0(%r2), 0
; vstebrh %v24, 0(%r2), 7
; br %r14
function %extractlane_i8x16_mem_0(i8x16, i64) {
@@ -405,7 +405,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vsteb %v24, 0(%r2), 15
; vsteb %v24, 0(%r2), 0
; br %r14
function %extractlane_i8x16_mem_15(i8x16, i64) {
@@ -416,7 +416,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vsteb %v24, 0(%r2), 0
; vsteb %v24, 0(%r2), 15
; br %r14
function %extractlane_i8x16_mem_little_0(i8x16, i64) {
@@ -427,7 +427,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vsteb %v24, 0(%r2), 15
; vsteb %v24, 0(%r2), 0
; br %r14
function %extractlane_i8x16_mem_little_15(i8x16, i64) {
@@ -438,7 +438,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vsteb %v24, 0(%r2), 0
; vsteb %v24, 0(%r2), 15
; br %r14
function %extractlane_f64x2_mem_0(f64x2, i64) {
@@ -449,7 +449,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vsteg %v24, 0(%r2), 1
; vsteg %v24, 0(%r2), 0
; br %r14
function %extractlane_f64x2_mem_1(f64x2, i64) {
@@ -460,7 +460,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vsteg %v24, 0(%r2), 0
; vsteg %v24, 0(%r2), 1
; br %r14
function %extractlane_f64x2_mem_little_0(f64x2, i64) {
@@ -471,7 +471,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vstebrg %v24, 0(%r2), 1
; vstebrg %v24, 0(%r2), 0
; br %r14
function %extractlane_f64x2_mem_little_1(f64x2, i64) {
@@ -482,7 +482,7 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vstebrg %v24, 0(%r2), 0
; vstebrg %v24, 0(%r2), 1
; br %r14
function %extractlane_f32x4_mem_0(f32x4, i64) {
@@ -493,7 +493,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vstef %v24, 0(%r2), 3
; vstef %v24, 0(%r2), 0
; br %r14
function %extractlane_f32x4_mem_3(f32x4, i64) {
@@ -504,7 +504,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vstef %v24, 0(%r2), 0
; vstef %v24, 0(%r2), 3
; br %r14
function %extractlane_f32x4_mem_little_0(f32x4, i64) {
@@ -515,7 +515,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vstebrf %v24, 0(%r2), 3
; vstebrf %v24, 0(%r2), 0
; br %r14
function %extractlane_f32x4_mem_little_3(f32x4, i64) {
@@ -526,7 +526,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vstebrf %v24, 0(%r2), 0
; vstebrf %v24, 0(%r2), 3
; br %r14
function %splat_i64x2_mem(i64) -> i64x2 {
@@ -670,7 +670,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vleg %v24, 0(%r2), 1
; vleg %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i64x2_mem_little(i64) -> i64x2 {
@@ -682,7 +682,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlebrg %v24, 0(%r2), 1
; vlebrg %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i32x4_mem(i64) -> i32x4 {
@@ -694,7 +694,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlef %v24, 0(%r2), 3
; vlef %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i32x4_mem_little(i64) -> i32x4 {
@@ -706,7 +706,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlebrf %v24, 0(%r2), 3
; vlebrf %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i16x8_mem(i64) -> i16x8 {
@@ -718,7 +718,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vleh %v24, 0(%r2), 7
; vleh %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i16x8_mem_little(i64) -> i16x8 {
@@ -730,7 +730,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlebrh %v24, 0(%r2), 7
; vlebrh %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i8x16_mem(i64) -> i8x16 {
@@ -742,7 +742,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vleb %v24, 0(%r2), 15
; vleb %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_i8x16_mem_little(i64) -> i8x16 {
@@ -754,7 +754,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vleb %v24, 0(%r2), 15
; vleb %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_f64x2_mem(i64) -> f64x2 {
@@ -766,7 +766,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vleg %v24, 0(%r2), 1
; vleg %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_f64x2_mem_little(i64) -> f64x2 {
@@ -778,7 +778,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlebrg %v24, 0(%r2), 1
; vlebrg %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_f32x4_mem(i64) -> f32x4 {
@@ -790,7 +790,7 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlef %v24, 0(%r2), 3
; vlef %v24, 0(%r2), 0
; br %r14
function %scalar_to_vector_f32x4_mem_little(i64) -> f32x4 {
@@ -802,6 +802,6 @@ block0(v0: i64):
; block0:
; vgbm %v24, 0
; vlebrf %v24, 0(%r2), 3
; vlebrf %v24, 0(%r2), 0
; br %r14

View File

@@ -0,0 +1,807 @@
test compile precise-output
target s390x arch13
function %insertlane_i64x2_mem_0(i64x2, i64) -> i64x2 wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = load.i64 v1
v3 = insertlane.i64x2 v0, v2, 0
return v3
}
; block0:
; vleg %v24, 0(%r2), 1
; br %r14
function %insertlane_i64x2_mem_1(i64x2, i64) -> i64x2 wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = load.i64 v1
v3 = insertlane.i64x2 v0, v2, 1
return v3
}
; block0:
; vleg %v24, 0(%r2), 0
; br %r14
function %insertlane_i64x2_mem_little_0(i64x2, i64) -> i64x2 wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = load.i64 little v1
v3 = insertlane.i64x2 v0, v2, 0
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 1
; br %r14
function %insertlane_i64x2_mem_little_1(i64x2, i64) -> i64x2 wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = load.i64 little v1
v3 = insertlane.i64x2 v0, v2, 1
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_0(i32x4, i64) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = load.i32 v1
v3 = insertlane.i32x4 v0, v2, 0
return v3
}
; block0:
; vlef %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_3(i32x4, i64) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = load.i32 v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlef %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_little_0(i32x4, i64) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = load.i32 little v1
v3 = insertlane.i32x4 v0, v2, 0
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_little_3(i32x4, i64) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = load.i32 little v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 0
; br %r14
function %insertlane_i16x8_mem_0(i16x8, i64) -> i16x8 wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = load.i16 v1
v3 = insertlane.i16x8 v0, v2, 0
return v3
}
; block0:
; vleh %v24, 0(%r2), 7
; br %r14
function %insertlane_i16x8_mem_7(i16x8, i64) -> i16x8 wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = load.i16 v1
v3 = insertlane.i16x8 v0, v2, 7
return v3
}
; block0:
; vleh %v24, 0(%r2), 0
; br %r14
function %insertlane_i16x8_mem_little_0(i16x8, i64) -> i16x8 wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = load.i16 little v1
v3 = insertlane.i16x8 v0, v2, 0
return v3
}
; block0:
; vlebrh %v24, 0(%r2), 7
; br %r14
function %insertlane_i16x8_mem_little_7(i16x8, i64) -> i16x8 wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = load.i16 little v1
v3 = insertlane.i16x8 v0, v2, 7
return v3
}
; block0:
; vlebrh %v24, 0(%r2), 0
; br %r14
function %insertlane_i8x16_mem_0(i8x16, i64) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = load.i8 v1
v3 = insertlane.i8x16 v0, v2, 0
return v3
}
; block0:
; vleb %v24, 0(%r2), 15
; br %r14
function %insertlane_i8x16_mem_15(i8x16, i64) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = load.i8 v1
v3 = insertlane.i8x16 v0, v2, 15
return v3
}
; block0:
; vleb %v24, 0(%r2), 0
; br %r14
function %insertlane_i8x16_mem_little_0(i8x16, i64) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = load.i8 little v1
v3 = insertlane.i8x16 v0, v2, 0
return v3
}
; block0:
; vleb %v24, 0(%r2), 15
; br %r14
function %insertlane_i8x16_mem_little_15(i8x16, i64) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = load.i8 little v1
v3 = insertlane.i8x16 v0, v2, 15
return v3
}
; block0:
; vleb %v24, 0(%r2), 0
; br %r14
function %insertlane_f64x2_mem_0(f64x2, i64) -> f64x2 wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = load.f64 v1
v3 = insertlane.f64x2 v0, v2, 0
return v3
}
; block0:
; vleg %v24, 0(%r2), 1
; br %r14
function %insertlane_f64x2_mem_1(f64x2, i64) -> f64x2 wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = load.f64 v1
v3 = insertlane.f64x2 v0, v2, 1
return v3
}
; block0:
; vleg %v24, 0(%r2), 0
; br %r14
function %insertlane_f64x2_mem_little_0(f64x2, i64) -> f64x2 wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = load.f64 little v1
v3 = insertlane.f64x2 v0, v2, 0
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 1
; br %r14
function %insertlane_f64x2_mem_little_1(f64x2, i64) -> f64x2 wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = load.f64 little v1
v3 = insertlane.f64x2 v0, v2, 1
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 0
; br %r14
function %insertlane_f32x4_mem_0(f32x4, i64) -> f32x4 wasmtime_system_v {
block0(v0: f32x4, v1: i64):
v2 = load.f32 v1
v3 = insertlane.f32x4 v0, v2, 0
return v3
}
; block0:
; vlef %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_3(i32x4, i64) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = load.i32 v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlef %v24, 0(%r2), 0
; br %r14
function %insertlane_f32x4_mem_little_0(f32x4, i64) -> f32x4 wasmtime_system_v {
block0(v0: f32x4, v1: i64):
v2 = load.f32 little v1
v3 = insertlane.f32x4 v0, v2, 0
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_little_3(i32x4, i64) -> i32x4 wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = load.i32 little v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 0
; br %r14
function %extractlane_i64x2_mem_0(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 0
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 1
; br %r14
function %extractlane_i64x2_mem_1(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 1
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 0
; br %r14
function %extractlane_i64x2_mem_little_0(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 0
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 1
; br %r14
function %extractlane_i64x2_mem_little_1(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 1
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 0
; br %r14
function %extractlane_i32x4_mem_0(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 0
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 3
; br %r14
function %extractlane_i32x4_mem_3(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 3
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 0
; br %r14
function %extractlane_i32x4_mem_little_0(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 0
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 3
; br %r14
function %extractlane_i32x4_mem_little_3(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 3
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 0
; br %r14
function %extractlane_i16x8_mem_0(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 0
store v2, v1
return
}
; block0:
; vsteh %v24, 0(%r2), 7
; br %r14
function %extractlane_i16x8_mem_7(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 7
store v2, v1
return
}
; block0:
; vsteh %v24, 0(%r2), 0
; br %r14
function %extractlane_i16x8_mem_little_0(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 0
store little v2, v1
return
}
; block0:
; vstebrh %v24, 0(%r2), 7
; br %r14
function %extractlane_i16x8_mem_little_7(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 7
store little v2, v1
return
}
; block0:
; vstebrh %v24, 0(%r2), 0
; br %r14
function %extractlane_i8x16_mem_0(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 0
store v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 15
; br %r14
function %extractlane_i8x16_mem_15(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 15
store v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 0
; br %r14
function %extractlane_i8x16_mem_little_0(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 0
store little v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 15
; br %r14
function %extractlane_i8x16_mem_little_15(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 15
store little v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 0
; br %r14
function %extractlane_f64x2_mem_0(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 0
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 1
; br %r14
function %extractlane_f64x2_mem_1(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 1
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 0
; br %r14
function %extractlane_f64x2_mem_little_0(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 0
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 1
; br %r14
function %extractlane_f64x2_mem_little_1(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 1
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 0
; br %r14
function %extractlane_f32x4_mem_0(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 0
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 3
; br %r14
function %extractlane_f32x4_mem_3(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 3
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 0
; br %r14
function %extractlane_f32x4_mem_little_0(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 0
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 3
; br %r14
function %extractlane_f32x4_mem_little_3(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 3
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 0
; br %r14
function %splat_i64x2_mem(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64 v0
v2 = splat.i64x2 v1
return v2
}
; block0:
; vlrepg %v24, 0(%r2)
; br %r14
function %splat_i64x2_mem_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64 little v0
v2 = splat.i64x2 v1
return v2
}
; block0:
; vlbrrepg %v24, 0(%r2)
; br %r14
function %splat_i32x4_mem(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32 v0
v2 = splat.i32x4 v1
return v2
}
; block0:
; vlrepf %v24, 0(%r2)
; br %r14
function %splat_i32x4_mem_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32 little v0
v2 = splat.i32x4 v1
return v2
}
; block0:
; vlbrrepf %v24, 0(%r2)
; br %r14
function %splat_i16x8_mem(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16 v0
v2 = splat.i16x8 v1
return v2
}
; block0:
; vlreph %v24, 0(%r2)
; br %r14
function %splat_i16x8_mem_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16 little v0
v2 = splat.i16x8 v1
return v2
}
; block0:
; vlbrreph %v24, 0(%r2)
; br %r14
function %splat_i8x16_mem(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8 v0
v2 = splat.i8x16 v1
return v2
}
; block0:
; vlrepb %v24, 0(%r2)
; br %r14
function %splat_i8x16_mem_little(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8 little v0
v2 = splat.i8x16 v1
return v2
}
; block0:
; vlrepb %v24, 0(%r2)
; br %r14
function %splat_f64x2_mem(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64 v0
v2 = splat.f64x2 v1
return v2
}
; block0:
; vlrepg %v24, 0(%r2)
; br %r14
function %splat_f64x2_mem_little(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64 little v0
v2 = splat.f64x2 v1
return v2
}
; block0:
; vlbrrepg %v24, 0(%r2)
; br %r14
function %splat_f32x4_mem(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32 v0
v2 = splat.f32x4 v1
return v2
}
; block0:
; vlrepf %v24, 0(%r2)
; br %r14
function %splat_f32x4_mem_little(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32 little v0
v2 = splat.f32x4 v1
return v2
}
; block0:
; vlbrrepf %v24, 0(%r2)
; br %r14
function %scalar_to_vector_i64x2_mem(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64 v0
v2 = scalar_to_vector.i64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_i64x2_mem_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64 little v0
v2 = scalar_to_vector.i64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_i32x4_mem(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32 v0
v2 = scalar_to_vector.i32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlef %v24, 0(%r2), 3
; br %r14
function %scalar_to_vector_i32x4_mem_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32 little v0
v2 = scalar_to_vector.i32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrf %v24, 0(%r2), 3
; br %r14
function %scalar_to_vector_i16x8_mem(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16 v0
v2 = scalar_to_vector.i16x8 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleh %v24, 0(%r2), 7
; br %r14
function %scalar_to_vector_i16x8_mem_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16 little v0
v2 = scalar_to_vector.i16x8 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrh %v24, 0(%r2), 7
; br %r14
function %scalar_to_vector_i8x16_mem(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8 v0
v2 = scalar_to_vector.i8x16 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleb %v24, 0(%r2), 15
; br %r14
function %scalar_to_vector_i8x16_mem_little(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8 little v0
v2 = scalar_to_vector.i8x16 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleb %v24, 0(%r2), 15
; br %r14
function %scalar_to_vector_f64x2_mem(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64 v0
v2 = scalar_to_vector.f64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_f64x2_mem_little(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64 little v0
v2 = scalar_to_vector.f64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_f32x4_mem(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32 v0
v2 = scalar_to_vector.f32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlef %v24, 0(%r2), 3
; br %r14
function %scalar_to_vector_f32x4_mem_little(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32 little v0
v2 = scalar_to_vector.f32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrf %v24, 0(%r2), 3
; br %r14

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -625,7 +625,55 @@ block0(v0: f64x2, v1: f64x2):
; lochio %r2, 1
; br %r14
function %vhigh_bits(i64x2) -> i64 {
function %vhigh_bits_be(i64x2) -> i64 {
block0(v0: i64x2):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x80808080808080808080808080804000 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits_be(i32x4) -> i64 {
block0(v0: i32x4):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x80808080808080808080808060402000 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits_be(i16x8) -> i64 {
block0(v0: i16x8):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x80808080808080807060504030201000 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits_be(i8x16) -> i64 {
block0(v0: i8x16):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x78706860585048403830282018100800 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits_le(i64x2) -> i64 wasmtime_system_v {
block0(v0: i64x2):
v1 = vhigh_bits.i64 v0
return v1
@@ -637,7 +685,7 @@ block0(v0: i64x2):
; lgdr %r2, %f5
; br %r14
function %vhigh_bits(i32x4) -> i64 {
function %vhigh_bits_le(i32x4) -> i64 wasmtime_system_v {
block0(v0: i32x4):
v1 = vhigh_bits.i64 v0
return v1
@@ -649,7 +697,7 @@ block0(v0: i32x4):
; lgdr %r2, %f5
; br %r14
function %vhigh_bits(i16x8) -> i64 {
function %vhigh_bits_le(i16x8) -> i64 wasmtime_system_v {
block0(v0: i16x8):
v1 = vhigh_bits.i64 v0
return v1
@@ -661,7 +709,7 @@ block0(v0: i16x8):
; lgdr %r2, %f5
; br %r14
function %vhigh_bits(i8x16) -> i64 {
function %vhigh_bits_le(i8x16) -> i64 wasmtime_system_v {
block0(v0: i8x16):
v1 = vhigh_bits.i64 v0
return v1

View File

@@ -0,0 +1,493 @@
test compile precise-output
target s390x
function %swizzle(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = swizzle.i8x16 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vrepib %v7, 239
; vno %v17, %v25, %v25
; vmxlb %v19, %v7, %v17
; vperm %v24, %v5, %v24, %v19
; br %r14
function %shuffle_0(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
return v2
}
; block0:
; vrepib %v5, 15
; vperm %v24, %v24, %v25, %v5
; br %r14
function %shuffle_1(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
return v2
}
; block0:
; bras %r1, 20 ; data.u128 0x0a1e000d0b1702180403090b15100f0c ; vl %v5, 0(%r1)
; vperm %v24, %v24, %v25, %v5
; br %r14
function %shuffle_2(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
return v2
}
; block0:
; vgbm %v5, 1
; bras %r1, 20 ; data.u128 0x8080808080808080808080808080800f ; vl %v7, 0(%r1)
; vperm %v17, %v24, %v25, %v7
; vn %v24, %v5, %v17
; br %r14
function %shuffle_vmrhg_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
return v2
}
; block0:
; vmrhg %v24, %v24, %v25
; br %r14
function %shuffle_vmrhf_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
return v2
}
; block0:
; vmrhf %v24, %v24, %v25
; br %r14
function %shuffle_vmrhh_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
return v2
}
; block0:
; vmrhh %v24, %v24, %v25
; br %r14
function %shuffle_vmrhb_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
return v2
}
; block0:
; vmrhb %v24, %v24, %v25
; br %r14
function %shuffle_vmrhg_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
return v2
}
; block0:
; vmrhg %v24, %v25, %v24
; br %r14
function %shuffle_vmrhf_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
return v2
}
; block0:
; vmrhf %v24, %v25, %v24
; br %r14
function %shuffle_vmrhh_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
return v2
}
; block0:
; vmrhh %v24, %v25, %v24
; br %r14
function %shuffle_vmrhb_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
return v2
}
; block0:
; vmrhb %v24, %v25, %v24
; br %r14
function %shuffle_vmrhg_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
return v2
}
; block0:
; vmrhg %v24, %v24, %v24
; br %r14
function %shuffle_vmrhf_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
return v2
}
; block0:
; vmrhf %v24, %v24, %v24
; br %r14
function %shuffle_vmrhh_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
return v2
}
; block0:
; vmrhh %v24, %v24, %v24
; br %r14
function %shuffle_vmrhb_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
return v2
}
; block0:
; vmrhb %v24, %v24, %v24
; br %r14
function %shuffle_vmrhg_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
return v2
}
; block0:
; vmrhg %v24, %v25, %v25
; br %r14
function %shuffle_vmrhf_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
return v2
}
; block0:
; vmrhf %v24, %v25, %v25
; br %r14
function %shuffle_vmrhh_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
return v2
}
; block0:
; vmrhh %v24, %v25, %v25
; br %r14
function %shuffle_vmrhb_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
return v2
}
; block0:
; vmrhb %v24, %v25, %v25
; br %r14
function %shuffle_vmrlg_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
return v2
}
; block0:
; vmrlg %v24, %v24, %v25
; br %r14
function %shuffle_vmrlf_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
return v2
}
; block0:
; vmrlf %v24, %v24, %v25
; br %r14
function %shuffle_vmrlh_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
return v2
}
; block0:
; vmrlh %v24, %v24, %v25
; br %r14
function %shuffle_vmrlb_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
return v2
}
; block0:
; vmrlb %v24, %v24, %v25
; br %r14
function %shuffle_vmrlg_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
return v2
}
; block0:
; vmrlg %v24, %v25, %v24
; br %r14
function %shuffle_vmrlf_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
return v2
}
; block0:
; vmrlf %v24, %v25, %v24
; br %r14
function %shuffle_vmrlh_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
return v2
}
; block0:
; vmrlh %v24, %v25, %v24
; br %r14
function %shuffle_vmrlb_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
return v2
}
; block0:
; vmrlb %v24, %v25, %v24
; br %r14
function %shuffle_vmrlg_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
return v2
}
; block0:
; vmrlg %v24, %v24, %v24
; br %r14
function %shuffle_vmrlf_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
return v2
}
; block0:
; vmrlf %v24, %v24, %v24
; br %r14
function %shuffle_vmrlh_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
return v2
}
; block0:
; vmrlh %v24, %v24, %v24
; br %r14
function %shuffle_vmrlb_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
return v2
}
; block0:
; vmrlb %v24, %v24, %v24
; br %r14
function %shuffle_vmrlg_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
return v2
}
; block0:
; vmrlg %v24, %v25, %v25
; br %r14
function %shuffle_vmrlf_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
return v2
}
; block0:
; vmrlf %v24, %v25, %v25
; br %r14
function %shuffle_vmrlh_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
return v2
}
; block0:
; vmrlh %v24, %v25, %v25
; br %r14
function %shuffle_vmrlb_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
return v2
}
; block0:
; vmrlb %v24, %v25, %v25
; br %r14
;; Special patterns that can be implemented via PACK.
function %shuffle_vpkg_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 0 1 2 3 8 9 10 11]
return v2
}
; block0:
; vpkg %v24, %v24, %v25
; br %r14
function %shuffle_vpkf_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 0 1 4 5 8 9 12 13]
return v2
}
; block0:
; vpkf %v24, %v24, %v25
; br %r14
function %shuffle_vpkh_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 0 2 4 6 8 10 12 14]
return v2
}
; block0:
; vpkh %v24, %v24, %v25
; br %r14
function %shuffle_vpkg_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 16 17 18 19 24 25 26 27]
return v2
}
; block0:
; vpkg %v24, %v25, %v24
; br %r14
function %shuffle_vpkf_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29]
return v2
}
; block0:
; vpkf %v24, %v25, %v24
; br %r14
function %shuffle_vpkh_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
return v2
}
; block0:
; vpkh %v24, %v25, %v24
; br %r14
function %shuffle_vpkg_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11]
return v2
}
; block0:
; vpkg %v24, %v24, %v24
; br %r14
function %shuffle_vpkf_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 0 1 4 5 8 9 12 13]
return v2
}
; block0:
; vpkf %v24, %v24, %v24
; br %r14
function %shuffle_vpkh_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 0 2 4 6 8 10 12 14]
return v2
}
; block0:
; vpkh %v24, %v24, %v24
; br %r14
function %shuffle_vpkg_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 16 17 18 19 24 25 26 27]
return v2
}
; block0:
; vpkg %v24, %v25, %v25
; br %r14
function %shuffle_vpkf_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 16 17 20 21 24 25 28 29]
return v2
}
; block0:
; vpkf %v24, %v25, %v25
; br %r14
function %shuffle_vpkh_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 16 18 20 22 24 26 28 30]
return v2
}
; block0:
; vpkh %v24, %v25, %v25
; br %r14

View File

@@ -9,10 +9,9 @@ block0(v0: i8x16, v1: i8x16):
; block0:
; vgbm %v5, 0
; vrepib %v7, 239
; vno %v17, %v25, %v25
; vmxlb %v19, %v7, %v17
; vperm %v24, %v5, %v24, %v19
; vrepib %v7, 16
; vmnlb %v17, %v7, %v25
; vperm %v24, %v24, %v5, %v17
; br %r14
function %shuffle_0(i8x16, i8x16) -> i8x16 {
@@ -22,7 +21,7 @@ block0(v0: i8x16, v1: i8x16):
}
; block0:
; vrepib %v5, 15
; vgbm %v5, 0
; vperm %v24, %v24, %v25, %v5
; br %r14
@@ -33,7 +32,7 @@ block0(v0: i8x16, v1: i8x16):
}
; block0:
; bras %r1, 20 ; data.u128 0x0a1e000d0b1702180403090b15100f0c ; vl %v5, 0(%r1)
; bras %r1, 20 ; data.u128 0x03001f1a04060c0b170d1804020f1105 ; vl %v5, 0(%r1)
; vperm %v24, %v24, %v25, %v5
; br %r14
@@ -44,15 +43,15 @@ block0(v0: i8x16, v1: i8x16):
}
; block0:
; vgbm %v5, 1
; bras %r1, 20 ; data.u128 0x8080808080808080808080808080800f ; vl %v7, 0(%r1)
; vgbm %v5, 32768
; bras %r1, 20 ; data.u128 0x00808080808080808080808080808080 ; vl %v7, 0(%r1)
; vperm %v17, %v24, %v25, %v7
; vn %v24, %v5, %v17
; br %r14
function %shuffle_vmrhg_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
return v2
}
@@ -62,7 +61,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhf_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
return v2
}
@@ -72,7 +71,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhh_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
v2 = shuffle.i8x16 v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
return v2
}
@@ -82,7 +81,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhb_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
v2 = shuffle.i8x16 v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
return v2
}
@@ -92,7 +91,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhg_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
return v2
}
@@ -102,7 +101,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhf_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
return v2
}
@@ -112,7 +111,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhh_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
v2 = shuffle.i8x16 v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
return v2
}
@@ -122,7 +121,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhb_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
v2 = shuffle.i8x16 v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
return v2
}
@@ -132,7 +131,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhg_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
return v2
}
@@ -142,7 +141,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhf_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
return v2
}
@@ -152,7 +151,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhh_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
v2 = shuffle.i8x16 v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
return v2
}
@@ -162,7 +161,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhb_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
v2 = shuffle.i8x16 v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
return v2
}
@@ -172,7 +171,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhg_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
return v2
}
@@ -182,7 +181,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhf_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
return v2
}
@@ -192,7 +191,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhh_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
v2 = shuffle.i8x16 v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
return v2
}
@@ -202,7 +201,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrhb_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
v2 = shuffle.i8x16 v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
return v2
}
@@ -212,7 +211,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlg_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
return v2
}
@@ -222,7 +221,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlf_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
return v2
}
@@ -232,7 +231,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlh_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
v2 = shuffle.i8x16 v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
return v2
}
@@ -242,7 +241,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlb_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
v2 = shuffle.i8x16 v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
return v2
}
@@ -252,7 +251,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlg_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
return v2
}
@@ -262,7 +261,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlf_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
return v2
}
@@ -272,7 +271,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlh_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
v2 = shuffle.i8x16 v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
return v2
}
@@ -282,7 +281,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlb_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
v2 = shuffle.i8x16 v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
return v2
}
@@ -292,7 +291,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlg_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
return v2
}
@@ -302,7 +301,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlf_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
return v2
}
@@ -312,7 +311,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlh_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
v2 = shuffle.i8x16 v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
return v2
}
@@ -322,7 +321,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlb_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
v2 = shuffle.i8x16 v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
return v2
}
@@ -332,7 +331,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlg_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
return v2
}
@@ -342,7 +341,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlf_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
return v2
}
@@ -352,7 +351,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlh_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
v2 = shuffle.i8x16 v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
return v2
}
@@ -362,7 +361,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vmrlb_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
v2 = shuffle.i8x16 v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
return v2
}
@@ -373,7 +372,7 @@ block0(v0: i8x16, v1: i8x16):
;; Special patterns that can be implemented via PACK.
function %shuffle_vpkg_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 0 1 2 3 8 9 10 11]
v2 = shuffle.i8x16 v0, v1, [4 5 6 7 12 13 14 15 20 21 22 23 28 29 30 31]
return v2
}
@@ -383,7 +382,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkf_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 0 1 4 5 8 9 12 13]
v2 = shuffle.i8x16 v0, v1, [2 3 6 7 10 11 14 15 18 19 22 23 26 27 30 31]
return v2
}
@@ -393,7 +392,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkh_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 0 2 4 6 8 10 12 14]
v2 = shuffle.i8x16 v0, v1, [1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31]
return v2
}
@@ -403,7 +402,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkg_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 16 17 18 19 24 25 26 27]
v2 = shuffle.i8x16 v0, v1, [20 21 22 23 28 29 30 31 4 5 6 7 12 13 14 15]
return v2
}
@@ -413,7 +412,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkf_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29]
v2 = shuffle.i8x16 v0, v1, [18 19 22 23 26 27 30 31 2 3 6 7 10 11 14 15]
return v2
}
@@ -423,7 +422,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkh_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
v2 = shuffle.i8x16 v0, v1, [17 19 21 23 25 27 29 31 1 3 5 7 9 11 13 15]
return v2
}
@@ -433,7 +432,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkg_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11]
v2 = shuffle.i8x16 v0, v1, [4 5 6 7 12 13 14 15 4 5 6 7 12 13 14 15]
return v2
}
@@ -443,7 +442,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkf_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 0 1 4 5 8 9 12 13]
v2 = shuffle.i8x16 v0, v1, [2 3 6 7 10 11 14 15 2 3 6 7 10 11 14 15]
return v2
}
@@ -453,7 +452,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkh_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 0 2 4 6 8 10 12 14]
v2 = shuffle.i8x16 v0, v1, [1 3 5 7 9 11 13 15 1 3 5 7 9 11 13 15]
return v2
}
@@ -463,7 +462,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkg_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 16 17 18 19 24 25 26 27]
v2 = shuffle.i8x16 v0, v1, [20 21 22 23 28 29 30 31 20 21 22 23 28 29 30 31]
return v2
}
@@ -473,7 +472,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkf_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 16 17 20 21 24 25 28 29]
v2 = shuffle.i8x16 v0, v1, [18 19 22 23 26 27 30 31 18 19 22 23 26 27 30 31]
return v2
}
@@ -483,7 +482,7 @@ block0(v0: i8x16, v1: i8x16):
function %shuffle_vpkh_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 16 18 20 22 24 26 28 30]
v2 = shuffle.i8x16 v0, v1, [17 19 21 23 25 27 29 31 17 19 21 23 25 27 29 31]
return v2
}

View File

@@ -216,7 +216,7 @@ block0(v0: i64):
}
; block0:
; vlebrg %v3, 0(%r2), 0
; ld %f3, 0(%r2)
; vuplhb %v24, %v3
; br %r14
@@ -227,8 +227,9 @@ block0(v0: i64):
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhh %v24, %v3
; ld %f3, 0(%r2)
; verllh %v5, %v3, 8
; vuplhh %v24, %v5
; br %r14
function %uload32x2_little(i64) -> i64x2 {
@@ -239,7 +240,8 @@ block0(v0: i64):
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhf %v24, %v3
; verllg %v5, %v3, 32
; vuplhf %v24, %v5
; br %r14
function %sload8x8_little(i64) -> i16x8 {
@@ -249,7 +251,7 @@ block0(v0: i64):
}
; block0:
; vlebrg %v3, 0(%r2), 0
; ld %f3, 0(%r2)
; vuphb %v24, %v3
; br %r14
@@ -260,8 +262,9 @@ block0(v0: i64):
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphh %v24, %v3
; ld %f3, 0(%r2)
; verllh %v5, %v3, 8
; vuphh %v24, %v5
; br %r14
function %sload32x2_little(i64) -> i64x2 {
@@ -272,7 +275,8 @@ block0(v0: i64):
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphf %v24, %v3
; verllg %v5, %v3, 32
; vuphf %v24, %v5
; br %r14
function %load_i8x16_little(i64) -> i8x16 {
@@ -282,7 +286,7 @@ block0(v0: i64):
}
; block0:
; vlbrq %v24, 0(%r2)
; vl %v24, 0(%r2)
; br %r14
function %load_i16x8_little(i64) -> i16x8 {
@@ -292,7 +296,7 @@ block0(v0: i64):
}
; block0:
; vlbrq %v24, 0(%r2)
; vlbrh %v24, 0(%r2)
; br %r14
function %load_i32x4_little(i64) -> i32x4 {
@@ -302,7 +306,7 @@ block0(v0: i64):
}
; block0:
; vlbrq %v24, 0(%r2)
; vlbrf %v24, 0(%r2)
; br %r14
function %load_i64x2_little(i64) -> i64x2 {
@@ -312,7 +316,7 @@ block0(v0: i64):
}
; block0:
; vlbrq %v24, 0(%r2)
; vlbrg %v24, 0(%r2)
; br %r14
function %load_i128_little(i64) -> i128 {
@@ -333,7 +337,7 @@ block0(v0: i64):
}
; block0:
; vlbrq %v24, 0(%r2)
; vlbrf %v24, 0(%r2)
; br %r14
function %load_f64x2_little(i64) -> f64x2 {
@@ -343,7 +347,7 @@ block0(v0: i64):
}
; block0:
; vlbrq %v24, 0(%r2)
; vlbrg %v24, 0(%r2)
; br %r14
function %store_i8x16_little(i8x16, i64) {
@@ -353,7 +357,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vstbrq %v24, 0(%r2)
; vst %v24, 0(%r2)
; br %r14
function %store_i16x8_little(i16x8, i64) {
@@ -363,7 +367,7 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vstbrq %v24, 0(%r2)
; vstbrh %v24, 0(%r2)
; br %r14
function %store_i32x4_little(i32x4, i64) {
@@ -373,7 +377,7 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vstbrq %v24, 0(%r2)
; vstbrf %v24, 0(%r2)
; br %r14
function %store_i64x2_little(i64x2, i64) {
@@ -383,7 +387,7 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vstbrq %v24, 0(%r2)
; vstbrg %v24, 0(%r2)
; br %r14
function %store_i128_little(i128, i64) {
@@ -404,7 +408,7 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vstbrq %v24, 0(%r2)
; vstbrf %v24, 0(%r2)
; br %r14
function %store_f64x2_little(f64x2, i64) {
@@ -414,6 +418,6 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vstbrq %v24, 0(%r2)
; vstbrg %v24, 0(%r2)
; br %r14

View File

@@ -0,0 +1,379 @@
test compile precise-output
target s390x arch13
function %uload8x8_big(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = uload8x8 big v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhb %v24, %v3
; br %r14
function %uload16x4_big(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = uload16x4 big v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; verllh %v5, %v3, 8
; vuplhh %v24, %v5
; br %r14
function %uload32x2_big(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = uload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; verllg %v5, %v3, 32
; vuplhf %v24, %v5
; br %r14
function %sload8x8_big(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = sload8x8 big v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphb %v24, %v3
; br %r14
function %sload16x4_big(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = sload16x4 big v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; verllh %v5, %v3, 8
; vuphh %v24, %v5
; br %r14
function %sload32x2_big(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = sload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; verllg %v5, %v3, 32
; vuphf %v24, %v5
; br %r14
function %load_i8x16_big(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8x16 big v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i16x8_big(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16x8 big v0
return v1
}
; block0:
; vlerh %v24, 0(%r2)
; br %r14
function %load_i32x4_big(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32x4 big v0
return v1
}
; block0:
; vlerf %v24, 0(%r2)
; br %r14
function %load_i64x2_big(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64x2 big v0
return v1
}
; block0:
; vlerg %v24, 0(%r2)
; br %r14
function %load_f32x4_big(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32x4 big v0
return v1
}
; block0:
; vlerf %v24, 0(%r2)
; br %r14
function %load_f64x2_big(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64x2 big v0
return v1
}
; block0:
; vlerg %v24, 0(%r2)
; br %r14
function %store_i8x16_big(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
store.i8x16 big v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i16x8_big(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
store.i16x8 big v0, v1
return
}
; block0:
; vsterh %v24, 0(%r2)
; br %r14
function %store_i32x4_big(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
store.i32x4 big v0, v1
return
}
; block0:
; vsterf %v24, 0(%r2)
; br %r14
function %store_i64x2_big(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
store.i64x2 big v0, v1
return
}
; block0:
; vsterg %v24, 0(%r2)
; br %r14
function %store_f32x4_big(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
store.f32x4 big v0, v1
return
}
; block0:
; vsterf %v24, 0(%r2)
; br %r14
function %store_f64x2_big(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
store.f64x2 big v0, v1
return
}
; block0:
; vsterg %v24, 0(%r2)
; br %r14
function %uload8x8_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = uload8x8 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhb %v24, %v3
; br %r14
function %uload16x4_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = uload16x4 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhh %v24, %v3
; br %r14
function %uload32x2_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = uload32x2 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhf %v24, %v3
; br %r14
function %sload8x8_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = sload8x8 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphb %v24, %v3
; br %r14
function %sload16x4_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = sload16x4 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphh %v24, %v3
; br %r14
function %sload32x2_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = sload32x2 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphf %v24, %v3
; br %r14
function %load_i8x16_little(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8x16 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i16x8_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16x8 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i32x4_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32x4 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i64x2_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64x2 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_f32x4_little(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32x4 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_f64x2_little(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64x2 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %store_i8x16_little(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
store.i8x16 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i16x8_little(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
store.i16x8 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i32x4_little(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
store.i32x4 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i64x2_little(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
store.i64x2 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_f32x4_little(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
store.f32x4 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_f64x2_little(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
store.f64x2 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14

View File

@@ -0,0 +1,494 @@
test compile precise-output
target s390x
function %uload8x8_big(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = uload8x8 big v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhb %v24, %v5
; br %r14
function %uload16x4_big(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = uload16x4 big v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; verllh %v7, %v5, 8
; vuplhh %v24, %v7
; br %r14
function %uload32x2_big(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = uload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; verllg %v5, %v3, 32
; vuplhf %v24, %v5
; br %r14
function %sload8x8_big(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = sload8x8 big v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphb %v24, %v5
; br %r14
function %sload16x4_big(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = sload16x4 big v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; verllh %v7, %v5, 8
; vuphh %v24, %v7
; br %r14
function %sload32x2_big(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = sload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; verllg %v5, %v3, 32
; vuphf %v24, %v5
; br %r14
function %load_i8x16_big(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8x16 big v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i16x8_big(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16x8 big v0
return v1
}
; block0:
; vl %v3, 0(%r2)
; vpdi %v5, %v3, %v3, 4
; verllg %v7, %v5, 32
; verllf %v24, %v7, 16
; br %r14
function %load_i32x4_big(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32x4 big v0
return v1
}
; block0:
; vl %v3, 0(%r2)
; vpdi %v5, %v3, %v3, 4
; verllg %v24, %v5, 32
; br %r14
function %load_i64x2_big(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64x2 big v0
return v1
}
; block0:
; vl %v3, 0(%r2)
; vpdi %v24, %v3, %v3, 4
; br %r14
function %load_f32x4_big(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32x4 big v0
return v1
}
; block0:
; vl %v3, 0(%r2)
; vpdi %v5, %v3, %v3, 4
; verllg %v24, %v5, 32
; br %r14
function %load_f64x2_big(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64x2 big v0
return v1
}
; block0:
; vl %v3, 0(%r2)
; vpdi %v24, %v3, %v3, 4
; br %r14
function %store_i8x16_big(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
store.i8x16 big v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i16x8_big(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
store.i16x8 big v0, v1
return
}
; block0:
; vpdi %v4, %v24, %v24, 4
; verllg %v6, %v4, 32
; verllf %v16, %v6, 16
; vst %v16, 0(%r2)
; br %r14
function %store_i32x4_big(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
store.i32x4 big v0, v1
return
}
; block0:
; vpdi %v4, %v24, %v24, 4
; verllg %v6, %v4, 32
; vst %v6, 0(%r2)
; br %r14
function %store_i64x2_big(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
store.i64x2 big v0, v1
return
}
; block0:
; vpdi %v4, %v24, %v24, 4
; vst %v4, 0(%r2)
; br %r14
function %store_f32x4_big(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
store.f32x4 big v0, v1
return
}
; block0:
; vpdi %v4, %v24, %v24, 4
; verllg %v6, %v4, 32
; vst %v6, 0(%r2)
; br %r14
function %store_f64x2_big(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
store.f64x2 big v0, v1
return
}
; block0:
; vpdi %v4, %v24, %v24, 4
; vst %v4, 0(%r2)
; br %r14
function %uload8x8_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = uload8x8 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhb %v24, %v5
; br %r14
function %uload16x4_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = uload16x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhh %v24, %v5
; br %r14
function %uload32x2_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = uload32x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhf %v24, %v5
; br %r14
function %sload8x8_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = sload8x8 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphb %v24, %v5
; br %r14
function %sload16x4_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = sload16x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphh %v24, %v5
; br %r14
function %sload32x2_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = sload32x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphf %v24, %v5
; br %r14
function %load_i8x16_little(i64) -> i8x16 wasmtime_system_v {
block0(v0: i64):
v1 = load.i8x16 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i16x8_little(i64) -> i16x8 wasmtime_system_v {
block0(v0: i64):
v1 = load.i16x8 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i32x4_little(i64) -> i32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.i32x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i64x2_little(i64) -> i64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.i64x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_f32x4_little(i64) -> f32x4 wasmtime_system_v {
block0(v0: i64):
v1 = load.f32x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_f64x2_little(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_f64x2_sum_little(i64, i64) -> f64x2 wasmtime_system_v {
block0(v0: i64, v1: i64):
v2 = iadd.i64 v0, v1
v3 = load.f64x2 little v2
return v3
}
; block0:
; lrvg %r4, 0(%r3,%r2)
; lrvg %r5, 8(%r3,%r2)
; vlvgp %v24, %r5, %r4
; br %r14
function %load_f64x2_off_little(i64) -> f64x2 wasmtime_system_v {
block0(v0: i64):
v1 = load.f64x2 little v0+128
return v1
}
; block0:
; lrvg %r5, 128(%r2)
; lrvg %r3, 136(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %store_i8x16_little(i8x16, i64) wasmtime_system_v {
block0(v0: i8x16, v1: i64):
store.i8x16 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i16x8_little(i16x8, i64) wasmtime_system_v {
block0(v0: i16x8, v1: i64):
store.i16x8 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i32x4_little(i32x4, i64) wasmtime_system_v {
block0(v0: i32x4, v1: i64):
store.i32x4 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i64x2_little(i64x2, i64) wasmtime_system_v {
block0(v0: i64x2, v1: i64):
store.i64x2 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_f32x4_little(f32x4, i64) wasmtime_system_v {
block0(v0: f32x4, v1: i64):
store.f32x4 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_f64x2_little(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
store.f64x2 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_f64x2_sum_little(f64x2, i64, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64, v2: i64):
v3 = iadd.i64 v1, v2
store.f64x2 little v0, v3
return
}
; block0:
; vlgvg %r5, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r5, 0(%r3,%r2)
; strvg %r4, 8(%r3,%r2)
; br %r14
function %store_f64x2_off_little(f64x2, i64) wasmtime_system_v {
block0(v0: f64x2, v1: i64):
store.f64x2 little v0, v1+128
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 128(%r2)
; strvg %r4, 136(%r2)
; br %r14

View File

@@ -216,9 +216,8 @@ block0(v0: i64):
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhb %v24, %v5
; ld %f3, 0(%r2)
; vuplhb %v24, %v3
; br %r14
function %uload16x4_little(i64) -> i32x4 {
@@ -228,8 +227,8 @@ block0(v0: i64):
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; ld %f3, 0(%r2)
; verllh %v5, %v3, 8
; vuplhh %v24, %v5
; br %r14
@@ -242,7 +241,8 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhf %v24, %v5
; verllg %v7, %v5, 32
; vuplhf %v24, %v7
; br %r14
function %sload8x8_little(i64) -> i16x8 {
@@ -252,9 +252,8 @@ block0(v0: i64):
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphb %v24, %v5
; ld %f3, 0(%r2)
; vuphb %v24, %v3
; br %r14
function %sload16x4_little(i64) -> i32x4 {
@@ -264,8 +263,8 @@ block0(v0: i64):
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; ld %f3, 0(%r2)
; verllh %v5, %v3, 8
; vuphh %v24, %v5
; br %r14
@@ -278,7 +277,8 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphf %v24, %v5
; verllg %v7, %v5, 32
; vuphf %v24, %v7
; br %r14
function %load_i8x16_little(i64) -> i8x16 {
@@ -288,9 +288,7 @@ block0(v0: i64):
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; vl %v24, 0(%r2)
; br %r14
function %load_i16x8_little(i64) -> i16x8 {
@@ -302,7 +300,10 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; vlvgp %v7, %r3, %r5
; vpdi %v17, %v7, %v7, 4
; verllg %v19, %v17, 32
; verllf %v24, %v19, 16
; br %r14
function %load_i32x4_little(i64) -> i32x4 {
@@ -314,7 +315,9 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; vlvgp %v7, %r3, %r5
; vpdi %v17, %v7, %v7, 4
; verllg %v24, %v17, 32
; br %r14
function %load_i64x2_little(i64) -> i64x2 {
@@ -326,7 +329,8 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; vlvgp %v7, %r3, %r5
; vpdi %v24, %v7, %v7, 4
; br %r14
function %load_i128_little(i64) -> i128 {
@@ -351,7 +355,9 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; vlvgp %v7, %r3, %r5
; vpdi %v17, %v7, %v7, 4
; verllg %v24, %v17, 32
; br %r14
function %load_f64x2_little(i64) -> f64x2 {
@@ -363,7 +369,8 @@ block0(v0: i64):
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; vlvgp %v7, %r3, %r5
; vpdi %v24, %v7, %v7, 4
; br %r14
function %load_f64x2_sum_little(i64, i64) -> f64x2 {
@@ -376,7 +383,8 @@ block0(v0: i64, v1: i64):
; block0:
; lrvg %r4, 0(%r3,%r2)
; lrvg %r5, 8(%r3,%r2)
; vlvgp %v24, %r5, %r4
; vlvgp %v17, %r5, %r4
; vpdi %v24, %v17, %v17, 4
; br %r14
function %load_f64x2_off_little(i64) -> f64x2 {
@@ -388,7 +396,8 @@ block0(v0: i64):
; block0:
; lrvg %r5, 128(%r2)
; lrvg %r3, 136(%r2)
; vlvgp %v24, %r3, %r5
; vlvgp %v7, %r3, %r5
; vpdi %v24, %v7, %v7, 4
; br %r14
function %store_i8x16_little(i8x16, i64) {
@@ -398,10 +407,7 @@ block0(v0: i8x16, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; vst %v24, 0(%r2)
; br %r14
function %store_i16x8_little(i16x8, i64) {
@@ -411,10 +417,13 @@ block0(v0: i16x8, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; vpdi %v4, %v24, %v24, 4
; verllg %v6, %v4, 32
; verllf %v16, %v6, 16
; vlgvg %r4, %v16, 1
; vlgvg %r3, %v16, 0
; strvg %r4, 0(%r2)
; strvg %r3, 8(%r2)
; br %r14
function %store_i32x4_little(i32x4, i64) {
@@ -424,8 +433,10 @@ block0(v0: i32x4, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; vpdi %v4, %v24, %v24, 4
; verllg %v6, %v4, 32
; vlgvg %r3, %v6, 1
; lgdr %r4, %f6
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
@@ -437,10 +448,11 @@ block0(v0: i64x2, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; vpdi %v4, %v24, %v24, 4
; vlgvg %r4, %v4, 1
; lgdr %r3, %f4
; strvg %r4, 0(%r2)
; strvg %r3, 8(%r2)
; br %r14
function %store_i128_little(i128, i64) {
@@ -464,8 +476,10 @@ block0(v0: f32x4, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; vpdi %v4, %v24, %v24, 4
; verllg %v6, %v4, 32
; vlgvg %r3, %v6, 1
; lgdr %r4, %f6
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
@@ -477,10 +491,11 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; vpdi %v4, %v24, %v24, 4
; vlgvg %r4, %v4, 1
; lgdr %r3, %f4
; strvg %r4, 0(%r2)
; strvg %r3, 8(%r2)
; br %r14
function %store_f64x2_sum_little(f64x2, i64, i64) {
@@ -491,8 +506,9 @@ block0(v0: f64x2, v1: i64, v2: i64):
}
; block0:
; vlgvg %r5, %v24, 1
; vlgvg %r4, %v24, 0
; vpdi %v6, %v24, %v24, 4
; vlgvg %r5, %v6, 1
; lgdr %r4, %f6
; strvg %r5, 0(%r3,%r2)
; strvg %r4, 8(%r3,%r2)
; br %r14
@@ -504,9 +520,10 @@ block0(v0: f64x2, v1: i64):
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 128(%r2)
; strvg %r4, 136(%r2)
; vpdi %v4, %v24, %v24, 4
; vlgvg %r4, %v4, 1
; lgdr %r3, %f4
; strvg %r4, 128(%r2)
; strvg %r3, 136(%r2)
; br %r14

View File

@@ -20,7 +20,8 @@ block0(v0: i64x2, v1: i64x2, v2: i32x4):
v4 = bitselect v3, v0, v1
return v4
}
; run: %mask_casted([0 0], [0xFFFFFF 0xFFFF4F], [0xFFF1 0 0xF 0]) == [0xFF000E 0xFFFF40]
; N.B. The mask is chosen such that the result is correct with either LE or BE lane order.
; run: %mask_casted([0 0], [0xFFFFFF 0xFFFF4F], [0xFFF1 0xFFF1 0xF 0xF]) == [0xFF000E 0xFFFF40]
function %good_const_mask(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):

View File

@@ -12,23 +12,3 @@ block0(v0: i8x16, v1: i8x16):
}
; run: %swizzle_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [0 9 15 1 6 13 7 11 10 8 100 12 4 2 3 5]) == [1 10 16 2 7 14 8 12 11 9 0 13 5 3 4 6]
function %swizzle_i16x8(i8x16, i8x16) -> i16x8 {
block0(v0: i8x16, v1: i8x16):
v2 = swizzle.i16x8 v0, v1
return v2
}
; run: %swizzle_i16x8([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [0 9 15 1 6 13 7 11 10 8 100 12 4 2 3 5]) == 0x060403050d00090b0c080e0702100a01
function %swizzle_i32x4(i8x16, i8x16) -> i32x4 {
block0(v0: i8x16, v1: i8x16):
v2 = swizzle.i32x4 v0, v1
return v2
}
; run: %swizzle_i32x4([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [0 9 15 1 6 13 7 11 10 8 100 12 4 2 3 5]) == 0x060403050d00090b0c080e0702100a01
function %swizzle_i64x2(i8x16, i8x16) -> i64x2 {
block0(v0: i8x16, v1: i8x16):
v2 = swizzle.i64x2 v0, v1
return v2
}
; run: %swizzle_i64x2([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [0 9 15 1 6 13 7 11 10 8 100 12 4 2 3 5]) == 0x060403050d00090b0c080e0702100a01