[s390x, abi_impl] Add i128 support (#4598)

This adds full i128 support to the s390x target, including new filetests
and enabling the existing i128 runtest on s390x.

The ABI requires that i128 is passed and returned via implicit pointer,
but the front end still generates direct i128 types in call.  This means
we have to implement ABI support to implicitly convert i128 types to
pointers when passing arguments.

To do so, we add a new variant ABIArg::ImplicitArg.  This acts like
StructArg, except that the value type is the actual target type,
not a pointer type.  The required conversions have to be inserted
in the prologue and at function call sites.

Note that when dereferencing the implicit pointer in the prologue,
we may require a temp register: the pointer may be passed on the
stack so it needs to be loaded first, but the value register may
be in the wrong class for pointer values.  In this case, we use
the "stack limit" register, which should be available at this
point in the prologue.

For return values, we use a mechanism similar to the one used for
supporting multiple return values in the Wasmtime ABI.  The only
difference is that the hidden pointer to the return buffer must
be the *first*, not last, argument in this case.

(This implements the second half of issue #4565.)
This commit is contained in:
Ulrich Weigand
2022-08-04 22:41:26 +02:00
committed by GitHub
parent dc8362ceec
commit b17b1eb25d
46 changed files with 2424 additions and 166 deletions

View File

@@ -1,9 +1,18 @@
test compile precise-output
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; IADD
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %iadd_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = iadd.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vaq %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %iadd_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
@@ -323,6 +332,19 @@ block0(v0: i32, v1: i64):
; alr %r2, %r4
; br %r14
function %isub_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = isub.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vsq %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %isub_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = isub.i64 v0, v1
@@ -540,6 +562,22 @@ block0(v0: i8, v1: i64):
; sr %r2, %r4
; br %r14
function %iabs_i128(i128) -> i128 {
block0(v0: i128):
v1 = iabs.i128 v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vgbm %v5, 0
; vsq %v7, %v5, %v0
; vrepg %v17, %v0, 0
; vchg %v19, %v5, %v17
; vsel %v21, %v7, %v0, %v19
; vst %v21, 0(%r2)
; br %r14
function %iabs_i64(i64) -> i64 {
block0(v0: i64):
v1 = iabs.i64 v0
@@ -593,6 +631,19 @@ block0(v0: i8):
; lpr %r2, %r5
; br %r14
function %ineg_i128(i128) -> i128 {
block0(v0: i128):
v1 = ineg.i128 v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vgbm %v5, 0
; vsq %v7, %v5, %v0
; vst %v7, 0(%r2)
; br %r14
function %ineg_i64(i64) -> i64 {
block0(v0: i64):
v1 = ineg.i64 v0
@@ -644,6 +695,32 @@ block0(v0: i8):
; lcr %r2, %r2
; br %r14
function %imul_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = imul.i128 v0, v1
return v2
}
; stmg %r13, %r15, 104(%r15)
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; lgdr %r5, %f0
; vlgvg %r3, %v0, 1
; lgdr %r4, %f1
; vlgvg %r1, %v1, 1
; lgr %r13, %r1
; mlgr %r0, %r3
; msgr %r3, %r4
; lgr %r4, %r13
; msgr %r5, %r4
; agr %r3, %r0
; agr %r5, %r3
; vlvgp %v5, %r5, %r1
; vst %v5, 0(%r2)
; lmg %r13, %r15, 104(%r15)
; br %r14
function %imul_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = imul.i64 v0, v1

View File

@@ -36,6 +36,25 @@ target s390x
;; CLZ
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %clz_i128(i128) -> i128 {
block0(v0: i128):
v1 = clz v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vclzg %v5, %v0
; vgbm %v7, 0
; vpdi %v17, %v7, %v5, 0
; vpdi %v19, %v7, %v5, 1
; vag %v21, %v17, %v19
; vrepig %v23, 64
; vceqg %v25, %v17, %v23
; vsel %v27, %v21, %v17, %v25
; vst %v27, 0(%r2)
; br %r14
function %clz_i64(i64) -> i64 {
block0(v0: i64):
v1 = clz v0
@@ -83,6 +102,30 @@ block0(v0: i8):
; ahik %r2, %r0, -56
; br %r14
function %cls_i128(i128) -> i128 {
block0(v0: i128):
v1 = cls v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 255
; vsrab %v7, %v0, %v5
; vsra %v17, %v7, %v5
; vx %v19, %v0, %v17
; vclzg %v21, %v19
; vgbm %v23, 0
; vpdi %v25, %v23, %v21, 0
; vpdi %v27, %v23, %v21, 1
; vag %v29, %v25, %v27
; vrepig %v31, 64
; vceqg %v1, %v25, %v31
; vsel %v3, %v29, %v25, %v1
; vaq %v5, %v3, %v5
; vst %v5, 0(%r2)
; br %r14
function %cls_i64(i64) -> i64 {
block0(v0: i64):
v1 = cls v0
@@ -138,6 +181,25 @@ block0(v0: i8):
; ahik %r2, %r0, -57
; br %r14
function %ctz_i128(i128) -> i128 {
block0(v0: i128):
v1 = ctz v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vctzg %v5, %v0
; vgbm %v7, 0
; vpdi %v17, %v7, %v5, 0
; vpdi %v19, %v7, %v5, 1
; vag %v21, %v17, %v19
; vrepig %v23, 64
; vceqg %v25, %v19, %v23
; vsel %v27, %v21, %v19, %v25
; vst %v27, 0(%r2)
; br %r14
function %ctz_i64(i64) -> i64 {
block0(v0: i64):
v1 = ctz v0
@@ -198,6 +260,22 @@ block0(v0: i8):
; srk %r2, %r5, %r0
; br %r14
function %popcnt_i128(i128) -> i128 {
block0(v0: i128):
v1 = popcnt v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vpopctg %v5, %v0
; vgbm %v7, 0
; vpdi %v17, %v7, %v5, 0
; vpdi %v19, %v7, %v5, 1
; vag %v21, %v17, %v19
; vst %v21, 0(%r2)
; br %r14
function %popcnt_i64(i64) -> i64 {
block0(v0: i64):
v1 = popcnt v0

View File

@@ -4,9 +4,18 @@ target s390x
; FIXME: add immediate operand versions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BAND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %band_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = band.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vn %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %band_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
@@ -105,6 +114,19 @@ block0(v0: i8, v1: i64):
; nr %r2, %r4
; br %r14
function %bor_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bor.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vo %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %bor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor.i64 v0, v1
@@ -202,6 +224,19 @@ block0(v0: i8, v1: i64):
; or %r2, %r4
; br %r14
function %bxor_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bxor.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vx %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %bxor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor.i64 v0, v1
@@ -299,6 +334,19 @@ block0(v0: i8, v1: i64):
; xr %r2, %r4
; br %r14
function %band_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = band_not.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vnc %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %band_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band_not.i64 v0, v1
@@ -344,6 +392,19 @@ block0(v0: i8, v1: i8):
; nr %r2, %r3
; br %r14
function %bor_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bor_not.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; voc %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %bor_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor_not.i64 v0, v1
@@ -389,6 +450,19 @@ block0(v0: i8, v1: i8):
; or %r2, %r3
; br %r14
function %bxor_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bxor_not.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vnx %v7, %v0, %v1
; vst %v7, 0(%r2)
; br %r14
function %bxor_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor_not.i64 v0, v1
@@ -434,6 +508,18 @@ block0(v0: i8, v1: i8):
; xilf %r2, 4294967295
; br %r14
function %bnot_i128(i128) -> i128 {
block0(v0: i128):
v1 = bnot.i128 v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vno %v5, %v0, %v0
; vst %v5, 0(%r2)
; br %r14
function %bnot_i64(i64) -> i64 {
block0(v0: i64):
v1 = bnot.i64 v0
@@ -475,6 +561,20 @@ block0(v0: i8):
; xilf %r2, 4294967295
; br %r14
function %bitselect_i128(i128, i128, i128) -> i128 {
block0(v0: i128, v1: i128, v2: i128):
v3 = bitselect.i128 v0, v1, v2
return v3
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vl %v2, 0(%r5)
; vsel %v17, %v1, %v2, %v0
; vst %v17, 0(%r2)
; br %r14
function %bitselect_i64(i64, i64, i64) -> i64 {
block0(v0: i64, v1: i64, v2: i64):
v3 = bitselect.i64 v0, v1, v2

View File

@@ -183,3 +183,38 @@ block0(v0: i64, v1: i32, v2: i32, v3: i32, v4: i16, v5: i16, v6: i16, v7: i8, v8
; lmg %r7, %r15, 72(%r15)
; br %r14
function %incoming_args_i128(i128, i128, i128, i128, i128, i128, i128, i128) -> i128 {
block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: i128):
v8 = iadd v0, v1
v9 = iadd v2, v3
v10 = iadd v4, v5
v11 = iadd v6, v7
v12 = iadd v8, v9
v13 = iadd v10, v11
v14 = iadd v12, v13
return v14
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vl %v2, 0(%r5)
; vl %v3, 0(%r6)
; lg %r3, 160(%r15)
; vl %v4, 0(%r3)
; lg %r3, 168(%r15)
; vl %v5, 0(%r3)
; lg %r5, 176(%r15)
; vl %v6, 0(%r5)
; lg %r4, 184(%r15)
; vl %v7, 0(%r4)
; vaq %v17, %v0, %v1
; vaq %v18, %v2, %v3
; vaq %v19, %v4, %v5
; vaq %v20, %v6, %v7
; vaq %v17, %v17, %v18
; vaq %v18, %v19, %v20
; vaq %v17, %v17, %v18
; vst %v17, 0(%r2)
; br %r14

View File

@@ -0,0 +1,26 @@
test compile precise-output
target s390x
function %iconcat_i64(i64, i64) -> i128 {
block0(v0: i64, v1: i64):
v2 = iconcat.i64 v0, v1
return v2
}
; block0:
; vlvgp %v7, %r4, %r3
; vst %v7, 0(%r2)
; br %r14
function %isplit_i128(i128) -> i64, i64 {
block0(v0: i128):
v1, v2 = isplit.i128 v0
return v1, v2
}
; block0:
; vl %v0, 0(%r2)
; lgdr %r3, %f0
; vlgvg %r2, %v0, 1
; br %r14

View File

@@ -1,9 +1,29 @@
test compile precise-output
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; UEXTEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %uextend_i64_i128(i64) -> i128 {
block0(v0: i64):
v1 = uextend.i128 v0
return v1
}
; block0:
; vgbm %v5, 0
; vlvgg %v5, %r3, 1
; vst %v5, 0(%r2)
; br %r14
function %uextend_i32_i128(i32) -> i128 {
block0(v0: i32):
v1 = uextend.i128 v0
return v1
}
; block0:
; vgbm %v5, 0
; vlvgf %v5, %r3, 3
; vst %v5, 0(%r2)
; br %r14
function %uextend_i32_i64(i32) -> i64 {
block0(v0: i32):
@@ -15,6 +35,18 @@ block0(v0: i32):
; llgfr %r2, %r2
; br %r14
function %uextend_i16_i128(i16) -> i128 {
block0(v0: i16):
v1 = uextend.i128 v0
return v1
}
; block0:
; vgbm %v5, 0
; vlvgh %v5, %r3, 7
; vst %v5, 0(%r2)
; br %r14
function %uextend_i16_i64(i16) -> i64 {
block0(v0: i16):
v1 = uextend.i64 v0
@@ -35,6 +67,18 @@ block0(v0: i16):
; llhr %r2, %r2
; br %r14
function %uextend_i8_i128(i8) -> i128 {
block0(v0: i8):
v1 = uextend.i128 v0
return v1
}
; block0:
; vgbm %v5, 0
; vlvgb %v5, %r3, 15
; vst %v5, 0(%r2)
; br %r14
function %uextend_i8_i64(i8) -> i64 {
block0(v0: i8):
v1 = uextend.i64 v0
@@ -65,6 +109,31 @@ block0(v0: i8):
; llcr %r2, %r2
; br %r14
function %sextend_i64_i128(i64) -> i128 {
block0(v0: i64):
v1 = sextend.i128 v0
return v1
}
; block0:
; srag %r4, %r3, 63
; vlvgp %v7, %r4, %r3
; vst %v7, 0(%r2)
; br %r14
function %sextend_i32_i128(i32) -> i128 {
block0(v0: i32):
v1 = sextend.i128 v0
return v1
}
; block0:
; lgfr %r3, %r3
; srag %r5, %r3, 63
; vlvgp %v17, %r5, %r3
; vst %v17, 0(%r2)
; br %r14
function %sextend_i32_i64(i32) -> i64 {
block0(v0: i32):
v1 = sextend.i64 v0
@@ -75,6 +144,19 @@ block0(v0: i32):
; lgfr %r2, %r2
; br %r14
function %sextend_i16_i128(i16) -> i128 {
block0(v0: i16):
v1 = sextend.i128 v0
return v1
}
; block0:
; lghr %r3, %r3
; srag %r5, %r3, 63
; vlvgp %v17, %r5, %r3
; vst %v17, 0(%r2)
; br %r14
function %sextend_i16_i64(i16) -> i64 {
block0(v0: i16):
v1 = sextend.i64 v0
@@ -95,6 +177,19 @@ block0(v0: i16):
; lhr %r2, %r2
; br %r14
function %sextend_i8_i128(i8) -> i128 {
block0(v0: i8):
v1 = sextend.i128 v0
return v1
}
; block0:
; lgbr %r3, %r3
; srag %r5, %r3, 63
; vlvgp %v17, %r5, %r3
; vst %v17, 0(%r2)
; br %r14
function %sextend_i8_i64(i8) -> i64 {
block0(v0: i8):
v1 = sextend.i64 v0
@@ -125,6 +220,50 @@ block0(v0: i8):
; lbr %r2, %r2
; br %r14
function %ireduce_i128_i64(i128) -> i64 {
block0(v0: i128):
v1 = ireduce.i64 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %ireduce_i128_i32(i128) -> i32 {
block0(v0: i128):
v1 = ireduce.i32 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %ireduce_i128_i16(i128) -> i16 {
block0(v0: i128):
v1 = ireduce.i16 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %ireduce_i128_i8(i128) -> i8 {
block0(v0: i128):
v1 = ireduce.i8 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %ireduce_i64_i32(i64, i64) -> i32 {
block0(v0: i64, v1: i64):
v2 = ireduce.i32 v1
@@ -185,6 +324,29 @@ block0(v0: i16, v1: i16):
; lgr %r2, %r3
; br %r14
function %bextend_b64_b128(b64) -> b128 {
block0(v0: b64):
v1 = bextend.b128 v0
return v1
}
; block0:
; vlvgp %v5, %r3, %r3
; vst %v5, 0(%r2)
; br %r14
function %bextend_b32_b128(b32) -> b128 {
block0(v0: b32):
v1 = bextend.b128 v0
return v1
}
; block0:
; lgfr %r3, %r3
; vlvgp %v7, %r3, %r3
; vst %v7, 0(%r2)
; br %r14
function %bextend_b32_b64(b32) -> b64 {
block0(v0: b32):
v1 = bextend.b64 v0
@@ -195,6 +357,18 @@ block0(v0: b32):
; lgfr %r2, %r2
; br %r14
function %bextend_b16_b128(b16) -> b128 {
block0(v0: b16):
v1 = bextend.b128 v0
return v1
}
; block0:
; lghr %r3, %r3
; vlvgp %v7, %r3, %r3
; vst %v7, 0(%r2)
; br %r14
function %bextend_b16_b64(b16) -> b64 {
block0(v0: b16):
v1 = bextend.b64 v0
@@ -215,6 +389,18 @@ block0(v0: b16):
; lhr %r2, %r2
; br %r14
function %bextend_b8_b128(b8) -> b128 {
block0(v0: b8):
v1 = bextend.b128 v0
return v1
}
; block0:
; lgbr %r3, %r3
; vlvgp %v7, %r3, %r3
; vst %v7, 0(%r2)
; br %r14
function %bextend_b8_b64(b8) -> b64 {
block0(v0: b8):
v1 = bextend.b64 v0
@@ -245,6 +431,19 @@ block0(v0: b8):
; lbr %r2, %r2
; br %r14
function %bextend_b1_b128(b1) -> b128 {
block0(v0: b1):
v1 = bextend.b128 v0
return v1
}
; block0:
; sllg %r3, %r3, 63
; srag %r5, %r3, 63
; vlvgp %v17, %r5, %r5
; vst %v17, 0(%r2)
; br %r14
function %bextend_b1_b64(b1) -> b64 {
block0(v0: b1):
v1 = bextend.b64 v0
@@ -289,6 +488,61 @@ block0(v0: b1):
; srak %r2, %r5, 31
; br %r14
function %breduce_b128_b64(b128) -> b64 {
block0(v0: b128):
v1 = breduce.b64 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %breduce_b128_b32(b128) -> b32 {
block0(v0: b128):
v1 = breduce.b32 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %breduce_b128_b16(b128) -> b16 {
block0(v0: b128):
v1 = breduce.b16 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %breduce_b128_b8(b128) -> b8 {
block0(v0: b128):
v1 = breduce.b8 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %breduce_b128_b1(b128) -> b1 {
block0(v0: b128):
v1 = breduce.b1 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %breduce_b64_b32(b64, b64) -> b32 {
block0(v0: b64, v1: b64):
v2 = breduce.b32 v1
@@ -389,6 +643,72 @@ block0(v0: b8, v1: b8):
; lgr %r2, %r3
; br %r14
function %bmask_b128_i128(b128) -> i128 {
block0(v0: b128):
v1 = bmask.i128 v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vst %v0, 0(%r2)
; br %r14
function %bmask_b128_i64(b128) -> i64 {
block0(v0: b128):
v1 = bmask.i64 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %bmask_b128_i32(b128) -> i32 {
block0(v0: b128):
v1 = bmask.i32 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %bmask_b128_i16(b128) -> i16 {
block0(v0: b128):
v1 = bmask.i16 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %bmask_b128_i8(b128) -> i8 {
block0(v0: b128):
v1 = bmask.i8 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; br %r14
function %bmask_b64_i128(b64, b64) -> i128 {
block0(v0: b64, v1: b64):
v2 = bmask.i128 v1
return v2
}
; block0:
; vlvgp %v7, %r4, %r4
; vst %v7, 0(%r2)
; br %r14
function %bmask_b64_i64(b64, b64) -> i64 {
block0(v0: b64, v1: b64):
v2 = bmask.i64 v1
@@ -429,6 +749,18 @@ block0(v0: b64, v1: b64):
; lgr %r2, %r3
; br %r14
function %bmask_b32_i128(b32, b32) -> i128 {
block0(v0: b32, v1: b32):
v2 = bmask.i128 v1
return v2
}
; block0:
; lgfr %r5, %r4
; vlvgp %v17, %r5, %r5
; vst %v17, 0(%r2)
; br %r14
function %bmask_b32_i64(b32, b32) -> i64 {
block0(v0: b32, v1: b32):
v2 = bmask.i64 v1
@@ -469,6 +801,18 @@ block0(v0: b32, v1: b32):
; lgr %r2, %r3
; br %r14
function %bmask_b16_i128(b16, b16) -> i128 {
block0(v0: b16, v1: b16):
v2 = bmask.i128 v1
return v2
}
; block0:
; lghr %r5, %r4
; vlvgp %v17, %r5, %r5
; vst %v17, 0(%r2)
; br %r14
function %bmask_b16_i64(b16, b16) -> i64 {
block0(v0: b16, v1: b16):
v2 = bmask.i64 v1
@@ -509,6 +853,18 @@ block0(v0: b16, v1: b16):
; lgr %r2, %r3
; br %r14
function %bmask_b8_i128(b8, b8) -> i128 {
block0(v0: b8, v1: b8):
v2 = bmask.i128 v1
return v2
}
; block0:
; lgbr %r5, %r4
; vlvgp %v17, %r5, %r5
; vst %v17, 0(%r2)
; br %r14
function %bmask_b8_i64(b8, b8) -> i64 {
block0(v0: b8, v1: b8):
v2 = bmask.i64 v1
@@ -549,6 +905,19 @@ block0(v0: b8, v1: b8):
; lgr %r2, %r3
; br %r14
function %bmask_b1_i128(b1, b1) -> i128 {
block0(v0: b1, v1: b1):
v2 = bmask.i128 v1
return v2
}
; block0:
; sllg %r5, %r4, 63
; srag %r3, %r5, 63
; vlvgp %v19, %r3, %r3
; vst %v19, 0(%r2)
; br %r14
function %bmask_b1_i64(b1, b1) -> i64 {
block0(v0: b1, v1: b1):
v2 = bmask.i64 v1
@@ -593,6 +962,80 @@ block0(v0: b1, v1: b1):
; srak %r2, %r3, 31
; br %r14
function %bint_b128_i128(b128) -> i128 {
block0(v0: b128):
v1 = bint.i128 v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; bras %r1, 20 ; data.u128 0x00000000000000000000000000000001 ; vl %v5, 0(%r1)
; vn %v7, %v0, %v5
; vst %v7, 0(%r2)
; br %r14
function %bint_b128_i64(b128) -> i64 {
block0(v0: b128):
v1 = bint.i64 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvb %r2, %v0, 15
; nill %r2, 1
; br %r14
function %bint_b128_i32(b128) -> i32 {
block0(v0: b128):
v1 = bint.i32 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvb %r2, %v0, 15
; nill %r2, 1
; br %r14
function %bint_b128_i16(b128) -> i16 {
block0(v0: b128):
v1 = bint.i16 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvb %r2, %v0, 15
; nill %r2, 1
; br %r14
function %bint_b128_i8(b128) -> i8 {
block0(v0: b128):
v1 = bint.i8 v0
return v1
}
; block0:
; vl %v0, 0(%r2)
; vlgvb %r2, %v0, 15
; nill %r2, 1
; br %r14
function %bint_b64_i128(b64) -> i128 {
block0(v0: b64):
v1 = bint.i128 v0
return v1
}
; block0:
; nill %r3, 1
; vgbm %v16, 0
; vlvgb %v16, %r3, 15
; vst %v16, 0(%r2)
; br %r14
function %bint_b64_i64(b64) -> i64 {
block0(v0: b64):
v1 = bint.i64 v0
@@ -634,6 +1077,19 @@ block0(v0: b64):
; nill %r2, 1
; br %r14
function %bint_b32_i128(b32) -> i128 {
block0(v0: b32):
v1 = bint.i128 v0
return v1
}
; block0:
; nill %r3, 1
; vgbm %v16, 0
; vlvgb %v16, %r3, 15
; vst %v16, 0(%r2)
; br %r14
function %bint_b32_i64(b32) -> i64 {
block0(v0: b32):
v1 = bint.i64 v0
@@ -675,6 +1131,19 @@ block0(v0: b32):
; nill %r2, 1
; br %r14
function %bint_b16_i128(b16) -> i128 {
block0(v0: b16):
v1 = bint.i128 v0
return v1
}
; block0:
; nill %r3, 1
; vgbm %v16, 0
; vlvgb %v16, %r3, 15
; vst %v16, 0(%r2)
; br %r14
function %bint_b16_i64(b16) -> i64 {
block0(v0: b16):
v1 = bint.i64 v0
@@ -716,6 +1185,19 @@ block0(v0: b16):
; nill %r2, 1
; br %r14
function %bint_b8_i128(b8) -> i128 {
block0(v0: b8):
v1 = bint.i128 v0
return v1
}
; block0:
; nill %r3, 1
; vgbm %v16, 0
; vlvgb %v16, %r3, 15
; vst %v16, 0(%r2)
; br %r14
function %bint_b8_i64(b8) -> i64 {
block0(v0: b8):
v1 = bint.i64 v0
@@ -757,6 +1239,19 @@ block0(v0: b8):
; nill %r2, 1
; br %r14
function %bint_b1_i128(b1) -> i128 {
block0(v0: b1):
v1 = bint.i128 v0
return v1
}
; block0:
; nill %r3, 1
; vgbm %v16, 0
; vlvgb %v16, %r3, 15
; vst %v16, 0(%r2)
; br %r14
function %bint_b1_i64(b1) -> i64 {
block0(v0: b1):
v1 = bint.i64 v0

View File

@@ -0,0 +1,143 @@
test compile precise-output
target s390x
function %icmp_eq_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 eq v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; vceqgs %v5, %v0, %v1
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %icmp_ne_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 ne v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; vceqgs %v5, %v0, %v1
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %icmp_slt_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 slt v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; vecg %v0, %v1 ; jne 10 ; vchlgs %v5, %v1, %v0
; lhi %r2, 0
; lochil %r2, 1
; br %r14
function %icmp_sgt_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 sgt v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; vecg %v1, %v0 ; jne 10 ; vchlgs %v5, %v0, %v1
; lhi %r2, 0
; lochil %r2, 1
; br %r14
function %icmp_sle_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 sle v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; vecg %v1, %v0 ; jne 10 ; vchlgs %v5, %v0, %v1
; lhi %r2, 0
; lochinl %r2, 1
; br %r14
function %icmp_sge_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 sge v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; vecg %v0, %v1 ; jne 10 ; vchlgs %v5, %v1, %v0
; lhi %r2, 0
; lochinl %r2, 1
; br %r14
function %icmp_ult_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 ult v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; veclg %v0, %v1 ; jne 10 ; vchlgs %v5, %v1, %v0
; lhi %r2, 0
; lochil %r2, 1
; br %r14
function %icmp_ugt_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 ugt v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; veclg %v1, %v0 ; jne 10 ; vchlgs %v5, %v0, %v1
; lhi %r2, 0
; lochil %r2, 1
; br %r14
function %icmp_ule_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 ule v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; veclg %v1, %v0 ; jne 10 ; vchlgs %v5, %v0, %v1
; lhi %r2, 0
; lochinl %r2, 1
; br %r14
function %icmp_uge_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
v2 = icmp.i128 uge v0, v1
return v2
}
; block0:
; vl %v0, 0(%r2)
; vl %v1, 0(%r3)
; veclg %v0, %v1 ; jne 10 ; vchlgs %v5, %v1, %v0
; lhi %r2, 0
; lochinl %r2, 1
; br %r14

View File

@@ -1,9 +1,75 @@
test compile precise-output
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ROTR
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %rotr_i128_vr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = rotr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vrepb %v7, %v1, 15
; vlcb %v17, %v7
; vslb %v19, %v0, %v17
; vsl %v21, %v19, %v17
; vsrlb %v23, %v0, %v7
; vsrl %v25, %v23, %v7
; vo %v27, %v21, %v25
; vst %v27, 0(%r2)
; br %r14
function %rotr_i128_reg(i128, i64) -> i128 {
block0(v0: i128, v1: i64):
v2 = rotr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vlvgb %v7, %r4, 0
; vrepb %v17, %v7, 0
; vlcb %v19, %v17
; vslb %v21, %v0, %v19
; vsl %v23, %v21, %v19
; vsrlb %v25, %v0, %v17
; vsrl %v27, %v25, %v17
; vo %v29, %v23, %v27
; vst %v29, 0(%r2)
; br %r14
function %rotr_i128_imm(i128) -> i128 {
block0(v0: i128):
v1 = iconst.i32 17
v2 = rotr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 17
; vlcb %v7, %v5
; vslb %v17, %v0, %v7
; vsl %v19, %v17, %v7
; vsrlb %v21, %v0, %v5
; vsrl %v23, %v21, %v5
; vo %v25, %v19, %v23
; vst %v25, 0(%r2)
; br %r14
function %rotr_i64_vr(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = rotr.i64 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; lcr %r5, %r3
; rllg %r2, %r2, 0(%r5)
; br %r14
function %rotr_i64_reg(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
@@ -12,7 +78,7 @@ block0(v0: i64, v1: i64):
}
; block0:
; lcgr %r3, %r3
; lcr %r3, %r3
; rllg %r2, %r2, 0(%r3)
; br %r14
@@ -27,6 +93,19 @@ block0(v0: i64):
; rllg %r2, %r2, 47
; br %r14
function %rotr_i32_vr(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = rotr.i32 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; lcr %r5, %r3
; rll %r2, %r2, 0(%r5)
; br %r14
function %rotr_i32_reg(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = rotr.i32 v0, v1
@@ -49,6 +128,24 @@ block0(v0: i32):
; rll %r2, %r2, 15
; br %r14
function %rotr_i16_vr(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = rotr.i16 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; llhr %r3, %r2
; vlgvg %r5, %v1, 1
; lcr %r4, %r5
; nill %r5, 15
; nill %r4, 15
; sllk %r4, %r3, 0(%r4)
; srlk %r5, %r3, 0(%r5)
; ork %r2, %r4, %r5
; br %r14
function %rotr_i16_reg(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = rotr.i16 v0, v1
@@ -79,6 +176,24 @@ block0(v0: i16):
; ork %r2, %r3, %r5
; br %r14
function %rotr_i8_vr(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = rotr.i8 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; llcr %r3, %r2
; vlgvg %r5, %v1, 1
; lcr %r4, %r5
; nill %r5, 7
; nill %r4, 7
; sllk %r4, %r3, 0(%r4)
; srlk %r5, %r3, 0(%r5)
; ork %r2, %r4, %r5
; br %r14
function %rotr_i8_reg(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = rotr.i8 v0, v1
@@ -109,6 +224,75 @@ block0(v0: i8):
; ork %r2, %r3, %r5
; br %r14
function %rotl_i128_vr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = rotl.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vrepb %v7, %v1, 15
; vlcb %v17, %v7
; vslb %v19, %v0, %v7
; vsl %v21, %v19, %v7
; vsrlb %v23, %v0, %v17
; vsrl %v25, %v23, %v17
; vo %v27, %v21, %v25
; vst %v27, 0(%r2)
; br %r14
function %rotl_i128_reg(i128, i64) -> i128 {
block0(v0: i128, v1: i64):
v2 = rotl.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vlvgb %v7, %r4, 0
; vrepb %v17, %v7, 0
; vlcb %v19, %v17
; vslb %v21, %v0, %v17
; vsl %v23, %v21, %v17
; vsrlb %v25, %v0, %v19
; vsrl %v27, %v25, %v19
; vo %v29, %v23, %v27
; vst %v29, 0(%r2)
; br %r14
function %rotl_i128_imm(i128) -> i128 {
block0(v0: i128):
v1 = iconst.i32 17
v2 = rotl.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 17
; vlcb %v7, %v5
; vslb %v17, %v0, %v5
; vsl %v19, %v17, %v5
; vsrlb %v21, %v0, %v7
; vsrl %v23, %v21, %v7
; vo %v25, %v19, %v23
; vst %v25, 0(%r2)
; br %r14
function %rotl_i64_vr(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = rotl.i64 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; rllg %r2, %r2, 0(%r3)
; br %r14
function %rotl_i64_reg(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = rotl.i64 v0, v1
@@ -130,6 +314,18 @@ block0(v0: i64):
; rllg %r2, %r2, 17
; br %r14
function %rotl_i32_vr(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = rotl.i32 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; rll %r2, %r2, 0(%r3)
; br %r14
function %rotl_i32_reg(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = rotl.i32 v0, v1
@@ -151,6 +347,24 @@ block0(v0: i32):
; rll %r2, %r2, 17
; br %r14
function %rotl_i16_vr(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = rotl.i16 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; llhr %r3, %r2
; vlgvg %r5, %v1, 1
; lcr %r4, %r5
; nill %r5, 15
; nill %r4, 15
; sllk %r5, %r3, 0(%r5)
; srlk %r2, %r3, 0(%r4)
; ork %r2, %r5, %r2
; br %r14
function %rotl_i16_reg(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = rotl.i16 v0, v1
@@ -181,6 +395,24 @@ block0(v0: i16):
; ork %r2, %r3, %r5
; br %r14
function %rotl_i8_vr(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = rotl.i8 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; llcr %r3, %r2
; vlgvg %r5, %v1, 1
; lcr %r4, %r5
; nill %r5, 7
; nill %r4, 7
; sllk %r5, %r3, 0(%r5)
; srlk %r2, %r3, 0(%r4)
; ork %r2, %r5, %r2
; br %r14
function %rotl_i8_reg(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = rotl.i8 v0, v1
@@ -211,6 +443,63 @@ block0(v0: i8):
; ork %r2, %r3, %r5
; br %r14
function %ushr_i128_vr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = ushr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vrepb %v7, %v1, 15
; vsrlb %v17, %v0, %v7
; vsrl %v19, %v17, %v7
; vst %v19, 0(%r2)
; br %r14
function %ushr_i128_reg(i128, i64) -> i128 {
block0(v0: i128, v1: i64):
v2 = ushr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vlvgb %v7, %r4, 0
; vrepb %v17, %v7, 0
; vsrlb %v19, %v0, %v17
; vsrl %v21, %v19, %v17
; vst %v21, 0(%r2)
; br %r14
function %ushr_i128_imm(i128) -> i128 {
block0(v0: i128):
v1 = iconst.i32 17
v2 = ushr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 17
; vsrlb %v7, %v0, %v5
; vsrl %v17, %v7, %v5
; vst %v17, 0(%r2)
; br %r14
function %ushr_i64_vr(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = ushr.i64 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; srlg %r2, %r2, 0(%r3)
; br %r14
function %ushr_i64_reg(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ushr.i64 v0, v1
@@ -232,6 +521,19 @@ block0(v0: i64):
; srlg %r2, %r2, 17
; br %r14
function %ushr_i32_vr(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = ushr.i32 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; nill %r3, 31
; srlk %r2, %r2, 0(%r3)
; br %r14
function %ushr_i32_reg(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ushr.i32 v0, v1
@@ -254,6 +556,20 @@ block0(v0: i32):
; srlk %r2, %r2, 17
; br %r14
function %ushr_i16_vr(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = ushr.i16 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; llhr %r3, %r2
; vlgvg %r5, %v1, 1
; nill %r5, 15
; srlk %r2, %r3, 0(%r5)
; br %r14
function %ushr_i16_reg(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = ushr.i16 v0, v1
@@ -278,6 +594,20 @@ block0(v0: i16):
; srlk %r2, %r5, 10
; br %r14
function %ushr_i8_vr(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = ushr.i8 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; llcr %r3, %r2
; vlgvg %r5, %v1, 1
; nill %r5, 7
; srlk %r2, %r3, 0(%r5)
; br %r14
function %ushr_i8_reg(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = ushr.i8 v0, v1
@@ -302,6 +632,63 @@ block0(v0: i8):
; srlk %r2, %r5, 3
; br %r14
function %ishl_i128_vr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = ishl.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vrepb %v7, %v1, 15
; vslb %v17, %v0, %v7
; vsl %v19, %v17, %v7
; vst %v19, 0(%r2)
; br %r14
function %ishl_i128_reg(i128, i64) -> i128 {
block0(v0: i128, v1: i64):
v2 = ishl.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vlvgb %v7, %r4, 0
; vrepb %v17, %v7, 0
; vslb %v19, %v0, %v17
; vsl %v21, %v19, %v17
; vst %v21, 0(%r2)
; br %r14
function %ishl_i128_imm(i128) -> i128 {
block0(v0: i128):
v1 = iconst.i32 17
v2 = ishl.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 17
; vslb %v7, %v0, %v5
; vsl %v17, %v7, %v5
; vst %v17, 0(%r2)
; br %r14
function %ishl_i64_vr(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = ishl.i64 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; sllg %r2, %r2, 0(%r3)
; br %r14
function %ishl_i64_reg(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl.i64 v0, v1
@@ -323,6 +710,19 @@ block0(v0: i64):
; sllg %r2, %r2, 17
; br %r14
function %ishl_i32_vr(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = ishl.i32 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; nill %r3, 31
; sllk %r2, %r2, 0(%r3)
; br %r14
function %ishl_i32_reg(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ishl.i32 v0, v1
@@ -345,6 +745,19 @@ block0(v0: i32):
; sllk %r2, %r2, 17
; br %r14
function %ishl_i16_vr(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = ishl.i16 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; nill %r3, 15
; sllk %r2, %r2, 0(%r3)
; br %r14
function %ishl_i16_reg(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = ishl.i16 v0, v1
@@ -367,6 +780,19 @@ block0(v0: i16):
; sllk %r2, %r2, 10
; br %r14
function %ishl_i8_vr(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = ishl.i8 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; nill %r3, 7
; sllk %r2, %r2, 0(%r3)
; br %r14
function %ishl_i8_reg(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = ishl.i8 v0, v1
@@ -389,6 +815,63 @@ block0(v0: i8):
; sllk %r2, %r2, 3
; br %r14
function %sshr_i128_vr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = sshr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vl %v1, 0(%r4)
; vrepb %v7, %v1, 15
; vsrab %v17, %v0, %v7
; vsra %v19, %v17, %v7
; vst %v19, 0(%r2)
; br %r14
function %sshr_i128_reg(i128, i64) -> i128 {
block0(v0: i128, v1: i64):
v2 = sshr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vlvgb %v7, %r4, 0
; vrepb %v17, %v7, 0
; vsrab %v19, %v0, %v17
; vsra %v21, %v19, %v17
; vst %v21, 0(%r2)
; br %r14
function %sshr_i128_imm(i128) -> i128 {
block0(v0: i128):
v1 = iconst.i32 17
v2 = sshr.i128 v0, v1
return v2
}
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 17
; vsrab %v7, %v0, %v5
; vsra %v17, %v7, %v5
; vst %v17, 0(%r2)
; br %r14
function %sshr_i64_vr(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = sshr.i64 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; srag %r2, %r2, 0(%r3)
; br %r14
function %sshr_i64_reg(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = sshr.i64 v0, v1
@@ -410,6 +893,19 @@ block0(v0: i64):
; srag %r2, %r2, 17
; br %r14
function %sshr_i32_vr(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = sshr.i32 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; vlgvg %r3, %v1, 1
; nill %r3, 31
; srak %r2, %r2, 0(%r3)
; br %r14
function %sshr_i32_reg(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sshr.i32 v0, v1
@@ -432,6 +928,20 @@ block0(v0: i32):
; srak %r2, %r2, 17
; br %r14
function %sshr_i16_vr(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = sshr.i16 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; lhr %r3, %r2
; vlgvg %r5, %v1, 1
; nill %r5, 15
; srak %r2, %r3, 0(%r5)
; br %r14
function %sshr_i16_reg(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = sshr.i16 v0, v1
@@ -456,6 +966,20 @@ block0(v0: i16):
; srak %r2, %r5, 10
; br %r14
function %sshr_i8_vr(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = sshr.i8 v0, v1
return v2
}
; block0:
; vl %v1, 0(%r3)
; lbr %r3, %r2
; vlgvg %r5, %v1, 1
; nill %r5, 7
; srak %r2, %r3, 0(%r5)
; br %r14
function %sshr_i8_reg(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = sshr.i8 v0, v1

View File

@@ -107,6 +107,17 @@ block0(v0: i64):
; vl %v24, 0(%r2)
; br %r14
function %load_i128_big(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 big v0
return v1
}
; block0:
; vl %v5, 0(%r3)
; vst %v5, 0(%r2)
; br %r14
function %load_f32x4_big(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 big v0
@@ -167,6 +178,17 @@ block0(v0: i64x2, v1: i64):
; vst %v24, 0(%r2)
; br %r14
function %store_i128_big(i128, i64) {
block0(v0: i128, v1: i64):
store.i128 big v0, v1
return
}
; block0:
; vl %v0, 0(%r2)
; vst %v0, 0(%r3)
; br %r14
function %store_f32x4_big(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 big v0, v1
@@ -293,6 +315,17 @@ block0(v0: i64):
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i128_little(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 little v0
return v1
}
; block0:
; vlbrq %v5, 0(%r3)
; vst %v5, 0(%r2)
; br %r14
function %load_f32x4_little(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 little v0
@@ -353,6 +386,17 @@ block0(v0: i64x2, v1: i64):
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i128_little(i128, i64) {
block0(v0: i128, v1: i64):
store.i128 little v0, v1
return
}
; block0:
; vl %v0, 0(%r2)
; vstbrq %v0, 0(%r3)
; br %r14
function %store_f32x4_little(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 little v0, v1

View File

@@ -107,6 +107,17 @@ block0(v0: i64):
; vl %v24, 0(%r2)
; br %r14
function %load_i128_big(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 big v0
return v1
}
; block0:
; vl %v5, 0(%r3)
; vst %v5, 0(%r2)
; br %r14
function %load_f32x4_big(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 big v0
@@ -167,6 +178,17 @@ block0(v0: i64x2, v1: i64):
; vst %v24, 0(%r2)
; br %r14
function %store_i128_big(i128, i64) {
block0(v0: i128, v1: i64):
store.i128 big v0, v1
return
}
; block0:
; vl %v0, 0(%r2)
; vst %v0, 0(%r3)
; br %r14
function %store_f32x4_big(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 big v0, v1
@@ -307,6 +329,19 @@ block0(v0: i64):
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i128_little(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 little v0
return v1
}
; block0:
; lrvg %r4, 0(%r3)
; lrvg %r5, 8(%r3)
; vlvgp %v17, %r5, %r4
; vst %v17, 0(%r2)
; br %r14
function %load_f32x4_little(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 little v0
@@ -408,6 +443,20 @@ block0(v0: i64x2, v1: i64):
; strvg %r4, 8(%r2)
; br %r14
function %store_i128_little(i128, i64) {
block0(v0: i128, v1: i64):
store.i128 little v0, v1
return
}
; block0:
; vl %v0, 0(%r2)
; vlgvg %r2, %v0, 1
; lgdr %r4, %f0
; strvg %r2, 0(%r3)
; strvg %r4, 8(%r3)
; br %r14
function %store_f32x4_little(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 little v0, v1

View File

@@ -2,6 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %add_i128(i128, i128) -> i128 {

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
function %band_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
function %bextend_b1_b128(b1) -> b128 {
block0(v0: b1):

View File

@@ -2,6 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %bint_b1_i128_true() -> i128 {

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %ctz_i128(i128) -> i128 {

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %bnot_i128(i128) -> i128 {

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
function %bmask_b128_i128(b128) -> i128 {
block0(v0: b128):

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
function %bor_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64

View File

@@ -0,0 +1,61 @@
test run
target aarch64
; TODO: Merge this with the main i128-bricmp file when s390x supports overflows.
; See: https://github.com/bytecodealliance/wasmtime/issues/3060
function %i128_bricmp_of(i128, i128) -> b1 {
block0(v0: i128,v1: i128):
br_icmp.i128 of v0, v1, block2
jump block1
block1:
v2 = bconst.b1 false
return v2
block2:
v3 = bconst.b1 true
return v3
}
; run: %i128_bricmp_of(0, 0) == false
; run: %i128_bricmp_of(0, 1) == false
; run: %i128_bricmp_of(0, -1) == false
; run: %i128_bricmp_of(-1, -1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false
; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 1) == true
; run: %i128_bricmp_of(1, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
function %i128_bricmp_nof(i128, i128) -> b1 {
block0(v0: i128,v1: i128):
br_icmp.i128 nof v0, v1, block2
jump block1
block1:
v2 = bconst.b1 false
return v2
block2:
v3 = bconst.b1 true
return v3
}
; run: %i128_bricmp_nof(0, 0) == true
; run: %i128_bricmp_nof(0, 1) == true
; run: %i128_bricmp_nof(0, -1) == true
; run: %i128_bricmp_nof(-1, -1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true
; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 1) == false
; run: %i128_bricmp_nof(1, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
function %i128_bricmp_eq(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
@@ -245,45 +246,3 @@ block2:
v3 = bconst.b1 true
return v3
}
; run: %i128_bricmp_of(0, 0) == false
; run: %i128_bricmp_of(0, 1) == false
; run: %i128_bricmp_of(0, -1) == false
; run: %i128_bricmp_of(-1, -1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false
; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 1) == true
; run: %i128_bricmp_of(1, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
function %i128_bricmp_nof(i128, i128) -> b1 {
block0(v0: i128,v1: i128):
br_icmp.i128 nof v0, v1, block2
jump block1
block1:
v2 = bconst.b1 false
return v2
block2:
v3 = bconst.b1 true
return v3
}
; run: %i128_bricmp_nof(0, 0) == true
; run: %i128_bricmp_nof(0, 1) == true
; run: %i128_bricmp_nof(0, -1) == true
; run: %i128_bricmp_nof(-1, -1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true
; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 1) == false
; run: %i128_bricmp_nof(1, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
function %bxor_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
function %cls_i128(i128) -> i128 {
block0(v0: i128):

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %iconcat_isplit(i64, i64) -> i64, i64 {

View File

@@ -2,7 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
target x86_64
function %i128_const_0() -> i128 {

View File

@@ -2,6 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %i128_uextend_i64(i64) -> i128 {

View File

@@ -2,6 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %icmp_eq_i128(i128, i128) -> b1 {

View File

@@ -2,6 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %ireduce_128_64(i128) -> i64 {

View File

@@ -2,6 +2,7 @@ test run
set enable_llvm_abi_extensions=true
target x86_64
target aarch64
target s390x
function %i128_stack_store_load(i128) -> b1 {
ss0 = explicit_slot 16

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %rotl(i128, i8) -> i128 {

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %i128_select(b1, i128, i128) -> i128 {

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
; TODO: Merge this with the main i128-shifts file when x86_64 passes these.

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64