s390x: Implement full SIMD support (#4427)

This adds full support for all Cranelift SIMD instructions
to the s390x target.  Everything is matched fully via ISLE.

In addition to adding support for many new instructions,
and the lower.isle code to match all SIMD IR patterns,
this patch also adds ABI support for vector types.
In particular, we now need to handle the fact that
vector registers 8 .. 15 are partially callee-saved,
i.e. the high parts of those registers (which correspond
to the old floating-poing registers) are callee-saved,
but the low parts are not.  This is the exact same situation
that we already have on AArch64, and so this patch uses the
same solution (the is_included_in_clobbers callback).

The bulk of the changes are platform-specific, but there are
a few exceptions:

- Added ISLE extractors for the Immediate and Constant types,
  to enable matching the vconst and swizzle instructions.

- Added a missing accessor for call_conv to ABISig.

- Fixed endian conversion for vector types in data_value.rs
  to enable their use in runtests on the big-endian platforms.

- Enabled (nearly) all SIMD runtests on s390x.  [ Two test cases
  remain disabled due to vector shift count semantics, see below. ]

- Enabled all Wasmtime SIMD tests on s390x.

There are three minor issues, called out via FIXMEs below,
which should be addressed in the future, but should not be
blockers to getting this patch merged.  I've opened the
following issues to track them:

- Vector shift count semantics
  https://github.com/bytecodealliance/wasmtime/issues/4424

- is_included_in_clobbers vs. link register
  https://github.com/bytecodealliance/wasmtime/issues/4425

- gen_constant callback
  https://github.com/bytecodealliance/wasmtime/issues/4426

All tests, including all newly enabled SIMD tests, pass
on both z14 and z15 architectures.
This commit is contained in:
Ulrich Weigand
2022-07-18 23:00:48 +02:00
committed by GitHub
parent e5678e8f8d
commit 638dc4e0b3
75 changed files with 17839 additions and 1744 deletions

View File

@@ -171,9 +171,9 @@ fn write_testsuite_tests(
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
match strategy {
"Cranelift" => match (testsuite, testname) {
// No simd support yet for s390x.
("simd", _) if platform_is_s390x() => return true,
_ if platform_is_s390x() && testname.starts_with("simd") => return true,
// FIXME: These tests fail under qemu due to a qemu bug.
(_, "simd_f32x4_pmin_pmax") if platform_is_s390x() => return true,
(_, "simd_f64x2_pmin_pmax") if platform_is_s390x() => return true,
_ => {}
},
_ => panic!("unrecognized strategy"),

View File

@@ -89,7 +89,7 @@ impl DataValue {
DataValue::I128(i) => dst[..16].copy_from_slice(&i.to_ne_bytes()[..]),
DataValue::F32(f) => dst[..4].copy_from_slice(&f.bits().to_ne_bytes()[..]),
DataValue::F64(f) => dst[..8].copy_from_slice(&f.bits().to_ne_bytes()[..]),
DataValue::V128(v) => dst[..16].copy_from_slice(&v[..]),
DataValue::V128(v) => dst[..16].copy_from_slice(&u128::from_le_bytes(*v).to_ne_bytes()),
_ => unimplemented!(),
};
}
@@ -120,7 +120,7 @@ impl DataValue {
DataValue::B(src[..size].iter().any(|&i| i != 0))
}
_ if ty.is_vector() && ty.bytes() == 16 => {
DataValue::V128(src[..16].try_into().unwrap())
DataValue::V128(u128::from_ne_bytes(src[..16].try_into().unwrap()).to_le_bytes())
}
_ => unimplemented!(),
}

View File

@@ -97,6 +97,10 @@ fn in_flt_reg(ty: Type) -> bool {
}
}
fn in_vec_reg(ty: Type) -> bool {
ty.is_vector() && ty.bits() == 128
}
fn get_intreg_for_arg(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::gpr(2)),
@@ -118,6 +122,20 @@ fn get_fltreg_for_arg(idx: usize) -> Option<Reg> {
}
}
fn get_vecreg_for_arg(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::vr(24)),
1 => Some(regs::vr(25)),
2 => Some(regs::vr(26)),
3 => Some(regs::vr(27)),
4 => Some(regs::vr(28)),
5 => Some(regs::vr(29)),
6 => Some(regs::vr(30)),
7 => Some(regs::vr(31)),
_ => None,
}
}
fn get_intreg_for_ret(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::gpr(2)),
@@ -140,6 +158,21 @@ fn get_fltreg_for_ret(idx: usize) -> Option<Reg> {
}
}
fn get_vecreg_for_ret(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::vr(24)),
// ABI extension to support multi-value returns:
1 => Some(regs::vr(25)),
2 => Some(regs::vr(26)),
3 => Some(regs::vr(27)),
4 => Some(regs::vr(28)),
5 => Some(regs::vr(29)),
6 => Some(regs::vr(30)),
7 => Some(regs::vr(31)),
_ => None,
}
}
/// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB.
@@ -182,6 +215,7 @@ impl ABIMachineSpec for S390xMachineDeps {
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
let mut next_gpr = 0;
let mut next_fpr = 0;
let mut next_vr = 0;
let mut next_stack: u64 = 0;
let mut ret = vec![];
@@ -206,8 +240,8 @@ impl ABIMachineSpec for S390xMachineDeps {
let intreg = in_int_reg(param.value_type);
let fltreg = in_flt_reg(param.value_type);
debug_assert!(intreg || fltreg);
debug_assert!(!(intreg && fltreg));
let vecreg = in_vec_reg(param.value_type);
debug_assert!(intreg as i32 + fltreg as i32 + vecreg as i32 == 1);
let (next_reg, candidate) = if intreg {
let candidate = match args_or_rets {
@@ -215,12 +249,18 @@ impl ABIMachineSpec for S390xMachineDeps {
ArgsOrRets::Rets => get_intreg_for_ret(next_gpr),
};
(&mut next_gpr, candidate)
} else {
} else if fltreg {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg(next_fpr),
ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr),
};
(&mut next_fpr, candidate)
} else {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_vecreg_for_arg(next_vr),
ArgsOrRets::Rets => get_vecreg_for_ret(next_vr),
};
(&mut next_vr, candidate)
};
// In the Wasmtime ABI only the first return value can be in a register.
@@ -252,7 +292,8 @@ impl ABIMachineSpec for S390xMachineDeps {
// Align the stack slot.
debug_assert!(slot_size.is_power_of_two());
next_stack = align_to(next_stack, slot_size);
let slot_align = std::cmp::min(slot_size, 8);
next_stack = align_to(next_stack, slot_align);
// If the type is actually of smaller size (and the argument
// was not extended), it is passed right-aligned.
@@ -477,6 +518,13 @@ impl ABIMachineSpec for S390xMachineDeps {
RegClass::Float => clobbered_fpr.push(reg),
}
}
// We need to save the link register in non-leaf functions.
// FIXME: This should be included in the clobber list to begin with,
// but isn't because we have have excluded call instructions via the
// is_included_in_clobbers callback.
if outgoing_args_size > 0 {
clobbered_gpr.push(Writable::from_reg(RealReg::from(gpr_preg(14))));
}
let mut first_clobbered_gpr = 16;
for reg in clobbered_gpr {
@@ -534,13 +582,15 @@ impl ABIMachineSpec for S390xMachineDeps {
// Save FPRs.
for (i, reg) in clobbered_fpr.iter().enumerate() {
insts.push(Inst::FpuStore64 {
insts.push(Inst::VecStoreLane {
size: 64,
rd: reg.to_reg().into(),
mem: MemArg::reg_plus_off(
stack_reg(),
(i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
MemFlags::trusted(),
),
lane_imm: 0,
});
if flags.unwind_info() {
insts.push(Inst::Unwind {
@@ -566,7 +616,14 @@ impl ABIMachineSpec for S390xMachineDeps {
let mut insts = SmallVec::new();
// Collect clobbered registers.
let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
let (mut clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
// We need to restore the link register in non-leaf functions.
// FIXME: This should be included in the clobber list to begin with,
// but isn't because we have have excluded call instructions via the
// is_included_in_clobbers callback.
if outgoing_args_size > 0 {
clobbered_gpr.push(Writable::from_reg(RealReg::from(gpr_preg(14))));
}
let mut first_clobbered_gpr = 16;
for reg in clobbered_gpr {
let enc = reg.to_reg().hw_enc();
@@ -578,13 +635,15 @@ impl ABIMachineSpec for S390xMachineDeps {
// Restore FPRs.
for (i, reg) in clobbered_fpr.iter().enumerate() {
insts.push(Inst::FpuLoad64 {
insts.push(Inst::VecLoadLaneUndef {
size: 64,
rd: Writable::from_reg(reg.to_reg().into()),
mem: MemArg::reg_plus_off(
stack_reg(),
(i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
MemFlags::trusted(),
),
lane_imm: 0,
});
}
@@ -639,7 +698,7 @@ impl ABIMachineSpec for S390xMachineDeps {
// We allocate in terms of 8-byte slots.
match rc {
RegClass::Int => 1,
RegClass::Float => 1,
RegClass::Float => 2,
}
}
@@ -739,6 +798,21 @@ const fn clobbers() -> PRegSet {
.with(gpr_preg(3))
.with(gpr_preg(4))
.with(gpr_preg(5))
// v0 - v7 inclusive and v16 - v31 inclusive are
// caller-saves. The upper 64 bits of v8 - v15 inclusive are
// also caller-saves. However, because we cannot currently
// represent partial registers to regalloc2, we indicate here
// that every vector register is caller-save. Because this
// function is used at *callsites*, approximating in this
// direction (save more than necessary) is conservative and
// thus safe.
//
// Note that we exclude clobbers from a call instruction when
// a call instruction's callee has the same ABI as the caller
// (the current function body); this is safe (anything
// clobbered by callee can be clobbered by caller as well) and
// avoids unnecessary saves of v8-v15 in the prologue even
// though we include them as defs here.
.with(vr_preg(0))
.with(vr_preg(1))
.with(vr_preg(2))
@@ -747,6 +821,14 @@ const fn clobbers() -> PRegSet {
.with(vr_preg(5))
.with(vr_preg(6))
.with(vr_preg(7))
.with(vr_preg(8))
.with(vr_preg(9))
.with(vr_preg(10))
.with(vr_preg(11))
.with(vr_preg(12))
.with(vr_preg(13))
.with(vr_preg(14))
.with(vr_preg(15))
.with(vr_preg(16))
.with(vr_preg(17))
.with(vr_preg(18))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -43,15 +43,28 @@ impl LowerBackend for S390xBackend {
| Opcode::Bconst
| Opcode::F32const
| Opcode::F64const
| Opcode::Vconst
| Opcode::Null
| Opcode::Iadd
| Opcode::IaddIfcout
| Opcode::Isub
| Opcode::UaddSat
| Opcode::SaddSat
| Opcode::UsubSat
| Opcode::SsubSat
| Opcode::IaddPairwise
| Opcode::Imin
| Opcode::Umin
| Opcode::Imax
| Opcode::Umax
| Opcode::AvgRound
| Opcode::Iabs
| Opcode::Ineg
| Opcode::Imul
| Opcode::Umulhi
| Opcode::Smulhi
| Opcode::WideningPairwiseDotProductS
| Opcode::SqmulRoundSat
| Opcode::Udiv
| Opcode::Urem
| Opcode::Sdiv
@@ -64,6 +77,13 @@ impl LowerBackend for S390xBackend {
| Opcode::Ireduce
| Opcode::Uextend
| Opcode::Sextend
| Opcode::Snarrow
| Opcode::Unarrow
| Opcode::Uunarrow
| Opcode::SwidenLow
| Opcode::SwidenHigh
| Opcode::UwidenLow
| Opcode::UwidenHigh
| Opcode::Bnot
| Opcode::Band
| Opcode::Bor
@@ -72,6 +92,7 @@ impl LowerBackend for S390xBackend {
| Opcode::BorNot
| Opcode::BxorNot
| Opcode::Bitselect
| Opcode::Vselect
| Opcode::Breduce
| Opcode::Bextend
| Opcode::Bmask
@@ -86,11 +107,15 @@ impl LowerBackend for S390xBackend {
| Opcode::Fdiv
| Opcode::Fmin
| Opcode::Fmax
| Opcode::FminPseudo
| Opcode::FmaxPseudo
| Opcode::Sqrt
| Opcode::Fneg
| Opcode::Fabs
| Opcode::Fpromote
| Opcode::Fdemote
| Opcode::FvpromoteLow
| Opcode::Fvdemote
| Opcode::Ceil
| Opcode::Floor
| Opcode::Trunc
@@ -99,11 +124,20 @@ impl LowerBackend for S390xBackend {
| Opcode::Fcopysign
| Opcode::FcvtFromUint
| Opcode::FcvtFromSint
| Opcode::FcvtLowFromSint
| Opcode::FcvtToUint
| Opcode::FcvtToSint
| Opcode::FcvtToUintSat
| Opcode::FcvtToSintSat
| Opcode::Splat
| Opcode::Swizzle
| Opcode::Shuffle
| Opcode::Insertlane
| Opcode::Extractlane
| Opcode::ScalarToVector
| Opcode::VhighBits
| Opcode::Bitcast
| Opcode::RawBitcast
| Opcode::Load
| Opcode::Uload8
| Opcode::Sload8
@@ -111,6 +145,12 @@ impl LowerBackend for S390xBackend {
| Opcode::Sload16
| Opcode::Uload32
| Opcode::Sload32
| Opcode::Uload8x8
| Opcode::Sload8x8
| Opcode::Uload16x4
| Opcode::Sload16x4
| Opcode::Uload32x2
| Opcode::Sload32x2
| Opcode::Store
| Opcode::Istore8
| Opcode::Istore16
@@ -122,6 +162,8 @@ impl LowerBackend for S390xBackend {
| Opcode::Fence
| Opcode::Icmp
| Opcode::Fcmp
| Opcode::VanyTrue
| Opcode::VallTrue
| Opcode::IsNull
| Opcode::IsInvalid
| Opcode::Select
@@ -147,57 +189,15 @@ impl LowerBackend for S390xBackend {
)
}
Opcode::UaddSat
| Opcode::SaddSat
| Opcode::UsubSat
| Opcode::SsubSat
| Opcode::Bitrev
| Opcode::FcvtLowFromSint
Opcode::Bitrev
| Opcode::ConstAddr
| Opcode::TlsValue
| Opcode::GetPinnedReg
| Opcode::SetPinnedReg
| Opcode::Isplit
| Opcode::Iconcat
| Opcode::RawBitcast
| Opcode::Splat
| Opcode::Swizzle
| Opcode::Insertlane
| Opcode::Extractlane
| Opcode::Imin
| Opcode::Umin
| Opcode::Imax
| Opcode::Umax
| Opcode::AvgRound
| Opcode::FminPseudo
| Opcode::FmaxPseudo
| Opcode::Uload8x8
| Opcode::Sload8x8
| Opcode::Uload16x4
| Opcode::Sload16x4
| Opcode::Uload32x2
| Opcode::Sload32x2
| Opcode::Vconst
| Opcode::Shuffle
| Opcode::Vsplit
| Opcode::Vconcat
| Opcode::Vselect
| Opcode::VanyTrue
| Opcode::VallTrue
| Opcode::VhighBits
| Opcode::ScalarToVector
| Opcode::Snarrow
| Opcode::Unarrow
| Opcode::Uunarrow
| Opcode::SwidenLow
| Opcode::SwidenHigh
| Opcode::UwidenLow
| Opcode::UwidenHigh
| Opcode::WideningPairwiseDotProductS
| Opcode::SqmulRoundSat
| Opcode::FvpromoteLow
| Opcode::Fvdemote
| Opcode::IaddPairwise
| Opcode::DynamicStackLoad
| Opcode::DynamicStackStore
| Opcode::DynamicStackAddr

View File

@@ -6,7 +6,7 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use crate::isa::s390x::abi::S390xMachineDeps;
use crate::isa::s390x::inst::{
stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, MemArg,
stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, MemArg, UImm12,
UImm16Shifted, UImm32Shifted,
};
use crate::isa::s390x::settings::Flags as IsaFlags;
@@ -91,6 +91,8 @@ where
defs,
clobbers,
opcode: *opcode,
caller_callconv: self.lower_ctx.abi().call_conv(),
callee_callconv: abi.call_conv(),
})
}
@@ -102,6 +104,8 @@ where
defs,
clobbers,
opcode: *opcode,
caller_callconv: self.lower_ctx.abi().call_conv(),
callee_callconv: abi.call_conv(),
})
}
@@ -195,6 +199,46 @@ where
}
}
#[inline]
fn u64_pair_split(&mut self, n: u128) -> (u64, u64) {
((n >> 64) as u64, n as u64)
}
#[inline]
fn u64_pair_concat(&mut self, hi: u64, lo: u64) -> u128 {
(hi as u128) << 64 | (lo as u128)
}
#[inline]
fn u32_pair_split(&mut self, n: u64) -> (u32, u32) {
((n >> 32) as u32, n as u32)
}
#[inline]
fn u32_pair_concat(&mut self, hi: u32, lo: u32) -> u64 {
(hi as u64) << 32 | (lo as u64)
}
#[inline]
fn u16_pair_split(&mut self, n: u32) -> (u16, u16) {
((n >> 16) as u16, n as u16)
}
#[inline]
fn u16_pair_concat(&mut self, hi: u16, lo: u16) -> u32 {
(hi as u32) << 16 | (lo as u32)
}
#[inline]
fn u8_pair_split(&mut self, n: u16) -> (u8, u8) {
((n >> 8) as u8, n as u8)
}
#[inline]
fn u8_pair_concat(&mut self, hi: u8, lo: u8) -> u16 {
(hi as u16) << 8 | (lo as u16)
}
#[inline]
fn u8_as_u16(&mut self, n: u8) -> u16 {
n as u16
@@ -248,6 +292,15 @@ where
}
}
#[inline]
fn i16_from_u32(&mut self, n: u32) -> Option<i16> {
if let Ok(imm) = i16::try_from(n as i32) {
Some(imm)
} else {
None
}
}
#[inline]
fn uimm32shifted_from_u64(&mut self, n: u64) -> Option<UImm32Shifted> {
UImm32Shifted::maybe_from_u64(n)
@@ -258,11 +311,49 @@ where
UImm16Shifted::maybe_from_u64(n)
}
#[inline]
fn be_lane_idx(&mut self, ty: Type, idx: u8) -> u8 {
ty.lane_count() as u8 - 1 - idx
}
#[inline]
fn lane_byte_mask(&mut self, ty: Type, idx: u8) -> u16 {
let lane_bytes = (ty.lane_bits() / 8) as u8;
let lane_mask = (1u16 << lane_bytes) - 1;
lane_mask << (16 - ((idx + 1) * lane_bytes))
}
#[inline]
fn shuffle_mask_from_u128(&mut self, idx: u128) -> (u128, u16) {
let bytes = idx.to_be_bytes();
let and_mask = bytes.iter().fold(0, |acc, &x| (acc << 1) | (x < 32) as u16);
let bytes = bytes.map(|x| {
if x < 16 {
15 - x
} else if x < 32 {
47 - x
} else {
128
}
});
let permute_mask = u128::from_be_bytes(bytes);
(permute_mask, and_mask)
}
#[inline]
fn u64_from_value(&mut self, val: Value) -> Option<u64> {
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
let constant = self.lower_ctx.get_constant(inst)?;
Some(constant)
let ty = self.lower_ctx.output_ty(inst, 0);
Some(zero_extend_to_u64(constant, self.ty_bits(ty).unwrap()))
}
#[inline]
fn u64_from_inverted_value(&mut self, val: Value) -> Option<u64> {
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
let constant = self.lower_ctx.get_constant(inst)?;
let ty = self.lower_ctx.output_ty(inst, 0);
Some(zero_extend_to_u64(!constant, self.ty_bits(ty).unwrap()))
}
#[inline]
@@ -349,22 +440,22 @@ where
#[inline]
fn uimm16shifted_from_inverted_value(&mut self, val: Value) -> Option<UImm16Shifted> {
let constant = self.u64_from_value(val)?;
let imm = UImm16Shifted::maybe_from_u64(!constant)?;
let constant = self.u64_from_inverted_value(val)?;
let imm = UImm16Shifted::maybe_from_u64(constant)?;
Some(imm.negate_bits())
}
#[inline]
fn uimm32shifted_from_inverted_value(&mut self, val: Value) -> Option<UImm32Shifted> {
let constant = self.u64_from_value(val)?;
let imm = UImm32Shifted::maybe_from_u64(!constant)?;
let constant = self.u64_from_inverted_value(val)?;
let imm = UImm32Shifted::maybe_from_u64(constant)?;
Some(imm.negate_bits())
}
#[inline]
fn mask_amt_imm(&mut self, ty: Type, amt: i64) -> u8 {
let mask = self.ty_bits(ty).unwrap() - 1;
(amt as u8) & mask
let mask = ty.lane_bits() - 1;
(amt as u8) & (mask as u8)
}
#[inline]
@@ -498,13 +589,18 @@ where
}
#[inline]
fn memarg_reg_plus_reg(&mut self, x: Reg, y: Reg, flags: MemFlags) -> MemArg {
MemArg::reg_plus_reg(x, y, flags)
fn memarg_reg_plus_reg(&mut self, x: Reg, y: Reg, bias: u8, flags: MemFlags) -> MemArg {
MemArg::BXD12 {
base: x,
index: y,
disp: UImm12::maybe_from_u64(bias as u64).unwrap(),
flags,
}
}
#[inline]
fn memarg_reg_plus_off(&mut self, reg: Reg, off: i64, flags: MemFlags) -> MemArg {
MemArg::reg_plus_off(reg, off, flags)
fn memarg_reg_plus_off(&mut self, reg: Reg, off: i64, bias: u8, flags: MemFlags) -> MemArg {
MemArg::reg_plus_off(reg, off + (bias as i64), flags)
}
#[inline]
@@ -586,6 +682,17 @@ where
}
}
/// Zero-extend the low `from_bits` bits of `value` to a full u64.
#[inline]
fn zero_extend_to_u64(value: u64, from_bits: u8) -> u64 {
assert!(from_bits <= 64);
if from_bits >= 64 {
value
} else {
value & ((1u64 << from_bits) - 1)
}
}
/// Sign-extend the low `from_bits` bits of `value` to a full u64.
#[inline]
fn sign_extend_to_u64(value: u64, from_bits: u8) -> u64 {

View File

@@ -696,6 +696,11 @@ impl ABISig {
let ret_arg = self.stack_ret_arg?;
Some(self.args[ret_arg].clone())
}
/// Get calling convention used.
pub fn call_conv(&self) -> isa::CallConv {
self.call_conv
}
}
/// ABI object for a function body.

View File

@@ -7,7 +7,8 @@ use std::cell::Cell;
pub use super::MachLabel;
pub use crate::ir::{
ArgumentExtension, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, SigRef, StackSlot,
ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, Immediate,
SigRef, StackSlot,
};
pub use crate::isa::unwind::UnwindInst;
pub use crate::machinst::{ABIArg, ABIArgSlot, ABISig, RealReg, Reg, RelocDistance, Writable};
@@ -547,6 +548,18 @@ macro_rules! isle_prelude_methods {
}
}
#[inline]
fn u128_from_immediate(&mut self, imm: Immediate) -> Option<u128> {
let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
Some(u128::from_le_bytes(bytes.try_into().ok()?))
}
#[inline]
fn u128_from_constant(&mut self, constant: Constant) -> Option<u128> {
let bytes = self.lower_ctx.get_constant_data(constant).as_slice();
Some(u128::from_le_bytes(bytes.try_into().ok()?))
}
fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
match val.bits() {
0 => None,

View File

@@ -12,8 +12,8 @@ use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
use crate::ir::{
types::{FFLAGS, IFLAGS},
ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function,
GlobalValue, GlobalValueData, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc,
Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart,
GlobalValue, GlobalValueData, Immediate, Inst, InstructionData, MemFlags, Opcode, Signature,
SourceLoc, Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart,
};
use crate::machinst::{
non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
@@ -167,6 +167,8 @@ pub trait LowerCtx {
/// for the input produced by the sunk instruction), otherwise the
/// side-effect will occur twice.
fn sink_inst(&mut self, ir_inst: Inst);
/// Retrieve immediate data given a handle.
fn get_immediate_data(&self, imm: Immediate) -> &ConstantData;
/// Retrieve constant data given a handle.
fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData;
/// Indicate that a constant should be emitted.
@@ -1448,6 +1450,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
self.inst_sunk.insert(ir_inst);
}
fn get_immediate_data(&self, imm: Immediate) -> &ConstantData {
self.f.dfg.immediates.get(imm).unwrap()
}
fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData {
self.f.dfg.constants.get(constant_handle)
}

View File

@@ -661,6 +661,17 @@
(decl reloc_distance_near () RelocDistance)
(extern extractor reloc_distance_near reloc_distance_near)
;; Accessor for `Immediate` as u128.
(decl u128_from_immediate (u128) Immediate)
(extern extractor u128_from_immediate u128_from_immediate)
;; Accessor for `Constant` as u128.
(decl u128_from_constant (u128) Constant)
(extern extractor u128_from_constant u128_from_constant)
;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; A range of integers to loop through.

View File

@@ -43,3 +43,18 @@ block0(v0: i32, v1: i8, v2: i8):
; locre %r2, %r3
; br %r14
function %i(i32, i8x16, i8x16) -> i8x16 {
block0(v0: i32, v1: i8x16, v2: i8x16):
v3 = iconst.i32 42
v4 = icmp.i32 eq v0, v3
v5 = select.i8x16 v4, v1, v2
return v5
}
; block0:
; vlr %v20, %v24
; clfi %r2, 42
; vlr %v24, %v25
; jne 10 ; vlr %v24, %v20
; br %r14

View File

@@ -168,6 +168,46 @@ block0(v0: f64, v1: f64):
; wfmaxdb %f0, %f0, %f2, 1
; br %r14
function %fmin_pseudo_f32(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmin_pseudo v0, v1
return v2
}
; block0:
; wfminsb %f0, %f0, %f2, 3
; br %r14
function %fmin_pseudo_f64(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmin_pseudo v0, v1
return v2
}
; block0:
; wfmindb %f0, %f0, %f2, 3
; br %r14
function %fmax_pseudo_f32(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmax_pseudo v0, v1
return v2
}
; block0:
; wfmaxsb %f0, %f0, %f2, 3
; br %r14
function %fmax_pseudo_f64(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmax_pseudo v0, v1
return v2
}
; block0:
; wfmaxdb %f0, %f0, %f2, 3
; br %r14
function %sqrt_f32(f32) -> f32 {
block0(v0: f32):
v1 = sqrt v0

View File

@@ -8,7 +8,7 @@ block0(v0: i64):
}
; block0:
; vlebrg %f0, 0(%r2), 0
; vlebrg %v0, 0(%r2), 0
; br %r14
function %load_f32_little(i64) -> f32 {
@@ -18,7 +18,7 @@ block0(v0: i64):
}
; block0:
; vlebrf %f0, 0(%r2), 0
; vlebrf %v0, 0(%r2), 0
; br %r14
function %store_f64_little(f64, i64) {
@@ -28,7 +28,7 @@ block0(v0: f64, v1: i64):
}
; block0:
; vstebrg %f0, 0(%r2), 0
; vstebrg %v0, 0(%r2), 0
; br %r14
function %store_f32_little(f32, i64) {
@@ -38,6 +38,6 @@ block0(v0: f32, v1: i64):
}
; block0:
; vstebrf %f0, 0(%r2), 0
; vstebrf %v0, 0(%r2), 0
; br %r14

View File

@@ -0,0 +1,824 @@
test compile precise-output
target s390x
function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = iadd.i64x2 v0, v1
return v2
}
; block0:
; vag %v24, %v24, %v25
; br %r14
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd.i32x4 v0, v1
return v2
}
; block0:
; vaf %v24, %v24, %v25
; br %r14
function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = iadd.i16x8 v0, v1
return v2
}
; block0:
; vah %v24, %v24, %v25
; br %r14
function %iadd_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = iadd.i8x16 v0, v1
return v2
}
; block0:
; vab %v24, %v24, %v25
; br %r14
function %isub_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = isub.i64x2 v0, v1
return v2
}
; block0:
; vsg %v24, %v24, %v25
; br %r14
function %isub_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = isub.i32x4 v0, v1
return v2
}
; block0:
; vsf %v24, %v24, %v25
; br %r14
function %isub_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = isub.i16x8 v0, v1
return v2
}
; block0:
; vsh %v24, %v24, %v25
; br %r14
function %isub_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = isub.i8x16 v0, v1
return v2
}
; block0:
; vsb %v24, %v24, %v25
; br %r14
function %iabs_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iabs.i64x2 v0
return v1
}
; block0:
; vlpg %v24, %v24
; br %r14
function %iabs_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iabs.i32x4 v0
return v1
}
; block0:
; vlpf %v24, %v24
; br %r14
function %iabs_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iabs.i16x8 v0
return v1
}
; block0:
; vlph %v24, %v24
; br %r14
function %iabs_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iabs.i8x16 v0
return v1
}
; block0:
; vlpb %v24, %v24
; br %r14
function %ineg_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = ineg.i64x2 v0
return v1
}
; block0:
; vlcg %v24, %v24
; br %r14
function %ineg_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = ineg.i32x4 v0
return v1
}
; block0:
; vlcf %v24, %v24
; br %r14
function %ineg_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = ineg.i16x8 v0
return v1
}
; block0:
; vlch %v24, %v24
; br %r14
function %ineg_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = ineg.i8x16 v0
return v1
}
; block0:
; vlcb %v24, %v24
; br %r14
function %umax_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = umax.i64x2 v0, v1
return v2
}
; block0:
; vmxlg %v24, %v24, %v25
; br %r14
function %umax_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = umax.i32x4 v0, v1
return v2
}
; block0:
; vmxlf %v24, %v24, %v25
; br %r14
function %umax_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = umax.i16x8 v0, v1
return v2
}
; block0:
; vmxlh %v24, %v24, %v25
; br %r14
function %umax_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = umax.i8x16 v0, v1
return v2
}
; block0:
; vmxlb %v24, %v24, %v25
; br %r14
function %umin_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = umin.i64x2 v0, v1
return v2
}
; block0:
; vmnlg %v24, %v24, %v25
; br %r14
function %umin_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = umin.i32x4 v0, v1
return v2
}
; block0:
; vmnlf %v24, %v24, %v25
; br %r14
function %umin_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = umin.i16x8 v0, v1
return v2
}
; block0:
; vmnlh %v24, %v24, %v25
; br %r14
function %umin_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = umin.i8x16 v0, v1
return v2
}
; block0:
; vmnlb %v24, %v24, %v25
; br %r14
function %imax_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imax.i64x2 v0, v1
return v2
}
; block0:
; vmxg %v24, %v24, %v25
; br %r14
function %imax_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = imax.i32x4 v0, v1
return v2
}
; block0:
; vmxf %v24, %v24, %v25
; br %r14
function %imax_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = imax.i16x8 v0, v1
return v2
}
; block0:
; vmxh %v24, %v24, %v25
; br %r14
function %imax_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = imax.i8x16 v0, v1
return v2
}
; block0:
; vmxb %v24, %v24, %v25
; br %r14
function %imin_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imin.i64x2 v0, v1
return v2
}
; block0:
; vmng %v24, %v24, %v25
; br %r14
function %imin_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = imin.i32x4 v0, v1
return v2
}
; block0:
; vmnf %v24, %v24, %v25
; br %r14
function %imin_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = imin.i16x8 v0, v1
return v2
}
; block0:
; vmnh %v24, %v24, %v25
; br %r14
function %imin_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = imin.i8x16 v0, v1
return v2
}
; block0:
; vmnb %v24, %v24, %v25
; br %r14
function %avg_round_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = avg_round.i64x2 v0, v1
return v2
}
; block0:
; vavglg %v24, %v24, %v25
; br %r14
function %avg_round_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = avg_round.i32x4 v0, v1
return v2
}
; block0:
; vavglf %v24, %v24, %v25
; br %r14
function %avg_round_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = avg_round.i16x8 v0, v1
return v2
}
; block0:
; vavglh %v24, %v24, %v25
; br %r14
function %avg_round_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = avg_round.i8x16 v0, v1
return v2
}
; block0:
; vavglb %v24, %v24, %v25
; br %r14
function %uadd_sat64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = uadd_sat.i64x2 v0, v1
return v2
}
; block0:
; vag %v5, %v24, %v25
; vchlg %v7, %v24, %v5
; vo %v24, %v5, %v7
; br %r14
function %uadd_sat32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = uadd_sat.i32x4 v0, v1
return v2
}
; block0:
; vaf %v5, %v24, %v25
; vchlf %v7, %v24, %v5
; vo %v24, %v5, %v7
; br %r14
function %uadd_sat16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = uadd_sat.i16x8 v0, v1
return v2
}
; block0:
; vah %v5, %v24, %v25
; vchlh %v7, %v24, %v5
; vo %v24, %v5, %v7
; br %r14
function %uadd_sat8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = uadd_sat.i8x16 v0, v1
return v2
}
; block0:
; vab %v5, %v24, %v25
; vchlb %v7, %v24, %v5
; vo %v24, %v5, %v7
; br %r14
function %sadd_sat32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = sadd_sat.i32x4 v0, v1
return v2
}
; block0:
; vuphf %v5, %v24
; vuphf %v7, %v25
; vag %v17, %v5, %v7
; vuplf %v19, %v24
; vuplf %v21, %v25
; vag %v23, %v19, %v21
; vpksg %v24, %v17, %v23
; br %r14
function %sadd_sat16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = sadd_sat.i16x8 v0, v1
return v2
}
; block0:
; vuphh %v5, %v24
; vuphh %v7, %v25
; vaf %v17, %v5, %v7
; vuplh %v19, %v24
; vuplh %v21, %v25
; vaf %v23, %v19, %v21
; vpksf %v24, %v17, %v23
; br %r14
function %sadd_sat8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = sadd_sat.i8x16 v0, v1
return v2
}
; block0:
; vuphb %v5, %v24
; vuphb %v7, %v25
; vah %v17, %v5, %v7
; vuplb %v19, %v24
; vuplb %v21, %v25
; vah %v23, %v19, %v21
; vpksh %v24, %v17, %v23
; br %r14
function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise.i32x4 v0, v1
return v2
}
; block0:
; vrepib %v5, 32
; vsrlb %v7, %v25, %v5
; vaf %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vaf %v21, %v24, %v19
; vpkg %v24, %v17, %v21
; br %r14
function %usub_sat64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = usub_sat.i64x2 v0, v1
return v2
}
; block0:
; vsg %v5, %v24, %v25
; vchlg %v7, %v24, %v25
; vn %v24, %v5, %v7
; br %r14
function %usub_sat32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = usub_sat.i32x4 v0, v1
return v2
}
; block0:
; vsf %v5, %v24, %v25
; vchlf %v7, %v24, %v25
; vn %v24, %v5, %v7
; br %r14
function %usub_sat16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = usub_sat.i16x8 v0, v1
return v2
}
; block0:
; vsh %v5, %v24, %v25
; vchlh %v7, %v24, %v25
; vn %v24, %v5, %v7
; br %r14
function %usub_sat8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = usub_sat.i8x16 v0, v1
return v2
}
; block0:
; vsb %v5, %v24, %v25
; vchlb %v7, %v24, %v25
; vn %v24, %v5, %v7
; br %r14
function %ssub_sat32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = ssub_sat.i32x4 v0, v1
return v2
}
; block0:
; vuphf %v5, %v24
; vuphf %v7, %v25
; vsg %v17, %v5, %v7
; vuplf %v19, %v24
; vuplf %v21, %v25
; vsg %v23, %v19, %v21
; vpksg %v24, %v17, %v23
; br %r14
function %ssub_sat16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = ssub_sat.i16x8 v0, v1
return v2
}
; block0:
; vuphh %v5, %v24
; vuphh %v7, %v25
; vsf %v17, %v5, %v7
; vuplh %v19, %v24
; vuplh %v21, %v25
; vsf %v23, %v19, %v21
; vpksf %v24, %v17, %v23
; br %r14
function %ssub_sat8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = ssub_sat.i8x16 v0, v1
return v2
}
; block0:
; vuphb %v5, %v24
; vuphb %v7, %v25
; vsh %v17, %v5, %v7
; vuplb %v19, %v24
; vuplb %v21, %v25
; vsh %v23, %v19, %v21
; vpksh %v24, %v17, %v23
; br %r14
function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise.i32x4 v0, v1
return v2
}
; block0:
; vrepib %v5, 32
; vsrlb %v7, %v25, %v5
; vaf %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vaf %v21, %v24, %v19
; vpkg %v24, %v17, %v21
; br %r14
function %iadd_pairwise_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = iadd_pairwise.i16x8 v0, v1
return v2
}
; block0:
; vrepib %v5, 16
; vsrlb %v7, %v25, %v5
; vah %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vah %v21, %v24, %v19
; vpkf %v24, %v17, %v21
; br %r14
function %iadd_pairwise_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = iadd_pairwise.i8x16 v0, v1
return v2
}
; block0:
; vrepib %v5, 8
; vsrlb %v7, %v25, %v5
; vab %v17, %v25, %v7
; vsrlb %v19, %v24, %v5
; vab %v21, %v24, %v19
; vpkh %v24, %v17, %v21
; br %r14
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imul.i64x2 v0, v1
return v2
}
; block0:
; vlgvg %r3, %v24, 0
; vlgvg %r5, %v25, 0
; msgr %r3, %r5
; vlgvg %r5, %v24, 1
; vlgvg %r4, %v25, 1
; msgr %r5, %r4
; vlvgp %v24, %r3, %r5
; br %r14
function %imul_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = imul.i32x4 v0, v1
return v2
}
; block0:
; vmlf %v24, %v24, %v25
; br %r14
function %imul_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = imul.i16x8 v0, v1
return v2
}
; block0:
; vmlhw %v24, %v24, %v25
; br %r14
function %imul_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = imul.i8x16 v0, v1
return v2
}
; block0:
; vmlb %v24, %v24, %v25
; br %r14
function %umulhi_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = umulhi.i64x2 v0, v1
return v2
}
; block0:
; vlgvg %r3, %v24, 0
; vlgvg %r1, %v25, 0
; mlgr %r0, %r3
; lgr %r2, %r0
; vlgvg %r3, %v24, 1
; vlgvg %r1, %v25, 1
; mlgr %r0, %r3
; vlvgp %v24, %r2, %r0
; br %r14
function %umulhi_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = umulhi.i32x4 v0, v1
return v2
}
; block0:
; vmlhf %v24, %v24, %v25
; br %r14
function %umulhi_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = umulhi.i16x8 v0, v1
return v2
}
; block0:
; vmlhh %v24, %v24, %v25
; br %r14
function %umulhi_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = umulhi.i8x16 v0, v1
return v2
}
; block0:
; vmlhb %v24, %v24, %v25
; br %r14
function %smulhi_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = smulhi.i64x2 v0, v1
return v2
}
; block0:
; vlgvg %r3, %v24, 0
; vlgvg %r5, %v25, 0
; mgrk %r0, %r3, %r5
; lgr %r3, %r0
; vlgvg %r2, %v24, 1
; vlgvg %r4, %v25, 1
; mgrk %r0, %r2, %r4
; lgr %r4, %r3
; vlvgp %v24, %r4, %r0
; br %r14
function %smulhi_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = smulhi.i32x4 v0, v1
return v2
}
; block0:
; vmhf %v24, %v24, %v25
; br %r14
function %smulhi_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = smulhi.i16x8 v0, v1
return v2
}
; block0:
; vmhh %v24, %v24, %v25
; br %r14
function %smulhi_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = smulhi.i8x16 v0, v1
return v2
}
; block0:
; vmhb %v24, %v24, %v25
; br %r14
function %widening_pairwise_dot_product_s_i16x8(i16x8, i16x8) -> i32x4 {
block0(v0: i16x8, v1: i16x8):
v2 = widening_pairwise_dot_product_s v0, v1
return v2
}
; block0:
; vmeh %v5, %v24, %v25
; vmoh %v7, %v24, %v25
; vaf %v24, %v5, %v7
; br %r14
function %sqmul_round_sat(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = sqmul_round_sat.i16x8 v0, v1
return v2
}
; block0:
; vuphh %v5, %v24
; vuphh %v7, %v25
; vmlf %v17, %v5, %v7
; vgmf %v19, 17, 17
; vaf %v21, %v17, %v19
; vesraf %v23, %v21, 15
; vuplh %v26, %v24
; vuplh %v27, %v25
; vmlf %v29, %v26, %v27
; vgmf %v31, 17, 17
; vaf %v1, %v29, %v31
; vesraf %v3, %v1, 15
; vpksf %v24, %v23, %v3
; br %r14
function %sqmul_round_sat(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = sqmul_round_sat.i32x4 v0, v1
return v2
}
; block0:
; vuphf %v5, %v24
; vuphf %v7, %v25
; lgdr %r3, %f5
; lgdr %r5, %f7
; msgr %r3, %r5
; vlgvg %r5, %v5, 1
; vlgvg %r4, %v7, 1
; msgr %r5, %r4
; vlvgp %v29, %r3, %r5
; vgmg %v31, 33, 33
; vag %v1, %v29, %v31
; vesrag %v3, %v1, 31
; vuplf %v5, %v24
; vuplf %v7, %v25
; lgdr %r3, %f5
; lgdr %r5, %f7
; msgr %r3, %r5
; vlgvg %r5, %v5, 1
; vlgvg %r4, %v7, 1
; msgr %r5, %r4
; vlvgp %v29, %r3, %r5
; vgmg %v31, 33, 33
; vag %v1, %v29, %v31
; vesrag %v4, %v1, 31
; vpksg %v24, %v3, %v4
; br %r14

View File

@@ -0,0 +1,43 @@
test compile precise-output
target s390x
function %popcnt_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = popcnt.i64x2 v0
return v1
}
; block0:
; vpopctg %v24, %v24
; br %r14
function %popcnt_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = popcnt.i32x4 v0
return v1
}
; block0:
; vpopctf %v24, %v24
; br %r14
function %popcnt_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = popcnt.i16x8 v0
return v1
}
; block0:
; vpopcth %v24, %v24
; br %r14
function %popcnt_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = popcnt.i8x16 v0
return v1
}
; block0:
; vpopctb %v24, %v24
; br %r14

View File

@@ -0,0 +1,364 @@
test compile precise-output
target s390x
function %band_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = band.i64x2 v0, v1
return v2
}
; block0:
; vn %v24, %v24, %v25
; br %r14
function %band_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = band.i32x4 v0, v1
return v2
}
; block0:
; vn %v24, %v24, %v25
; br %r14
function %band_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = band.i16x8 v0, v1
return v2
}
; block0:
; vn %v24, %v24, %v25
; br %r14
function %band_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = band.i8x16 v0, v1
return v2
}
; block0:
; vn %v24, %v24, %v25
; br %r14
function %bor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bor.i64x2 v0, v1
return v2
}
; block0:
; vo %v24, %v24, %v25
; br %r14
function %bor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bor.i32x4 v0, v1
return v2
}
; block0:
; vo %v24, %v24, %v25
; br %r14
function %bor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bor.i16x8 v0, v1
return v2
}
; block0:
; vo %v24, %v24, %v25
; br %r14
function %bor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bor.i8x16 v0, v1
return v2
}
; block0:
; vo %v24, %v24, %v25
; br %r14
function %bxor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bxor.i64x2 v0, v1
return v2
}
; block0:
; vx %v24, %v24, %v25
; br %r14
function %bxor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bxor.i32x4 v0, v1
return v2
}
; block0:
; vx %v24, %v24, %v25
; br %r14
function %bxor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bxor.i16x8 v0, v1
return v2
}
; block0:
; vx %v24, %v24, %v25
; br %r14
function %bxor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bxor.i8x16 v0, v1
return v2
}
; block0:
; vx %v24, %v24, %v25
; br %r14
function %band_not_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = band_not.i64x2 v0, v1
return v2
}
; block0:
; vnc %v24, %v24, %v25
; br %r14
function %band_not_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = band_not.i32x4 v0, v1
return v2
}
; block0:
; vnc %v24, %v24, %v25
; br %r14
function %band_not_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = band_not.i16x8 v0, v1
return v2
}
; block0:
; vnc %v24, %v24, %v25
; br %r14
function %band_not_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = band_not.i8x16 v0, v1
return v2
}
; block0:
; vnc %v24, %v24, %v25
; br %r14
function %bor_not_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bor_not.i64x2 v0, v1
return v2
}
; block0:
; voc %v24, %v24, %v25
; br %r14
function %bor_not_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bor_not.i32x4 v0, v1
return v2
}
; block0:
; voc %v24, %v24, %v25
; br %r14
function %bor_not_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bor_not.i16x8 v0, v1
return v2
}
; block0:
; voc %v24, %v24, %v25
; br %r14
function %bor_not_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bor_not.i8x16 v0, v1
return v2
}
; block0:
; voc %v24, %v24, %v25
; br %r14
function %bxor_not_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bxor_not.i64x2 v0, v1
return v2
}
; block0:
; vnx %v24, %v24, %v25
; br %r14
function %bxor_not_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bxor_not.i32x4 v0, v1
return v2
}
; block0:
; vnx %v24, %v24, %v25
; br %r14
function %bxor_not_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bxor_not.i16x8 v0, v1
return v2
}
; block0:
; vnx %v24, %v24, %v25
; br %r14
function %bxor_not_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bxor_not.i8x16 v0, v1
return v2
}
; block0:
; vnx %v24, %v24, %v25
; br %r14
function %bnot_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = bnot.i64x2 v0
return v1
}
; block0:
; vno %v24, %v24, %v24
; br %r14
function %bnot_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = bnot.i32x4 v0
return v1
}
; block0:
; vno %v24, %v24, %v24
; br %r14
function %bnot_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = bnot.i16x8 v0
return v1
}
; block0:
; vno %v24, %v24, %v24
; br %r14
function %bnot_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = bnot.i8x16 v0
return v1
}
; block0:
; vno %v24, %v24, %v24
; br %r14
function %bitselect_i64x2(i64x2, i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2, v2: i64x2):
v3 = bitselect.i64x2 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %bitselect_i32x4(i32x4, i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4, v2: i32x4):
v3 = bitselect.i32x4 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %bitselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8, v2: i16x8):
v3 = bitselect.i16x8 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %bitselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16, v2: i8x16):
v3 = bitselect.i8x16 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 {
block0(v0: b64x2, v1: i64x2, v2: i64x2):
v3 = vselect.i64x2 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 {
block0(v0: b32x4, v1: i32x4, v2: i32x4):
v3 = vselect.i32x4 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
block0(v0: b16x8, v1: i16x8, v2: i16x8):
v3 = vselect.i16x8 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14
function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 {
block0(v0: b8x16, v1: i8x16, v2: i8x16):
v3 = vselect.i8x16 v0, v1, v2
return v3
}
; block0:
; vsel %v24, %v25, %v26, %v24
; br %r14

View File

@@ -0,0 +1,213 @@
test compile precise-output
target s390x
function %vconst_i64x2_zero() -> i64x2 {
block0:
v1 = vconst.i64x2 [0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i64x2_splat1() -> i64x2 {
block0:
v1 = vconst.i64x2 [32767 32767]
return v1
}
; block0:
; vrepig %v24, 32767
; br %r14
function %vconst_i64x2_splat2() -> i64x2 {
block0:
v1 = vconst.i64x2 [-32768 -32768]
return v1
}
; block0:
; vrepig %v24, -32768
; br %r14
function %vconst_i64x2_splat3() -> i64x2 {
block0:
v1 = vconst.i64x2 [32768 32768]
return v1
}
; block0:
; bras %r1, 12 ; data.u64 0x0000000000008000 ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i64x2_splat4() -> i64x2 {
block0:
v1 = vconst.i64x2 [-32769 -32769]
return v1
}
; block0:
; bras %r1, 12 ; data.u64 0xffffffffffff7fff ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i64x2_mixed() -> i64x2 {
block0:
v1 = vconst.i64x2 [1 2]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00000000000000020000000000000001 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i32x4_zero() -> i32x4 {
block0:
v1 = vconst.i32x4 [0 0 0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i32x4_splat1() -> i32x4 {
block0:
v1 = vconst.i32x4 [32767 32767 32767 32767]
return v1
}
; block0:
; vrepif %v24, 32767
; br %r14
function %vconst_i32x4_splat2() -> i32x4 {
block0:
v1 = vconst.i32x4 [-32768 -32768 -32768 -32768]
return v1
}
; block0:
; vrepif %v24, -32768
; br %r14
function %vconst_i32x4_splat3() -> i32x4 {
block0:
v1 = vconst.i32x4 [32768 32768 32768 32768]
return v1
}
; block0:
; bras %r1, 8 ; data.u32 0x00008000 ; vlrepf %v24, 0(%r1)
; br %r14
function %vconst_i32x4_splat4() -> i32x4 {
block0:
v1 = vconst.i32x4 [-32769 -32769 -32769 -32769]
return v1
}
; block0:
; bras %r1, 8 ; data.u32 0xffff7fff ; vlrepf %v24, 0(%r1)
; br %r14
function %vconst_i32x4_splat_i64() -> i32x4 {
block0:
v1 = vconst.i32x4 [1 2 1 2]
return v1
}
; block0:
; bras %r1, 12 ; data.u64 0x0000000200000001 ; vlrepg %v24, 0(%r1)
; br %r14
function %vconst_i32x4_mixed() -> i32x4 {
block0:
v1 = vconst.i32x4 [1 2 3 4]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00000004000000030000000200000001 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i16x8_zero() -> i16x8 {
block0:
v1 = vconst.i16x8 [0 0 0 0 0 0 0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i16x8_splat1() -> i16x8 {
block0:
v1 = vconst.i16x8 [32767 32767 32767 32767 32767 32767 32767 32767]
return v1
}
; block0:
; vrepih %v24, 32767
; br %r14
function %vconst_i16x8_splat2() -> i16x8 {
block0:
v1 = vconst.i16x8 [-32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768]
return v1
}
; block0:
; vrepih %v24, -32768
; br %r14
function %vconst_i16x8_mixed() -> i16x8 {
block0:
v1 = vconst.i16x8 [1 2 3 4 5 6 7 8]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00080007000600050004000300020001 ; vl %v24, 0(%r1)
; br %r14
function %vconst_i8x16_zero() -> i8x16 {
block0:
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_i8x16_splat1() -> i8x16 {
block0:
v1 = vconst.i8x16 [127 127 127 127 127 127 127 127 127 127 127 127 127 127 127 127]
return v1
}
; block0:
; vrepib %v24, 127
; br %r14
function %vconst_i8x16_splat2() -> i8x16 {
block0:
v1 = vconst.i8x16 [-128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128]
return v1
}
; block0:
; vrepib %v24, 128
; br %r14
function %vconst_i8x16_mixed() -> i8x16 {
block0:
v1 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x100f0e0d0c0b0a090807060504030201 ; vl %v24, 0(%r1)
; br %r14

View File

@@ -0,0 +1,222 @@
test compile precise-output
target s390x
function %snarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = snarrow.i64x2 v0, v1
return v2
}
; block0:
; vpksg %v24, %v25, %v24
; br %r14
function %snarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow.i32x4 v0, v1
return v2
}
; block0:
; vpksf %v24, %v25, %v24
; br %r14
function %snarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = snarrow.i16x8 v0, v1
return v2
}
; block0:
; vpksh %v24, %v25, %v24
; br %r14
function %unarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = unarrow.i64x2 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vmxg %v7, %v25, %v5
; vmxg %v17, %v24, %v5
; vpklsg %v24, %v7, %v17
; br %r14
function %unarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow.i32x4 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vmxf %v7, %v25, %v5
; vmxf %v17, %v24, %v5
; vpklsf %v24, %v7, %v17
; br %r14
function %unarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = unarrow.i16x8 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vmxh %v7, %v25, %v5
; vmxh %v17, %v24, %v5
; vpklsh %v24, %v7, %v17
; br %r14
function %uunarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = uunarrow.i64x2 v0, v1
return v2
}
; block0:
; vpklsg %v24, %v25, %v24
; br %r14
function %uunarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = uunarrow.i32x4 v0, v1
return v2
}
; block0:
; vpklsf %v24, %v25, %v24
; br %r14
function %uunarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = uunarrow.i16x8 v0, v1
return v2
}
; block0:
; vpklsh %v24, %v25, %v24
; br %r14
function %swiden_low_i32x4_i64x2(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = swiden_low.i32x4 v0
return v1
}
; block0:
; vuplf %v24, %v24
; br %r14
function %swiden_low_i16x8_i32x4(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = swiden_low.i16x8 v0
return v1
}
; block0:
; vuplh %v24, %v24
; br %r14
function %swiden_low_i8x16_i16x8(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = swiden_low.i8x16 v0
return v1
}
; block0:
; vuplb %v24, %v24
; br %r14
function %swiden_high_i32x4_i64x2(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = swiden_high.i32x4 v0
return v1
}
; block0:
; vuphf %v24, %v24
; br %r14
function %swiden_high_i16x8_i32x4(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = swiden_high.i16x8 v0
return v1
}
; block0:
; vuphh %v24, %v24
; br %r14
function %swiden_high_i8x16_i16x8(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = swiden_high.i8x16 v0
return v1
}
; block0:
; vuphb %v24, %v24
; br %r14
function %uwiden_low_i32x4_i64x2(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = uwiden_low.i32x4 v0
return v1
}
; block0:
; vupllf %v24, %v24
; br %r14
function %uwiden_low_i16x8_i32x4(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = uwiden_low.i16x8 v0
return v1
}
; block0:
; vupllh %v24, %v24
; br %r14
function %uwiden_low_i8x16_i16x8(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = uwiden_low.i8x16 v0
return v1
}
; block0:
; vupllb %v24, %v24
; br %r14
function %uwiden_high_i32x4_i64x2(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = uwiden_high.i32x4 v0
return v1
}
; block0:
; vuplhf %v24, %v24
; br %r14
function %uwiden_high_i16x8_i32x4(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = uwiden_high.i16x8 v0
return v1
}
; block0:
; vuplhh %v24, %v24
; br %r14
function %uwiden_high_i8x16_i16x8(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = uwiden_high.i8x16 v0
return v1
}
; block0:
; vuplhb %v24, %v24
; br %r14

View File

@@ -0,0 +1,309 @@
test compile precise-output
target s390x
function %fcmp_eq_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 eq v0, v1
return v2
}
; block0:
; vfcedb %v24, %v24, %v25
; br %r14
function %fcmp_ne_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ne v0, v1
return v2
}
; block0:
; vfcedb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %fcmp_gt_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 gt v0, v1
return v2
}
; block0:
; vfchdb %v24, %v24, %v25
; br %r14
function %fcmp_lt_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 lt v0, v1
return v2
}
; block0:
; vfchdb %v24, %v25, %v24
; br %r14
function %fcmp_ge_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ge v0, v1
return v2
}
; block0:
; vfchedb %v24, %v24, %v25
; br %r14
function %fcmp_le_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 le v0, v1
return v2
}
; block0:
; vfchedb %v24, %v25, %v24
; br %r14
function %fcmp_ueq_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ueq v0, v1
return v2
}
; block0:
; vfchdb %v5, %v24, %v25
; vfchdb %v7, %v25, %v24
; vno %v24, %v5, %v7
; br %r14
function %fcmp_one_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 one v0, v1
return v2
}
; block0:
; vfchdb %v5, %v24, %v25
; vfchdb %v7, %v25, %v24
; vo %v24, %v5, %v7
; br %r14
function %fcmp_ugt_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ugt v0, v1
return v2
}
; block0:
; vfchedb %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %fcmp_ult_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ult v0, v1
return v2
}
; block0:
; vfchedb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %fcmp_uge_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 uge v0, v1
return v2
}
; block0:
; vfchdb %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %fcmp_ule_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ule v0, v1
return v2
}
; block0:
; vfchdb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %fcmp_ord_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 ord v0, v1
return v2
}
; block0:
; vfchedb %v5, %v24, %v25
; vfchedb %v7, %v25, %v24
; vo %v24, %v5, %v7
; br %r14
function %fcmp_uno_f64x2(f64x2, f64x2) -> b64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp.f64x2 uno v0, v1
return v2
}
; block0:
; vfchedb %v5, %v24, %v25
; vfchedb %v7, %v25, %v24
; vno %v24, %v5, %v7
; br %r14
function %fcmp_eq_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 eq v0, v1
return v2
}
; block0:
; vfcesb %v24, %v24, %v25
; br %r14
function %fcmp_ne_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ne v0, v1
return v2
}
; block0:
; vfcesb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %fcmp_gt_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 gt v0, v1
return v2
}
; block0:
; vfchsb %v24, %v24, %v25
; br %r14
function %fcmp_lt_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 lt v0, v1
return v2
}
; block0:
; vfchsb %v24, %v25, %v24
; br %r14
function %fcmp_ge_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ge v0, v1
return v2
}
; block0:
; vfchesb %v24, %v24, %v25
; br %r14
function %fcmp_le_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 le v0, v1
return v2
}
; block0:
; vfchesb %v24, %v25, %v24
; br %r14
function %fcmp_ueq_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ueq v0, v1
return v2
}
; block0:
; vfchsb %v5, %v24, %v25
; vfchsb %v7, %v25, %v24
; vno %v24, %v5, %v7
; br %r14
function %fcmp_one_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 one v0, v1
return v2
}
; block0:
; vfchsb %v5, %v24, %v25
; vfchsb %v7, %v25, %v24
; vo %v24, %v5, %v7
; br %r14
function %fcmp_ugt_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ugt v0, v1
return v2
}
; block0:
; vfchesb %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %fcmp_ult_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ult v0, v1
return v2
}
; block0:
; vfchesb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %fcmp_uge_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 uge v0, v1
return v2
}
; block0:
; vfchsb %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %fcmp_ule_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ule v0, v1
return v2
}
; block0:
; vfchsb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %fcmp_ord_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 ord v0, v1
return v2
}
; block0:
; vfchesb %v5, %v24, %v25
; vfchesb %v7, %v25, %v24
; vo %v24, %v5, %v7
; br %r14
function %fcmp_uno_f32x4(f32x4, f32x4) -> b32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcmp.f32x4 uno v0, v1
return v2
}
; block0:
; vfchesb %v5, %v24, %v25
; vfchesb %v7, %v25, %v24
; vno %v24, %v5, %v7
; br %r14

View File

@@ -0,0 +1,90 @@
test compile precise-output
target s390x arch13
function %fcvt_from_uint_i32x4_f32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; block0:
; vcelfb %v24, %v24, 0, 4
; br %r14
function %fcvt_from_sint_i32x4_f32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}
; block0:
; vcefb %v24, %v24, 0, 4
; br %r14
function %fcvt_from_uint_i64x2_f64x2(i64x2) -> f64x2 {
block0(v0: i64x2):
v1 = fcvt_from_uint.f64x2 v0
return v1
}
; block0:
; vcdlgb %v24, %v24, 0, 4
; br %r14
function %fcvt_from_sint_i64x2_f64x2(i64x2) -> f64x2 {
block0(v0: i64x2):
v1 = fcvt_from_sint.f64x2 v0
return v1
}
; block0:
; vcdgb %v24, %v24, 0, 4
; br %r14
function %fcvt_to_uint_sat_f32x4_i32x4(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; block0:
; vclfeb %v24, %v24, 0, 5
; br %r14
function %fcvt_to_sint_sat_f32x4_i32x4(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; block0:
; vcfeb %v3, %v24, 0, 5
; vgbm %v5, 0
; vfcesb %v7, %v24, %v24
; vsel %v24, %v3, %v5, %v7
; br %r14
function %fcvt_to_uint_sat_f64x2_i64x2(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = fcvt_to_uint_sat.i64x2 v0
return v1
}
; block0:
; vclgdb %v24, %v24, 0, 5
; br %r14
function %fcvt_to_sint_sat_f64x2_i64x2(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = fcvt_to_sint_sat.i64x2 v0
return v1
}
; block0:
; vcgdb %v3, %v24, 0, 5
; vgbm %v5, 0
; vfcedb %v7, %v24, %v24
; vsel %v24, %v3, %v5, %v7
; br %r14

View File

@@ -0,0 +1,533 @@
test compile precise-output
target s390x
function %vconst_f32x4_zero() -> f32x4 {
block0:
v1 = vconst.f32x4 [0x0.0 0x0.0 0x0.0 0x0.0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_f64x2_zero() -> f64x2 {
block0:
v1 = vconst.f64x2 [0x0.0 0x0.0]
return v1
}
; block0:
; vgbm %v24, 0
; br %r14
function %vconst_f32x4_mixed() -> f32x4 {
block0:
v1 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x4080000040400000400000003f800000 ; vl %v24, 0(%r1)
; br %r14
function %vconst_f64x2_mixed() -> f64x2 {
block0:
v1 = vconst.f64x2 [0x1.0 0x2.0]
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x40000000000000003ff0000000000000 ; vl %v24, 0(%r1)
; br %r14
function %fadd_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fadd v0, v1
return v2
}
; block0:
; vfasb %v24, %v24, %v25
; br %r14
function %fadd_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fadd v0, v1
return v2
}
; block0:
; vfadb %v24, %v24, %v25
; br %r14
function %fsub_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fsub v0, v1
return v2
}
; block0:
; vfssb %v24, %v24, %v25
; br %r14
function %fsub_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fsub v0, v1
return v2
}
; block0:
; vfsdb %v24, %v24, %v25
; br %r14
function %fmul_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fmul v0, v1
return v2
}
; block0:
; vfmsb %v24, %v24, %v25
; br %r14
function %fmul_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmul v0, v1
return v2
}
; block0:
; vfmdb %v24, %v24, %v25
; br %r14
function %fdiv_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fdiv v0, v1
return v2
}
; block0:
; vfdsb %v24, %v24, %v25
; br %r14
function %fdiv_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fdiv v0, v1
return v2
}
; block0:
; vfddb %v24, %v24, %v25
; br %r14
function %fmin_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fmin v0, v1
return v2
}
; block0:
; vfminsb %v24, %v24, %v25, 1
; br %r14
function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmin v0, v1
return v2
}
; block0:
; vfmindb %v24, %v24, %v25, 1
; br %r14
function %fmax_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fmax v0, v1
return v2
}
; block0:
; vfmaxsb %v24, %v24, %v25, 1
; br %r14
function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmax v0, v1
return v2
}
; block0:
; vfmaxdb %v24, %v24, %v25, 1
; br %r14
function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fmin_pseudo v0, v1
return v2
}
; block0:
; vfminsb %v24, %v24, %v25, 3
; br %r14
function %fmin_pseudo_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmin_pseudo v0, v1
return v2
}
; block0:
; vfmindb %v24, %v24, %v25, 3
; br %r14
function %fmax_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fmax_pseudo v0, v1
return v2
}
; block0:
; vfmaxsb %v24, %v24, %v25, 3
; br %r14
function %fmax_pseudo_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmax_pseudo v0, v1
return v2
}
; block0:
; vfmaxdb %v24, %v24, %v25, 3
; br %r14
function %sqrt_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = sqrt v0
return v1
}
; block0:
; vfsqsb %v24, %v24
; br %r14
function %sqrt_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = sqrt v0
return v1
}
; block0:
; vfsqdb %v24, %v24
; br %r14
function %fabs_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = fabs v0
return v1
}
; block0:
; vflpsb %v24, %v24
; br %r14
function %fabs_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = fabs v0
return v1
}
; block0:
; vflpdb %v24, %v24
; br %r14
function %fneg_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = fneg v0
return v1
}
; block0:
; vflcsb %v24, %v24
; br %r14
function %fneg_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = fneg v0
return v1
}
; block0:
; vflcdb %v24, %v24
; br %r14
function %fvpromote_low_f32x4(f32x4) -> f64x2 {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
}
; block0:
; vmrlf %v3, %v24, %v24
; vldeb %v24, %v3
; br %r14
function %fvdemote_f64x2(f64x2) -> f32x4 {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
}
; block0:
; vledb %v3, %v24, 0, 0
; vgbm %v5, 0
; bras %r1, 20 ; data.u128 0x10101010101010100001020308090a0b ; vl %v7, 0(%r1)
; vperm %v24, %v3, %v5, %v7
; br %r14
function %ceil_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = ceil v0
return v1
}
; block0:
; vfisb %v24, %v24, 0, 6
; br %r14
function %ceil_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = ceil v0
return v1
}
; block0:
; vfidb %v24, %v24, 0, 6
; br %r14
function %floor_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = floor v0
return v1
}
; block0:
; vfisb %v24, %v24, 0, 7
; br %r14
function %floor_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = floor v0
return v1
}
; block0:
; vfidb %v24, %v24, 0, 7
; br %r14
function %trunc_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = trunc v0
return v1
}
; block0:
; vfisb %v24, %v24, 0, 5
; br %r14
function %trunc_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = trunc v0
return v1
}
; block0:
; vfidb %v24, %v24, 0, 5
; br %r14
function %nearest_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = nearest v0
return v1
}
; block0:
; vfisb %v24, %v24, 0, 4
; br %r14
function %nearest_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = nearest v0
return v1
}
; block0:
; vfidb %v24, %v24, 0, 4
; br %r14
function %fma_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4, v2: f32x4):
v3 = fma v0, v1, v2
return v3
}
; block0:
; vfmasb %v24, %v24, %v25, %v26
; br %r14
function %fma_f64x2(f64x2, f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2, v2: f64x2):
v3 = fma v0, v1, v2
return v3
}
; block0:
; vfmadb %v24, %v24, %v25, %v26
; br %r14
function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; block0:
; vgmf %v5, 1, 31
; vsel %v24, %v24, %v25, %v5
; br %r14
function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; vgmg %v5, 1, 63
; vsel %v24, %v24, %v25, %v5
; br %r14
function %fcvt_from_uint_i32x4_f32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; block0:
; vuplhf %v3, %v24
; vcdlgb %v5, %v3, 0, 3
; vledb %v7, %v5, 0, 4
; vupllf %v17, %v24
; vcdlgb %v19, %v17, 0, 3
; vledb %v21, %v19, 0, 4
; bras %r1, 20 ; data.u128 0x0001020308090a0b1011121318191a1b ; vl %v23, 0(%r1)
; vperm %v24, %v7, %v21, %v23
; br %r14
function %fcvt_from_sint_i32x4_f32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}
; block0:
; vuphf %v3, %v24
; vcdgb %v5, %v3, 0, 3
; vledb %v7, %v5, 0, 4
; vuplf %v17, %v24
; vcdgb %v19, %v17, 0, 3
; vledb %v21, %v19, 0, 4
; bras %r1, 20 ; data.u128 0x0001020308090a0b1011121318191a1b ; vl %v23, 0(%r1)
; vperm %v24, %v7, %v21, %v23
; br %r14
function %fcvt_from_uint_i64x2_f64x2(i64x2) -> f64x2 {
block0(v0: i64x2):
v1 = fcvt_from_uint.f64x2 v0
return v1
}
; block0:
; vcdlgb %v24, %v24, 0, 4
; br %r14
function %fcvt_from_sint_i64x2_f64x2(i64x2) -> f64x2 {
block0(v0: i64x2):
v1 = fcvt_from_sint.f64x2 v0
return v1
}
; block0:
; vcdgb %v24, %v24, 0, 4
; br %r14
function %fcvt_low_from_sint_i32x4_f64x2(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = fcvt_low_from_sint.f64x2 v0
return v1
}
; block0:
; vuplf %v3, %v24
; vcdgb %v24, %v3, 0, 4
; br %r14
function %fcvt_to_uint_sat_f32x4_i32x4(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; block0:
; vmrhf %v3, %v24, %v24
; vldeb %v5, %v3
; vclgdb %v7, %v5, 0, 5
; vmrlf %v17, %v24, %v24
; vldeb %v19, %v17
; vclgdb %v21, %v19, 0, 5
; vpklsg %v24, %v7, %v21
; br %r14
function %fcvt_to_sint_sat_f32x4_i32x4(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; block0:
; vmrhf %v3, %v24, %v24
; vldeb %v5, %v3
; vcgdb %v7, %v5, 0, 5
; vmrlf %v17, %v24, %v24
; vldeb %v19, %v17
; vcgdb %v21, %v19, 0, 5
; vpksg %v23, %v7, %v21
; vgbm %v25, 0
; vfcesb %v27, %v24, %v24
; vsel %v24, %v23, %v25, %v27
; br %r14
function %fcvt_to_uint_sat_f64x2_i64x2(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = fcvt_to_uint_sat.i64x2 v0
return v1
}
; block0:
; vclgdb %v24, %v24, 0, 5
; br %r14
function %fcvt_to_sint_sat_f64x2_i64x2(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = fcvt_to_sint_sat.i64x2 v0
return v1
}
; block0:
; vcgdb %v3, %v24, 0, 5
; vgbm %v5, 0
; vfcedb %v7, %v24, %v24
; vsel %v24, %v3, %v5, %v7
; br %r14

View File

@@ -0,0 +1,423 @@
test compile precise-output
target s390x
function %icmp_eq_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 eq v0, v1
return v2
}
; block0:
; vceqg %v24, %v24, %v25
; br %r14
function %icmp_ne_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 ne v0, v1
return v2
}
; block0:
; vceqg %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 sgt v0, v1
return v2
}
; block0:
; vchg %v24, %v24, %v25
; br %r14
function %icmp_slt_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 slt v0, v1
return v2
}
; block0:
; vchg %v24, %v25, %v24
; br %r14
function %icmp_sge_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 sge v0, v1
return v2
}
; block0:
; vchg %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_sle_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 sle v0, v1
return v2
}
; block0:
; vchg %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_ugt_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 ugt v0, v1
return v2
}
; block0:
; vchlg %v24, %v24, %v25
; br %r14
function %icmp_ult_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 ult v0, v1
return v2
}
; block0:
; vchlg %v24, %v25, %v24
; br %r14
function %icmp_uge_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 uge v0, v1
return v2
}
; block0:
; vchlg %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_ule_i64x2(i64x2, i64x2) -> b64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp.i64x2 ule v0, v1
return v2
}
; block0:
; vchlg %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_eq_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 eq v0, v1
return v2
}
; block0:
; vceqf %v24, %v24, %v25
; br %r14
function %icmp_ne_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 ne v0, v1
return v2
}
; block0:
; vceqf %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 sgt v0, v1
return v2
}
; block0:
; vchf %v24, %v24, %v25
; br %r14
function %icmp_slt_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 slt v0, v1
return v2
}
; block0:
; vchf %v24, %v25, %v24
; br %r14
function %icmp_sge_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 sge v0, v1
return v2
}
; block0:
; vchf %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_sle_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 sle v0, v1
return v2
}
; block0:
; vchf %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 ugt v0, v1
return v2
}
; block0:
; vchlf %v24, %v24, %v25
; br %r14
function %icmp_ult_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 ult v0, v1
return v2
}
; block0:
; vchlf %v24, %v25, %v24
; br %r14
function %icmp_uge_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 uge v0, v1
return v2
}
; block0:
; vchlf %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_ule_i32x4(i32x4, i32x4) -> b32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp.i32x4 ule v0, v1
return v2
}
; block0:
; vchlf %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_eq_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 eq v0, v1
return v2
}
; block0:
; vceqh %v24, %v24, %v25
; br %r14
function %icmp_ne_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 ne v0, v1
return v2
}
; block0:
; vceqh %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 sgt v0, v1
return v2
}
; block0:
; vchh %v24, %v24, %v25
; br %r14
function %icmp_slt_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 slt v0, v1
return v2
}
; block0:
; vchh %v24, %v25, %v24
; br %r14
function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 sge v0, v1
return v2
}
; block0:
; vchh %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_sle_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 sle v0, v1
return v2
}
; block0:
; vchh %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_ugt_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 ugt v0, v1
return v2
}
; block0:
; vchlh %v24, %v24, %v25
; br %r14
function %icmp_ult_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 ult v0, v1
return v2
}
; block0:
; vchlh %v24, %v25, %v24
; br %r14
function %icmp_uge_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 uge v0, v1
return v2
}
; block0:
; vchlh %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_ule_i16x8(i16x8, i16x8) -> b16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = icmp.i16x8 ule v0, v1
return v2
}
; block0:
; vchlh %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_eq_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 eq v0, v1
return v2
}
; block0:
; vceqb %v24, %v24, %v25
; br %r14
function %icmp_ne_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 ne v0, v1
return v2
}
; block0:
; vceqb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 sgt v0, v1
return v2
}
; block0:
; vchb %v24, %v24, %v25
; br %r14
function %icmp_slt_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 slt v0, v1
return v2
}
; block0:
; vchb %v24, %v25, %v24
; br %r14
function %icmp_sge_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 sge v0, v1
return v2
}
; block0:
; vchb %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_sle_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 sle v0, v1
return v2
}
; block0:
; vchb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14
function %icmp_ugt_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 ugt v0, v1
return v2
}
; block0:
; vchlb %v24, %v24, %v25
; br %r14
function %icmp_ult_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 ult v0, v1
return v2
}
; block0:
; vchlb %v24, %v25, %v24
; br %r14
function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 uge v0, v1
return v2
}
; block0:
; vchlb %v5, %v25, %v24
; vno %v24, %v5, %v5
; br %r14
function %icmp_ule_i8x16(i8x16, i8x16) -> b8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = icmp.i8x16 ule v0, v1
return v2
}
; block0:
; vchlb %v5, %v24, %v25
; vno %v24, %v5, %v5
; br %r14

View File

@@ -0,0 +1,807 @@
test compile precise-output
target s390x arch13
function %insertlane_i64x2_mem_0(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = load.i64 v1
v3 = insertlane.i64x2 v0, v2, 0
return v3
}
; block0:
; vleg %v24, 0(%r2), 1
; br %r14
function %insertlane_i64x2_mem_1(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = load.i64 v1
v3 = insertlane.i64x2 v0, v2, 1
return v3
}
; block0:
; vleg %v24, 0(%r2), 0
; br %r14
function %insertlane_i64x2_mem_little_0(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = load.i64 little v1
v3 = insertlane.i64x2 v0, v2, 0
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 1
; br %r14
function %insertlane_i64x2_mem_little_1(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = load.i64 little v1
v3 = insertlane.i64x2 v0, v2, 1
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_0(i32x4, i64) -> i32x4 {
block0(v0: i32x4, v1: i64):
v2 = load.i32 v1
v3 = insertlane.i32x4 v0, v2, 0
return v3
}
; block0:
; vlef %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_3(i32x4, i64) -> i32x4 {
block0(v0: i32x4, v1: i64):
v2 = load.i32 v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlef %v24, 0(%r2), 0
; br %r14
function %insertlane_i32x4_mem_little_0(i32x4, i64) -> i32x4 {
block0(v0: i32x4, v1: i64):
v2 = load.i32 little v1
v3 = insertlane.i32x4 v0, v2, 0
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_little_3(i32x4, i64) -> i32x4 {
block0(v0: i32x4, v1: i64):
v2 = load.i32 little v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 0
; br %r14
function %insertlane_i16x8_mem_0(i16x8, i64) -> i16x8 {
block0(v0: i16x8, v1: i64):
v2 = load.i16 v1
v3 = insertlane.i16x8 v0, v2, 0
return v3
}
; block0:
; vleh %v24, 0(%r2), 7
; br %r14
function %insertlane_i16x8_mem_7(i16x8, i64) -> i16x8 {
block0(v0: i16x8, v1: i64):
v2 = load.i16 v1
v3 = insertlane.i16x8 v0, v2, 7
return v3
}
; block0:
; vleh %v24, 0(%r2), 0
; br %r14
function %insertlane_i16x8_mem_little_0(i16x8, i64) -> i16x8 {
block0(v0: i16x8, v1: i64):
v2 = load.i16 little v1
v3 = insertlane.i16x8 v0, v2, 0
return v3
}
; block0:
; vlebrh %v24, 0(%r2), 7
; br %r14
function %insertlane_i16x8_mem_little_7(i16x8, i64) -> i16x8 {
block0(v0: i16x8, v1: i64):
v2 = load.i16 little v1
v3 = insertlane.i16x8 v0, v2, 7
return v3
}
; block0:
; vlebrh %v24, 0(%r2), 0
; br %r14
function %insertlane_i8x16_mem_0(i8x16, i64) -> i8x16 {
block0(v0: i8x16, v1: i64):
v2 = load.i8 v1
v3 = insertlane.i8x16 v0, v2, 0
return v3
}
; block0:
; vleb %v24, 0(%r2), 15
; br %r14
function %insertlane_i8x16_mem_15(i8x16, i64) -> i8x16 {
block0(v0: i8x16, v1: i64):
v2 = load.i8 v1
v3 = insertlane.i8x16 v0, v2, 15
return v3
}
; block0:
; vleb %v24, 0(%r2), 0
; br %r14
function %insertlane_i8x16_mem_little_0(i8x16, i64) -> i8x16 {
block0(v0: i8x16, v1: i64):
v2 = load.i8 little v1
v3 = insertlane.i8x16 v0, v2, 0
return v3
}
; block0:
; vleb %v24, 0(%r2), 15
; br %r14
function %insertlane_i8x16_mem_little_15(i8x16, i64) -> i8x16 {
block0(v0: i8x16, v1: i64):
v2 = load.i8 little v1
v3 = insertlane.i8x16 v0, v2, 15
return v3
}
; block0:
; vleb %v24, 0(%r2), 0
; br %r14
function %insertlane_f64x2_mem_0(f64x2, i64) -> f64x2 {
block0(v0: f64x2, v1: i64):
v2 = load.f64 v1
v3 = insertlane.f64x2 v0, v2, 0
return v3
}
; block0:
; vleg %v24, 0(%r2), 1
; br %r14
function %insertlane_f64x2_mem_1(f64x2, i64) -> f64x2 {
block0(v0: f64x2, v1: i64):
v2 = load.f64 v1
v3 = insertlane.f64x2 v0, v2, 1
return v3
}
; block0:
; vleg %v24, 0(%r2), 0
; br %r14
function %insertlane_f64x2_mem_little_0(f64x2, i64) -> f64x2 {
block0(v0: f64x2, v1: i64):
v2 = load.f64 little v1
v3 = insertlane.f64x2 v0, v2, 0
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 1
; br %r14
function %insertlane_f64x2_mem_little_1(f64x2, i64) -> f64x2 {
block0(v0: f64x2, v1: i64):
v2 = load.f64 little v1
v3 = insertlane.f64x2 v0, v2, 1
return v3
}
; block0:
; vlebrg %v24, 0(%r2), 0
; br %r14
function %insertlane_f32x4_mem_0(f32x4, i64) -> f32x4 {
block0(v0: f32x4, v1: i64):
v2 = load.f32 v1
v3 = insertlane.f32x4 v0, v2, 0
return v3
}
; block0:
; vlef %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_3(i32x4, i64) -> i32x4 {
block0(v0: i32x4, v1: i64):
v2 = load.i32 v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlef %v24, 0(%r2), 0
; br %r14
function %insertlane_f32x4_mem_little_0(f32x4, i64) -> f32x4 {
block0(v0: f32x4, v1: i64):
v2 = load.f32 little v1
v3 = insertlane.f32x4 v0, v2, 0
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 3
; br %r14
function %insertlane_i32x4_mem_little_3(i32x4, i64) -> i32x4 {
block0(v0: i32x4, v1: i64):
v2 = load.i32 little v1
v3 = insertlane.i32x4 v0, v2, 3
return v3
}
; block0:
; vlebrf %v24, 0(%r2), 0
; br %r14
function %extractlane_i64x2_mem_0(i64x2, i64) {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 0
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 1
; br %r14
function %extractlane_i64x2_mem_1(i64x2, i64) {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 1
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 0
; br %r14
function %extractlane_i64x2_mem_little_0(i64x2, i64) {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 0
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 1
; br %r14
function %extractlane_i64x2_mem_little_1(i64x2, i64) {
block0(v0: i64x2, v1: i64):
v2 = extractlane.i64x2 v0, 1
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 0
; br %r14
function %extractlane_i32x4_mem_0(i32x4, i64) {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 0
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 3
; br %r14
function %extractlane_i32x4_mem_3(i32x4, i64) {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 3
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 0
; br %r14
function %extractlane_i32x4_mem_little_0(i32x4, i64) {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 0
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 3
; br %r14
function %extractlane_i32x4_mem_little_3(i32x4, i64) {
block0(v0: i32x4, v1: i64):
v2 = extractlane.i32x4 v0, 3
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 0
; br %r14
function %extractlane_i16x8_mem_0(i16x8, i64) {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 0
store v2, v1
return
}
; block0:
; vsteh %v24, 0(%r2), 7
; br %r14
function %extractlane_i16x8_mem_7(i16x8, i64) {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 7
store v2, v1
return
}
; block0:
; vsteh %v24, 0(%r2), 0
; br %r14
function %extractlane_i16x8_mem_little_0(i16x8, i64) {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 0
store little v2, v1
return
}
; block0:
; vstebrh %v24, 0(%r2), 7
; br %r14
function %extractlane_i16x8_mem_little_7(i16x8, i64) {
block0(v0: i16x8, v1: i64):
v2 = extractlane.i16x8 v0, 7
store little v2, v1
return
}
; block0:
; vstebrh %v24, 0(%r2), 0
; br %r14
function %extractlane_i8x16_mem_0(i8x16, i64) {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 0
store v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 15
; br %r14
function %extractlane_i8x16_mem_15(i8x16, i64) {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 15
store v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 0
; br %r14
function %extractlane_i8x16_mem_little_0(i8x16, i64) {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 0
store little v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 15
; br %r14
function %extractlane_i8x16_mem_little_15(i8x16, i64) {
block0(v0: i8x16, v1: i64):
v2 = extractlane.i8x16 v0, 15
store little v2, v1
return
}
; block0:
; vsteb %v24, 0(%r2), 0
; br %r14
function %extractlane_f64x2_mem_0(f64x2, i64) {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 0
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 1
; br %r14
function %extractlane_f64x2_mem_1(f64x2, i64) {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 1
store v2, v1
return
}
; block0:
; vsteg %v24, 0(%r2), 0
; br %r14
function %extractlane_f64x2_mem_little_0(f64x2, i64) {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 0
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 1
; br %r14
function %extractlane_f64x2_mem_little_1(f64x2, i64) {
block0(v0: f64x2, v1: i64):
v2 = extractlane.f64x2 v0, 1
store little v2, v1
return
}
; block0:
; vstebrg %v24, 0(%r2), 0
; br %r14
function %extractlane_f32x4_mem_0(f32x4, i64) {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 0
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 3
; br %r14
function %extractlane_f32x4_mem_3(f32x4, i64) {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 3
store v2, v1
return
}
; block0:
; vstef %v24, 0(%r2), 0
; br %r14
function %extractlane_f32x4_mem_little_0(f32x4, i64) {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 0
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 3
; br %r14
function %extractlane_f32x4_mem_little_3(f32x4, i64) {
block0(v0: f32x4, v1: i64):
v2 = extractlane.f32x4 v0, 3
store little v2, v1
return
}
; block0:
; vstebrf %v24, 0(%r2), 0
; br %r14
function %splat_i64x2_mem(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64 v0
v2 = splat.i64x2 v1
return v2
}
; block0:
; vlrepg %v24, 0(%r2)
; br %r14
function %splat_i64x2_mem_little(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64 little v0
v2 = splat.i64x2 v1
return v2
}
; block0:
; vlbrrepg %v24, 0(%r2)
; br %r14
function %splat_i32x4_mem(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 v0
v2 = splat.i32x4 v1
return v2
}
; block0:
; vlrepf %v24, 0(%r2)
; br %r14
function %splat_i32x4_mem_little(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 little v0
v2 = splat.i32x4 v1
return v2
}
; block0:
; vlbrrepf %v24, 0(%r2)
; br %r14
function %splat_i16x8_mem(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16 v0
v2 = splat.i16x8 v1
return v2
}
; block0:
; vlreph %v24, 0(%r2)
; br %r14
function %splat_i16x8_mem_little(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16 little v0
v2 = splat.i16x8 v1
return v2
}
; block0:
; vlbrreph %v24, 0(%r2)
; br %r14
function %splat_i8x16_mem(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = splat.i8x16 v1
return v2
}
; block0:
; vlrepb %v24, 0(%r2)
; br %r14
function %splat_i8x16_mem_little(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8 little v0
v2 = splat.i8x16 v1
return v2
}
; block0:
; vlrepb %v24, 0(%r2)
; br %r14
function %splat_f64x2_mem(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64 v0
v2 = splat.f64x2 v1
return v2
}
; block0:
; vlrepg %v24, 0(%r2)
; br %r14
function %splat_f64x2_mem_little(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64 little v0
v2 = splat.f64x2 v1
return v2
}
; block0:
; vlbrrepg %v24, 0(%r2)
; br %r14
function %splat_f32x4_mem(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32 v0
v2 = splat.f32x4 v1
return v2
}
; block0:
; vlrepf %v24, 0(%r2)
; br %r14
function %splat_f32x4_mem_little(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32 little v0
v2 = splat.f32x4 v1
return v2
}
; block0:
; vlbrrepf %v24, 0(%r2)
; br %r14
function %scalar_to_vector_i64x2_mem(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64 v0
v2 = scalar_to_vector.i64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_i64x2_mem_little(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64 little v0
v2 = scalar_to_vector.i64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_i32x4_mem(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 v0
v2 = scalar_to_vector.i32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlef %v24, 0(%r2), 3
; br %r14
function %scalar_to_vector_i32x4_mem_little(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 little v0
v2 = scalar_to_vector.i32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrf %v24, 0(%r2), 3
; br %r14
function %scalar_to_vector_i16x8_mem(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16 v0
v2 = scalar_to_vector.i16x8 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleh %v24, 0(%r2), 7
; br %r14
function %scalar_to_vector_i16x8_mem_little(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16 little v0
v2 = scalar_to_vector.i16x8 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrh %v24, 0(%r2), 7
; br %r14
function %scalar_to_vector_i8x16_mem(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = scalar_to_vector.i8x16 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleb %v24, 0(%r2), 15
; br %r14
function %scalar_to_vector_i8x16_mem_little(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8 little v0
v2 = scalar_to_vector.i8x16 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleb %v24, 0(%r2), 15
; br %r14
function %scalar_to_vector_f64x2_mem(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64 v0
v2 = scalar_to_vector.f64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vleg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_f64x2_mem_little(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64 little v0
v2 = scalar_to_vector.f64x2 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrg %v24, 0(%r2), 1
; br %r14
function %scalar_to_vector_f32x4_mem(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32 v0
v2 = scalar_to_vector.f32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlef %v24, 0(%r2), 3
; br %r14
function %scalar_to_vector_f32x4_mem_little(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32 little v0
v2 = scalar_to_vector.f32x4 v1
return v2
}
; block0:
; vgbm %v24, 0
; vlebrf %v24, 0(%r2), 3
; br %r14

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,675 @@
test compile precise-output
target s390x
function %vany_true_i64x2(i64x2) -> b1 {
block0(v0: i64x2):
v1 = vany_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqgs %v5, %v24, %v3
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_i32x4(i32x4) -> b1 {
block0(v0: i32x4):
v1 = vany_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqfs %v5, %v24, %v3
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_i16x8(i16x8) -> b1 {
block0(v0: i16x8):
v1 = vany_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqhs %v5, %v24, %v3
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_i8x16(i8x16) -> b1 {
block0(v0: i8x16):
v1 = vany_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqbs %v5, %v24, %v3
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vall_true_i64x2(i64x2) -> b1 {
block0(v0: i64x2):
v1 = vall_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqgs %v5, %v24, %v3
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_i32x4(i32x4) -> b1 {
block0(v0: i32x4):
v1 = vall_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqfs %v5, %v24, %v3
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_i16x8(i16x8) -> b1 {
block0(v0: i16x8):
v1 = vall_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqhs %v5, %v24, %v3
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_i8x16(i8x16) -> b1 {
block0(v0: i8x16):
v1 = vall_true v0
return v1
}
; block0:
; vgbm %v3, 0
; vceqbs %v5, %v24, %v3
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vany_true_icmp_eq_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp eq v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vceqgs %v5, %v24, %v25
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_icmp_ne_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ne v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vceqgs %v5, %v24, %v25
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_icmp_sgt_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp sgt v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchgs %v5, %v24, %v25
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_icmp_sle_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp sle v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchgs %v5, %v24, %v25
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_icmp_slt_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp slt v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchgs %v5, %v25, %v24
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_icmp_sge_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp sge v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchgs %v5, %v25, %v24
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_icmp_ugt_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ugt v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchlgs %v5, %v24, %v25
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_icmp_ule_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ule v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchlgs %v5, %v24, %v25
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_icmp_ult_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ult v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchlgs %v5, %v25, %v24
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_icmp_uge_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp uge v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vchlgs %v5, %v25, %v24
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_fcmp_eq_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp eq v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfcedbs %v5, %v24, %v25
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_fcmp_ne_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ne v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfcedbs %v5, %v24, %v25
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_fcmp_gt_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp gt v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchdbs %v5, %v24, %v25
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_fcmp_ule_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ule v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchdbs %v5, %v24, %v25
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_fcmp_ge_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ge v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchedbs %v5, %v24, %v25
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_fcmp_ult_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ult v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchedbs %v5, %v24, %v25
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_fcmp_lt_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp lt v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchdbs %v5, %v25, %v24
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_fcmp_uge_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp uge v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchdbs %v5, %v25, %v24
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vany_true_fcmp_le_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp le v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchedbs %v5, %v25, %v24
; lhi %r2, 0
; lochino %r2, 1
; br %r14
function %vany_true_fcmp_ugt_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ugt v0, v1
v3 = vany_true v2
return v3
}
; block0:
; vfchedbs %v5, %v25, %v24
; lhi %r2, 0
; lochine %r2, 1
; br %r14
function %vall_true_icmp_eq_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp eq v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vceqgs %v5, %v24, %v25
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_icmp_ne_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ne v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vceqgs %v5, %v24, %v25
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_icmp_sgt_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp sgt v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchgs %v5, %v24, %v25
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_icmp_sle_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp sle v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchgs %v5, %v24, %v25
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_icmp_slt_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp slt v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchgs %v5, %v25, %v24
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_icmp_sge_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp sge v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchgs %v5, %v25, %v24
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_icmp_ugt_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ugt v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchlgs %v5, %v24, %v25
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_icmp_ule_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ule v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchlgs %v5, %v24, %v25
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_icmp_ult_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp ult v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchlgs %v5, %v25, %v24
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_icmp_uge_i64x2(i64x2, i64x2) -> b1 {
block0(v0: i64x2, v1: i64x2):
v2 = icmp uge v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vchlgs %v5, %v25, %v24
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_fcmp_eq_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp eq v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfcedbs %v5, %v24, %v25
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_fcmp_ne_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ne v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfcedbs %v5, %v24, %v25
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_fcmp_gt_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp gt v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchdbs %v5, %v24, %v25
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_fcmp_ule_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ule v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchdbs %v5, %v24, %v25
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_fcmp_ge_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ge v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchedbs %v5, %v24, %v25
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_fcmp_ult_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ult v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchedbs %v5, %v24, %v25
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_fcmp_lt_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp lt v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchdbs %v5, %v25, %v24
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_fcmp_uge_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp uge v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchdbs %v5, %v25, %v24
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vall_true_fcmp_le_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp le v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchedbs %v5, %v25, %v24
; lhi %r2, 0
; lochie %r2, 1
; br %r14
function %vall_true_fcmp_ugt_f64x2(f64x2, f64x2) -> b1 {
block0(v0: f64x2, v1: f64x2):
v2 = fcmp ugt v0, v1
v3 = vall_true v2
return v3
}
; block0:
; vfchedbs %v5, %v25, %v24
; lhi %r2, 0
; lochio %r2, 1
; br %r14
function %vhigh_bits(i64x2) -> i64 {
block0(v0: i64x2):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x80808080808080808080808080800040 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits(i32x4) -> i64 {
block0(v0: i32x4):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x80808080808080808080808000204060 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits(i16x8) -> i64 {
block0(v0: i16x8):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x80808080808080800010203040506070 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14
function %vhigh_bits(i8x16) -> i64 {
block0(v0: i8x16):
v1 = vhigh_bits.i64 v0
return v1
}
; block0:
; bras %r1, 20 ; data.u128 0x00081018202830384048505860687078 ; vl %v3, 0(%r1)
; vbperm %v5, %v24, %v3
; lgdr %r2, %f5
; br %r14

View File

@@ -0,0 +1,493 @@
test compile precise-output
target s390x
function %swizzle(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = swizzle.i8x16 v0, v1
return v2
}
; block0:
; vgbm %v5, 0
; vrepib %v7, 239
; vno %v17, %v25, %v25
; vmxlb %v19, %v7, %v17
; vperm %v24, %v5, %v24, %v19
; br %r14
function %shuffle_0(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
return v2
}
; block0:
; vrepib %v5, 15
; vperm %v24, %v24, %v25, %v5
; br %r14
function %shuffle_1(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
return v2
}
; block0:
; bras %r1, 20 ; data.u128 0x0a1e000d0b1702180403090b15100f0c ; vl %v5, 0(%r1)
; vperm %v24, %v24, %v25, %v5
; br %r14
function %shuffle_2(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
return v2
}
; block0:
; vgbm %v5, 1
; bras %r1, 20 ; data.u128 0x8080808080808080808080808080800f ; vl %v7, 0(%r1)
; vperm %v17, %v24, %v25, %v7
; vn %v24, %v5, %v17
; br %r14
function %shuffle_vmrhg_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
return v2
}
; block0:
; vmrhg %v24, %v24, %v25
; br %r14
function %shuffle_vmrhf_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
return v2
}
; block0:
; vmrhf %v24, %v24, %v25
; br %r14
function %shuffle_vmrhh_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
return v2
}
; block0:
; vmrhh %v24, %v24, %v25
; br %r14
function %shuffle_vmrhb_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
return v2
}
; block0:
; vmrhb %v24, %v24, %v25
; br %r14
function %shuffle_vmrhg_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
return v2
}
; block0:
; vmrhg %v24, %v25, %v24
; br %r14
function %shuffle_vmrhf_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
return v2
}
; block0:
; vmrhf %v24, %v25, %v24
; br %r14
function %shuffle_vmrhh_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
return v2
}
; block0:
; vmrhh %v24, %v25, %v24
; br %r14
function %shuffle_vmrhb_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
return v2
}
; block0:
; vmrhb %v24, %v25, %v24
; br %r14
function %shuffle_vmrhg_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
return v2
}
; block0:
; vmrhg %v24, %v24, %v24
; br %r14
function %shuffle_vmrhf_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
return v2
}
; block0:
; vmrhf %v24, %v24, %v24
; br %r14
function %shuffle_vmrhh_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
return v2
}
; block0:
; vmrhh %v24, %v24, %v24
; br %r14
function %shuffle_vmrhb_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
return v2
}
; block0:
; vmrhb %v24, %v24, %v24
; br %r14
function %shuffle_vmrhg_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
return v2
}
; block0:
; vmrhg %v24, %v25, %v25
; br %r14
function %shuffle_vmrhf_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
return v2
}
; block0:
; vmrhf %v24, %v25, %v25
; br %r14
function %shuffle_vmrhh_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
return v2
}
; block0:
; vmrhh %v24, %v25, %v25
; br %r14
function %shuffle_vmrhb_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
return v2
}
; block0:
; vmrhb %v24, %v25, %v25
; br %r14
function %shuffle_vmrlg_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
return v2
}
; block0:
; vmrlg %v24, %v24, %v25
; br %r14
function %shuffle_vmrlf_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
return v2
}
; block0:
; vmrlf %v24, %v24, %v25
; br %r14
function %shuffle_vmrlh_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
return v2
}
; block0:
; vmrlh %v24, %v24, %v25
; br %r14
function %shuffle_vmrlb_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
return v2
}
; block0:
; vmrlb %v24, %v24, %v25
; br %r14
function %shuffle_vmrlg_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
return v2
}
; block0:
; vmrlg %v24, %v25, %v24
; br %r14
function %shuffle_vmrlf_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
return v2
}
; block0:
; vmrlf %v24, %v25, %v24
; br %r14
function %shuffle_vmrlh_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
return v2
}
; block0:
; vmrlh %v24, %v25, %v24
; br %r14
function %shuffle_vmrlb_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
return v2
}
; block0:
; vmrlb %v24, %v25, %v24
; br %r14
function %shuffle_vmrlg_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
return v2
}
; block0:
; vmrlg %v24, %v24, %v24
; br %r14
function %shuffle_vmrlf_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
return v2
}
; block0:
; vmrlf %v24, %v24, %v24
; br %r14
function %shuffle_vmrlh_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
return v2
}
; block0:
; vmrlh %v24, %v24, %v24
; br %r14
function %shuffle_vmrlb_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
return v2
}
; block0:
; vmrlb %v24, %v24, %v24
; br %r14
function %shuffle_vmrlg_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
return v2
}
; block0:
; vmrlg %v24, %v25, %v25
; br %r14
function %shuffle_vmrlf_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
return v2
}
; block0:
; vmrlf %v24, %v25, %v25
; br %r14
function %shuffle_vmrlh_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
return v2
}
; block0:
; vmrlh %v24, %v25, %v25
; br %r14
function %shuffle_vmrlb_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
return v2
}
; block0:
; vmrlb %v24, %v25, %v25
; br %r14
;; Special patterns that can be implemented via PACK.
function %shuffle_vpkg_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 0 1 2 3 8 9 10 11]
return v2
}
; block0:
; vpkg %v24, %v24, %v25
; br %r14
function %shuffle_vpkf_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 0 1 4 5 8 9 12 13]
return v2
}
; block0:
; vpkf %v24, %v24, %v25
; br %r14
function %shuffle_vpkh_xy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 0 2 4 6 8 10 12 14]
return v2
}
; block0:
; vpkh %v24, %v24, %v25
; br %r14
function %shuffle_vpkg_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 16 17 18 19 24 25 26 27]
return v2
}
; block0:
; vpkg %v24, %v25, %v24
; br %r14
function %shuffle_vpkf_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29]
return v2
}
; block0:
; vpkf %v24, %v25, %v24
; br %r14
function %shuffle_vpkh_yx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
return v2
}
; block0:
; vpkh %v24, %v25, %v24
; br %r14
function %shuffle_vpkg_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11]
return v2
}
; block0:
; vpkg %v24, %v24, %v24
; br %r14
function %shuffle_vpkf_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 0 1 4 5 8 9 12 13]
return v2
}
; block0:
; vpkf %v24, %v24, %v24
; br %r14
function %shuffle_vpkh_xx(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 0 2 4 6 8 10 12 14]
return v2
}
; block0:
; vpkh %v24, %v24, %v24
; br %r14
function %shuffle_vpkg_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 16 17 18 19 24 25 26 27]
return v2
}
; block0:
; vpkg %v24, %v25, %v25
; br %r14
function %shuffle_vpkf_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 16 17 20 21 24 25 28 29]
return v2
}
; block0:
; vpkf %v24, %v25, %v25
; br %r14
function %shuffle_vpkh_yy(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 16 18 20 22 24 26 28 30]
return v2
}
; block0:
; vpkh %v24, %v25, %v25
; br %r14

View File

@@ -0,0 +1,427 @@
test compile precise-output
target s390x
function %rotr_i64x4_reg(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = rotr.i64x2 v0, v1
return v2
}
; block0:
; lcr %r3, %r2
; verllg %v24, %v24, 0(%r3)
; br %r14
function %rotr_i64x4_imm(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i32 17
v2 = rotr.i64x2 v0, v1
return v2
}
; block0:
; verllg %v24, %v24, 47
; br %r14
function %rotr_i32x4_reg(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = rotr.i32x4 v0, v1
return v2
}
; block0:
; lcr %r3, %r2
; verllf %v24, %v24, 0(%r3)
; br %r14
function %rotr_i32x4_imm(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 17
v2 = rotr.i32x4 v0, v1
return v2
}
; block0:
; verllf %v24, %v24, 15
; br %r14
function %rotr_i16x8_reg(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = rotr.i16x8 v0, v1
return v2
}
; block0:
; lcr %r3, %r2
; verllh %v24, %v24, 0(%r3)
; br %r14
function %rotr_i16x8_imm(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i32 10
v2 = rotr.i16x8 v0, v1
return v2
}
; block0:
; verllh %v24, %v24, 6
; br %r14
function %rotr_i8x16_reg(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = rotr.i8x16 v0, v1
return v2
}
; block0:
; lcr %r3, %r2
; verllb %v24, %v24, 0(%r3)
; br %r14
function %rotr_i8x16_imm(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i32 3
v2 = rotr.i8x16 v0, v1
return v2
}
; block0:
; verllb %v24, %v24, 5
; br %r14
function %rotl_i64x2_reg(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = rotl.i64x2 v0, v1
return v2
}
; block0:
; verllg %v24, %v24, 0(%r2)
; br %r14
function %rotl_i64x2_imm(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i32 17
v2 = rotl.i64x2 v0, v1
return v2
}
; block0:
; verllg %v24, %v24, 17
; br %r14
function %rotl_i32x4_reg(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = rotl.i32x4 v0, v1
return v2
}
; block0:
; verllf %v24, %v24, 0(%r2)
; br %r14
function %rotl_i32x4_imm(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 17
v2 = rotl.i32x4 v0, v1
return v2
}
; block0:
; verllf %v24, %v24, 17
; br %r14
function %rotl_i16x8_reg(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = rotl.i16x8 v0, v1
return v2
}
; block0:
; verllh %v24, %v24, 0(%r2)
; br %r14
function %rotl_i16x8_imm(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i32 10
v2 = rotl.i16x8 v0, v1
return v2
}
; block0:
; verllh %v24, %v24, 10
; br %r14
function %rotl_i8x16_reg(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = rotl.i8x16 v0, v1
return v2
}
; block0:
; verllb %v24, %v24, 0(%r2)
; br %r14
function %rotr_i8x16_imm(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i32 3
v2 = rotl.i8x16 v0, v1
return v2
}
; block0:
; verllb %v24, %v24, 3
; br %r14
function %ushr_i64x2_reg(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = ushr.i64x2 v0, v1
return v2
}
; block0:
; vesrlg %v24, %v24, 0(%r2)
; br %r14
function %ushr_i64x2_imm(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i32 17
v2 = ushr.i64x2 v0, v1
return v2
}
; block0:
; vesrlg %v24, %v24, 17
; br %r14
function %ushr_i32x4_reg(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = ushr.i32x4 v0, v1
return v2
}
; block0:
; vesrlf %v24, %v24, 0(%r2)
; br %r14
function %ushr_i32x4_imm(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 17
v2 = ushr.i32x4 v0, v1
return v2
}
; block0:
; vesrlf %v24, %v24, 17
; br %r14
function %ushr_i16x8_reg(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = ushr.i16x8 v0, v1
return v2
}
; block0:
; vesrlh %v24, %v24, 0(%r2)
; br %r14
function %ushr_i16x8_imm(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i32 10
v2 = ushr.i16x8 v0, v1
return v2
}
; block0:
; vesrlh %v24, %v24, 10
; br %r14
function %ushr_i8x16_reg(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = ushr.i8x16 v0, v1
return v2
}
; block0:
; vesrlb %v24, %v24, 0(%r2)
; br %r14
function %ushr_i8x16_imm(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i32 3
v2 = ushr.i8x16 v0, v1
return v2
}
; block0:
; vesrlb %v24, %v24, 3
; br %r14
function %ishl_i64x2_reg(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = ishl.i64x2 v0, v1
return v2
}
; block0:
; veslg %v24, %v24, 0(%r2)
; br %r14
function %ishl_i64x2_imm(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i32 17
v2 = ishl.i64x2 v0, v1
return v2
}
; block0:
; veslg %v24, %v24, 17
; br %r14
function %ishl_i32x4_reg(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = ishl.i32x4 v0, v1
return v2
}
; block0:
; veslf %v24, %v24, 0(%r2)
; br %r14
function %ishl_i32x4_imm(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 17
v2 = ishl.i32x4 v0, v1
return v2
}
; block0:
; veslf %v24, %v24, 17
; br %r14
function %ishl_i16x8_reg(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = ishl.i16x8 v0, v1
return v2
}
; block0:
; veslh %v24, %v24, 0(%r2)
; br %r14
function %ishl_i16x8_imm(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i32 10
v2 = ishl.i16x8 v0, v1
return v2
}
; block0:
; veslh %v24, %v24, 10
; br %r14
function %ishl_i8x16_reg(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = ishl.i8x16 v0, v1
return v2
}
; block0:
; veslb %v24, %v24, 0(%r2)
; br %r14
function %ishl_i8x16_imm(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i32 3
v2 = ishl.i8x16 v0, v1
return v2
}
; block0:
; veslb %v24, %v24, 3
; br %r14
function %sshr_i64x2_reg(i64x2, i64) -> i64x2 {
block0(v0: i64x2, v1: i64):
v2 = sshr.i64x2 v0, v1
return v2
}
; block0:
; vesrag %v24, %v24, 0(%r2)
; br %r14
function %sshr_i64x2_imm(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i32 17
v2 = sshr.i64x2 v0, v1
return v2
}
; block0:
; vesrag %v24, %v24, 17
; br %r14
function %sshr_i32x4_reg(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = sshr.i32x4 v0, v1
return v2
}
; block0:
; vesraf %v24, %v24, 0(%r2)
; br %r14
function %sshr_i32x4_imm(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 17
v2 = sshr.i32x4 v0, v1
return v2
}
; block0:
; vesraf %v24, %v24, 17
; br %r14
function %sshr_i16x8_reg(i16x8, i16) -> i16x8 {
block0(v0: i16x8, v1: i16):
v2 = sshr.i16x8 v0, v1
return v2
}
; block0:
; vesrah %v24, %v24, 0(%r2)
; br %r14
function %sshr_i16x8_imm(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i32 10
v2 = sshr.i16x8 v0, v1
return v2
}
; block0:
; vesrah %v24, %v24, 10
; br %r14
function %sshr_i8x16_reg(i8x16, i8) -> i8x16 {
block0(v0: i8x16, v1: i8):
v2 = sshr.i8x16 v0, v1
return v2
}
; block0:
; vesrab %v24, %v24, 0(%r2)
; br %r14
function %sshr_i8x16_imm(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i32 3
v2 = sshr.i8x16 v0, v1
return v2
}
; block0:
; vesrab %v24, %v24, 3
; br %r14

View File

@@ -0,0 +1,375 @@
test compile precise-output
target s390x arch13
function %uload8x8_big(i64) -> i16x8 {
block0(v0: i64):
v1 = uload8x8 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuplhb %v24, %v3
; br %r14
function %uload16x4_big(i64) -> i32x4 {
block0(v0: i64):
v1 = uload16x4 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuplhh %v24, %v3
; br %r14
function %uload32x2_big(i64) -> i64x2 {
block0(v0: i64):
v1 = uload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuplhf %v24, %v3
; br %r14
function %sload8x8_big(i64) -> i16x8 {
block0(v0: i64):
v1 = sload8x8 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuphb %v24, %v3
; br %r14
function %sload16x4_big(i64) -> i32x4 {
block0(v0: i64):
v1 = sload16x4 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuphh %v24, %v3
; br %r14
function %sload32x2_big(i64) -> i64x2 {
block0(v0: i64):
v1 = sload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuphf %v24, %v3
; br %r14
function %load_i8x16_big(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8x16 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_i16x8_big(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16x8 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_i32x4_big(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32x4 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_i64x2_big(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64x2 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_f32x4_big(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_f64x2_big(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64x2 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %store_i8x16_big(i8x16, i64) {
block0(v0: i8x16, v1: i64):
store.i8x16 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_i16x8_big(i16x8, i64) {
block0(v0: i16x8, v1: i64):
store.i16x8 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_i32x4_big(i32x4, i64) {
block0(v0: i32x4, v1: i64):
store.i32x4 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_i64x2_big(i64x2, i64) {
block0(v0: i64x2, v1: i64):
store.i64x2 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_f32x4_big(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_f64x2_big(f64x2, i64) {
block0(v0: f64x2, v1: i64):
store.f64x2 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %uload8x8_little(i64) -> i16x8 {
block0(v0: i64):
v1 = uload8x8 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhb %v24, %v3
; br %r14
function %uload16x4_little(i64) -> i32x4 {
block0(v0: i64):
v1 = uload16x4 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhh %v24, %v3
; br %r14
function %uload32x2_little(i64) -> i64x2 {
block0(v0: i64):
v1 = uload32x2 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuplhf %v24, %v3
; br %r14
function %sload8x8_little(i64) -> i16x8 {
block0(v0: i64):
v1 = sload8x8 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphb %v24, %v3
; br %r14
function %sload16x4_little(i64) -> i32x4 {
block0(v0: i64):
v1 = sload16x4 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphh %v24, %v3
; br %r14
function %sload32x2_little(i64) -> i64x2 {
block0(v0: i64):
v1 = sload32x2 little v0
return v1
}
; block0:
; vlebrg %v3, 0(%r2), 0
; vuphf %v24, %v3
; br %r14
function %load_i8x16_little(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8x16 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i16x8_little(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16x8 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i32x4_little(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32x4 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_i64x2_little(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64x2 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_f32x4_little(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %load_f64x2_little(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64x2 little v0
return v1
}
; block0:
; vlbrq %v24, 0(%r2)
; br %r14
function %store_i8x16_little(i8x16, i64) {
block0(v0: i8x16, v1: i64):
store.i8x16 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i16x8_little(i16x8, i64) {
block0(v0: i16x8, v1: i64):
store.i16x8 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i32x4_little(i32x4, i64) {
block0(v0: i32x4, v1: i64):
store.i32x4 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_i64x2_little(i64x2, i64) {
block0(v0: i64x2, v1: i64):
store.i64x2 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_f32x4_little(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14
function %store_f64x2_little(f64x2, i64) {
block0(v0: f64x2, v1: i64):
store.f64x2 little v0, v1
return
}
; block0:
; vstbrq %v24, 0(%r2)
; br %r14

View File

@@ -0,0 +1,463 @@
test compile precise-output
target s390x
function %uload8x8_big(i64) -> i16x8 {
block0(v0: i64):
v1 = uload8x8 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuplhb %v24, %v3
; br %r14
function %uload16x4_big(i64) -> i32x4 {
block0(v0: i64):
v1 = uload16x4 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuplhh %v24, %v3
; br %r14
function %uload32x2_big(i64) -> i64x2 {
block0(v0: i64):
v1 = uload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuplhf %v24, %v3
; br %r14
function %sload8x8_big(i64) -> i16x8 {
block0(v0: i64):
v1 = sload8x8 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuphb %v24, %v3
; br %r14
function %sload16x4_big(i64) -> i32x4 {
block0(v0: i64):
v1 = sload16x4 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuphh %v24, %v3
; br %r14
function %sload32x2_big(i64) -> i64x2 {
block0(v0: i64):
v1 = sload32x2 big v0
return v1
}
; block0:
; ld %f3, 0(%r2)
; vuphf %v24, %v3
; br %r14
function %load_i8x16_big(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8x16 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_i16x8_big(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16x8 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_i32x4_big(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32x4 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_i64x2_big(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64x2 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_f32x4_big(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %load_f64x2_big(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64x2 big v0
return v1
}
; block0:
; vl %v24, 0(%r2)
; br %r14
function %store_i8x16_big(i8x16, i64) {
block0(v0: i8x16, v1: i64):
store.i8x16 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_i16x8_big(i16x8, i64) {
block0(v0: i16x8, v1: i64):
store.i16x8 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_i32x4_big(i32x4, i64) {
block0(v0: i32x4, v1: i64):
store.i32x4 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_i64x2_big(i64x2, i64) {
block0(v0: i64x2, v1: i64):
store.i64x2 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_f32x4_big(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %store_f64x2_big(f64x2, i64) {
block0(v0: f64x2, v1: i64):
store.f64x2 big v0, v1
return
}
; block0:
; vst %v24, 0(%r2)
; br %r14
function %uload8x8_little(i64) -> i16x8 {
block0(v0: i64):
v1 = uload8x8 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhb %v24, %v5
; br %r14
function %uload16x4_little(i64) -> i32x4 {
block0(v0: i64):
v1 = uload16x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhh %v24, %v5
; br %r14
function %uload32x2_little(i64) -> i64x2 {
block0(v0: i64):
v1 = uload32x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuplhf %v24, %v5
; br %r14
function %sload8x8_little(i64) -> i16x8 {
block0(v0: i64):
v1 = sload8x8 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphb %v24, %v5
; br %r14
function %sload16x4_little(i64) -> i32x4 {
block0(v0: i64):
v1 = sload16x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphh %v24, %v5
; br %r14
function %sload32x2_little(i64) -> i64x2 {
block0(v0: i64):
v1 = sload32x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; ldgr %f5, %r5
; vuphf %v24, %v5
; br %r14
function %load_i8x16_little(i64) -> i8x16 {
block0(v0: i64):
v1 = load.i8x16 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i16x8_little(i64) -> i16x8 {
block0(v0: i64):
v1 = load.i16x8 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i32x4_little(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_i64x2_little(i64) -> i64x2 {
block0(v0: i64):
v1 = load.i64x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_f32x4_little(i64) -> f32x4 {
block0(v0: i64):
v1 = load.f32x4 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_f64x2_little(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64x2 little v0
return v1
}
; block0:
; lrvg %r5, 0(%r2)
; lrvg %r3, 8(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %load_f64x2_sum_little(i64, i64) -> f64x2 {
block0(v0: i64, v1: i64):
v2 = iadd.i64 v0, v1
v3 = load.f64x2 little v2
return v3
}
; block0:
; lrvg %r4, 0(%r3,%r2)
; lrvg %r5, 8(%r3,%r2)
; vlvgp %v24, %r5, %r4
; br %r14
function %load_f64x2_off_little(i64) -> f64x2 {
block0(v0: i64):
v1 = load.f64x2 little v0+128
return v1
}
; block0:
; lrvg %r5, 128(%r2)
; lrvg %r3, 136(%r2)
; vlvgp %v24, %r3, %r5
; br %r14
function %store_i8x16_little(i8x16, i64) {
block0(v0: i8x16, v1: i64):
store.i8x16 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i16x8_little(i16x8, i64) {
block0(v0: i16x8, v1: i64):
store.i16x8 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i32x4_little(i32x4, i64) {
block0(v0: i32x4, v1: i64):
store.i32x4 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_i64x2_little(i64x2, i64) {
block0(v0: i64x2, v1: i64):
store.i64x2 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_f32x4_little(f32x4, i64) {
block0(v0: f32x4, v1: i64):
store.f32x4 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_f64x2_little(f64x2, i64) {
block0(v0: f64x2, v1: i64):
store.f64x2 little v0, v1
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 0(%r2)
; strvg %r4, 8(%r2)
; br %r14
function %store_f64x2_sum_little(f64x2, i64, i64) {
block0(v0: f64x2, v1: i64, v2: i64):
v3 = iadd.i64 v1, v2
store.f64x2 little v0, v3
return
}
; block0:
; vlgvg %r5, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r5, 0(%r3,%r2)
; strvg %r4, 8(%r3,%r2)
; br %r14
function %store_f64x2_off_little(f64x2, i64) {
block0(v0: f64x2, v1: i64):
store.f64x2 little v0, v1+128
return
}
; block0:
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v24, 0
; strvg %r3, 128(%r2)
; strvg %r4, 136(%r2)
; br %r14

View File

@@ -2,6 +2,7 @@ test interpret
test run
target x86_64
target aarch64
; target s390x FIXME: This currently fails under qemu due to a qemu bug
function %fmax_p_f32(f32, f32) -> f32 {
block0(v0: f32, v1: f32):

View File

@@ -1,7 +1,7 @@
test run
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target aarch64
; target s390x FIXME: This currently fails under qemu due to a qemu bug
target x86_64 skylake
function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 {

View File

@@ -2,6 +2,7 @@ test interpret
test run
target x86_64
target aarch64
; target s390x FIXME: This currently fails under qemu due to a qemu bug
function %fmin_p_f32(f32, f32) -> f32 {
block0(v0: f32, v1: f32):

View File

@@ -1,7 +1,8 @@
test run
target aarch64
target s390x
; TODO: Merge this with the main shifts file when x86_64 & s390x passes these.
; TODO: Merge this with the main shifts file when x86_64 passes these.
function %ishl_i16_i64(i16, i64) -> i16 {
block0(v0: i16, v1: i64):

View File

@@ -3,6 +3,7 @@
; simd-arithmetic-nondeterministic*.clif as well.
test run
target aarch64
target s390x
function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
set enable_simd
target x86_64 skylake

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
set opt_level=speed_and_size
set enable_simd
target x86_64 skylake

View File

@@ -1,6 +1,7 @@
test run
set enable_simd
target aarch64
target s390x
target x86_64 has_sse3 has_ssse3 has_sse41
function %bitselect_i32x4(i32x4, i32x4, i32x4) -> i32x4 {

View File

@@ -1,6 +1,7 @@
test run
set enable_simd
target aarch64
; target s390x FIXME: s390x implements modulo semantics for shift counts
target x86_64 skylake
; TODO: once available, replace all lane extraction with `icmp + all_ones`

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
; target s390x FIXME: s390x implements modulo semantics for shift counts
set enable_simd
target x86_64 skylake

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
function %iaddp_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41
@@ -30,4 +31,4 @@ block0(v0: i64x2, v1: i64):
v2 = insertlane v0, v1, 0
return v2
}
; run: %insertlane_0([1 1], 5000000000) == [5000000000 1]
; run: %insertlane_0([1 1], 5000000000) == [5000000000 1]

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,6 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test run
target aarch64
target x86_64
target s390x
function %imin_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
;; x86_64 hasn't implemented this for `i32x4`
function %sqmulrs_i32x4(i32x4, i32x4) -> i32x4 {

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
; x86_64 panics: `Did not match fcvt input!
; thread 'worker #0' panicked at 'register allocation: Analysis(EntryLiveinValues([v2V]))', cranelift/codegen/src/machinst/compile.rs:96:10`

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %vall_true_b8x16(b8x16) -> b1 {

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %vany_true_b8x16(b8x16) -> b1 {

View File

@@ -1,5 +1,5 @@
test run
; target s390x TODO: Not yet implemented on s390x
target s390x
target aarch64
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,6 @@
test interpret
test run
; target s390x TODO: Not yet implemented on s390x
target s390x
target aarch64
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

View File

@@ -1,5 +1,6 @@
test run
target aarch64
target s390x
; raw_bitcast is needed to get around issue with "bint" on aarch64