Merge pull request #2310 from akirilov-arm/vector_constants
Cranelift AArch64: Improve code generation for vector constants
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
//! Instruction predicates/properties, shared by various analyses.
|
||||
|
||||
use crate::ir::{DataFlowGraph, Function, Inst, InstructionData, Opcode};
|
||||
use crate::machinst::ty_bits;
|
||||
use cranelift_entity::EntityRef;
|
||||
|
||||
/// Preserve instructions with used result values.
|
||||
@@ -59,7 +60,21 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
|
||||
&InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
|
||||
&InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
|
||||
&InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
|
||||
&InstructionData::UnaryBool { imm, .. } => Some(if imm { 1 } else { 0 }),
|
||||
&InstructionData::UnaryBool { imm, .. } => {
|
||||
let imm = if imm {
|
||||
let bits = ty_bits(func.dfg.value_type(func.dfg.inst_results(inst)[0]));
|
||||
|
||||
if bits < 64 {
|
||||
(1u64 << bits) - 1
|
||||
} else {
|
||||
u64::MAX
|
||||
}
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
Some(imm)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -609,10 +609,27 @@ pub enum VectorSize {
|
||||
}
|
||||
|
||||
impl VectorSize {
|
||||
/// Get the vector operand size with the given scalar size as lane size.
|
||||
pub fn from_lane_size(size: ScalarSize, is_128bit: bool) -> VectorSize {
|
||||
match (size, is_128bit) {
|
||||
(ScalarSize::Size8, false) => VectorSize::Size8x8,
|
||||
(ScalarSize::Size8, true) => VectorSize::Size8x16,
|
||||
(ScalarSize::Size16, false) => VectorSize::Size16x4,
|
||||
(ScalarSize::Size16, true) => VectorSize::Size16x8,
|
||||
(ScalarSize::Size32, false) => VectorSize::Size32x2,
|
||||
(ScalarSize::Size32, true) => VectorSize::Size32x4,
|
||||
(ScalarSize::Size64, true) => VectorSize::Size64x2,
|
||||
_ => panic!("Unexpected scalar FP operand size: {:?}", size),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert from a type into a vector operand size.
|
||||
pub fn from_ty(ty: Type) -> VectorSize {
|
||||
match ty {
|
||||
B8X16 => VectorSize::Size8x16,
|
||||
B16X8 => VectorSize::Size16x8,
|
||||
B32X4 => VectorSize::Size32x4,
|
||||
B64X2 => VectorSize::Size64x2,
|
||||
F32X2 => VectorSize::Size32x2,
|
||||
F32X4 => VectorSize::Size32x4,
|
||||
F64X2 => VectorSize::Size64x2,
|
||||
|
||||
@@ -437,6 +437,21 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||
| machreg_to_gpr(rt)
|
||||
}
|
||||
|
||||
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
|
||||
let abc = (imm >> 5) as u32;
|
||||
let defgh = (imm & 0b11111) as u32;
|
||||
|
||||
debug_assert_eq!(cmode & 0b1111, cmode);
|
||||
debug_assert_eq!(q_op & 0b11, q_op);
|
||||
|
||||
0b0_0_0_0111100000_000_0000_01_00000_00000
|
||||
| (q_op << 29)
|
||||
| (abc << 16)
|
||||
| (cmode << 12)
|
||||
| (defgh << 5)
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
/// State carried between emissions of a sequence of instructions.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct EmitState {
|
||||
@@ -1588,19 +1603,6 @@ impl MachInstEmit for Inst {
|
||||
};
|
||||
sink.put4(enc_inttofpu(top16, rd, rn));
|
||||
}
|
||||
&Inst::LoadFpuConst32 { rd, const_data } => {
|
||||
let inst = Inst::FpuLoad32 {
|
||||
rd,
|
||||
mem: AMode::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, emit_info, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(8),
|
||||
};
|
||||
inst.emit(sink, emit_info, state);
|
||||
sink.put4(const_data.to_bits());
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
let inst = Inst::FpuLoad64 {
|
||||
rd,
|
||||
@@ -1612,7 +1614,7 @@ impl MachInstEmit for Inst {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, emit_info, state);
|
||||
sink.put8(const_data.to_bits());
|
||||
sink.put8(const_data);
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let inst = Inst::FpuLoad128 {
|
||||
@@ -1751,6 +1753,53 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert,
|
||||
size,
|
||||
} => {
|
||||
let (imm, shift, shift_ones) = imm.value();
|
||||
let (op, cmode) = match size.lane_size() {
|
||||
ScalarSize::Size8 => {
|
||||
assert!(!invert);
|
||||
assert_eq!(shift, 0);
|
||||
|
||||
(0, 0b1110)
|
||||
}
|
||||
ScalarSize::Size16 => {
|
||||
let s = shift & 8;
|
||||
|
||||
assert!(!shift_ones);
|
||||
assert_eq!(s, shift);
|
||||
|
||||
(invert as u32, 0b1000 | (s >> 2))
|
||||
}
|
||||
ScalarSize::Size32 => {
|
||||
if shift_ones {
|
||||
assert!(shift == 8 || shift == 16);
|
||||
|
||||
(invert as u32, 0b1100 | (shift >> 4))
|
||||
} else {
|
||||
let s = shift & 24;
|
||||
|
||||
assert_eq!(s, shift);
|
||||
|
||||
(invert as u32, 0b0000 | (s >> 2))
|
||||
}
|
||||
}
|
||||
ScalarSize::Size64 => {
|
||||
assert!(!invert);
|
||||
assert_eq!(shift, 0);
|
||||
|
||||
(1, 0b1110)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let q_op = op | ((size.is_128bits() as u32) << 1);
|
||||
|
||||
sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
|
||||
}
|
||||
&Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
@@ -1803,8 +1852,8 @@ impl MachInstEmit for Inst {
|
||||
&Inst::VecMovElement {
|
||||
rd,
|
||||
rn,
|
||||
idx1,
|
||||
idx2,
|
||||
dest_idx,
|
||||
src_idx,
|
||||
size,
|
||||
} => {
|
||||
let (imm5, shift) = match size.lane_size() {
|
||||
@@ -1815,10 +1864,10 @@ impl MachInstEmit for Inst {
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let mask = 0b11111 >> shift;
|
||||
debug_assert_eq!(idx1 & mask, idx1);
|
||||
debug_assert_eq!(idx2 & mask, idx2);
|
||||
let imm4 = (idx2 as u32) << (shift - 1);
|
||||
let imm5 = imm5 | ((idx1 as u32) << shift);
|
||||
debug_assert_eq!(dest_idx & mask, dest_idx);
|
||||
debug_assert_eq!(src_idx & mask, src_idx);
|
||||
let imm4 = (src_idx as u32) << (shift - 1);
|
||||
let imm5 = imm5 | ((dest_idx as u32) << shift);
|
||||
sink.put4(
|
||||
0b011_01110000_00000_0_0000_1_00000_00000
|
||||
| (imm5 << 16)
|
||||
|
||||
@@ -2034,6 +2034,26 @@ fn test_aarch64_binemit() {
|
||||
"5205084E",
|
||||
"dup v18.2d, v10.d[0]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(31),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(255, ScalarSize::Size8).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"FFE7074F",
|
||||
"movi v31.16b, #255",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(0),
|
||||
imm: ASIMDMovModImm::zero(),
|
||||
invert: true,
|
||||
size: VectorSize::Size16x4,
|
||||
},
|
||||
"0084002F",
|
||||
"mvni v0.4h, #0",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl8,
|
||||
@@ -2099,8 +2119,8 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecMovElement {
|
||||
rd: writable_vreg(0),
|
||||
rn: vreg(31),
|
||||
idx1: 7,
|
||||
idx2: 7,
|
||||
dest_idx: 7,
|
||||
src_idx: 7,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E0771E6E",
|
||||
@@ -2111,8 +2131,8 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecMovElement {
|
||||
rd: writable_vreg(31),
|
||||
rn: vreg(16),
|
||||
idx1: 1,
|
||||
idx2: 0,
|
||||
dest_idx: 1,
|
||||
src_idx: 0,
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"1F060C6E",
|
||||
@@ -4781,19 +4801,10 @@ fn test_aarch64_binemit() {
|
||||
"str q16, [x8, x9, LSL #4]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst32 {
|
||||
rd: writable_vreg(16),
|
||||
const_data: 1.0,
|
||||
},
|
||||
"5000001C020000140000803F",
|
||||
"ldr s16, pc+8 ; b 8 ; data.f32 1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst64 {
|
||||
rd: writable_vreg(16),
|
||||
const_data: 1.0,
|
||||
const_data: 1.0_f64.to_bits(),
|
||||
},
|
||||
"5000005C03000014000000000000F03F",
|
||||
"ldr d16, pc+8 ; b 12 ; data.f64 1",
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#[allow(dead_code)]
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::OperandSize;
|
||||
use crate::isa::aarch64::inst::{OperandSize, ScalarSize};
|
||||
|
||||
use regalloc::{PrettyPrint, RealRegUniverse};
|
||||
|
||||
@@ -667,6 +667,40 @@ impl MoveWideConst {
|
||||
}
|
||||
}
|
||||
|
||||
/// Advanced SIMD modified immediate as used by MOVI/MVNI.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct ASIMDMovModImm {
|
||||
imm: u8,
|
||||
shift: u8,
|
||||
shift_ones: bool,
|
||||
}
|
||||
|
||||
impl ASIMDMovModImm {
|
||||
pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
|
||||
match size {
|
||||
ScalarSize::Size8 => Some(ASIMDMovModImm {
|
||||
imm: value as u8,
|
||||
shift: 0,
|
||||
shift_ones: false,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero() -> Self {
|
||||
ASIMDMovModImm {
|
||||
imm: 0,
|
||||
shift: 0,
|
||||
shift_ones: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn value(&self) -> (u8, u32, bool) {
|
||||
(self.imm, self.shift as u32, self.shift_ones)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for NZCV {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
|
||||
@@ -746,6 +780,17 @@ impl PrettyPrint for MoveWideConst {
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for ASIMDMovModImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.shift == 0 {
|
||||
format!("#{}", self.imm)
|
||||
} else {
|
||||
let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
|
||||
format!("#{}, {} #{}", self.imm, shift_type, self.shift)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
@@ -18,6 +18,7 @@ use regalloc::{RegUsageCollector, RegUsageMapper};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::string::{String, ToString};
|
||||
|
||||
@@ -826,14 +827,9 @@ pub enum Inst {
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
LoadFpuConst32 {
|
||||
rd: Writable<Reg>,
|
||||
const_data: f32,
|
||||
},
|
||||
|
||||
LoadFpuConst64 {
|
||||
rd: Writable<Reg>,
|
||||
const_data: f64,
|
||||
const_data: u64,
|
||||
},
|
||||
|
||||
LoadFpuConst128 {
|
||||
@@ -922,6 +918,14 @@ pub enum Inst {
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate immediate to vector.
|
||||
VecDupImm {
|
||||
rd: Writable<Reg>,
|
||||
imm: ASIMDMovModImm,
|
||||
invert: bool,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector extend.
|
||||
VecExtend {
|
||||
t: VecExtendOp,
|
||||
@@ -934,8 +938,8 @@ pub enum Inst {
|
||||
VecMovElement {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
idx1: u8,
|
||||
idx2: u8,
|
||||
dest_idx: u8,
|
||||
src_idx: u8,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
@@ -1297,29 +1301,146 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an instruction that loads a 32-bit floating-point constant.
|
||||
pub fn load_fp_constant32(rd: Writable<Reg>, value: f32) -> Inst {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
|
||||
Inst::LoadFpuConst32 {
|
||||
rd,
|
||||
const_data: value,
|
||||
/// Create instructions that load a 32-bit floating-point constant.
|
||||
pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
value: u32,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
if value == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(),
|
||||
invert: false,
|
||||
size: VectorSize::Size8x8
|
||||
}]
|
||||
} else {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
|
||||
let tmp = alloc_tmp(RegClass::I64, I32);
|
||||
let mut insts = Inst::load_constant(tmp, value as u64);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
|
||||
insts
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an instruction that loads a 64-bit floating-point constant.
|
||||
pub fn load_fp_constant64(rd: Writable<Reg>, value: f64) -> Inst {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
|
||||
Inst::LoadFpuConst64 {
|
||||
rd,
|
||||
const_data: value,
|
||||
/// Create instructions that load a 64-bit floating-point constant.
|
||||
pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u64,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
if let Ok(const_data) = u32::try_from(const_data) {
|
||||
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
||||
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent bits.
|
||||
// Also, treat it as half of a 128-bit vector and consider replicated patterns. Scalar MOVI
|
||||
// might also be an option.
|
||||
} else if const_data & (u32::MAX as u64) == 0 {
|
||||
let tmp = alloc_tmp(RegClass::I64, I64);
|
||||
let mut insts = Inst::load_constant(tmp, const_data);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
|
||||
insts
|
||||
} else {
|
||||
smallvec![Inst::LoadFpuConst64 { rd, const_data }]
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an instruction that loads a 128-bit vector constant.
|
||||
pub fn load_fp_constant128(rd: Writable<Reg>, value: u128) -> Inst {
|
||||
Inst::LoadFpuConst128 {
|
||||
rd,
|
||||
const_data: value,
|
||||
/// Create instructions that load a 128-bit vector constant.
|
||||
pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u128,
|
||||
alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
if let Ok(const_data) = u64::try_from(const_data) {
|
||||
SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
|
||||
} else if let Some((pattern, size)) =
|
||||
Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
|
||||
{
|
||||
Inst::load_replicated_vector_pattern(
|
||||
rd,
|
||||
pattern,
|
||||
VectorSize::from_lane_size(size, true),
|
||||
alloc_tmp,
|
||||
)
|
||||
} else {
|
||||
smallvec![Inst::LoadFpuConst128 { rd, const_data }]
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether a 128-bit constant represents a vector consisting of elements with
|
||||
/// the same value.
|
||||
pub fn get_replicated_vector_pattern(
|
||||
value: u128,
|
||||
size: ScalarSize,
|
||||
) -> Option<(u64, ScalarSize)> {
|
||||
let (mask, shift, next_size) = match size {
|
||||
ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
|
||||
ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
|
||||
ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
|
||||
ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
|
||||
_ => return None,
|
||||
};
|
||||
let mut r = None;
|
||||
let v = value & mask;
|
||||
|
||||
if (value >> shift) & mask == v {
|
||||
r = Inst::get_replicated_vector_pattern(v, next_size);
|
||||
|
||||
if r.is_none() {
|
||||
r = Some((v as u64, size));
|
||||
}
|
||||
}
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
/// Create instructions that load a 128-bit vector constant consisting of elements with
|
||||
/// the same value.
|
||||
pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
pattern: u64,
|
||||
size: VectorSize,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
let lane_size = size.lane_size();
|
||||
|
||||
if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: false,
|
||||
size
|
||||
}]
|
||||
} else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
|
||||
debug_assert_ne!(lane_size, ScalarSize::Size8);
|
||||
debug_assert_ne!(lane_size, ScalarSize::Size64);
|
||||
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: true,
|
||||
size
|
||||
}]
|
||||
} else {
|
||||
let tmp = alloc_tmp(RegClass::I64, I64);
|
||||
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
|
||||
|
||||
insts.push(Inst::VecDup {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size,
|
||||
});
|
||||
|
||||
insts
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1704,9 +1825,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_use(rd);
|
||||
memarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::LoadFpuConst32 { rd, .. }
|
||||
| &Inst::LoadFpuConst64 { rd, .. }
|
||||
| &Inst::LoadFpuConst128 { rd, .. } => {
|
||||
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
&Inst::FpuToInt { rd, rn, .. } => {
|
||||
@@ -1746,6 +1865,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecDupImm { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
&Inst::VecExtend { rd, rn, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
@@ -2344,9 +2466,6 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, rd);
|
||||
map_mem(mapper, mem);
|
||||
}
|
||||
&mut Inst::LoadFpuConst32 { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
&mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
@@ -2441,6 +2560,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecDupImm { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
&mut Inst::VecExtend {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
@@ -2631,19 +2753,12 @@ impl MachInst for Inst {
|
||||
to_reg: Writable<Reg>,
|
||||
value: u64,
|
||||
ty: Type,
|
||||
_alloc_tmp: F,
|
||||
alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
if ty == F64 {
|
||||
let mut ret = SmallVec::new();
|
||||
ret.push(Inst::load_fp_constant64(to_reg, f64::from_bits(value)));
|
||||
ret
|
||||
Inst::load_fp_constant64(to_reg, value, alloc_tmp)
|
||||
} else if ty == F32 {
|
||||
let mut ret = SmallVec::new();
|
||||
ret.push(Inst::load_fp_constant32(
|
||||
to_reg,
|
||||
f32::from_bits(value as u32),
|
||||
));
|
||||
ret
|
||||
Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
|
||||
} else {
|
||||
// Must be an integer type.
|
||||
debug_assert!(
|
||||
@@ -3216,13 +3331,9 @@ impl Inst {
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
format!("{}str {}, {}", mem_str, rd, mem)
|
||||
}
|
||||
&Inst::LoadFpuConst32 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
|
||||
format!("ldr {}, pc+8 ; b 8 ; data.f32 {}", rd, const_data)
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
|
||||
format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, const_data)
|
||||
format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
|
||||
@@ -3330,6 +3441,17 @@ impl Inst {
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, size);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupImm { rd, imm, invert, size } => {
|
||||
let imm = imm.show_rru(mb_rru);
|
||||
let op = if invert {
|
||||
"mvni"
|
||||
} else {
|
||||
"movi"
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
|
||||
format!("{} {}, {}", op, rd, imm)
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn, high_half } => {
|
||||
let (op, dest, src) = match (t, high_half) {
|
||||
(VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
@@ -3352,12 +3474,12 @@ impl Inst {
|
||||
&Inst::VecMovElement {
|
||||
rd,
|
||||
rn,
|
||||
idx1,
|
||||
idx2,
|
||||
dest_idx,
|
||||
src_idx,
|
||||
size,
|
||||
} => {
|
||||
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx1, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, idx2, size);
|
||||
let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, src_idx, size);
|
||||
format!("mov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
|
||||
|
||||
@@ -813,7 +813,11 @@ pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
|
||||
rd: Writable<Reg>,
|
||||
value: f32,
|
||||
) {
|
||||
ctx.emit(Inst::load_fp_constant32(rd, value));
|
||||
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
|
||||
|
||||
for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
|
||||
@@ -821,7 +825,11 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
|
||||
rd: Writable<Reg>,
|
||||
value: f64,
|
||||
) {
|
||||
ctx.emit(Inst::load_fp_constant64(rd, value));
|
||||
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
|
||||
|
||||
for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
|
||||
@@ -829,7 +837,38 @@ pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
|
||||
rd: Writable<Reg>,
|
||||
value: u128,
|
||||
) {
|
||||
ctx.emit(Inst::load_fp_constant128(rd, value));
|
||||
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
|
||||
|
||||
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
rd: Writable<Reg>,
|
||||
value: u64,
|
||||
size: VectorSize,
|
||||
) {
|
||||
let (value, narrow_size) = match size.lane_size() {
|
||||
ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
|
||||
ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
|
||||
ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
|
||||
ScalarSize::Size64 => (value, ScalarSize::Size32),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
|
||||
Some((value, lane_size)) => (
|
||||
value,
|
||||
VectorSize::from_lane_size(lane_size, size.is_128bits()),
|
||||
),
|
||||
None => (value, size),
|
||||
};
|
||||
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
|
||||
|
||||
for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
|
||||
|
||||
@@ -2013,24 +2013,47 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::VecMovElement {
|
||||
rd,
|
||||
rn,
|
||||
idx1: idx,
|
||||
idx2: 0,
|
||||
dest_idx: idx,
|
||||
src_idx: 0,
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Splat => {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ty.unwrap());
|
||||
let inst = if ty_has_int_representation(input_ty) {
|
||||
Inst::VecDup { rd, rn, size }
|
||||
|
||||
if let Some((_, insn)) = maybe_input_insn_multi(
|
||||
ctx,
|
||||
inputs[0],
|
||||
&[
|
||||
Opcode::Bconst,
|
||||
Opcode::F32const,
|
||||
Opcode::F64const,
|
||||
Opcode::Iconst,
|
||||
],
|
||||
) {
|
||||
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
|
||||
} else if let Some(insn) =
|
||||
maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Iconst, Opcode::Ireduce)
|
||||
{
|
||||
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
|
||||
} else if let Some(insn) =
|
||||
maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Bconst, Opcode::Breduce)
|
||||
{
|
||||
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
|
||||
} else {
|
||||
Inst::VecDupFromFpu { rd, rn, size }
|
||||
};
|
||||
ctx.emit(inst);
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let inst = if ty_has_int_representation(input_ty) {
|
||||
Inst::VecDup { rd, rn, size }
|
||||
} else {
|
||||
Inst::VecDupFromFpu { rd, rn, size }
|
||||
};
|
||||
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::VanyTrue | Opcode::VallTrue => {
|
||||
@@ -2820,15 +2843,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: max as f32,
|
||||
});
|
||||
lower_constant_f32(ctx, rtmp1, max as f32);
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: max,
|
||||
});
|
||||
lower_constant_f64(ctx, rtmp1, max);
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
|
||||
@@ -2837,15 +2854,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: min as f32,
|
||||
});
|
||||
lower_constant_f32(ctx, rtmp1, min as f32);
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: min,
|
||||
});
|
||||
lower_constant_f64(ctx, rtmp1, min);
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
|
||||
@@ -2855,15 +2866,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
if out_signed {
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: 0.0,
|
||||
});
|
||||
lower_constant_f32(ctx, rtmp1, 0.0);
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: 0.0,
|
||||
});
|
||||
lower_constant_f64(ctx, rtmp1, 0.0);
|
||||
}
|
||||
}
|
||||
if in_bits == 32 {
|
||||
|
||||
@@ -9,7 +9,7 @@ block0:
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #1
|
||||
; nextln: movz x0, #255
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
@@ -60,10 +60,12 @@ block0(v0: f32):
|
||||
v1 = fcvt_to_uint.i8 v0
|
||||
; check: fcmp s0, s0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; check: movz x0, #49024, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 256
|
||||
; check: movz x0, #17280, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, s0
|
||||
@@ -80,10 +82,12 @@ block0(v0: f64):
|
||||
v1 = fcvt_to_uint.i8 v0
|
||||
; check: fcmp d0, d0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; check: movz x0, #49136, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 256
|
||||
; check: movz x0, #16496, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, d0
|
||||
@@ -100,10 +104,12 @@ block0(v0: f32):
|
||||
v1 = fcvt_to_uint.i16 v0
|
||||
; check: fcmp s0, s0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; check: movz x0, #49024, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 65536
|
||||
; check: movz x0, #18304, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, s0
|
||||
@@ -120,10 +126,12 @@ block0(v0: f64):
|
||||
v1 = fcvt_to_uint.i16 v0
|
||||
; check: fcmp d0, d0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; check: movz x0, #49136, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 65536
|
||||
; check: movz x0, #16624, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, d0
|
||||
|
||||
@@ -427,10 +427,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; nextln: movz x0, #49024, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 4294967300
|
||||
; nextln: movz x0, #20352, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu w0, s0
|
||||
@@ -448,10 +450,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -2147483600
|
||||
; nextln: movz x0, #52992, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.ge 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 2147483600
|
||||
; nextln: movz x0, #20224, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs w0, s0
|
||||
@@ -469,10 +473,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; nextln: movz x0, #49024, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 18446744000000000000
|
||||
; nextln: movz x0, #24448, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu x0, s0
|
||||
@@ -490,10 +496,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -9223372000000000000
|
||||
; nextln: movz x0, #57088, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.ge 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 9223372000000000000
|
||||
; nextln: movz x0, #24320, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs x0, s0
|
||||
@@ -511,10 +519,12 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; nextln: movz x0, #49136, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967296
|
||||
; nextln: movz x0, #16880, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu w0, d0
|
||||
@@ -535,7 +545,8 @@ block0(v0: f64):
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 2147483648
|
||||
; nextln: movz x0, #16864, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs w0, d0
|
||||
@@ -553,10 +564,12 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; nextln: movz x0, #49136, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 18446744073709552000
|
||||
; nextln: movz x0, #17392, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu x0, d0
|
||||
@@ -574,10 +587,12 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -9223372036854776000
|
||||
; nextln: movz x0, #50144, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.ge 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 9223372036854776000
|
||||
; nextln: movz x0, #17376, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs x0, d0
|
||||
@@ -697,9 +712,10 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 4294967300
|
||||
; nextln: movz x0, #20352, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s2, s0, s1
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax s2, s2, s1
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s1, s2, ne
|
||||
@@ -716,11 +732,13 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 2147483600
|
||||
; nextln: movz x0, #20224, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s1, s0, s1
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 -2147483600
|
||||
; nextln: movz x0, #52992, LSL #16
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax s1, s1, s2
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s2, s1, ne
|
||||
; nextln: fcvtzs w0, s0
|
||||
@@ -736,9 +754,10 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 18446744000000000000
|
||||
; nextln: movz x0, #24448, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s2, s0, s1
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax s2, s2, s1
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s1, s2, ne
|
||||
@@ -755,11 +774,13 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 9223372000000000000
|
||||
; nextln: movz x0, #24320, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s1, s0, s1
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 -9223372000000000000
|
||||
; nextln: movz x0, #57088, LSL #16
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax s1, s1, s2
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s2, s1, ne
|
||||
; nextln: fcvtzs x0, s0
|
||||
@@ -777,7 +798,7 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295
|
||||
; nextln: fmin d2, d0, d1
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax d2, d2, d1
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d1, d2, ne
|
||||
@@ -796,9 +817,10 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 2147483647
|
||||
; nextln: fmin d1, d0, d1
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 -2147483648
|
||||
; nextln: movz x0, #49632, LSL #48
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax d1, d1, d2
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d2, d1, ne
|
||||
; nextln: fcvtzs w0, d0
|
||||
@@ -814,9 +836,10 @@ block0(v0: f64):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 18446744073709552000
|
||||
; nextln: movz x0, #17392, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin d2, d0, d1
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax d2, d2, d1
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d1, d2, ne
|
||||
@@ -833,11 +856,13 @@ block0(v0: f64):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 9223372036854776000
|
||||
; nextln: movz x0, #17376, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin d1, d0, d1
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 -9223372036854776000
|
||||
; nextln: movz x0, #50144, LSL #48
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax d1, d1, d2
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d2, d1, ne
|
||||
; nextln: fcvtzs x0, d0
|
||||
|
||||
49
cranelift/filetests/filetests/isa/aarch64/simd.clif
Normal file
49
cranelift/filetests/filetests/isa/aarch64/simd.clif
Normal file
@@ -0,0 +1,49 @@
|
||||
test compile
|
||||
target aarch64
|
||||
|
||||
function %f1() -> i64x2 {
|
||||
block0:
|
||||
v0 = iconst.i64 281474976710657
|
||||
v1 = splat.i64x2 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #1
|
||||
; nextln: movk x0, #1, LSL #48
|
||||
; nextln: dup v0.2d, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f2() -> i16x8 {
|
||||
block0:
|
||||
v0 = iconst.i32 42679
|
||||
v1 = ireduce.i16 v0
|
||||
v2 = splat.i16x8 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #42679
|
||||
; nextln: dup v0.8h, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f3() -> b8x16 {
|
||||
block0:
|
||||
v0 = bconst.b32 true
|
||||
v1 = breduce.b8 v0
|
||||
v2 = splat.b8x16 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movi v0.16b, #255
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
Reference in New Issue
Block a user