Merge pull request #2413 from akirilov-arm/fmov_vector
Cranelift AArch64: Further vector constant improvements
This commit is contained in:
@@ -1312,6 +1312,13 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::FpuExtend { rd, rn, size } => {
|
||||
sink.put4(enc_fpurr(
|
||||
0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
|
||||
rd,
|
||||
rn,
|
||||
));
|
||||
}
|
||||
&Inst::FpuRR { fpu_op, rd, rn } => {
|
||||
let top22 = match fpu_op {
|
||||
FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
|
||||
@@ -1746,6 +1753,17 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDupFPImm { rd, imm, size } => {
|
||||
let imm = imm.enc_bits();
|
||||
let op = match size.lane_size() {
|
||||
ScalarSize::Size32 => 0,
|
||||
ScalarSize::Size64 => 1,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let q_op = op | ((size.is_128bits() as u32) << 1);
|
||||
|
||||
sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
|
||||
}
|
||||
&Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
|
||||
@@ -2072,6 +2072,24 @@ fn test_aarch64_binemit() {
|
||||
"5205084E",
|
||||
"dup v18.2d, v10.d[0]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupFPImm {
|
||||
rd: writable_vreg(31),
|
||||
imm: ASIMDFPModImm::maybe_from_u64(1_f32.to_bits() as u64, ScalarSize::Size32).unwrap(),
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"1FF6030F",
|
||||
"fmov v31.2s, #1",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupFPImm {
|
||||
rd: writable_vreg(0),
|
||||
imm: ASIMDFPModImm::maybe_from_u64(2_f64.to_bits(), ScalarSize::Size64).unwrap(),
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"00F4006F",
|
||||
"fmov v0.2d, #2",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(31),
|
||||
@@ -2082,16 +2100,96 @@ fn test_aarch64_binemit() {
|
||||
"FFE7074F",
|
||||
"movi v31.16b, #255",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(30),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size16).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"1E84004F",
|
||||
"movi v30.8h, #0",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(0),
|
||||
imm: ASIMDMovModImm::zero(),
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size16),
|
||||
invert: true,
|
||||
size: VectorSize::Size16x4,
|
||||
},
|
||||
"0084002F",
|
||||
"mvni v0.4h, #0",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(0),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(256, ScalarSize::Size16).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"20A4004F",
|
||||
"movi v0.8h, #1, LSL #8",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(8),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(2228223, ScalarSize::Size32).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"28D4014F",
|
||||
"movi v8.4s, #33, MSL #16",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(16),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(35071, ScalarSize::Size32).unwrap(),
|
||||
invert: true,
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"10C5042F",
|
||||
"mvni v16.2s, #136, MSL #8",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(1),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size32).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"0104000F",
|
||||
"movi v1.2s, #0",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(24),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(1107296256, ScalarSize::Size32).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"5864024F",
|
||||
"movi v24.4s, #66, LSL #24",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(8),
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size64),
|
||||
invert: false,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"08E4006F",
|
||||
"movi v8.2d, #0",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecDupImm {
|
||||
rd: writable_vreg(7),
|
||||
imm: ASIMDMovModImm::maybe_from_u64(18374687574904995840, ScalarSize::Size64).unwrap(),
|
||||
invert: false,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"87E6046F",
|
||||
"movi v7.2d, #18374687574904995840",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl8,
|
||||
@@ -4376,6 +4474,16 @@ fn test_aarch64_binemit() {
|
||||
"mov d23, v11.d[0]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuExtend {
|
||||
rd: writable_vreg(31),
|
||||
rn: vreg(0),
|
||||
size: ScalarSize::Size32,
|
||||
},
|
||||
"1F40201E",
|
||||
"fmov s31, s0",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRR {
|
||||
fpu_op: FPUOp1::Abs32,
|
||||
|
||||
@@ -668,39 +668,208 @@ impl MoveWideConst {
|
||||
}
|
||||
|
||||
/// Advanced SIMD modified immediate as used by MOVI/MVNI.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct ASIMDMovModImm {
|
||||
imm: u8,
|
||||
shift: u8,
|
||||
is_64bit: bool,
|
||||
shift_ones: bool,
|
||||
}
|
||||
|
||||
impl ASIMDMovModImm {
|
||||
/// Construct an ASIMDMovModImm from an arbitrary 64-bit constant, if possible.
|
||||
/// Note that the bits in `value` outside of the range specified by `size` are
|
||||
/// ignored; for example, in the case of `ScalarSize::Size8` all bits above the
|
||||
/// lowest 8 are ignored.
|
||||
pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
|
||||
match size {
|
||||
ScalarSize::Size8 => Some(ASIMDMovModImm {
|
||||
imm: value as u8,
|
||||
shift: 0,
|
||||
is_64bit: false,
|
||||
shift_ones: false,
|
||||
}),
|
||||
ScalarSize::Size16 => {
|
||||
let value = value as u16;
|
||||
|
||||
if value >> 8 == 0 {
|
||||
Some(ASIMDMovModImm {
|
||||
imm: value as u8,
|
||||
shift: 0,
|
||||
is_64bit: false,
|
||||
shift_ones: false,
|
||||
})
|
||||
} else if value as u8 == 0 {
|
||||
Some(ASIMDMovModImm {
|
||||
imm: (value >> 8) as u8,
|
||||
shift: 8,
|
||||
is_64bit: false,
|
||||
shift_ones: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ScalarSize::Size32 => {
|
||||
let value = value as u32;
|
||||
|
||||
// Value is of the form 0x00MMFFFF.
|
||||
if value & 0xFF00FFFF == 0x0000FFFF {
|
||||
let imm = (value >> 16) as u8;
|
||||
|
||||
Some(ASIMDMovModImm {
|
||||
imm,
|
||||
shift: 16,
|
||||
is_64bit: false,
|
||||
shift_ones: true,
|
||||
})
|
||||
// Value is of the form 0x0000MMFF.
|
||||
} else if value & 0xFFFF00FF == 0x000000FF {
|
||||
let imm = (value >> 8) as u8;
|
||||
|
||||
Some(ASIMDMovModImm {
|
||||
imm,
|
||||
shift: 8,
|
||||
is_64bit: false,
|
||||
shift_ones: true,
|
||||
})
|
||||
} else {
|
||||
// Of the 4 bytes, at most one is non-zero.
|
||||
for shift in (0..32).step_by(8) {
|
||||
if value & (0xFF << shift) == value {
|
||||
return Some(ASIMDMovModImm {
|
||||
imm: (value >> shift) as u8,
|
||||
shift,
|
||||
is_64bit: false,
|
||||
shift_ones: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
ScalarSize::Size64 => {
|
||||
let mut imm = 0u8;
|
||||
|
||||
// Check if all bytes are either 0 or 0xFF.
|
||||
for i in 0..8 {
|
||||
let b = (value >> (i * 8)) as u8;
|
||||
|
||||
if b == 0 || b == 0xFF {
|
||||
imm |= (b & 1) << i;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(ASIMDMovModImm {
|
||||
imm,
|
||||
shift: 0,
|
||||
is_64bit: true,
|
||||
shift_ones: false,
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero() -> Self {
|
||||
pub fn zero(size: ScalarSize) -> Self {
|
||||
ASIMDMovModImm {
|
||||
imm: 0,
|
||||
shift: 0,
|
||||
is_64bit: size == ScalarSize::Size64,
|
||||
shift_ones: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value that this immediate represents.
|
||||
pub fn value(&self) -> (u8, u32, bool) {
|
||||
(self.imm, self.shift as u32, self.shift_ones)
|
||||
}
|
||||
}
|
||||
|
||||
/// Advanced SIMD modified immediate as used by the vector variant of FMOV.
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct ASIMDFPModImm {
|
||||
imm: u8,
|
||||
is_64bit: bool,
|
||||
}
|
||||
|
||||
impl ASIMDFPModImm {
|
||||
/// Construct an ASIMDFPModImm from an arbitrary 64-bit constant, if possible.
|
||||
pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDFPModImm> {
|
||||
// In all cases immediates are encoded as an 8-bit number 0b_abcdefgh;
|
||||
// let `D` be the inverse of the digit `d`.
|
||||
match size {
|
||||
ScalarSize::Size32 => {
|
||||
// In this case the representable immediates are 32-bit numbers of the form
|
||||
// 0b_aBbb_bbbc_defg_h000 shifted to the left by 16.
|
||||
let value = value as u32;
|
||||
let b0_5 = (value >> 19) & 0b111111;
|
||||
let b6 = (value >> 19) & (1 << 6);
|
||||
let b7 = (value >> 24) & (1 << 7);
|
||||
let imm = (b0_5 | b6 | b7) as u8;
|
||||
|
||||
if value == Self::value32(imm) {
|
||||
Some(ASIMDFPModImm {
|
||||
imm,
|
||||
is_64bit: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ScalarSize::Size64 => {
|
||||
// In this case the representable immediates are 64-bit numbers of the form
|
||||
// 0b_aBbb_bbbb_bbcd_efgh shifted to the left by 48.
|
||||
let b0_5 = (value >> 48) & 0b111111;
|
||||
let b6 = (value >> 48) & (1 << 6);
|
||||
let b7 = (value >> 56) & (1 << 7);
|
||||
let imm = (b0_5 | b6 | b7) as u8;
|
||||
|
||||
if value == Self::value64(imm) {
|
||||
Some(ASIMDFPModImm {
|
||||
imm,
|
||||
is_64bit: true,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns bits ready for encoding.
|
||||
pub fn enc_bits(&self) -> u8 {
|
||||
self.imm
|
||||
}
|
||||
|
||||
/// Returns the 32-bit value that corresponds to an 8-bit encoding.
|
||||
fn value32(imm: u8) -> u32 {
|
||||
let imm = imm as u32;
|
||||
let b0_5 = imm & 0b111111;
|
||||
let b6 = (imm >> 6) & 1;
|
||||
let b6_inv = b6 ^ 1;
|
||||
let b7 = (imm >> 7) & 1;
|
||||
|
||||
b0_5 << 19 | (b6 * 0b11111) << 25 | b6_inv << 30 | b7 << 31
|
||||
}
|
||||
|
||||
/// Returns the 64-bit value that corresponds to an 8-bit encoding.
|
||||
fn value64(imm: u8) -> u64 {
|
||||
let imm = imm as u64;
|
||||
let b0_5 = imm & 0b111111;
|
||||
let b6 = (imm >> 6) & 1;
|
||||
let b6_inv = b6 ^ 1;
|
||||
let b7 = (imm >> 7) & 1;
|
||||
|
||||
b0_5 << 48 | (b6 * 0b11111111) << 54 | b6_inv << 62 | b7 << 63
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for NZCV {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
|
||||
@@ -782,7 +951,20 @@ impl PrettyPrint for MoveWideConst {
|
||||
|
||||
impl PrettyPrint for ASIMDMovModImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.shift == 0 {
|
||||
if self.is_64bit {
|
||||
debug_assert_eq!(self.shift, 0);
|
||||
|
||||
let enc_imm = self.imm as i8;
|
||||
let mut imm = 0u64;
|
||||
|
||||
for i in 0..8 {
|
||||
let b = (enc_imm >> i) & 1;
|
||||
|
||||
imm |= (-b as u8 as u64) << (i * 8);
|
||||
}
|
||||
|
||||
format!("#{}", imm)
|
||||
} else if self.shift == 0 {
|
||||
format!("#{}", self.imm)
|
||||
} else {
|
||||
let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
|
||||
@@ -791,6 +973,16 @@ impl PrettyPrint for ASIMDMovModImm {
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for ASIMDFPModImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.is_64bit {
|
||||
format!("#{}", f64::from_bits(Self::value64(self.imm)))
|
||||
} else {
|
||||
format!("#{}", f32::from_bits(Self::value32(self.imm)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
@@ -1022,4 +1214,44 @@ mod test {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn asimd_fp_mod_imm_test() {
|
||||
assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size32));
|
||||
assert_eq!(
|
||||
None,
|
||||
ASIMDFPModImm::maybe_from_u64(0.013671875_f32.to_bits() as u64, ScalarSize::Size32)
|
||||
);
|
||||
assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size64));
|
||||
assert_eq!(
|
||||
None,
|
||||
ASIMDFPModImm::maybe_from_u64(10000_f64.to_bits(), ScalarSize::Size64)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn asimd_mov_mod_imm_test() {
|
||||
assert_eq!(
|
||||
None,
|
||||
ASIMDMovModImm::maybe_from_u64(513, ScalarSize::Size16)
|
||||
);
|
||||
assert_eq!(
|
||||
None,
|
||||
ASIMDMovModImm::maybe_from_u64(4278190335, ScalarSize::Size32)
|
||||
);
|
||||
assert_eq!(
|
||||
None,
|
||||
ASIMDMovModImm::maybe_from_u64(8388608, ScalarSize::Size64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ASIMDMovModImm {
|
||||
imm: 66,
|
||||
shift: 16,
|
||||
is_64bit: false,
|
||||
shift_ones: true,
|
||||
}),
|
||||
ASIMDMovModImm::maybe_from_u64(4390911, ScalarSize::Size32)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -755,6 +755,13 @@ pub enum Inst {
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Zero-extend a SIMD & FP scalar to the full width of a vector register.
|
||||
FpuExtend {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
size: ScalarSize,
|
||||
},
|
||||
|
||||
/// 1-op FPU instruction.
|
||||
FpuRR {
|
||||
fpu_op: FPUOp1,
|
||||
@@ -928,6 +935,13 @@ pub enum Inst {
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate FP immediate to vector.
|
||||
VecDupFPImm {
|
||||
rd: Writable<Reg>,
|
||||
imm: ASIMDFPModImm,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate immediate to vector.
|
||||
VecDupImm {
|
||||
rd: Writable<Reg>,
|
||||
@@ -1295,12 +1309,15 @@ impl Inst {
|
||||
value: u32,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
// Note that we must make sure that all bits outside the lowest 32 are set to 0
|
||||
// because this function is also used to load wider constants (that have zeros
|
||||
// in their most significant bits).
|
||||
if value == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(),
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
|
||||
invert: false,
|
||||
size: VectorSize::Size8x8
|
||||
size: VectorSize::Size32x2
|
||||
}]
|
||||
} else {
|
||||
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
|
||||
@@ -1324,6 +1341,9 @@ impl Inst {
|
||||
const_data: u64,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
// Note that we must make sure that all bits outside the lowest 64 are set to 0
|
||||
// because this function is also used to load wider constants (that have zeros
|
||||
// in their most significant bits).
|
||||
if let Ok(const_data) = u32::try_from(const_data) {
|
||||
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
||||
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
|
||||
@@ -1394,7 +1414,7 @@ impl Inst {
|
||||
r
|
||||
}
|
||||
|
||||
/// Create instructions that load a 128-bit vector constant consisting of elements with
|
||||
/// Create instructions that load a vector constant consisting of elements with
|
||||
/// the same value.
|
||||
pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
@@ -1403,6 +1423,15 @@ impl Inst {
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
let lane_size = size.lane_size();
|
||||
let widen_32_bit_pattern = |pattern, lane_size| {
|
||||
if lane_size == ScalarSize::Size32 {
|
||||
let pattern = pattern as u32 as u64;
|
||||
|
||||
ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
|
||||
smallvec![Inst::VecDupImm {
|
||||
@@ -1421,6 +1450,27 @@ impl Inst {
|
||||
invert: true,
|
||||
size
|
||||
}]
|
||||
} else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
|
||||
let mut insts = smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: false,
|
||||
size: VectorSize::Size64x2,
|
||||
}];
|
||||
|
||||
// TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
|
||||
// lower 64 bits instead.
|
||||
if !size.is_128bits() {
|
||||
insts.push(Inst::FpuExtend {
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
size: ScalarSize::Size64,
|
||||
});
|
||||
}
|
||||
|
||||
insts
|
||||
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
|
||||
smallvec![Inst::VecDupFPImm { rd, imm, size }]
|
||||
} else {
|
||||
let tmp = alloc_tmp(RegClass::I64, I64);
|
||||
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
|
||||
@@ -1721,6 +1771,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::FpuExtend { rd, rn, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::FpuRR { rd, rn, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
@@ -1870,6 +1924,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecDupFPImm { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
&Inst::VecDupImm { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
@@ -2299,6 +2356,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::FpuExtend {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::FpuRR {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
@@ -2582,6 +2647,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecDupFPImm { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
&mut Inst::VecDupImm { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
@@ -3229,6 +3297,12 @@ impl Inst {
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||
format!("mov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::FpuExtend { rd, rn, size } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_scalar(rn, mb_rru, size);
|
||||
|
||||
format!("fmov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::FpuRR { fpu_op, rd, rn } => {
|
||||
let (op, sizesrc, sizedest) = match fpu_op {
|
||||
FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
|
||||
@@ -3465,6 +3539,12 @@ impl Inst {
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, size);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupFPImm { rd, imm, size } => {
|
||||
let imm = imm.show_rru(mb_rru);
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
|
||||
format!("fmov {}, {}", rd, imm)
|
||||
}
|
||||
&Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
|
||||
@@ -853,7 +853,7 @@ pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
|
||||
// is potentially expensive.
|
||||
ctx.emit(Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(),
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size8),
|
||||
invert: false,
|
||||
size: VectorSize::Size8x16,
|
||||
});
|
||||
|
||||
@@ -2075,8 +2075,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// derivation of these sequences. Alternative sequences are discussed in
|
||||
// https://github.com/bytecodealliance/wasmtime/issues/2296, although they are not
|
||||
// used here.
|
||||
// Also .. FIXME: when https://github.com/bytecodealliance/wasmtime/pull/2310 is
|
||||
// merged, use `lower_splat_constant` instead to generate the constants.
|
||||
let tmp_r0 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp_v0 = ctx.alloc_tmp(RegClass::V128, I8X16);
|
||||
let tmp_v1 = ctx.alloc_tmp(RegClass::V128, I8X16);
|
||||
@@ -2100,12 +2098,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
size: VectorSize::Size8x16,
|
||||
imm: 7,
|
||||
});
|
||||
lower_constant_u64(ctx, tmp_r0, 0x8040201008040201u64);
|
||||
ctx.emit(Inst::VecDup {
|
||||
rd: tmp_v0,
|
||||
rn: tmp_r0.to_reg(),
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
lower_splat_const(ctx, tmp_v0, 0x8040201008040201u64, VectorSize::Size64x2);
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::And,
|
||||
rd: tmp_v1,
|
||||
|
||||
@@ -715,7 +715,7 @@ block0(v0: f32):
|
||||
; nextln: movz x0, #20352, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s2, s0, s1
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: movi v1.2s, #0
|
||||
; nextln: fmax s2, s2, s1
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s1, s2, ne
|
||||
@@ -738,7 +738,7 @@ block0(v0: f32):
|
||||
; nextln: movz x0, #52992, LSL #16
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax s1, s1, s2
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: movi v2.2s, #0
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s2, s1, ne
|
||||
; nextln: fcvtzs w0, s0
|
||||
@@ -757,7 +757,7 @@ block0(v0: f32):
|
||||
; nextln: movz x0, #24448, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s2, s0, s1
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: movi v1.2s, #0
|
||||
; nextln: fmax s2, s2, s1
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s1, s2, ne
|
||||
@@ -780,7 +780,7 @@ block0(v0: f32):
|
||||
; nextln: movz x0, #57088, LSL #16
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax s1, s1, s2
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: movi v2.2s, #0
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s2, s1, ne
|
||||
; nextln: fcvtzs x0, s0
|
||||
@@ -798,7 +798,7 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295
|
||||
; nextln: fmin d2, d0, d1
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: movi v1.2s, #0
|
||||
; nextln: fmax d2, d2, d1
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d1, d2, ne
|
||||
@@ -820,7 +820,7 @@ block0(v0: f64):
|
||||
; nextln: movz x0, #49632, LSL #48
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax d1, d1, d2
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: movi v2.2s, #0
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d2, d1, ne
|
||||
; nextln: fcvtzs w0, d0
|
||||
@@ -839,7 +839,7 @@ block0(v0: f64):
|
||||
; nextln: movz x0, #17392, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin d2, d0, d1
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: movi v1.2s, #0
|
||||
; nextln: fmax d2, d2, d1
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d1, d2, ne
|
||||
@@ -862,7 +862,7 @@ block0(v0: f64):
|
||||
; nextln: movz x0, #50144, LSL #48
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax d1, d1, d2
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: movi v2.2s, #0
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d2, d1, ne
|
||||
; nextln: fcvtzs x0, d0
|
||||
|
||||
@@ -127,3 +127,46 @@ block0(v0: i64, v1: i64):
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f9() -> i32x2 {
|
||||
block0:
|
||||
v0 = iconst.i32 4278190335
|
||||
v1 = splat.i32x2 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movi v0.2d, #18374687579166474495
|
||||
; nextln: fmov d0, d0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f10() -> i32x4 {
|
||||
block0:
|
||||
v0 = iconst.i32 4293918720
|
||||
v1 = splat.i32x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: mvni v0.4s, #15, MSL #16
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f11() -> f32x4 {
|
||||
block0:
|
||||
v0 = f32const 0x1.5
|
||||
v1 = splat.f32x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fmov v0.4s, #1.3125
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
Reference in New Issue
Block a user