From f59b274d22016f008ed0ccb98c24c7551e0e349b Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Thu, 29 Oct 2020 13:29:03 +0000 Subject: [PATCH] Cranelift AArch64: Further vector constant improvements Introduce support for MOVI/MVNI with 16-, 32-, and 64-bit elements, and the vector variant of FMOV. Copyright (c) 2020, Arm Limited. --- .../codegen/src/isa/aarch64/inst/emit.rs | 18 ++ .../src/isa/aarch64/inst/emit_tests.rs | 110 +++++++- .../codegen/src/isa/aarch64/inst/imms.rs | 238 +++++++++++++++++- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 86 ++++++- cranelift/codegen/src/isa/aarch64/lower.rs | 2 +- .../codegen/src/isa/aarch64/lower_inst.rs | 9 +- .../filetests/isa/aarch64/floating-point.clif | 16 +- .../filetests/filetests/isa/aarch64/simd.clif | 43 ++++ 8 files changed, 498 insertions(+), 24 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 0654711353..432bbc19dd 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1312,6 +1312,13 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } + &Inst::FpuExtend { rd, rn, size } => { + sink.put4(enc_fpurr( + 0b000_11110_00_1_000000_10000 | (size.ftype() << 13), + rd, + rn, + )); + } &Inst::FpuRR { fpu_op, rd, rn } => { let top22 = match fpu_op { FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000, @@ -1746,6 +1753,17 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } + &Inst::VecDupFPImm { rd, imm, size } => { + let imm = imm.enc_bits(); + let op = match size.lane_size() { + ScalarSize::Size32 => 0, + ScalarSize::Size64 => 1, + _ => unimplemented!(), + }; + let q_op = op | ((size.is_128bits() as u32) << 1); + + sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm)); + } &Inst::VecDupImm { rd, imm, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index cd0fbf9020..f01fbf43f0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2072,6 +2072,24 @@ fn test_aarch64_binemit() { "5205084E", "dup v18.2d, v10.d[0]", )); + insns.push(( + Inst::VecDupFPImm { + rd: writable_vreg(31), + imm: ASIMDFPModImm::maybe_from_u64(1_f32.to_bits() as u64, ScalarSize::Size32).unwrap(), + size: VectorSize::Size32x2, + }, + "1FF6030F", + "fmov v31.2s, #1", + )); + insns.push(( + Inst::VecDupFPImm { + rd: writable_vreg(0), + imm: ASIMDFPModImm::maybe_from_u64(2_f64.to_bits(), ScalarSize::Size64).unwrap(), + size: VectorSize::Size64x2, + }, + "00F4006F", + "fmov v0.2d, #2", + )); insns.push(( Inst::VecDupImm { rd: writable_vreg(31), @@ -2082,16 +2100,96 @@ fn test_aarch64_binemit() { "FFE7074F", "movi v31.16b, #255", )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(30), + imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size16).unwrap(), + invert: false, + size: VectorSize::Size16x8, + }, + "1E84004F", + "movi v30.8h, #0", + )); insns.push(( Inst::VecDupImm { rd: writable_vreg(0), - imm: ASIMDMovModImm::zero(), + imm: ASIMDMovModImm::zero(ScalarSize::Size16), invert: true, size: VectorSize::Size16x4, }, "0084002F", "mvni v0.4h, #0", )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(0), + imm: ASIMDMovModImm::maybe_from_u64(256, ScalarSize::Size16).unwrap(), + invert: false, + size: VectorSize::Size16x8, + }, + "20A4004F", + "movi v0.8h, #1, LSL #8", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(8), + imm: ASIMDMovModImm::maybe_from_u64(2228223, ScalarSize::Size32).unwrap(), + invert: false, + size: VectorSize::Size32x4, + }, + "28D4014F", + "movi v8.4s, #33, MSL #16", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(16), + imm: ASIMDMovModImm::maybe_from_u64(35071, ScalarSize::Size32).unwrap(), + invert: true, + size: VectorSize::Size32x2, + }, + "10C5042F", + "mvni v16.2s, #136, MSL #8", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(1), + imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size32).unwrap(), + invert: false, + size: VectorSize::Size32x2, + }, + "0104000F", + "movi v1.2s, #0", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(24), + imm: ASIMDMovModImm::maybe_from_u64(1107296256, ScalarSize::Size32).unwrap(), + invert: false, + size: VectorSize::Size32x4, + }, + "5864024F", + "movi v24.4s, #66, LSL #24", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(8), + imm: ASIMDMovModImm::zero(ScalarSize::Size64), + invert: false, + size: VectorSize::Size64x2, + }, + "08E4006F", + "movi v8.2d, #0", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(7), + imm: ASIMDMovModImm::maybe_from_u64(18374687574904995840, ScalarSize::Size64).unwrap(), + invert: false, + size: VectorSize::Size64x2, + }, + "87E6046F", + "movi v7.2d, #18374687574904995840", + )); insns.push(( Inst::VecExtend { t: VecExtendOp::Sxtl8, @@ -4376,6 +4474,16 @@ fn test_aarch64_binemit() { "mov d23, v11.d[0]", )); + insns.push(( + Inst::FpuExtend { + rd: writable_vreg(31), + rn: vreg(0), + size: ScalarSize::Size32, + }, + "1F40201E", + "fmov s31, s0", + )); + insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Abs32, diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index b6da0402bc..34c2946db0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -668,39 +668,208 @@ impl MoveWideConst { } /// Advanced SIMD modified immediate as used by MOVI/MVNI. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] pub struct ASIMDMovModImm { imm: u8, shift: u8, + is_64bit: bool, shift_ones: bool, } impl ASIMDMovModImm { + /// Construct an ASIMDMovModImm from an arbitrary 64-bit constant, if possible. + /// Note that the bits in `value` outside of the range specified by `size` are + /// ignored; for example, in the case of `ScalarSize::Size8` all bits above the + /// lowest 8 are ignored. pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option { match size { ScalarSize::Size8 => Some(ASIMDMovModImm { imm: value as u8, shift: 0, + is_64bit: false, shift_ones: false, }), + ScalarSize::Size16 => { + let value = value as u16; + + if value >> 8 == 0 { + Some(ASIMDMovModImm { + imm: value as u8, + shift: 0, + is_64bit: false, + shift_ones: false, + }) + } else if value as u8 == 0 { + Some(ASIMDMovModImm { + imm: (value >> 8) as u8, + shift: 8, + is_64bit: false, + shift_ones: false, + }) + } else { + None + } + } + ScalarSize::Size32 => { + let value = value as u32; + + // Value is of the form 0x00MMFFFF. + if value & 0xFF00FFFF == 0x0000FFFF { + let imm = (value >> 16) as u8; + + Some(ASIMDMovModImm { + imm, + shift: 16, + is_64bit: false, + shift_ones: true, + }) + // Value is of the form 0x0000MMFF. + } else if value & 0xFFFF00FF == 0x000000FF { + let imm = (value >> 8) as u8; + + Some(ASIMDMovModImm { + imm, + shift: 8, + is_64bit: false, + shift_ones: true, + }) + } else { + // Of the 4 bytes, at most one is non-zero. + for shift in (0..32).step_by(8) { + if value & (0xFF << shift) == value { + return Some(ASIMDMovModImm { + imm: (value >> shift) as u8, + shift, + is_64bit: false, + shift_ones: false, + }); + } + } + + None + } + } + ScalarSize::Size64 => { + let mut imm = 0u8; + + // Check if all bytes are either 0 or 0xFF. + for i in 0..8 { + let b = (value >> (i * 8)) as u8; + + if b == 0 || b == 0xFF { + imm |= (b & 1) << i; + } else { + return None; + } + } + + Some(ASIMDMovModImm { + imm, + shift: 0, + is_64bit: true, + shift_ones: false, + }) + } _ => None, } } /// Create a zero immediate of this format. - pub fn zero() -> Self { + pub fn zero(size: ScalarSize) -> Self { ASIMDMovModImm { imm: 0, shift: 0, + is_64bit: size == ScalarSize::Size64, shift_ones: false, } } + /// Returns the value that this immediate represents. pub fn value(&self) -> (u8, u32, bool) { (self.imm, self.shift as u32, self.shift_ones) } } +/// Advanced SIMD modified immediate as used by the vector variant of FMOV. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct ASIMDFPModImm { + imm: u8, + is_64bit: bool, +} + +impl ASIMDFPModImm { + /// Construct an ASIMDFPModImm from an arbitrary 64-bit constant, if possible. + pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option { + // In all cases immediates are encoded as an 8-bit number 0b_abcdefgh; + // let `D` be the inverse of the digit `d`. + match size { + ScalarSize::Size32 => { + // In this case the representable immediates are 32-bit numbers of the form + // 0b_aBbb_bbbc_defg_h000 shifted to the left by 16. + let value = value as u32; + let b0_5 = (value >> 19) & 0b111111; + let b6 = (value >> 19) & (1 << 6); + let b7 = (value >> 24) & (1 << 7); + let imm = (b0_5 | b6 | b7) as u8; + + if value == Self::value32(imm) { + Some(ASIMDFPModImm { + imm, + is_64bit: false, + }) + } else { + None + } + } + ScalarSize::Size64 => { + // In this case the representable immediates are 64-bit numbers of the form + // 0b_aBbb_bbbb_bbcd_efgh shifted to the left by 48. + let b0_5 = (value >> 48) & 0b111111; + let b6 = (value >> 48) & (1 << 6); + let b7 = (value >> 56) & (1 << 7); + let imm = (b0_5 | b6 | b7) as u8; + + if value == Self::value64(imm) { + Some(ASIMDFPModImm { + imm, + is_64bit: true, + }) + } else { + None + } + } + _ => None, + } + } + + /// Returns bits ready for encoding. + pub fn enc_bits(&self) -> u8 { + self.imm + } + + /// Returns the 32-bit value that corresponds to an 8-bit encoding. + fn value32(imm: u8) -> u32 { + let imm = imm as u32; + let b0_5 = imm & 0b111111; + let b6 = (imm >> 6) & 1; + let b6_inv = b6 ^ 1; + let b7 = (imm >> 7) & 1; + + b0_5 << 19 | (b6 * 0b11111) << 25 | b6_inv << 30 | b7 << 31 + } + + /// Returns the 64-bit value that corresponds to an 8-bit encoding. + fn value64(imm: u8) -> u64 { + let imm = imm as u64; + let b0_5 = imm & 0b111111; + let b6 = (imm >> 6) & 1; + let b6_inv = b6 ^ 1; + let b7 = (imm >> 7) & 1; + + b0_5 << 48 | (b6 * 0b11111111) << 54 | b6_inv << 62 | b7 << 63 + } +} + impl PrettyPrint for NZCV { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c }; @@ -782,7 +951,20 @@ impl PrettyPrint for MoveWideConst { impl PrettyPrint for ASIMDMovModImm { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { - if self.shift == 0 { + if self.is_64bit { + debug_assert_eq!(self.shift, 0); + + let enc_imm = self.imm as i8; + let mut imm = 0u64; + + for i in 0..8 { + let b = (enc_imm >> i) & 1; + + imm |= (-b as u8 as u64) << (i * 8); + } + + format!("#{}", imm) + } else if self.shift == 0 { format!("#{}", self.imm) } else { let shift_type = if self.shift_ones { "MSL" } else { "LSL" }; @@ -791,6 +973,16 @@ impl PrettyPrint for ASIMDMovModImm { } } +impl PrettyPrint for ASIMDFPModImm { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + if self.is_64bit { + format!("#{}", f64::from_bits(Self::value64(self.imm))) + } else { + format!("#{}", f32::from_bits(Self::value32(self.imm))) + } + } +} + #[cfg(test)] mod test { use super::*; @@ -1022,4 +1214,44 @@ mod test { unreachable!(); } } + + #[test] + fn asimd_fp_mod_imm_test() { + assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size32)); + assert_eq!( + None, + ASIMDFPModImm::maybe_from_u64(0.013671875_f32.to_bits() as u64, ScalarSize::Size32) + ); + assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size64)); + assert_eq!( + None, + ASIMDFPModImm::maybe_from_u64(10000_f64.to_bits(), ScalarSize::Size64) + ); + } + + #[test] + fn asimd_mov_mod_imm_test() { + assert_eq!( + None, + ASIMDMovModImm::maybe_from_u64(513, ScalarSize::Size16) + ); + assert_eq!( + None, + ASIMDMovModImm::maybe_from_u64(4278190335, ScalarSize::Size32) + ); + assert_eq!( + None, + ASIMDMovModImm::maybe_from_u64(8388608, ScalarSize::Size64) + ); + + assert_eq!( + Some(ASIMDMovModImm { + imm: 66, + shift: 16, + is_64bit: false, + shift_ones: true, + }), + ASIMDMovModImm::maybe_from_u64(4390911, ScalarSize::Size32) + ); + } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 676dff88e4..a8aa47c2a7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -755,6 +755,13 @@ pub enum Inst { size: VectorSize, }, + /// Zero-extend a SIMD & FP scalar to the full width of a vector register. + FpuExtend { + rd: Writable, + rn: Reg, + size: ScalarSize, + }, + /// 1-op FPU instruction. FpuRR { fpu_op: FPUOp1, @@ -928,6 +935,13 @@ pub enum Inst { size: VectorSize, }, + /// Duplicate FP immediate to vector. + VecDupFPImm { + rd: Writable, + imm: ASIMDFPModImm, + size: VectorSize, + }, + /// Duplicate immediate to vector. VecDupImm { rd: Writable, @@ -1295,12 +1309,15 @@ impl Inst { value: u32, mut alloc_tmp: F, ) -> SmallVec<[Inst; 4]> { + // Note that we must make sure that all bits outside the lowest 32 are set to 0 + // because this function is also used to load wider constants (that have zeros + // in their most significant bits). if value == 0 { smallvec![Inst::VecDupImm { rd, - imm: ASIMDMovModImm::zero(), + imm: ASIMDMovModImm::zero(ScalarSize::Size32), invert: false, - size: VectorSize::Size8x8 + size: VectorSize::Size32x2 }] } else { // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent @@ -1324,6 +1341,9 @@ impl Inst { const_data: u64, mut alloc_tmp: F, ) -> SmallVec<[Inst; 4]> { + // Note that we must make sure that all bits outside the lowest 64 are set to 0 + // because this function is also used to load wider constants (that have zeros + // in their most significant bits). if let Ok(const_data) = u32::try_from(const_data) { Inst::load_fp_constant32(rd, const_data, alloc_tmp) // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent @@ -1394,7 +1414,7 @@ impl Inst { r } - /// Create instructions that load a 128-bit vector constant consisting of elements with + /// Create instructions that load a vector constant consisting of elements with /// the same value. pub fn load_replicated_vector_pattern Writable>( rd: Writable, @@ -1403,6 +1423,15 @@ impl Inst { mut alloc_tmp: F, ) -> SmallVec<[Inst; 5]> { let lane_size = size.lane_size(); + let widen_32_bit_pattern = |pattern, lane_size| { + if lane_size == ScalarSize::Size32 { + let pattern = pattern as u32 as u64; + + ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64) + } else { + None + } + }; if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) { smallvec![Inst::VecDupImm { @@ -1421,6 +1450,27 @@ impl Inst { invert: true, size }] + } else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) { + let mut insts = smallvec![Inst::VecDupImm { + rd, + imm, + invert: false, + size: VectorSize::Size64x2, + }]; + + // TODO: Implement support for 64-bit scalar MOVI; we zero-extend the + // lower 64 bits instead. + if !size.is_128bits() { + insts.push(Inst::FpuExtend { + rd, + rn: rd.to_reg(), + size: ScalarSize::Size64, + }); + } + + insts + } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) { + smallvec![Inst::VecDupFPImm { rd, imm, size }] } else { let tmp = alloc_tmp(RegClass::I64, I64); let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]); @@ -1721,6 +1771,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } + &Inst::FpuExtend { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } &Inst::FpuRR { rd, rn, .. } => { collector.add_def(rd); collector.add_use(rn); @@ -1870,6 +1924,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } + &Inst::VecDupFPImm { rd, .. } => { + collector.add_def(rd); + } &Inst::VecDupImm { rd, .. } => { collector.add_def(rd); } @@ -2299,6 +2356,14 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::FpuExtend { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } &mut Inst::FpuRR { ref mut rd, ref mut rn, @@ -2582,6 +2647,9 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::VecDupFPImm { ref mut rd, .. } => { + map_def(mapper, rd); + } &mut Inst::VecDupImm { ref mut rd, .. } => { map_def(mapper, rd); } @@ -3229,6 +3297,12 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, idx, size); format!("mov {}, {}", rd, rn) } + &Inst::FpuExtend { rd, rn, size } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); + let rn = show_vreg_scalar(rn, mb_rru, size); + + format!("fmov {}, {}", rd, rn) + } &Inst::FpuRR { fpu_op, rd, rn } => { let (op, sizesrc, sizedest) = match fpu_op { FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32), @@ -3465,6 +3539,12 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, 0, size); format!("dup {}, {}", rd, rn) } + &Inst::VecDupFPImm { rd, imm, size } => { + let imm = imm.show_rru(mb_rru); + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + + format!("fmov {}, {}", rd, imm) + } &Inst::VecDupImm { rd, imm, diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 086b9a3a20..75b4cbe727 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -853,7 +853,7 @@ pub(crate) fn lower_constant_f128>( // is potentially expensive. ctx.emit(Inst::VecDupImm { rd, - imm: ASIMDMovModImm::zero(), + imm: ASIMDMovModImm::zero(ScalarSize::Size8), invert: false, size: VectorSize::Size8x16, }); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 4d3d4bfb1d..89bcd517f4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2075,8 +2075,6 @@ pub(crate) fn lower_insn_to_regs>( // derivation of these sequences. Alternative sequences are discussed in // https://github.com/bytecodealliance/wasmtime/issues/2296, although they are not // used here. - // Also .. FIXME: when https://github.com/bytecodealliance/wasmtime/pull/2310 is - // merged, use `lower_splat_constant` instead to generate the constants. let tmp_r0 = ctx.alloc_tmp(RegClass::I64, I64); let tmp_v0 = ctx.alloc_tmp(RegClass::V128, I8X16); let tmp_v1 = ctx.alloc_tmp(RegClass::V128, I8X16); @@ -2100,12 +2098,7 @@ pub(crate) fn lower_insn_to_regs>( size: VectorSize::Size8x16, imm: 7, }); - lower_constant_u64(ctx, tmp_r0, 0x8040201008040201u64); - ctx.emit(Inst::VecDup { - rd: tmp_v0, - rn: tmp_r0.to_reg(), - size: VectorSize::Size64x2, - }); + lower_splat_const(ctx, tmp_v0, 0x8040201008040201u64, VectorSize::Size64x2); ctx.emit(Inst::VecRRR { alu_op: VecALUOp::And, rd: tmp_v1, diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 25f53ff4b1..a3fa2c48c6 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -715,7 +715,7 @@ block0(v0: f32): ; nextln: movz x0, #20352, LSL #16 ; nextln: fmov d1, x0 ; nextln: fmin s2, s0, s1 -; nextln: movi v1.8b, #0 +; nextln: movi v1.2s, #0 ; nextln: fmax s2, s2, s1 ; nextln: fcmp s0, s0 ; nextln: fcsel s0, s1, s2, ne @@ -738,7 +738,7 @@ block0(v0: f32): ; nextln: movz x0, #52992, LSL #16 ; nextln: fmov d2, x0 ; nextln: fmax s1, s1, s2 -; nextln: movi v2.8b, #0 +; nextln: movi v2.2s, #0 ; nextln: fcmp s0, s0 ; nextln: fcsel s0, s2, s1, ne ; nextln: fcvtzs w0, s0 @@ -757,7 +757,7 @@ block0(v0: f32): ; nextln: movz x0, #24448, LSL #16 ; nextln: fmov d1, x0 ; nextln: fmin s2, s0, s1 -; nextln: movi v1.8b, #0 +; nextln: movi v1.2s, #0 ; nextln: fmax s2, s2, s1 ; nextln: fcmp s0, s0 ; nextln: fcsel s0, s1, s2, ne @@ -780,7 +780,7 @@ block0(v0: f32): ; nextln: movz x0, #57088, LSL #16 ; nextln: fmov d2, x0 ; nextln: fmax s1, s1, s2 -; nextln: movi v2.8b, #0 +; nextln: movi v2.2s, #0 ; nextln: fcmp s0, s0 ; nextln: fcsel s0, s2, s1, ne ; nextln: fcvtzs x0, s0 @@ -798,7 +798,7 @@ block0(v0: f64): ; nextln: mov fp, sp ; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295 ; nextln: fmin d2, d0, d1 -; nextln: movi v1.8b, #0 +; nextln: movi v1.2s, #0 ; nextln: fmax d2, d2, d1 ; nextln: fcmp d0, d0 ; nextln: fcsel d0, d1, d2, ne @@ -820,7 +820,7 @@ block0(v0: f64): ; nextln: movz x0, #49632, LSL #48 ; nextln: fmov d2, x0 ; nextln: fmax d1, d1, d2 -; nextln: movi v2.8b, #0 +; nextln: movi v2.2s, #0 ; nextln: fcmp d0, d0 ; nextln: fcsel d0, d2, d1, ne ; nextln: fcvtzs w0, d0 @@ -839,7 +839,7 @@ block0(v0: f64): ; nextln: movz x0, #17392, LSL #48 ; nextln: fmov d1, x0 ; nextln: fmin d2, d0, d1 -; nextln: movi v1.8b, #0 +; nextln: movi v1.2s, #0 ; nextln: fmax d2, d2, d1 ; nextln: fcmp d0, d0 ; nextln: fcsel d0, d1, d2, ne @@ -862,7 +862,7 @@ block0(v0: f64): ; nextln: movz x0, #50144, LSL #48 ; nextln: fmov d2, x0 ; nextln: fmax d1, d1, d2 -; nextln: movi v2.8b, #0 +; nextln: movi v2.2s, #0 ; nextln: fcmp d0, d0 ; nextln: fcsel d0, d2, d1, ne ; nextln: fcvtzs x0, d0 diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif index ac6fb7d6ef..ea9ad30d53 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif @@ -127,3 +127,46 @@ block0(v0: i64, v1: i64): ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f9() -> i32x2 { +block0: + v0 = iconst.i32 4278190335 + v1 = splat.i32x2 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movi v0.2d, #18374687579166474495 +; nextln: fmov d0, d0 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f10() -> i32x4 { +block0: + v0 = iconst.i32 4293918720 + v1 = splat.i32x4 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mvni v0.4s, #15, MSL #16 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f11() -> f32x4 { +block0: + v0 = f32const 0x1.5 + v1 = splat.f32x4 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: fmov v0.4s, #1.3125 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret