diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index f035789c1f..8d182a92de 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -466,14 +466,6 @@ (mem PairAMode) (flags MemFlags)) - (LoadFpuConst64 - (rd WritableReg) - (const_data u64)) - - (LoadFpuConst128 - (rd WritableReg) - (const_data u128)) - ;; Conversion: FP -> integer. (FpuToInt (op FpuToIntOp) @@ -1135,6 +1127,11 @@ (off i64) (ty Type)) + ;; A reference to a constant which is placed outside of the function's + ;; body, typically at the end. + (Const + (addr VCodeConstant)) + ;; Offset from the "nominal stack pointer", which is where the real SP is ;; just after stack and spill slots are allocated in the function prologue. ;; At emission time, this is converted to `SPOffset` with a fixup added to @@ -1194,6 +1191,16 @@ (rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32)) (rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64)) +;; Helper for extracting the size of a lane from the input `VectorSize` +(decl pure vector_lane_size (VectorSize) ScalarSize) +(rule (vector_lane_size (VectorSize.Size8x16)) (ScalarSize.Size8)) +(rule (vector_lane_size (VectorSize.Size8x8)) (ScalarSize.Size8)) +(rule (vector_lane_size (VectorSize.Size16x8)) (ScalarSize.Size16)) +(rule (vector_lane_size (VectorSize.Size16x4)) (ScalarSize.Size16)) +(rule (vector_lane_size (VectorSize.Size32x4)) (ScalarSize.Size32)) +(rule (vector_lane_size (VectorSize.Size32x2)) (ScalarSize.Size32)) +(rule (vector_lane_size (VectorSize.Size64x2)) (ScalarSize.Size64)) + (type Cond extern (enum (Eq) @@ -1908,6 +1915,13 @@ (_ Unit (emit (MInst.VecDupFromFpu dst src size lane)))) dst)) +;; Helper for emitting `MInst.VecDupImm` instructions. +(decl vec_dup_imm (ASIMDMovModImm bool VectorSize) Reg) +(rule (vec_dup_imm imm invert size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecDupImm dst imm invert size)))) + dst)) + ;; Helper for emitting `MInst.AluRRImm12` instructions. (decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg) (rule (alu_rr_imm12 op ty src imm) @@ -2158,6 +2172,13 @@ (_ Unit (emit (MInst.MovToFpu dst x size)))) dst)) +;; Helper for emitting `MInst.FpuMoveFPImm` instructions. +(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg) +(rule (fpu_move_fp_imm imm size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuMoveFPImm dst imm size)))) + dst)) + ;; Helper for emitting `MInst.MovToVec` instructions. (decl mov_to_vec (Reg Reg u8 VectorSize) Reg) (rule (mov_to_vec src1 src2 lane size) @@ -2986,24 +3007,122 @@ (amode ty addr offset))) ;; Lower a constant f32. -(decl constant_f32 (u64) Reg) -;; TODO: Port lower_constant_f32() to ISLE. -(extern constructor constant_f32 constant_f32) +;; +;; Note that we must make sure that all bits outside the lowest 32 are set to 0 +;; because this function is also used to load wider constants (that have zeros +;; in their most significant bits). +(decl constant_f32 (u32) Reg) +(rule 2 (constant_f32 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32)) + $false + (VectorSize.Size32x2))) +(rule 1 (constant_f32 n) + (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32))) + (fpu_move_fp_imm imm (ScalarSize.Size32))) +(rule (constant_f32 n) + (mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32))) ;; Lower a constant f64. +;; +;; Note that we must make sure that all bits outside the lowest 64 are set to 0 +;; because this function is also used to load wider constants (that have zeros +;; in their most significant bits). +;; TODO: Treat as half of a 128 bit vector and consider replicated patterns. +;; Scalar MOVI might also be an option. (decl constant_f64 (u64) Reg) -;; TODO: Port lower_constant_f64() to ISLE. -(extern constructor constant_f64 constant_f64) +(rule 4 (constant_f64 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32)) + $false + (VectorSize.Size32x2))) +(rule 3 (constant_f64 n) + (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size64))) + (fpu_move_fp_imm imm (ScalarSize.Size64))) +(rule 2 (constant_f64 (u64_as_u32 n)) + (constant_f32 n)) +(rule 1 (constant_f64 (u64_low32_bits_unset n)) + (mov_to_fpu (imm $I64 (ImmExtend.Zero) n) (ScalarSize.Size64))) +(rule (constant_f64 n) + (fpu_load64 (AMode.Const (emit_u64_le_const n)) (mem_flags_trusted))) + +;; Tests whether the low 32 bits in the input are all zero. +(decl u64_low32_bits_unset (u64) u64) +(extern extractor u64_low32_bits_unset u64_low32_bits_unset) ;; Lower a constant f128. (decl constant_f128 (u128) Reg) -;; TODO: Port lower_constant_f128() to ISLE. -(extern constructor constant_f128 constant_f128) +(rule 3 (constant_f128 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size8)) + $false + (VectorSize.Size8x16))) + +;; If the upper 64-bits are all zero then defer to `constant_f64`. +(rule 2 (constant_f128 (u128_as_u64 n)) (constant_f64 n)) + +;; If the low half of the u128 equals the high half then delegate to the splat +;; logic as a splat of a 64-bit value. +(rule 1 (constant_f128 (u128_replicated_u64 n)) + (splat_const n (VectorSize.Size64x2))) + +;; Base case is to load the constant from memory. +(rule (constant_f128 n) + (fpu_load128 (AMode.Const (emit_u128_le_const n)) (mem_flags_trusted))) ;; Lower a vector splat with a constant parameter. +;; +;; The 64-bit input here only uses the low bits for the lane size in +;; `VectorSize` and all other bits are ignored. (decl splat_const (u64 VectorSize) Reg) -;; TODO: Port lower_splat_const() to ISLE. -(extern constructor splat_const splat_const) + +;; If the splat'd constant can itself be reduced in size then attempt to do so +;; as it will make it easier to create the immediates in the instructions below. +(rule 5 (splat_const (u64_replicated_u32 n) (VectorSize.Size64x2)) + (splat_const n (VectorSize.Size32x4))) +(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x4)) + (splat_const n (VectorSize.Size16x8))) +(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x2)) + (splat_const n (VectorSize.Size16x4))) +(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x8)) + (splat_const n (VectorSize.Size8x16))) +(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x4)) + (splat_const n (VectorSize.Size8x8))) + +;; Special cases for `vec_dup_imm` instructions where the input is either +;; negated or not. +(rule 4 (splat_const n size) + (if-let imm (asimd_mov_mod_imm_from_u64 n (vector_lane_size size))) + (vec_dup_imm imm $false size)) +(rule 3 (splat_const n size) + (if-let imm (asimd_mov_mod_imm_from_u64 (u64_not n) (vector_lane_size size))) + (vec_dup_imm imm $true size)) + +;; Special case a 32-bit splat where an immediate can be created by +;; concatenating the 32-bit constant into a 64-bit value +(rule 2 (splat_const n (VectorSize.Size32x4)) + (if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64))) + (vec_dup_imm imm $false (VectorSize.Size64x2))) +(rule 2 (splat_const n (VectorSize.Size32x2)) + (if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64))) + (fpu_extend (vec_dup_imm imm $false (VectorSize.Size64x2)) (ScalarSize.Size64))) + +(rule 1 (splat_const n size) + (if-let imm (asimd_fp_mod_imm_from_u64 n (vector_lane_size size))) + (vec_dup_fp_imm imm size)) + +;; The base case for splat is to use `vec_dup` with the immediate loaded into a +;; register. +(rule (splat_const n size) + (vec_dup (imm $I64 (ImmExtend.Zero) n) size)) + +;; Each of these extractors tests whether the upper half of the input equals the +;; lower half of the input +(decl u128_replicated_u64 (u64) u128) +(extern extractor u128_replicated_u64 u128_replicated_u64) +(decl u64_replicated_u32 (u64) u64) +(extern extractor u64_replicated_u32 u64_replicated_u32) +(decl u32_replicated_u16 (u64) u64) +(extern extractor u32_replicated_u16 u32_replicated_u16) +(decl u16_replicated_u8 (u64) u64) +(extern extractor u16_replicated_u8 u16_replicated_u8) ;; Lower a FloatCC to a Cond. (decl fp_cond_code (FloatCC) Cond) @@ -3814,3 +3933,36 @@ ;; Helper for emitting the `trn2` instruction (decl vec_trn2 (Reg Reg VectorSize) Reg) (rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size)) + +;; Helper for creating a zero value `ASIMDMovModImm` immediate. +(decl asimd_mov_mod_imm_zero (ScalarSize) ASIMDMovModImm) +(extern constructor asimd_mov_mod_imm_zero asimd_mov_mod_imm_zero) + +;; Helper for fallibly creating an `ASIMDMovModImm` immediate from its parts. +(decl pure partial asimd_mov_mod_imm_from_u64 (u64 ScalarSize) ASIMDMovModImm) +(extern constructor asimd_mov_mod_imm_from_u64 asimd_mov_mod_imm_from_u64) + +;; Helper for fallibly creating an `ASIMDFPModImm` immediate from its parts. +(decl pure partial asimd_fp_mod_imm_from_u64 (u64 ScalarSize) ASIMDFPModImm) +(extern constructor asimd_fp_mod_imm_from_u64 asimd_fp_mod_imm_from_u64) + +;; Helper for creating a `VecDupFPImm` instruction +(decl vec_dup_fp_imm (ASIMDFPModImm VectorSize) Reg) +(rule (vec_dup_fp_imm imm size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecDupFPImm dst imm size)))) + dst)) + +;; Helper for creating a `FpuLoad64` instruction +(decl fpu_load64 (AMode MemFlags) Reg) +(rule (fpu_load64 amode flags) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuLoad64 dst amode flags)))) + dst)) + +;; Helper for creating a `FpuLoad128` instruction +(decl fpu_load128 (AMode MemFlags) Reg) +(rule (fpu_load128 amode flags) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuLoad128 dst amode flags)))) + dst)) diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 69eb7e5251..1c29591ba2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -124,6 +124,9 @@ pub enum MemLabel { /// offset from this instruction. This form must be used at emission time; /// see `memlabel_finalize()` for how other forms are lowered to this one. PCRel(i32), + /// An address that refers to a label within a `MachBuffer`, for example a + /// constant that lives in the pool at the end of the function. + Mach(MachLabel), } impl AMode { @@ -194,6 +197,7 @@ impl AMode { | &AMode::FPOffset { .. } | &AMode::SPOffset { .. } | &AMode::NominalSPOffset { .. } + | &AMode::Const { .. } | AMode::Label { .. } => self.clone(), } } @@ -382,7 +386,8 @@ impl PrettyPrint for ExtendOp { impl PrettyPrint for MemLabel { fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { match self { - &MemLabel::PCRel(off) => format!("pc+{}", off), + MemLabel::PCRel(off) => format!("pc+{}", off), + MemLabel::Mach(off) => format!("label({})", off.get()), } } } @@ -465,6 +470,8 @@ impl PrettyPrint for AMode { let simm9 = simm9.pretty_print(8, allocs); format!("[sp], {}", simm9) } + AMode::Const { addr } => format!("[const({})]", addr.as_u32()), + // Eliminated by `mem_finalize()`. &AMode::SPOffset { .. } | &AMode::FPOffset { .. } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 2332ff19ba..4f0288dc2b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2,7 +2,7 @@ use regalloc2::Allocation; -use crate::binemit::{CodeOffset, Reloc, StackMap}; +use crate::binemit::{Reloc, StackMap}; use crate::ir::{types::*, RelSourceLoc}; use crate::ir::{LibCall, MemFlags, TrapCode}; use crate::isa::aarch64::inst::*; @@ -10,20 +10,12 @@ use crate::machinst::{ty_bits, Reg, RegClass, Writable}; use crate::trace; use core::convert::TryFrom; -/// Memory label/reference finalization: convert a MemLabel to a PC-relative -/// offset, possibly emitting relocation(s) as necessary. -pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 { - match label { - &MemLabel::PCRel(rel) => rel, - } -} - /// Memory addressing mode finalization: convert "special" modes (e.g., /// generic arbitrary stack offset) into real addressing modes, possibly by /// emitting some helper instructions that come immediately before the use /// of this amode. pub fn mem_finalize( - insn_off: CodeOffset, + sink: Option<&mut MachBuffer>, mem: &AMode, state: &EmitState, ) -> (SmallVec<[Inst; 4]>, AMode) { @@ -74,14 +66,14 @@ pub fn mem_finalize( } } - &AMode::Label { ref label } => { - let off = memlabel_finalize(insn_off, label); - ( - smallvec![], - AMode::Label { - label: MemLabel::PCRel(off), - }, - ) + AMode::Const { addr } => { + let sink = match sink { + Some(sink) => sink, + None => return (smallvec![], mem.clone()), + }; + let label = sink.get_label_for_constant(*addr); + let label = MemLabel::Mach(label); + (smallvec![], AMode::Label { label }) } _ => (smallvec![], mem.clone()), @@ -959,7 +951,7 @@ impl MachInstEmit for Inst { | &Inst::FpuLoad128 { rd, ref mem, flags } => { let rd = allocs.next_writable(rd); let mem = mem.with_allocs(&mut allocs); - let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state); + let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); @@ -1039,7 +1031,19 @@ impl MachInstEmit for Inst { &AMode::Label { ref label } => { let offset = match label { // cast i32 to u32 (two's-complement) - &MemLabel::PCRel(off) => off as u32, + MemLabel::PCRel(off) => *off as u32, + // Emit a relocation into the `MachBuffer` + // for the label that's being loaded from and + // encode an address of 0 in its place which will + // get filled in by relocation resolution later on. + MemLabel::Mach(label) => { + sink.use_label_at_offset( + sink.cur_offset(), + *label, + LabelUse::Ldr19, + ); + 0 + } } / 4; assert!(offset < (1 << 19)); match self { @@ -1076,6 +1080,7 @@ impl MachInstEmit for Inst { &AMode::SPOffset { .. } | &AMode::FPOffset { .. } | &AMode::NominalSPOffset { .. } + | &AMode::Const { .. } | &AMode::RegOffset { .. } => { panic!("Should not see {:?} here!", mem) } @@ -1091,7 +1096,7 @@ impl MachInstEmit for Inst { | &Inst::FpuStore128 { rd, ref mem, flags } => { let rd = allocs.next(rd); let mem = mem.with_allocs(&mut allocs); - let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state); + let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); @@ -1172,6 +1177,7 @@ impl MachInstEmit for Inst { &AMode::SPOffset { .. } | &AMode::FPOffset { .. } | &AMode::NominalSPOffset { .. } + | &AMode::Const { .. } | &AMode::RegOffset { .. } => { panic!("Should not see {:?} here!", mem) } @@ -2319,41 +2325,6 @@ impl MachInstEmit for Inst { }; sink.put4(enc_inttofpu(top16, rd, rn)); } - &Inst::LoadFpuConst64 { rd, const_data } => { - let rd = allocs.next_writable(rd); - let inst = Inst::FpuLoad64 { - rd, - mem: AMode::Label { - label: MemLabel::PCRel(8), - }, - flags: MemFlags::trusted(), - }; - inst.emit(&[], sink, emit_info, state); - let inst = Inst::Jump { - dest: BranchTarget::ResolvedOffset(12), - }; - inst.emit(&[], sink, emit_info, state); - sink.put8(const_data); - } - &Inst::LoadFpuConst128 { rd, const_data } => { - let rd = allocs.next_writable(rd); - let inst = Inst::FpuLoad128 { - rd, - mem: AMode::Label { - label: MemLabel::PCRel(8), - }, - flags: MemFlags::trusted(), - }; - inst.emit(&[], sink, emit_info, state); - let inst = Inst::Jump { - dest: BranchTarget::ResolvedOffset(20), - }; - inst.emit(&[], sink, emit_info, state); - - for i in const_data.to_le_bytes().iter() { - sink.put1(*i); - } - } &Inst::FpuCSel32 { rd, rn, rm, cond } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); @@ -3350,7 +3321,7 @@ impl MachInstEmit for Inst { &Inst::LoadAddr { rd, ref mem } => { let rd = allocs.next_writable(rd); let mem = mem.with_allocs(&mut allocs); - let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state); + let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index e06221836b..c0ba8ce1b1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -6891,24 +6891,6 @@ fn test_aarch64_binemit() { "stp q18, q22, [sp], #304", )); - insns.push(( - Inst::LoadFpuConst64 { - rd: writable_vreg(16), - const_data: 1.0_f64.to_bits(), - }, - "5000005C03000014000000000000F03F", - "ldr d16, pc+8 ; b 12 ; data.f64 1", - )); - - insns.push(( - Inst::LoadFpuConst128 { - rd: writable_vreg(5), - const_data: 0x0f0e0d0c0b0a09080706050403020100, - }, - "4500009C05000014000102030405060708090A0B0C0D0E0F", - "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100", - )); - insns.push(( Inst::FpuCSel32 { rd: writable_vreg(1), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 6e8e602de2..22487b264e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -10,7 +10,6 @@ use crate::{settings, CodegenError, CodegenResult}; use crate::machinst::{PrettyPrint, Reg, RegClass, Writable}; use alloc::vec::Vec; -use core::convert::TryFrom; use regalloc2::{PRegSet, VReg}; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; @@ -250,215 +249,6 @@ impl Inst { } } - /// Create instructions that load a 32-bit floating-point constant. - pub fn load_fp_constant32 Writable>( - rd: Writable, - const_data: u32, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - // Note that we must make sure that all bits outside the lowest 32 are set to 0 - // because this function is also used to load wider constants (that have zeros - // in their most significant bits). - if const_data == 0 { - smallvec![Inst::VecDupImm { - rd, - imm: ASIMDMovModImm::zero(ScalarSize::Size32), - invert: false, - size: VectorSize::Size32x2, - }] - } else if let Some(imm) = - ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32) - { - smallvec![Inst::FpuMoveFPImm { - rd, - imm, - size: ScalarSize::Size32, - }] - } else { - let tmp = alloc_tmp(I32); - let mut insts = Inst::load_constant(tmp, const_data as u64, &mut alloc_tmp); - - insts.push(Inst::MovToFpu { - rd, - rn: tmp.to_reg(), - size: ScalarSize::Size32, - }); - - insts - } - } - - /// Create instructions that load a 64-bit floating-point constant. - pub fn load_fp_constant64 Writable>( - rd: Writable, - const_data: u64, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - // Note that we must make sure that all bits outside the lowest 64 are set to 0 - // because this function is also used to load wider constants (that have zeros - // in their most significant bits). - // TODO: Treat as half of a 128 bit vector and consider replicated patterns. - // Scalar MOVI might also be an option. - if const_data == 0 { - smallvec![Inst::VecDupImm { - rd, - imm: ASIMDMovModImm::zero(ScalarSize::Size32), - invert: false, - size: VectorSize::Size32x2, - }] - } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) { - smallvec![Inst::FpuMoveFPImm { - rd, - imm, - size: ScalarSize::Size64, - }] - } else if let Ok(const_data) = u32::try_from(const_data) { - Inst::load_fp_constant32(rd, const_data, alloc_tmp) - } else if const_data & (u32::MAX as u64) == 0 { - let tmp = alloc_tmp(I64); - let mut insts = Inst::load_constant(tmp, const_data, &mut alloc_tmp); - - insts.push(Inst::MovToFpu { - rd, - rn: tmp.to_reg(), - size: ScalarSize::Size64, - }); - - insts - } else { - smallvec![Inst::LoadFpuConst64 { rd, const_data }] - } - } - - /// Create instructions that load a 128-bit vector constant. - pub fn load_fp_constant128 Writable>( - rd: Writable, - const_data: u128, - alloc_tmp: F, - ) -> SmallVec<[Inst; 5]> { - if let Ok(const_data) = u64::try_from(const_data) { - SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..]) - } else if let Some((pattern, size)) = - Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64) - { - Inst::load_replicated_vector_pattern( - rd, - pattern, - VectorSize::from_lane_size(size, true), - alloc_tmp, - ) - } else { - smallvec![Inst::LoadFpuConst128 { rd, const_data }] - } - } - - /// Determine whether a 128-bit constant represents a vector consisting of elements with - /// the same value. - pub fn get_replicated_vector_pattern( - value: u128, - size: ScalarSize, - ) -> Option<(u64, ScalarSize)> { - let (mask, shift, next_size) = match size { - ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128), - ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8), - ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16), - ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32), - _ => return None, - }; - let mut r = None; - let v = value & mask; - - if (value >> shift) & mask == v { - r = Inst::get_replicated_vector_pattern(v, next_size); - - if r.is_none() { - r = Some((v as u64, size)); - } - } - - r - } - - /// Create instructions that load a vector constant consisting of elements with - /// the same value. - pub fn load_replicated_vector_pattern Writable>( - rd: Writable, - pattern: u64, - size: VectorSize, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 5]> { - let lane_size = size.lane_size(); - let widen_32_bit_pattern = |pattern, lane_size| { - if lane_size == ScalarSize::Size32 { - let pattern = pattern as u32 as u64; - - ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64) - } else { - None - } - }; - - if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) { - smallvec![Inst::VecDupImm { - rd, - imm, - invert: false, - size - }] - } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) { - debug_assert_ne!(lane_size, ScalarSize::Size8); - debug_assert_ne!(lane_size, ScalarSize::Size64); - - smallvec![Inst::VecDupImm { - rd, - imm, - invert: true, - size - }] - } else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) { - let mut insts = smallvec![]; - - // TODO: Implement support for 64-bit scalar MOVI; we zero-extend the - // lower 64 bits instead. - if !size.is_128bits() { - let tmp = alloc_tmp(types::I64X2); - insts.push(Inst::VecDupImm { - rd: tmp, - imm, - invert: false, - size: VectorSize::Size64x2, - }); - insts.push(Inst::FpuExtend { - rd, - rn: tmp.to_reg(), - size: ScalarSize::Size64, - }); - } else { - insts.push(Inst::VecDupImm { - rd, - imm, - invert: false, - size: VectorSize::Size64x2, - }); - } - - insts - } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) { - smallvec![Inst::VecDupFPImm { rd, imm, size }] - } else { - let tmp = alloc_tmp(I64); - let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern, &mut alloc_tmp)[..]); - - insts.push(Inst::VecDup { - rd, - rn: tmp.to_reg(), - size, - }); - - insts - } - } - /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type, flags: MemFlags) -> Inst { match ty { @@ -585,6 +375,7 @@ fn memarg_operands VReg>(memarg: &AMode, collector: &mut OperandC &AMode::RegOffset { rn, .. } => { collector.reg_use(rn); } + &AMode::Const { .. } => {} } } @@ -928,9 +719,6 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rt2); pairmemarg_operands(mem, collector); } - &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => { - collector.reg_def(rd); - } &Inst::FpuToInt { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); @@ -1318,7 +1106,7 @@ impl MachInst for Inst { // Pretty-printing of instructions. fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) { - let (mem_insts, mem) = mem_finalize(0, mem, state); + let (mem_insts, mem) = mem_finalize(None, mem, state); let mut mem_str = mem_insts .into_iter() .map(|inst| { @@ -2007,18 +1795,6 @@ impl Inst { format!("stp {}, {}, {}", rt, rt2, mem) } - &Inst::LoadFpuConst64 { rd, const_data } => { - let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); - format!( - "ldr {}, pc+8 ; b 12 ; data.f64 {}", - rd, - f64::from_bits(const_data) - ) - } - &Inst::LoadFpuConst128 { rd, const_data } => { - let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size128, allocs); - format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data) - } &Inst::FpuToInt { op, rd, rn } => { let (op, sizesrc, sizedest) = match op { FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32), @@ -2820,7 +2596,7 @@ impl Inst { // of the existing legalization framework). let rd = allocs.next_writable(rd); let mem = mem.with_allocs(allocs); - let (mem_insts, mem) = mem_finalize(0, &mem, state); + let (mem_insts, mem) = mem_finalize(None, &mem, state); let mut ret = String::new(); for inst in mem_insts.into_iter() { ret.push_str( diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index f3a3db7039..2b0d678f14 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -26,7 +26,7 @@ ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (f32const (u64_from_ieee32 n))) +(rule (lower (f32const (u32_from_ieee32 n))) (constant_f32 n)) ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1954,7 +1954,7 @@ (rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _))))) (vec_dup_from_fpu x (vector_size ty) 0)) -(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n))))) +(rule (lower (has_type ty (splat (f32const (u32_from_ieee32 n))))) (splat_const n (vector_size ty))) (rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n))))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index d219451cfb..fcc38d0d59 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -570,67 +570,6 @@ pub(crate) fn lower_constant_u64(ctx: &mut Lower, rd: Writable, value } } -pub(crate) fn lower_constant_f32(ctx: &mut Lower, rd: Writable, value: f32) { - let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); - - for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) { - ctx.emit(inst); - } -} - -pub(crate) fn lower_constant_f64(ctx: &mut Lower, rd: Writable, value: f64) { - let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); - - for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) { - ctx.emit(inst); - } -} - -pub(crate) fn lower_constant_f128(ctx: &mut Lower, rd: Writable, value: u128) { - if value == 0 { - // Fast-track a common case. The general case, viz, calling `Inst::load_fp_constant128`, - // is potentially expensive. - ctx.emit(Inst::VecDupImm { - rd, - imm: ASIMDMovModImm::zero(ScalarSize::Size8), - invert: false, - size: VectorSize::Size8x16, - }); - } else { - let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); - for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) { - ctx.emit(inst); - } - } -} - -pub(crate) fn lower_splat_const( - ctx: &mut Lower, - rd: Writable, - value: u64, - size: VectorSize, -) { - let (value, narrow_size) = match size.lane_size() { - ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128), - ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8), - ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16), - ScalarSize::Size64 => (value, ScalarSize::Size32), - _ => unreachable!(), - }; - let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) { - Some((value, lane_size)) => ( - value, - VectorSize::from_lane_size(lane_size, size.is_128bits()), - ), - None => (value, size), - }; - let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); - - for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) { - ctx.emit(inst); - } -} - pub(crate) fn lower_condcode(cc: IntCC) -> Cond { match cc { IntCC::Equal => Cond::Eq, diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 707243d2a3..d4545565d8 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -7,17 +7,16 @@ use smallvec::SmallVec; // Types that the generated ISLE code uses via `use super::*`. use super::{ - fp_reg, lower_condcode, lower_constant_f128, lower_constant_f32, lower_constant_f64, - lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg, zero_reg, AMode, - ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, - FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, - MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, - PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2, VectorSize, - NZCV, + fp_reg, lower_condcode, lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg, + zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, + CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, + IntCC, JTSequenceInfo, MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, + OperandSize, PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2, + VectorSize, NZCV, }; use crate::ir::condcodes; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm}; -use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const}; +use crate::isa::aarch64::lower::{lower_address, lower_pair_address}; use crate::isa::aarch64::AArch64Backend; use crate::machinst::valueregs; use crate::machinst::{isle::*, InputSourceInst}; @@ -524,38 +523,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { lower_pair_address(self.lower_ctx, addr, offset as i32) } - fn constant_f32(&mut self, value: u64) -> Reg { - let rd = self.temp_writable_reg(I8X16); - - lower_constant_f32(self.lower_ctx, rd, f32::from_bits(value as u32)); - - rd.to_reg() - } - - fn constant_f64(&mut self, value: u64) -> Reg { - let rd = self.temp_writable_reg(I8X16); - - lower_constant_f64(self.lower_ctx, rd, f64::from_bits(value)); - - rd.to_reg() - } - - fn constant_f128(&mut self, value: u128) -> Reg { - let rd = self.temp_writable_reg(I8X16); - - lower_constant_f128(self.lower_ctx, rd, value); - - rd.to_reg() - } - - fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg { - let rd = self.temp_writable_reg(I8X16); - - lower_splat_const(self.lower_ctx, rd, value, *size); - - rd.to_reg() - } - fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond { lower_fp_condcode(*cc) } @@ -612,8 +579,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { } fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg { - let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap(); - if in_bits == 32 { // From float32. let min = match (signed, out_bits) { @@ -630,7 +595,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { ), }; - lower_constant_f32(self.lower_ctx, tmp, min); + generated_code::constructor_constant_f32(self, min.to_bits()) } else if in_bits == 64 { // From float64. let min = match (signed, out_bits) { @@ -647,7 +612,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { ), }; - lower_constant_f64(self.lower_ctx, tmp, min); + generated_code::constructor_constant_f64(self, min.to_bits()) } else { unimplemented!( "unexpected input size for min_fp_value: {} (signed: {}, output size: {})", @@ -656,13 +621,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { out_bits ); } - - tmp.to_reg() } fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg { - let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap(); - if in_bits == 32 { // From float32. let max = match (signed, out_bits) { @@ -682,7 +643,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { ), }; - lower_constant_f32(self.lower_ctx, tmp, max); + generated_code::constructor_constant_f32(self, max.to_bits()) } else if in_bits == 64 { // From float64. let max = match (signed, out_bits) { @@ -702,7 +663,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { ), }; - lower_constant_f64(self.lower_ctx, tmp, max); + generated_code::constructor_constant_f64(self, max.to_bits()) } else { unimplemented!( "unexpected input size for max_fp_value: {} (signed: {}, output size: {})", @@ -711,8 +672,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { out_bits ); } - - tmp.to_reg() } fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI { @@ -785,4 +744,66 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { None } } + + fn asimd_mov_mod_imm_zero(&mut self, size: &ScalarSize) -> ASIMDMovModImm { + ASIMDMovModImm::zero(*size) + } + + fn asimd_mov_mod_imm_from_u64( + &mut self, + val: u64, + size: &ScalarSize, + ) -> Option { + ASIMDMovModImm::maybe_from_u64(val, *size) + } + + fn asimd_fp_mod_imm_from_u64(&mut self, val: u64, size: &ScalarSize) -> Option { + ASIMDFPModImm::maybe_from_u64(val, *size) + } + + fn u64_low32_bits_unset(&mut self, val: u64) -> Option { + if val & 0xffffffff == 0 { + Some(val) + } else { + None + } + } + + fn u128_replicated_u64(&mut self, val: u128) -> Option { + let low64 = val as u64 as u128; + if (low64 | (low64 << 64)) == val { + Some(low64 as u64) + } else { + None + } + } + + fn u64_replicated_u32(&mut self, val: u64) -> Option { + let low32 = val as u32 as u64; + if (low32 | (low32 << 32)) == val { + Some(low32) + } else { + None + } + } + + fn u32_replicated_u16(&mut self, val: u64) -> Option { + let val = val as u32; + let low16 = val as u16 as u32; + if (low16 | (low16 << 16)) == val { + Some(low16.into()) + } else { + None + } + } + + fn u16_replicated_u8(&mut self, val: u64) -> Option { + let val = val as u16; + let low8 = val as u8 as u16; + if (low8 | (low8 << 8)) == val { + Some(low8.into()) + } else { + None + } + } } diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 2691129614..ec74f4555a 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -11,7 +11,7 @@ ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (f32const (u64_from_ieee32 n))) +(rule (lower (f32const (u32_from_ieee32 n))) (imm $F32 n)) ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index ffc5f7350f..4330b33e01 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -896,7 +896,7 @@ (CallInd (link WritableReg) (info BoxCallIndInfo)) - + ;; A pseudo-instruction that captures register arguments in vregs. (Args (args VecArgPair)) @@ -1555,8 +1555,8 @@ (decl u8_as_u16 (u8) u16) (extern constructor u8_as_u16 u8_as_u16) -(decl u64_as_u32 (u64) u32) -(extern constructor u64_as_u32 u64_as_u32) +(decl u64_truncate_to_u32 (u64) u32) +(extern constructor u64_truncate_to_u32 u64_truncate_to_u32) (decl u64_as_i16 (u64) i16) (extern constructor u64_as_i16 u64_as_i16) @@ -3000,7 +3000,7 @@ ;; 32-bit result type, any value (rule 5 (imm (gpr32_ty ty) n) (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.Mov32Imm dst (u64_as_u32 n))))) + (_ Unit (emit (MInst.Mov32Imm dst (u64_truncate_to_u32 n))))) dst)) ;; 64-bit result type, value fits in i16 @@ -3051,7 +3051,7 @@ ;; TODO: use LZER to load 0.0 (rule 8 (imm $F32 n) (let ((dst WritableReg (temp_writable_reg $F32)) - (_ Unit (emit (MInst.LoadFpuConst32 dst (u64_as_u32 n))))) + (_ Unit (emit (MInst.LoadFpuConst32 dst (u64_truncate_to_u32 n))))) dst)) ;; 64-bit floating-point type, any value. Loaded from literal pool. diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 02563f4d5b..965074fa69 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -18,7 +18,7 @@ ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (f32const (u64_from_ieee32 x))) +(rule (lower (f32const (u32_from_ieee32 x))) (imm $F32 x)) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index a6dfe21e51..7baf0f5cf5 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -436,7 +436,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { } #[inline] - fn u64_as_u32(&mut self, n: u64) -> u32 { + fn u64_truncate_to_u32(&mut self, n: u64) -> u32 { n as u32 } diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index a19b6717fa..5b4773f6cb 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -24,7 +24,7 @@ ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (f32const (u64_from_ieee32 x))) +(rule (lower (f32const (u32_from_ieee32 x))) (imm $F32 x)) ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index aa3714f141..9d78044b30 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -80,6 +80,11 @@ macro_rules! isle_common_prelude_methods { x ^ y } + #[inline] + fn u64_shl(&mut self, x: u64, y: u64) -> u64 { + x << y + } + #[inline] fn imm64_shl(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 { // Mask off any excess shift bits. @@ -502,8 +507,8 @@ macro_rules! isle_common_prelude_methods { } } - fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 { - val.bits().into() + fn u32_from_ieee32(&mut self, val: Ieee32) -> u32 { + val.bits() } fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 { @@ -748,5 +753,13 @@ macro_rules! isle_common_prelude_methods { fn pack_block_array_2(&mut self, a: BlockCall, b: BlockCall) -> BlockArray2 { [a, b] } + + fn u128_as_u64(&mut self, val: u128) -> Option { + u64::try_from(val).ok() + } + + fn u64_as_u32(&mut self, val: u64) -> Option { + u32::try_from(val).ok() + } }; } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 637ebd5e68..4815cf6357 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -88,10 +88,17 @@ (decl pure u32_as_u64 (u32) u64) (extern constructor u32_as_u64 u32_as_u64) +(convert u32 u64 u32_as_u64) (decl pure i64_as_u64 (i64) u64) (extern constructor i64_as_u64 i64_as_u64) +(decl u128_as_u64 (u64) u128) +(extern extractor u128_as_u64 u128_as_u64) + +(decl u64_as_u32 (u32) u64) +(extern extractor u64_as_u32 u64_as_u32) + ;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl pure u8_and (u8 u8) u8) @@ -129,6 +136,9 @@ (decl pure u64_xor (u64 u64) u64) (extern constructor u64_xor u64_xor) +(decl pure u64_shl (u64 u64) u64) +(extern constructor u64_shl u64_shl) + (decl pure imm64_shl (Type Imm64 Imm64) Imm64) (extern constructor imm64_shl imm64_shl) @@ -388,8 +398,8 @@ (extern constructor imm64_masked imm64_masked) ;; Extract a `u64` from an `Ieee32`. -(decl u64_from_ieee32 (u64) Ieee32) -(extern extractor infallible u64_from_ieee32 u64_from_ieee32) +(decl u32_from_ieee32 (u32) Ieee32) +(extern extractor infallible u32_from_ieee32 u32_from_ieee32) ;; Extract a `u64` from an `Ieee64`. (decl u64_from_ieee64 (u64) Ieee64) diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif index 53795f2ec1..01d6f5c172 100644 --- a/cranelift/filetests/filetests/isa/aarch64/constants.clif +++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif @@ -356,14 +356,14 @@ block0: ; VCode: ; block0: -; movz x1, #16457, LSL #48 -; fmov d0, x1 +; movz x0, #16457, LSL #48 +; fmov d0, x0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; mov x1, #0x4049000000000000 -; fmov d0, x1 +; mov x0, #0x4049000000000000 +; fmov d0, x0 ; ret function %f() -> f32 { @@ -374,14 +374,14 @@ block0: ; VCode: ; block0: -; movz x1, #16968, LSL #16 -; fmov s0, w1 +; movz w0, #16968, LSL #16 +; fmov s0, w0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; mov x1, #0x42480000 -; fmov s0, w1 +; mov w0, #0x42480000 +; fmov s0, w0 ; ret function %f() -> f64 { diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif index 2bd77df458..1074c034af 100644 --- a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif +++ b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif @@ -87,9 +87,9 @@ block0(v0: f32): ; fmov s4, #-1 ; fcmp s0, s4 ; b.gt 8 ; udf -; movz x9, #17280, LSL #16 -; fmov s17, w9 -; fcmp s0, s17 +; movz w8, #17280, LSL #16 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt 8 ; udf ; fcvtzu w0, s0 ; ret @@ -103,9 +103,9 @@ block0(v0: f32): ; fcmp s0, s4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x43800000 -; fmov s17, w9 -; fcmp s0, s17 +; mov w8, #0x43800000 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, s0 @@ -124,9 +124,9 @@ block0(v0: f64): ; fmov d4, #-1 ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #16496, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #16496, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzu w0, d0 ; ret @@ -140,9 +140,9 @@ block0(v0: f64): ; fcmp d0, d4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x4070000000000000 -; fmov d17, x9 -; fcmp d0, d17 +; mov x8, #0x4070000000000000 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, d0 @@ -161,9 +161,9 @@ block0(v0: f32): ; fmov s4, #-1 ; fcmp s0, s4 ; b.gt 8 ; udf -; movz x9, #18304, LSL #16 -; fmov s17, w9 -; fcmp s0, s17 +; movz w8, #18304, LSL #16 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt 8 ; udf ; fcvtzu w0, s0 ; ret @@ -177,9 +177,9 @@ block0(v0: f32): ; fcmp s0, s4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x47800000 -; fmov s17, w9 -; fcmp s0, s17 +; mov w8, #0x47800000 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, s0 @@ -198,9 +198,9 @@ block0(v0: f64): ; fmov d4, #-1 ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #16624, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #16624, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzu w0, d0 ; ret @@ -214,9 +214,9 @@ block0(v0: f64): ; fcmp d0, d4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x40f0000000000000 -; fmov d17, x9 -; fcmp d0, d17 +; mov x8, #0x40f0000000000000 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, d0 diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif index 06ba98d8b5..c17f495cc6 100644 --- a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif @@ -241,9 +241,9 @@ block0(v0: f32): ; fmov s4, #-1 ; fcmp s0, s4 ; b.gt 8 ; udf -; movz x9, #20352, LSL #16 -; fmov s17, w9 -; fcmp s0, s17 +; movz w8, #20352, LSL #16 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt 8 ; udf ; fcvtzu w0, s0 ; ret @@ -257,9 +257,9 @@ block0(v0: f32): ; fcmp s0, s4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x4f800000 -; fmov s17, w9 -; fcmp s0, s17 +; mov w8, #0x4f800000 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, s0 @@ -278,9 +278,9 @@ block0(v0: f32): ; fmov s4, #-1 ; fcmp s0, s4 ; b.gt 8 ; udf -; movz x9, #24448, LSL #16 -; fmov s17, w9 -; fcmp s0, s17 +; movz w8, #24448, LSL #16 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt 8 ; udf ; fcvtzu x0, s0 ; ret @@ -294,9 +294,9 @@ block0(v0: f32): ; fcmp s0, s4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x5f800000 -; fmov s17, w9 -; fcmp s0, s17 +; mov w8, #0x5f800000 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu x0, s0 @@ -315,9 +315,9 @@ block0(v0: f64): ; fmov d4, #-1 ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #16880, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #16880, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzu w0, d0 ; ret @@ -331,9 +331,9 @@ block0(v0: f64): ; fcmp d0, d4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x41f0000000000000 -; fmov d17, x9 -; fcmp d0, d17 +; mov x8, #0x41f0000000000000 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, d0 @@ -352,9 +352,9 @@ block0(v0: f64): ; fmov d4, #-1 ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #17392, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #17392, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzu x0, d0 ; ret @@ -368,9 +368,9 @@ block0(v0: f64): ; fcmp d0, d4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x43f0000000000000 -; fmov d17, x9 -; fcmp d0, d17 +; mov x8, #0x43f0000000000000 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu x0, d0 @@ -450,13 +450,13 @@ block0(v0: f32): ; block0: ; fcmp s0, s0 ; b.vc 8 ; udf -; movz x5, #52992, LSL #16 -; fmov s5, w5 -; fcmp s0, s5 +; movz w4, #52992, LSL #16 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge 8 ; udf -; movz x11, #20224, LSL #16 -; fmov s19, w11 -; fcmp s0, s19 +; movz w10, #20224, LSL #16 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt 8 ; udf ; fcvtzs w0, s0 ; ret @@ -466,14 +466,14 @@ block0(v0: f32): ; fcmp s0, s0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; mov x5, #0xcf000000 -; fmov s5, w5 -; fcmp s0, s5 +; mov w4, #-0x31000000 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge #0x20 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x11, #0x4f000000 -; fmov s19, w11 -; fcmp s0, s19 +; mov w10, #0x4f000000 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt #0x34 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs w0, s0 @@ -489,13 +489,13 @@ block0(v0: f32): ; block0: ; fcmp s0, s0 ; b.vc 8 ; udf -; movz x5, #57088, LSL #16 -; fmov s5, w5 -; fcmp s0, s5 +; movz w4, #57088, LSL #16 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge 8 ; udf -; movz x11, #24320, LSL #16 -; fmov s19, w11 -; fcmp s0, s19 +; movz w10, #24320, LSL #16 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt 8 ; udf ; fcvtzs x0, s0 ; ret @@ -505,14 +505,14 @@ block0(v0: f32): ; fcmp s0, s0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; mov x5, #0xdf000000 -; fmov s5, w5 -; fcmp s0, s5 +; mov w4, #-0x21000000 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge #0x20 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x11, #0x5f000000 -; fmov s19, w11 -; fcmp s0, s19 +; mov w10, #0x5f000000 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt #0x34 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs x0, s0 @@ -528,12 +528,12 @@ block0(v0: f64): ; block0: ; fcmp d0, d0 ; b.vc 8 ; udf -; ldr d4, pc+8 ; b 12 ; data.f64 -2147483649 +; ldr d4, [const(0)] ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #16864, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #16864, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzs w0, d0 ; ret @@ -543,20 +543,19 @@ block0(v0: f64): ; fcmp d0, d0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; ldr d4, #0x14 -; b #0x1c -; .byte 0x00, 0x00, 0x20, 0x00 -; .byte 0x00, 0x00, 0xe0, 0xc1 +; ldr d4, #0x38 ; fcmp d0, d4 -; b.gt #0x28 +; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x41e0000000000000 -; fmov d17, x9 -; fcmp d0, d17 -; b.lt #0x3c +; mov x8, #0x41e0000000000000 +; fmov d18, x8 +; fcmp d0, d18 +; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs w0, d0 ; ret +; .byte 0x00, 0x00, 0x20, 0x00 +; .byte 0x00, 0x00, 0xe0, 0xc1 function %f24(f64) -> i64 { block0(v0: f64): @@ -568,13 +567,13 @@ block0(v0: f64): ; block0: ; fcmp d0, d0 ; b.vc 8 ; udf -; movz x5, #50144, LSL #48 -; fmov d5, x5 -; fcmp d0, d5 +; movz x4, #50144, LSL #48 +; fmov d6, x4 +; fcmp d0, d6 ; b.ge 8 ; udf -; movz x11, #17376, LSL #48 -; fmov d19, x11 -; fcmp d0, d19 +; movz x10, #17376, LSL #48 +; fmov d20, x10 +; fcmp d0, d20 ; b.lt 8 ; udf ; fcvtzs x0, d0 ; ret @@ -584,14 +583,14 @@ block0(v0: f64): ; fcmp d0, d0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; mov x5, #-0x3c20000000000000 -; fmov d5, x5 -; fcmp d0, d5 +; mov x4, #-0x3c20000000000000 +; fmov d6, x4 +; fcmp d0, d6 ; b.ge #0x20 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x11, #0x43e0000000000000 -; fmov d19, x11 -; fcmp d0, d19 +; mov x10, #0x43e0000000000000 +; fmov d20, x10 +; fcmp d0, d20 ; b.lt #0x34 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs x0, d0 diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 3ec6179544..186b06ad90 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -531,9 +531,9 @@ block0(v0: f32): ; fmov s4, #-1 ; fcmp s0, s4 ; b.gt 8 ; udf -; movz x9, #20352, LSL #16 -; fmov s17, w9 -; fcmp s0, s17 +; movz w8, #20352, LSL #16 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt 8 ; udf ; fcvtzu w0, s0 ; ret @@ -547,9 +547,9 @@ block0(v0: f32): ; fcmp s0, s4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x4f800000 -; fmov s17, w9 -; fcmp s0, s17 +; mov w8, #0x4f800000 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, s0 @@ -565,13 +565,13 @@ block0(v0: f32): ; block0: ; fcmp s0, s0 ; b.vc 8 ; udf -; movz x5, #52992, LSL #16 -; fmov s5, w5 -; fcmp s0, s5 +; movz w4, #52992, LSL #16 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge 8 ; udf -; movz x11, #20224, LSL #16 -; fmov s19, w11 -; fcmp s0, s19 +; movz w10, #20224, LSL #16 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt 8 ; udf ; fcvtzs w0, s0 ; ret @@ -581,14 +581,14 @@ block0(v0: f32): ; fcmp s0, s0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; mov x5, #0xcf000000 -; fmov s5, w5 -; fcmp s0, s5 +; mov w4, #-0x31000000 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge #0x20 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x11, #0x4f000000 -; fmov s19, w11 -; fcmp s0, s19 +; mov w10, #0x4f000000 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt #0x34 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs w0, s0 @@ -607,9 +607,9 @@ block0(v0: f32): ; fmov s4, #-1 ; fcmp s0, s4 ; b.gt 8 ; udf -; movz x9, #24448, LSL #16 -; fmov s17, w9 -; fcmp s0, s17 +; movz w8, #24448, LSL #16 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt 8 ; udf ; fcvtzu x0, s0 ; ret @@ -623,9 +623,9 @@ block0(v0: f32): ; fcmp s0, s4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x5f800000 -; fmov s17, w9 -; fcmp s0, s17 +; mov w8, #0x5f800000 +; fmov s18, w8 +; fcmp s0, s18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu x0, s0 @@ -641,13 +641,13 @@ block0(v0: f32): ; block0: ; fcmp s0, s0 ; b.vc 8 ; udf -; movz x5, #57088, LSL #16 -; fmov s5, w5 -; fcmp s0, s5 +; movz w4, #57088, LSL #16 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge 8 ; udf -; movz x11, #24320, LSL #16 -; fmov s19, w11 -; fcmp s0, s19 +; movz w10, #24320, LSL #16 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt 8 ; udf ; fcvtzs x0, s0 ; ret @@ -657,14 +657,14 @@ block0(v0: f32): ; fcmp s0, s0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; mov x5, #0xdf000000 -; fmov s5, w5 -; fcmp s0, s5 +; mov w4, #-0x21000000 +; fmov s6, w4 +; fcmp s0, s6 ; b.ge #0x20 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x11, #0x5f000000 -; fmov s19, w11 -; fcmp s0, s19 +; mov w10, #0x5f000000 +; fmov s20, w10 +; fcmp s0, s20 ; b.lt #0x34 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs x0, s0 @@ -683,9 +683,9 @@ block0(v0: f64): ; fmov d4, #-1 ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #16880, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #16880, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzu w0, d0 ; ret @@ -699,9 +699,9 @@ block0(v0: f64): ; fcmp d0, d4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x41f0000000000000 -; fmov d17, x9 -; fcmp d0, d17 +; mov x8, #0x41f0000000000000 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu w0, d0 @@ -717,12 +717,12 @@ block0(v0: f64): ; block0: ; fcmp d0, d0 ; b.vc 8 ; udf -; ldr d4, pc+8 ; b 12 ; data.f64 -2147483649 +; ldr d4, [const(0)] ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #16864, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #16864, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzs w0, d0 ; ret @@ -732,20 +732,19 @@ block0(v0: f64): ; fcmp d0, d0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; ldr d4, #0x14 -; b #0x1c -; .byte 0x00, 0x00, 0x20, 0x00 -; .byte 0x00, 0x00, 0xe0, 0xc1 +; ldr d4, #0x38 ; fcmp d0, d4 -; b.gt #0x28 +; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x41e0000000000000 -; fmov d17, x9 -; fcmp d0, d17 -; b.lt #0x3c +; mov x8, #0x41e0000000000000 +; fmov d18, x8 +; fcmp d0, d18 +; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs w0, d0 ; ret +; .byte 0x00, 0x00, 0x20, 0x00 +; .byte 0x00, 0x00, 0xe0, 0xc1 function %f39(f64) -> i64 { block0(v0: f64): @@ -760,9 +759,9 @@ block0(v0: f64): ; fmov d4, #-1 ; fcmp d0, d4 ; b.gt 8 ; udf -; movz x9, #17392, LSL #48 -; fmov d17, x9 -; fcmp d0, d17 +; movz x8, #17392, LSL #48 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt 8 ; udf ; fcvtzu x0, d0 ; ret @@ -776,9 +775,9 @@ block0(v0: f64): ; fcmp d0, d4 ; b.gt #0x1c ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x9, #0x43f0000000000000 -; fmov d17, x9 -; fcmp d0, d17 +; mov x8, #0x43f0000000000000 +; fmov d18, x8 +; fcmp d0, d18 ; b.lt #0x30 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzu x0, d0 @@ -794,13 +793,13 @@ block0(v0: f64): ; block0: ; fcmp d0, d0 ; b.vc 8 ; udf -; movz x5, #50144, LSL #48 -; fmov d5, x5 -; fcmp d0, d5 +; movz x4, #50144, LSL #48 +; fmov d6, x4 +; fcmp d0, d6 ; b.ge 8 ; udf -; movz x11, #17376, LSL #48 -; fmov d19, x11 -; fcmp d0, d19 +; movz x10, #17376, LSL #48 +; fmov d20, x10 +; fcmp d0, d20 ; b.lt 8 ; udf ; fcvtzs x0, d0 ; ret @@ -810,14 +809,14 @@ block0(v0: f64): ; fcmp d0, d0 ; b.vc #0xc ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint -; mov x5, #-0x3c20000000000000 -; fmov d5, x5 -; fcmp d0, d5 +; mov x4, #-0x3c20000000000000 +; fmov d6, x4 +; fcmp d0, d6 ; b.ge #0x20 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf -; mov x11, #0x43e0000000000000 -; fmov d19, x11 -; fcmp d0, d19 +; mov x10, #0x43e0000000000000 +; fmov d20, x10 +; fcmp d0, d20 ; b.lt #0x34 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf ; fcvtzs x0, d0 diff --git a/cranelift/filetests/filetests/isa/aarch64/shuffle.clif b/cranelift/filetests/filetests/isa/aarch64/shuffle.clif index c52959b886..c9020f0cc1 100644 --- a/cranelift/filetests/filetests/isa/aarch64/shuffle.clif +++ b/cranelift/filetests/filetests/isa/aarch64/shuffle.clif @@ -12,7 +12,7 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; mov v30.16b, v0.16b ; mov v31.16b, v1.16b -; ldr q3, pc+8 ; b 20 ; data.f128 0x05110f0204180d170b0c06041a1f0003 +; ldr q3, [const(0)] ; tbl v0.16b, { v30.16b, v31.16b }, v3.16b ; ret ; @@ -20,14 +20,16 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; offset 0x0 ; mov v30.16b, v0.16b ; mov v31.16b, v1.16b -; ldr q3, #0x10 -; b #0x20 +; ldr q3, #0x20 +; tbl v0.16b, {v30.16b, v31.16b}, v3.16b +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 ; adc w3, w0, wzr ; add w4, w16, w12, lsl #1 ; orr z23.b, p3/m, z23.b, z8.b ; mov z2.b, p1/z, #0x78 -; tbl v0.16b, {v30.16b, v31.16b}, v3.16b -; ret function %aarch64_uzp1_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): @@ -541,7 +543,7 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; mov v30.16b, v0.16b ; mov v31.16b, v1.16b -; ldr q3, pc+8 ; b 20 ; data.f128 0x1f1e1d1c1b1a19181716151413121110 +; ldr q3, [const(0)] ; tbl v0.16b, { v30.16b, v31.16b }, v3.16b ; ret ; @@ -549,14 +551,16 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; offset 0x0 ; mov v30.16b, v0.16b ; mov v31.16b, v1.16b -; ldr q3, #0x10 -; b #0x20 -; sbfiz w16, w8, #0xe, #5 -; b #0xfffffffffc585464 -; madd w24, w8, w26, w6 -; fmadd s28, s8, s30, s7 +; ldr q3, #0x20 ; tbl v0.16b, {v30.16b, v31.16b}, v3.16b ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; sbfiz w16, w8, #0xe, #5 +; b #0xfffffffffc585474 +; madd w24, w8, w26, w6 +; fmadd s28, s8, s30, s7 function %aarch64_dup_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif index 3487163f41..5a05e8925b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif @@ -106,10 +106,10 @@ block0(v0: i64x2, v1: i64x2): ; VCode: ; block0: -; movz x4, #1 -; dup v4.2d, x4 +; movz x3, #1 +; dup v5.2d, x3 ; orr v7.16b, v0.16b, v1.16b -; and v17.16b, v7.16b, v4.16b +; and v17.16b, v7.16b, v5.16b ; ushr v19.2d, v0.2d, #1 ; ushr v21.2d, v1.2d, #1 ; add v23.2d, v19.2d, v21.2d @@ -118,10 +118,10 @@ block0(v0: i64x2, v1: i64x2): ; ; Disassembled: ; block0: ; offset 0x0 -; mov x4, #1 -; dup v4.2d, x4 +; mov x3, #1 +; dup v5.2d, x3 ; orr v7.16b, v0.16b, v1.16b -; and v17.16b, v7.16b, v4.16b +; and v17.16b, v7.16b, v5.16b ; ushr v19.2d, v0.2d, #1 ; ushr v21.2d, v1.2d, #1 ; add v23.2d, v19.2d, v21.2d diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif index b4449a9670..9b7b851128 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif @@ -228,7 +228,7 @@ block0(v0: i32): ; VCode: ; block0: -; ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100 +; ldr q5, [const(0)] ; and w3, w0, #7 ; dup v6.16b, w3 ; sshl v0.16b, v5.16b, v6.16b @@ -236,16 +236,18 @@ block0(v0: i32): ; ; Disassembled: ; block0: ; offset 0x0 -; ldr q5, #8 -; b #0x18 -; .byte 0x00, 0x01, 0x02, 0x03 -; .byte 0x04, 0x05, 0x06, 0x07 -; add w8, w8, w10, lsl #2 -; .byte 0x0c, 0x0d, 0x0e, 0x0f +; ldr q5, #0x20 ; and w3, w0, #7 ; dup v6.16b, w3 ; sshl v0.16b, v5.16b, v6.16b ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x01, 0x02, 0x03 +; .byte 0x04, 0x05, 0x06, 0x07 +; add w8, w8, w10, lsl #2 +; .byte 0x0c, 0x0d, 0x0e, 0x0f function %ushr_i8x16_imm() -> i8x16 { block0: @@ -257,7 +259,7 @@ block0: ; VCode: ; block0: -; ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100 +; ldr q5, [const(0)] ; movz w1, #1 ; and w3, w1, #7 ; sub x5, xzr, x3 @@ -267,18 +269,18 @@ block0: ; ; Disassembled: ; block0: ; offset 0x0 -; ldr q5, #8 -; b #0x18 -; .byte 0x00, 0x01, 0x02, 0x03 -; .byte 0x04, 0x05, 0x06, 0x07 -; add w8, w8, w10, lsl #2 -; .byte 0x0c, 0x0d, 0x0e, 0x0f +; ldr q5, #0x20 ; mov w1, #1 ; and w3, w1, #7 ; neg x5, x3 ; dup v7.16b, w5 ; ushl v0.16b, v5.16b, v7.16b ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x01, 0x02, 0x03 +; .byte 0x04, 0x05, 0x06, 0x07 +; add w8, w8, w10, lsl #2 +; .byte 0x0c, 0x0d, 0x0e, 0x0f function %sshr_i8x16(i32) -> i8x16 { block0(v0: i32): @@ -289,7 +291,7 @@ block0(v0: i32): ; VCode: ; block0: -; ldr q6, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100 +; ldr q6, [const(0)] ; and w3, w0, #7 ; sub x5, xzr, x3 ; dup v7.16b, w5 @@ -298,17 +300,18 @@ block0(v0: i32): ; ; Disassembled: ; block0: ; offset 0x0 -; ldr q6, #8 -; b #0x18 -; .byte 0x00, 0x01, 0x02, 0x03 -; .byte 0x04, 0x05, 0x06, 0x07 -; add w8, w8, w10, lsl #2 -; .byte 0x0c, 0x0d, 0x0e, 0x0f +; ldr q6, #0x20 ; and w3, w0, #7 ; neg x5, x3 ; dup v7.16b, w5 ; sshl v0.16b, v6.16b, v7.16b ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x01, 0x02, 0x03 +; .byte 0x04, 0x05, 0x06, 0x07 +; add w8, w8, w10, lsl #2 +; .byte 0x0c, 0x0d, 0x0e, 0x0f function %sshr_i8x16_imm(i8x16, i32) -> i8x16 { block0(v0: i8x16, v1: i32): diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif index af539f84ba..3b19f5d0e1 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif @@ -15,25 +15,26 @@ block0: ; VCode: ; block0: ; movi v30.16b, #0 -; movz x4, #1 -; fmov s31, w4 -; ldr q3, pc+8 ; b 20 ; data.f128 0x11000000000000000000000000000000 +; movz w3, #1 +; fmov s31, w3 +; ldr q3, [const(0)] ; tbl v0.16b, { v30.16b, v31.16b }, v3.16b ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; movi v30.16b, #0 -; mov x4, #1 -; fmov s31, w4 -; ldr q3, #0x14 -; b #0x24 +; mov w3, #1 +; fmov s31, w3 +; ldr q3, #0x20 +; tbl v0.16b, {v30.16b, v31.16b}, v3.16b +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 ; .byte 0x00, 0x00, 0x00, 0x00 ; .byte 0x00, 0x00, 0x00, 0x00 ; .byte 0x00, 0x00, 0x00, 0x00 ; add w0, w0, #0 -; tbl v0.16b, {v30.16b, v31.16b}, v3.16b -; ret function %shuffle_same_ssa_value() -> i8x16 { block0: @@ -44,26 +45,27 @@ block0: ; VCode: ; block0: -; movz x3, #1 -; fmov s31, w3 -; ldr q2, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000 +; movz w2, #1 +; fmov s31, w2 +; ldr q2, [const(0)] ; mov v30.16b, v31.16b ; tbl v0.16b, { v30.16b, v31.16b }, v2.16b ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; mov x3, #1 -; fmov s31, w3 -; ldr q2, #0x10 -; b #0x20 +; mov w2, #1 +; fmov s31, w2 +; ldr q2, #0x20 +; mov v30.16b, v31.16b +; tbl v0.16b, {v30.16b, v31.16b}, v2.16b +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 ; .byte 0x00, 0x00, 0x00, 0x00 ; .byte 0x00, 0x00, 0x00, 0x00 ; .byte 0x00, 0x00, 0x00, 0x00 ; sbfx w0, w0, #0, #1 -; mov v30.16b, v31.16b -; tbl v0.16b, {v30.16b, v31.16b}, v2.16b -; ret function %swizzle() -> i8x16 { block0: @@ -75,27 +77,25 @@ block0: ; VCode: ; block0: -; ldr q2, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100 -; ldr q3, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100 +; ldr q2, [const(1)] +; ldr q3, [const(0)] ; tbl v0.16b, { v2.16b }, v3.16b ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; ldr q2, #8 -; b #0x18 -; .byte 0x00, 0x01, 0x02, 0x03 -; .byte 0x04, 0x05, 0x06, 0x07 -; add w8, w8, w10, lsl #2 -; .byte 0x0c, 0x0d, 0x0e, 0x0f -; ldr q3, #0x20 -; b #0x30 -; .byte 0x00, 0x01, 0x02, 0x03 -; .byte 0x04, 0x05, 0x06, 0x07 -; add w8, w8, w10, lsl #2 -; .byte 0x0c, 0x0d, 0x0e, 0x0f +; ldr q2, #0x20 +; ldr q3, #0x10 ; tbl v0.16b, {v2.16b}, v3.16b ; ret +; .byte 0x00, 0x01, 0x02, 0x03 +; .byte 0x04, 0x05, 0x06, 0x07 +; add w8, w8, w10, lsl #2 +; .byte 0x0c, 0x0d, 0x0e, 0x0f +; .byte 0x00, 0x01, 0x02, 0x03 +; .byte 0x04, 0x05, 0x06, 0x07 +; add w8, w8, w10, lsl #2 +; .byte 0x0c, 0x0d, 0x0e, 0x0f function %splat_i8(i8) -> i8x16 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif index 4933878ac3..47e1f23f85 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif @@ -33,14 +33,14 @@ block0: ; VCode: ; block0: -; movz x1, #42679 -; dup v0.8h, w1 +; movz x0, #42679 +; dup v0.8h, w0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; mov x1, #0xa6b7 -; dup v0.8h, w1 +; mov x0, #0xa6b7 +; dup v0.8h, w0 ; ret function %f4(i32, i8x16, i8x16) -> i8x16 { @@ -156,14 +156,14 @@ block0: ; VCode: ; block0: -; movi v1.2d, #18374687579166474495 -; fmov d0, d1 +; movi v0.2d, #18374687579166474495 +; fmov d0, d0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; movi v1.2d, #0xff0000ffff0000ff -; fmov d0, d1 +; movi v0.2d, #0xff0000ffff0000ff +; fmov d0, d0 ; ret function %f10() -> i32x4 { diff --git a/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif b/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif index e0034049af..6a36c90191 100644 --- a/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif +++ b/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif @@ -10,12 +10,12 @@ block0(v0: i8x16): ; VCode: ; block0: ; sshr v2.16b, v0.16b, #7 -; movz x5, #513 -; movk x5, x5, #2052, LSL #16 -; movk x5, x5, #8208, LSL #32 -; movk x5, x5, #32832, LSL #48 -; dup v16.2d, x5 -; and v22.16b, v2.16b, v16.16b +; movz x7, #513 +; movk x7, x7, #2052, LSL #16 +; movk x7, x7, #8208, LSL #32 +; movk x7, x7, #32832, LSL #48 +; dup v20.2d, x7 +; and v22.16b, v2.16b, v20.16b ; ext v24.16b, v22.16b, v22.16b, #8 ; zip1 v26.16b, v22.16b, v24.16b ; addv h28, v26.8h @@ -25,12 +25,12 @@ block0(v0: i8x16): ; Disassembled: ; block0: ; offset 0x0 ; sshr v2.16b, v0.16b, #7 -; mov x5, #0x201 -; movk x5, #0x804, lsl #16 -; movk x5, #0x2010, lsl #32 -; movk x5, #0x8040, lsl #48 -; dup v16.2d, x5 -; and v22.16b, v2.16b, v16.16b +; mov x7, #0x201 +; movk x7, #0x804, lsl #16 +; movk x7, #0x2010, lsl #32 +; movk x7, #0x8040, lsl #48 +; dup v20.2d, x7 +; and v22.16b, v2.16b, v20.16b ; ext v24.16b, v22.16b, v22.16b, #8 ; zip1 v26.16b, v22.16b, v24.16b ; addv h28, v26.8h @@ -46,12 +46,12 @@ block0(v0: i8x16): ; VCode: ; block0: ; sshr v2.16b, v0.16b, #7 -; movz x5, #513 -; movk x5, x5, #2052, LSL #16 -; movk x5, x5, #8208, LSL #32 -; movk x5, x5, #32832, LSL #48 -; dup v16.2d, x5 -; and v22.16b, v2.16b, v16.16b +; movz x7, #513 +; movk x7, x7, #2052, LSL #16 +; movk x7, x7, #8208, LSL #32 +; movk x7, x7, #32832, LSL #48 +; dup v20.2d, x7 +; and v22.16b, v2.16b, v20.16b ; ext v24.16b, v22.16b, v22.16b, #8 ; zip1 v26.16b, v22.16b, v24.16b ; addv h28, v26.8h @@ -61,12 +61,12 @@ block0(v0: i8x16): ; Disassembled: ; block0: ; offset 0x0 ; sshr v2.16b, v0.16b, #7 -; mov x5, #0x201 -; movk x5, #0x804, lsl #16 -; movk x5, #0x2010, lsl #32 -; movk x5, #0x8040, lsl #48 -; dup v16.2d, x5 -; and v22.16b, v2.16b, v16.16b +; mov x7, #0x201 +; movk x7, #0x804, lsl #16 +; movk x7, #0x2010, lsl #32 +; movk x7, #0x8040, lsl #48 +; dup v20.2d, x7 +; and v22.16b, v2.16b, v20.16b ; ext v24.16b, v22.16b, v22.16b, #8 ; zip1 v26.16b, v22.16b, v24.16b ; addv h28, v26.8h @@ -82,7 +82,7 @@ block0(v0: i16x8): ; VCode: ; block0: ; sshr v2.8h, v0.8h, #15 -; ldr q4, pc+8 ; b 20 ; data.f128 0x00800040002000100008000400020001 +; ldr q4, [const(0)] ; and v6.16b, v2.16b, v4.16b ; addv h16, v6.8h ; umov w0, v16.h[0] @@ -91,16 +91,17 @@ block0(v0: i16x8): ; Disassembled: ; block0: ; offset 0x0 ; sshr v2.8h, v0.8h, #0xf -; ldr q4, #0xc -; b #0x1c -; .byte 0x01, 0x00, 0x02, 0x00 -; .byte 0x04, 0x00, 0x08, 0x00 -; .byte 0x10, 0x00, 0x20, 0x00 -; .byte 0x40, 0x00, 0x80, 0x00 +; ldr q4, #0x20 ; and v6.16b, v2.16b, v4.16b ; addv h16, v6.8h ; umov w0, v16.h[0] ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x01, 0x00, 0x02, 0x00 +; .byte 0x04, 0x00, 0x08, 0x00 +; .byte 0x10, 0x00, 0x20, 0x00 +; .byte 0x40, 0x00, 0x80, 0x00 function %f4(i32x4) -> i8 { block0(v0: i32x4): @@ -111,7 +112,7 @@ block0(v0: i32x4): ; VCode: ; block0: ; sshr v2.4s, v0.4s, #31 -; ldr q4, pc+8 ; b 20 ; data.f128 0x00000008000000040000000200000001 +; ldr q4, [const(0)] ; and v6.16b, v2.16b, v4.16b ; addv s16, v6.4s ; mov w0, v16.s[0] @@ -120,16 +121,17 @@ block0(v0: i32x4): ; Disassembled: ; block0: ; offset 0x0 ; sshr v2.4s, v0.4s, #0x1f -; ldr q4, #0xc -; b #0x1c -; .byte 0x01, 0x00, 0x00, 0x00 -; .byte 0x02, 0x00, 0x00, 0x00 -; .byte 0x04, 0x00, 0x00, 0x00 -; .byte 0x08, 0x00, 0x00, 0x00 +; ldr q4, #0x20 ; and v6.16b, v2.16b, v4.16b ; addv s16, v6.4s ; mov w0, v16.s[0] ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x01, 0x00, 0x00, 0x00 +; .byte 0x02, 0x00, 0x00, 0x00 +; .byte 0x04, 0x00, 0x00, 0x00 +; .byte 0x08, 0x00, 0x00, 0x00 function %f5(i64x2) -> i8 { block0(v0: i64x2):