aarch64: Use VCodeConstant for f64/v128 constants (#5997)

* aarch64: Translate float and splat lowering to ISLE

I was looking into `constant_f128` and its fallback lowering into memory
and to get familiar with the code I figured it'd be good to port some
Rust logic to ISLE. This commit ports the `constant_{f128,f64,f32}`
helpers into ISLE from Rust as well as the `splat_const` helper which
ended up being closely related.

Tests reflect a number of regalloc changes that happened but also namely
one major difference is that in the lowering of `f32` a 32-bit immediate
is created now instead of a 64-bit immediate (in a GP register before
it's moved into a FP register). This semantically has no change but the
generated code is slightly different in a few minor cases.

* aarch64: Load f64/v128 constants from a pool

This commit removes the `LoadFpuConst64` and `LoadFpuConst128`
pseudo-instructions from the AArch64 backend which internally loaded a
nearby constant and then jumped over it. Constants now go through the
`VCodeConstant` infrastructure which gets placed at the end of the
function similar to how x64 works. Some minor support was added in as
well to add a new addressing mode for a `MachLabel`-relative load.
This commit is contained in:
Alex Crichton
2023-03-13 14:33:52 -05:00
committed by GitHub
parent 6ecdc2482e
commit 03b5dbb3e0
25 changed files with 622 additions and 744 deletions

View File

@@ -466,14 +466,6 @@
(mem PairAMode)
(flags MemFlags))
(LoadFpuConst64
(rd WritableReg)
(const_data u64))
(LoadFpuConst128
(rd WritableReg)
(const_data u128))
;; Conversion: FP -> integer.
(FpuToInt
(op FpuToIntOp)
@@ -1135,6 +1127,11 @@
(off i64)
(ty Type))
;; A reference to a constant which is placed outside of the function's
;; body, typically at the end.
(Const
(addr VCodeConstant))
;; Offset from the "nominal stack pointer", which is where the real SP is
;; just after stack and spill slots are allocated in the function prologue.
;; At emission time, this is converted to `SPOffset` with a fixup added to
@@ -1194,6 +1191,16 @@
(rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32))
(rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64))
;; Helper for extracting the size of a lane from the input `VectorSize`
(decl pure vector_lane_size (VectorSize) ScalarSize)
(rule (vector_lane_size (VectorSize.Size8x16)) (ScalarSize.Size8))
(rule (vector_lane_size (VectorSize.Size8x8)) (ScalarSize.Size8))
(rule (vector_lane_size (VectorSize.Size16x8)) (ScalarSize.Size16))
(rule (vector_lane_size (VectorSize.Size16x4)) (ScalarSize.Size16))
(rule (vector_lane_size (VectorSize.Size32x4)) (ScalarSize.Size32))
(rule (vector_lane_size (VectorSize.Size32x2)) (ScalarSize.Size32))
(rule (vector_lane_size (VectorSize.Size64x2)) (ScalarSize.Size64))
(type Cond extern
(enum
(Eq)
@@ -1908,6 +1915,13 @@
(_ Unit (emit (MInst.VecDupFromFpu dst src size lane))))
dst))
;; Helper for emitting `MInst.VecDupImm` instructions.
(decl vec_dup_imm (ASIMDMovModImm bool VectorSize) Reg)
(rule (vec_dup_imm imm invert size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecDupImm dst imm invert size))))
dst))
;; Helper for emitting `MInst.AluRRImm12` instructions.
(decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg)
(rule (alu_rr_imm12 op ty src imm)
@@ -2158,6 +2172,13 @@
(_ Unit (emit (MInst.MovToFpu dst x size))))
dst))
;; Helper for emitting `MInst.FpuMoveFPImm` instructions.
(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg)
(rule (fpu_move_fp_imm imm size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMoveFPImm dst imm size))))
dst))
;; Helper for emitting `MInst.MovToVec` instructions.
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
(rule (mov_to_vec src1 src2 lane size)
@@ -2986,24 +3007,122 @@
(amode ty addr offset)))
;; Lower a constant f32.
(decl constant_f32 (u64) Reg)
;; TODO: Port lower_constant_f32() to ISLE.
(extern constructor constant_f32 constant_f32)
;;
;; Note that we must make sure that all bits outside the lowest 32 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f32 (u32) Reg)
(rule 2 (constant_f32 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
$false
(VectorSize.Size32x2)))
(rule 1 (constant_f32 n)
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32)))
(fpu_move_fp_imm imm (ScalarSize.Size32)))
(rule (constant_f32 n)
(mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32)))
;; Lower a constant f64.
;;
;; Note that we must make sure that all bits outside the lowest 64 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
;; TODO: Treat as half of a 128 bit vector and consider replicated patterns.
;; Scalar MOVI might also be an option.
(decl constant_f64 (u64) Reg)
;; TODO: Port lower_constant_f64() to ISLE.
(extern constructor constant_f64 constant_f64)
(rule 4 (constant_f64 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
$false
(VectorSize.Size32x2)))
(rule 3 (constant_f64 n)
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size64)))
(fpu_move_fp_imm imm (ScalarSize.Size64)))
(rule 2 (constant_f64 (u64_as_u32 n))
(constant_f32 n))
(rule 1 (constant_f64 (u64_low32_bits_unset n))
(mov_to_fpu (imm $I64 (ImmExtend.Zero) n) (ScalarSize.Size64)))
(rule (constant_f64 n)
(fpu_load64 (AMode.Const (emit_u64_le_const n)) (mem_flags_trusted)))
;; Tests whether the low 32 bits in the input are all zero.
(decl u64_low32_bits_unset (u64) u64)
(extern extractor u64_low32_bits_unset u64_low32_bits_unset)
;; Lower a constant f128.
(decl constant_f128 (u128) Reg)
;; TODO: Port lower_constant_f128() to ISLE.
(extern constructor constant_f128 constant_f128)
(rule 3 (constant_f128 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size8))
$false
(VectorSize.Size8x16)))
;; If the upper 64-bits are all zero then defer to `constant_f64`.
(rule 2 (constant_f128 (u128_as_u64 n)) (constant_f64 n))
;; If the low half of the u128 equals the high half then delegate to the splat
;; logic as a splat of a 64-bit value.
(rule 1 (constant_f128 (u128_replicated_u64 n))
(splat_const n (VectorSize.Size64x2)))
;; Base case is to load the constant from memory.
(rule (constant_f128 n)
(fpu_load128 (AMode.Const (emit_u128_le_const n)) (mem_flags_trusted)))
;; Lower a vector splat with a constant parameter.
;;
;; The 64-bit input here only uses the low bits for the lane size in
;; `VectorSize` and all other bits are ignored.
(decl splat_const (u64 VectorSize) Reg)
;; TODO: Port lower_splat_const() to ISLE.
(extern constructor splat_const splat_const)
;; If the splat'd constant can itself be reduced in size then attempt to do so
;; as it will make it easier to create the immediates in the instructions below.
(rule 5 (splat_const (u64_replicated_u32 n) (VectorSize.Size64x2))
(splat_const n (VectorSize.Size32x4)))
(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x4))
(splat_const n (VectorSize.Size16x8)))
(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x2))
(splat_const n (VectorSize.Size16x4)))
(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x8))
(splat_const n (VectorSize.Size8x16)))
(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x4))
(splat_const n (VectorSize.Size8x8)))
;; Special cases for `vec_dup_imm` instructions where the input is either
;; negated or not.
(rule 4 (splat_const n size)
(if-let imm (asimd_mov_mod_imm_from_u64 n (vector_lane_size size)))
(vec_dup_imm imm $false size))
(rule 3 (splat_const n size)
(if-let imm (asimd_mov_mod_imm_from_u64 (u64_not n) (vector_lane_size size)))
(vec_dup_imm imm $true size))
;; Special case a 32-bit splat where an immediate can be created by
;; concatenating the 32-bit constant into a 64-bit value
(rule 2 (splat_const n (VectorSize.Size32x4))
(if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64)))
(vec_dup_imm imm $false (VectorSize.Size64x2)))
(rule 2 (splat_const n (VectorSize.Size32x2))
(if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64)))
(fpu_extend (vec_dup_imm imm $false (VectorSize.Size64x2)) (ScalarSize.Size64)))
(rule 1 (splat_const n size)
(if-let imm (asimd_fp_mod_imm_from_u64 n (vector_lane_size size)))
(vec_dup_fp_imm imm size))
;; The base case for splat is to use `vec_dup` with the immediate loaded into a
;; register.
(rule (splat_const n size)
(vec_dup (imm $I64 (ImmExtend.Zero) n) size))
;; Each of these extractors tests whether the upper half of the input equals the
;; lower half of the input
(decl u128_replicated_u64 (u64) u128)
(extern extractor u128_replicated_u64 u128_replicated_u64)
(decl u64_replicated_u32 (u64) u64)
(extern extractor u64_replicated_u32 u64_replicated_u32)
(decl u32_replicated_u16 (u64) u64)
(extern extractor u32_replicated_u16 u32_replicated_u16)
(decl u16_replicated_u8 (u64) u64)
(extern extractor u16_replicated_u8 u16_replicated_u8)
;; Lower a FloatCC to a Cond.
(decl fp_cond_code (FloatCC) Cond)
@@ -3814,3 +3933,36 @@
;; Helper for emitting the `trn2` instruction
(decl vec_trn2 (Reg Reg VectorSize) Reg)
(rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size))
;; Helper for creating a zero value `ASIMDMovModImm` immediate.
(decl asimd_mov_mod_imm_zero (ScalarSize) ASIMDMovModImm)
(extern constructor asimd_mov_mod_imm_zero asimd_mov_mod_imm_zero)
;; Helper for fallibly creating an `ASIMDMovModImm` immediate from its parts.
(decl pure partial asimd_mov_mod_imm_from_u64 (u64 ScalarSize) ASIMDMovModImm)
(extern constructor asimd_mov_mod_imm_from_u64 asimd_mov_mod_imm_from_u64)
;; Helper for fallibly creating an `ASIMDFPModImm` immediate from its parts.
(decl pure partial asimd_fp_mod_imm_from_u64 (u64 ScalarSize) ASIMDFPModImm)
(extern constructor asimd_fp_mod_imm_from_u64 asimd_fp_mod_imm_from_u64)
;; Helper for creating a `VecDupFPImm` instruction
(decl vec_dup_fp_imm (ASIMDFPModImm VectorSize) Reg)
(rule (vec_dup_fp_imm imm size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecDupFPImm dst imm size))))
dst))
;; Helper for creating a `FpuLoad64` instruction
(decl fpu_load64 (AMode MemFlags) Reg)
(rule (fpu_load64 amode flags)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuLoad64 dst amode flags))))
dst))
;; Helper for creating a `FpuLoad128` instruction
(decl fpu_load128 (AMode MemFlags) Reg)
(rule (fpu_load128 amode flags)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuLoad128 dst amode flags))))
dst))

View File

@@ -124,6 +124,9 @@ pub enum MemLabel {
/// offset from this instruction. This form must be used at emission time;
/// see `memlabel_finalize()` for how other forms are lowered to this one.
PCRel(i32),
/// An address that refers to a label within a `MachBuffer`, for example a
/// constant that lives in the pool at the end of the function.
Mach(MachLabel),
}
impl AMode {
@@ -194,6 +197,7 @@ impl AMode {
| &AMode::FPOffset { .. }
| &AMode::SPOffset { .. }
| &AMode::NominalSPOffset { .. }
| &AMode::Const { .. }
| AMode::Label { .. } => self.clone(),
}
}
@@ -382,7 +386,8 @@ impl PrettyPrint for ExtendOp {
impl PrettyPrint for MemLabel {
fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
match self {
&MemLabel::PCRel(off) => format!("pc+{}", off),
MemLabel::PCRel(off) => format!("pc+{}", off),
MemLabel::Mach(off) => format!("label({})", off.get()),
}
}
}
@@ -465,6 +470,8 @@ impl PrettyPrint for AMode {
let simm9 = simm9.pretty_print(8, allocs);
format!("[sp], {}", simm9)
}
AMode::Const { addr } => format!("[const({})]", addr.as_u32()),
// Eliminated by `mem_finalize()`.
&AMode::SPOffset { .. }
| &AMode::FPOffset { .. }

View File

@@ -2,7 +2,7 @@
use regalloc2::Allocation;
use crate::binemit::{CodeOffset, Reloc, StackMap};
use crate::binemit::{Reloc, StackMap};
use crate::ir::{types::*, RelSourceLoc};
use crate::ir::{LibCall, MemFlags, TrapCode};
use crate::isa::aarch64::inst::*;
@@ -10,20 +10,12 @@ use crate::machinst::{ty_bits, Reg, RegClass, Writable};
use crate::trace;
use core::convert::TryFrom;
/// Memory label/reference finalization: convert a MemLabel to a PC-relative
/// offset, possibly emitting relocation(s) as necessary.
pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
match label {
&MemLabel::PCRel(rel) => rel,
}
}
/// Memory addressing mode finalization: convert "special" modes (e.g.,
/// generic arbitrary stack offset) into real addressing modes, possibly by
/// emitting some helper instructions that come immediately before the use
/// of this amode.
pub fn mem_finalize(
insn_off: CodeOffset,
sink: Option<&mut MachBuffer<Inst>>,
mem: &AMode,
state: &EmitState,
) -> (SmallVec<[Inst; 4]>, AMode) {
@@ -74,14 +66,14 @@ pub fn mem_finalize(
}
}
&AMode::Label { ref label } => {
let off = memlabel_finalize(insn_off, label);
(
smallvec![],
AMode::Label {
label: MemLabel::PCRel(off),
},
)
AMode::Const { addr } => {
let sink = match sink {
Some(sink) => sink,
None => return (smallvec![], mem.clone()),
};
let label = sink.get_label_for_constant(*addr);
let label = MemLabel::Mach(label);
(smallvec![], AMode::Label { label })
}
_ => (smallvec![], mem.clone()),
@@ -959,7 +951,7 @@ impl MachInstEmit for Inst {
| &Inst::FpuLoad128 { rd, ref mem, flags } => {
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(&mut allocs);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
@@ -1039,7 +1031,19 @@ impl MachInstEmit for Inst {
&AMode::Label { ref label } => {
let offset = match label {
// cast i32 to u32 (two's-complement)
&MemLabel::PCRel(off) => off as u32,
MemLabel::PCRel(off) => *off as u32,
// Emit a relocation into the `MachBuffer`
// for the label that's being loaded from and
// encode an address of 0 in its place which will
// get filled in by relocation resolution later on.
MemLabel::Mach(label) => {
sink.use_label_at_offset(
sink.cur_offset(),
*label,
LabelUse::Ldr19,
);
0
}
} / 4;
assert!(offset < (1 << 19));
match self {
@@ -1076,6 +1080,7 @@ impl MachInstEmit for Inst {
&AMode::SPOffset { .. }
| &AMode::FPOffset { .. }
| &AMode::NominalSPOffset { .. }
| &AMode::Const { .. }
| &AMode::RegOffset { .. } => {
panic!("Should not see {:?} here!", mem)
}
@@ -1091,7 +1096,7 @@ impl MachInstEmit for Inst {
| &Inst::FpuStore128 { rd, ref mem, flags } => {
let rd = allocs.next(rd);
let mem = mem.with_allocs(&mut allocs);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
@@ -1172,6 +1177,7 @@ impl MachInstEmit for Inst {
&AMode::SPOffset { .. }
| &AMode::FPOffset { .. }
| &AMode::NominalSPOffset { .. }
| &AMode::Const { .. }
| &AMode::RegOffset { .. } => {
panic!("Should not see {:?} here!", mem)
}
@@ -2319,41 +2325,6 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_inttofpu(top16, rd, rn));
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let rd = allocs.next_writable(rd);
let inst = Inst::FpuLoad64 {
rd,
mem: AMode::Label {
label: MemLabel::PCRel(8),
},
flags: MemFlags::trusted(),
};
inst.emit(&[], sink, emit_info, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(&[], sink, emit_info, state);
sink.put8(const_data);
}
&Inst::LoadFpuConst128 { rd, const_data } => {
let rd = allocs.next_writable(rd);
let inst = Inst::FpuLoad128 {
rd,
mem: AMode::Label {
label: MemLabel::PCRel(8),
},
flags: MemFlags::trusted(),
};
inst.emit(&[], sink, emit_info, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(20),
};
inst.emit(&[], sink, emit_info, state);
for i in const_data.to_le_bytes().iter() {
sink.put1(*i);
}
}
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
@@ -3350,7 +3321,7 @@ impl MachInstEmit for Inst {
&Inst::LoadAddr { rd, ref mem } => {
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(&mut allocs);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}

View File

@@ -6891,24 +6891,6 @@ fn test_aarch64_binemit() {
"stp q18, q22, [sp], #304",
));
insns.push((
Inst::LoadFpuConst64 {
rd: writable_vreg(16),
const_data: 1.0_f64.to_bits(),
},
"5000005C03000014000000000000F03F",
"ldr d16, pc+8 ; b 12 ; data.f64 1",
));
insns.push((
Inst::LoadFpuConst128 {
rd: writable_vreg(5),
const_data: 0x0f0e0d0c0b0a09080706050403020100,
},
"4500009C05000014000102030405060708090A0B0C0D0E0F",
"ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
));
insns.push((
Inst::FpuCSel32 {
rd: writable_vreg(1),

View File

@@ -10,7 +10,6 @@ use crate::{settings, CodegenError, CodegenResult};
use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
use alloc::vec::Vec;
use core::convert::TryFrom;
use regalloc2::{PRegSet, VReg};
use smallvec::{smallvec, SmallVec};
use std::string::{String, ToString};
@@ -250,215 +249,6 @@ impl Inst {
}
}
/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u32,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
// Note that we must make sure that all bits outside the lowest 32 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if const_data == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size32x2,
}]
} else if let Some(imm) =
ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
{
smallvec![Inst::FpuMoveFPImm {
rd,
imm,
size: ScalarSize::Size32,
}]
} else {
let tmp = alloc_tmp(I32);
let mut insts = Inst::load_constant(tmp, const_data as u64, &mut alloc_tmp);
insts.push(Inst::MovToFpu {
rd,
rn: tmp.to_reg(),
size: ScalarSize::Size32,
});
insts
}
}
/// Create instructions that load a 64-bit floating-point constant.
pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u64,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
// Note that we must make sure that all bits outside the lowest 64 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
// TODO: Treat as half of a 128 bit vector and consider replicated patterns.
// Scalar MOVI might also be an option.
if const_data == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size32x2,
}]
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
smallvec![Inst::FpuMoveFPImm {
rd,
imm,
size: ScalarSize::Size64,
}]
} else if let Ok(const_data) = u32::try_from(const_data) {
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
} else if const_data & (u32::MAX as u64) == 0 {
let tmp = alloc_tmp(I64);
let mut insts = Inst::load_constant(tmp, const_data, &mut alloc_tmp);
insts.push(Inst::MovToFpu {
rd,
rn: tmp.to_reg(),
size: ScalarSize::Size64,
});
insts
} else {
smallvec![Inst::LoadFpuConst64 { rd, const_data }]
}
}
/// Create instructions that load a 128-bit vector constant.
pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u128,
alloc_tmp: F,
) -> SmallVec<[Inst; 5]> {
if let Ok(const_data) = u64::try_from(const_data) {
SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
} else if let Some((pattern, size)) =
Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
{
Inst::load_replicated_vector_pattern(
rd,
pattern,
VectorSize::from_lane_size(size, true),
alloc_tmp,
)
} else {
smallvec![Inst::LoadFpuConst128 { rd, const_data }]
}
}
/// Determine whether a 128-bit constant represents a vector consisting of elements with
/// the same value.
pub fn get_replicated_vector_pattern(
value: u128,
size: ScalarSize,
) -> Option<(u64, ScalarSize)> {
let (mask, shift, next_size) = match size {
ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
_ => return None,
};
let mut r = None;
let v = value & mask;
if (value >> shift) & mask == v {
r = Inst::get_replicated_vector_pattern(v, next_size);
if r.is_none() {
r = Some((v as u64, size));
}
}
r
}
/// Create instructions that load a vector constant consisting of elements with
/// the same value.
pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
pattern: u64,
size: VectorSize,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 5]> {
let lane_size = size.lane_size();
let widen_32_bit_pattern = |pattern, lane_size| {
if lane_size == ScalarSize::Size32 {
let pattern = pattern as u32 as u64;
ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
} else {
None
}
};
if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
smallvec![Inst::VecDupImm {
rd,
imm,
invert: false,
size
}]
} else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
debug_assert_ne!(lane_size, ScalarSize::Size8);
debug_assert_ne!(lane_size, ScalarSize::Size64);
smallvec![Inst::VecDupImm {
rd,
imm,
invert: true,
size
}]
} else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
let mut insts = smallvec![];
// TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
// lower 64 bits instead.
if !size.is_128bits() {
let tmp = alloc_tmp(types::I64X2);
insts.push(Inst::VecDupImm {
rd: tmp,
imm,
invert: false,
size: VectorSize::Size64x2,
});
insts.push(Inst::FpuExtend {
rd,
rn: tmp.to_reg(),
size: ScalarSize::Size64,
});
} else {
insts.push(Inst::VecDupImm {
rd,
imm,
invert: false,
size: VectorSize::Size64x2,
});
}
insts
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
smallvec![Inst::VecDupFPImm { rd, imm, size }]
} else {
let tmp = alloc_tmp(I64);
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern, &mut alloc_tmp)[..]);
insts.push(Inst::VecDup {
rd,
rn: tmp.to_reg(),
size,
});
insts
}
}
/// Generic constructor for a load (zero-extending where appropriate).
pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
match ty {
@@ -585,6 +375,7 @@ fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &AMode, collector: &mut OperandC
&AMode::RegOffset { rn, .. } => {
collector.reg_use(rn);
}
&AMode::Const { .. } => {}
}
}
@@ -928,9 +719,6 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rt2);
pairmemarg_operands(mem, collector);
}
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
collector.reg_def(rd);
}
&Inst::FpuToInt { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
@@ -1318,7 +1106,7 @@ impl MachInst for Inst {
// Pretty-printing of instructions.
fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) {
let (mem_insts, mem) = mem_finalize(0, mem, state);
let (mem_insts, mem) = mem_finalize(None, mem, state);
let mut mem_str = mem_insts
.into_iter()
.map(|inst| {
@@ -2007,18 +1795,6 @@ impl Inst {
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
format!(
"ldr {}, pc+8 ; b 12 ; data.f64 {}",
rd,
f64::from_bits(const_data)
)
}
&Inst::LoadFpuConst128 { rd, const_data } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size128, allocs);
format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
}
&Inst::FpuToInt { op, rd, rn } => {
let (op, sizesrc, sizedest) = match op {
FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
@@ -2820,7 +2596,7 @@ impl Inst {
// of the existing legalization framework).
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(allocs);
let (mem_insts, mem) = mem_finalize(0, &mem, state);
let (mem_insts, mem) = mem_finalize(None, &mem, state);
let mut ret = String::new();
for inst in mem_insts.into_iter() {
ret.push_str(

View File

@@ -26,7 +26,7 @@
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f32const (u64_from_ieee32 n)))
(rule (lower (f32const (u32_from_ieee32 n)))
(constant_f32 n))
;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1954,7 +1954,7 @@
(rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
(vec_dup_from_fpu x (vector_size ty) 0))
(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
(rule (lower (has_type ty (splat (f32const (u32_from_ieee32 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))

View File

@@ -570,67 +570,6 @@ pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value
}
}
pub(crate) fn lower_constant_f32(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f32) {
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f64) {
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f128(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u128) {
if value == 0 {
// Fast-track a common case. The general case, viz, calling `Inst::load_fp_constant128`,
// is potentially expensive.
ctx.emit(Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size8),
invert: false,
size: VectorSize::Size8x16,
});
} else {
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
ctx.emit(inst);
}
}
}
pub(crate) fn lower_splat_const(
ctx: &mut Lower<Inst>,
rd: Writable<Reg>,
value: u64,
size: VectorSize,
) {
let (value, narrow_size) = match size.lane_size() {
ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
ScalarSize::Size64 => (value, ScalarSize::Size32),
_ => unreachable!(),
};
let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
Some((value, lane_size)) => (
value,
VectorSize::from_lane_size(lane_size, size.is_128bits()),
),
None => (value, size),
};
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
match cc {
IntCC::Equal => Cond::Eq,

View File

@@ -7,17 +7,16 @@ use smallvec::SmallVec;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
fp_reg, lower_condcode, lower_constant_f128, lower_constant_f32, lower_constant_f64,
lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg, zero_reg, AMode,
ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp,
FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo,
MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize,
PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2, VectorSize,
NZCV,
fp_reg, lower_condcode, lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg,
zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond,
CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst,
IntCC, JTSequenceInfo, MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode,
OperandSize, PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2,
VectorSize, NZCV,
};
use crate::ir::condcodes;
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
use crate::isa::aarch64::lower::{lower_address, lower_pair_address};
use crate::isa::aarch64::AArch64Backend;
use crate::machinst::valueregs;
use crate::machinst::{isle::*, InputSourceInst};
@@ -524,38 +523,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
lower_pair_address(self.lower_ctx, addr, offset as i32)
}
fn constant_f32(&mut self, value: u64) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_constant_f32(self.lower_ctx, rd, f32::from_bits(value as u32));
rd.to_reg()
}
fn constant_f64(&mut self, value: u64) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_constant_f64(self.lower_ctx, rd, f64::from_bits(value));
rd.to_reg()
}
fn constant_f128(&mut self, value: u128) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_constant_f128(self.lower_ctx, rd, value);
rd.to_reg()
}
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_splat_const(self.lower_ctx, rd, value, *size);
rd.to_reg()
}
fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
lower_fp_condcode(*cc)
}
@@ -612,8 +579,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
}
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
if in_bits == 32 {
// From float32.
let min = match (signed, out_bits) {
@@ -630,7 +595,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
),
};
lower_constant_f32(self.lower_ctx, tmp, min);
generated_code::constructor_constant_f32(self, min.to_bits())
} else if in_bits == 64 {
// From float64.
let min = match (signed, out_bits) {
@@ -647,7 +612,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
),
};
lower_constant_f64(self.lower_ctx, tmp, min);
generated_code::constructor_constant_f64(self, min.to_bits())
} else {
unimplemented!(
"unexpected input size for min_fp_value: {} (signed: {}, output size: {})",
@@ -656,13 +621,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
out_bits
);
}
tmp.to_reg()
}
fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
if in_bits == 32 {
// From float32.
let max = match (signed, out_bits) {
@@ -682,7 +643,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
),
};
lower_constant_f32(self.lower_ctx, tmp, max);
generated_code::constructor_constant_f32(self, max.to_bits())
} else if in_bits == 64 {
// From float64.
let max = match (signed, out_bits) {
@@ -702,7 +663,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
),
};
lower_constant_f64(self.lower_ctx, tmp, max);
generated_code::constructor_constant_f64(self, max.to_bits())
} else {
unimplemented!(
"unexpected input size for max_fp_value: {} (signed: {}, output size: {})",
@@ -711,8 +672,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
out_bits
);
}
tmp.to_reg()
}
fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
@@ -785,4 +744,66 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
None
}
}
fn asimd_mov_mod_imm_zero(&mut self, size: &ScalarSize) -> ASIMDMovModImm {
ASIMDMovModImm::zero(*size)
}
fn asimd_mov_mod_imm_from_u64(
&mut self,
val: u64,
size: &ScalarSize,
) -> Option<ASIMDMovModImm> {
ASIMDMovModImm::maybe_from_u64(val, *size)
}
fn asimd_fp_mod_imm_from_u64(&mut self, val: u64, size: &ScalarSize) -> Option<ASIMDFPModImm> {
ASIMDFPModImm::maybe_from_u64(val, *size)
}
fn u64_low32_bits_unset(&mut self, val: u64) -> Option<u64> {
if val & 0xffffffff == 0 {
Some(val)
} else {
None
}
}
fn u128_replicated_u64(&mut self, val: u128) -> Option<u64> {
let low64 = val as u64 as u128;
if (low64 | (low64 << 64)) == val {
Some(low64 as u64)
} else {
None
}
}
fn u64_replicated_u32(&mut self, val: u64) -> Option<u64> {
let low32 = val as u32 as u64;
if (low32 | (low32 << 32)) == val {
Some(low32)
} else {
None
}
}
fn u32_replicated_u16(&mut self, val: u64) -> Option<u64> {
let val = val as u32;
let low16 = val as u16 as u32;
if (low16 | (low16 << 16)) == val {
Some(low16.into())
} else {
None
}
}
fn u16_replicated_u8(&mut self, val: u64) -> Option<u64> {
let val = val as u16;
let low8 = val as u8 as u16;
if (low8 | (low8 << 8)) == val {
Some(low8.into())
} else {
None
}
}
}

View File

@@ -11,7 +11,7 @@
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f32const (u64_from_ieee32 n)))
(rule (lower (f32const (u32_from_ieee32 n)))
(imm $F32 n))
;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -896,7 +896,7 @@
(CallInd
(link WritableReg)
(info BoxCallIndInfo))
;; A pseudo-instruction that captures register arguments in vregs.
(Args
(args VecArgPair))
@@ -1555,8 +1555,8 @@
(decl u8_as_u16 (u8) u16)
(extern constructor u8_as_u16 u8_as_u16)
(decl u64_as_u32 (u64) u32)
(extern constructor u64_as_u32 u64_as_u32)
(decl u64_truncate_to_u32 (u64) u32)
(extern constructor u64_truncate_to_u32 u64_truncate_to_u32)
(decl u64_as_i16 (u64) i16)
(extern constructor u64_as_i16 u64_as_i16)
@@ -3000,7 +3000,7 @@
;; 32-bit result type, any value
(rule 5 (imm (gpr32_ty ty) n)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.Mov32Imm dst (u64_as_u32 n)))))
(_ Unit (emit (MInst.Mov32Imm dst (u64_truncate_to_u32 n)))))
dst))
;; 64-bit result type, value fits in i16
@@ -3051,7 +3051,7 @@
;; TODO: use LZER to load 0.0
(rule 8 (imm $F32 n)
(let ((dst WritableReg (temp_writable_reg $F32))
(_ Unit (emit (MInst.LoadFpuConst32 dst (u64_as_u32 n)))))
(_ Unit (emit (MInst.LoadFpuConst32 dst (u64_truncate_to_u32 n)))))
dst))
;; 64-bit floating-point type, any value. Loaded from literal pool.

View File

@@ -18,7 +18,7 @@
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f32const (u64_from_ieee32 x)))
(rule (lower (f32const (u32_from_ieee32 x)))
(imm $F32 x))

View File

@@ -436,7 +436,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> {
}
#[inline]
fn u64_as_u32(&mut self, n: u64) -> u32 {
fn u64_truncate_to_u32(&mut self, n: u64) -> u32 {
n as u32
}

View File

@@ -24,7 +24,7 @@
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f32const (u64_from_ieee32 x)))
(rule (lower (f32const (u32_from_ieee32 x)))
(imm $F32 x))
;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -80,6 +80,11 @@ macro_rules! isle_common_prelude_methods {
x ^ y
}
#[inline]
fn u64_shl(&mut self, x: u64, y: u64) -> u64 {
x << y
}
#[inline]
fn imm64_shl(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 {
// Mask off any excess shift bits.
@@ -502,8 +507,8 @@ macro_rules! isle_common_prelude_methods {
}
}
fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
val.bits().into()
fn u32_from_ieee32(&mut self, val: Ieee32) -> u32 {
val.bits()
}
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
@@ -748,5 +753,13 @@ macro_rules! isle_common_prelude_methods {
fn pack_block_array_2(&mut self, a: BlockCall, b: BlockCall) -> BlockArray2 {
[a, b]
}
fn u128_as_u64(&mut self, val: u128) -> Option<u64> {
u64::try_from(val).ok()
}
fn u64_as_u32(&mut self, val: u64) -> Option<u32> {
u32::try_from(val).ok()
}
};
}

View File

@@ -88,10 +88,17 @@
(decl pure u32_as_u64 (u32) u64)
(extern constructor u32_as_u64 u32_as_u64)
(convert u32 u64 u32_as_u64)
(decl pure i64_as_u64 (i64) u64)
(extern constructor i64_as_u64 i64_as_u64)
(decl u128_as_u64 (u64) u128)
(extern extractor u128_as_u64 u128_as_u64)
(decl u64_as_u32 (u32) u64)
(extern extractor u64_as_u32 u64_as_u32)
;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl pure u8_and (u8 u8) u8)
@@ -129,6 +136,9 @@
(decl pure u64_xor (u64 u64) u64)
(extern constructor u64_xor u64_xor)
(decl pure u64_shl (u64 u64) u64)
(extern constructor u64_shl u64_shl)
(decl pure imm64_shl (Type Imm64 Imm64) Imm64)
(extern constructor imm64_shl imm64_shl)
@@ -388,8 +398,8 @@
(extern constructor imm64_masked imm64_masked)
;; Extract a `u64` from an `Ieee32`.
(decl u64_from_ieee32 (u64) Ieee32)
(extern extractor infallible u64_from_ieee32 u64_from_ieee32)
(decl u32_from_ieee32 (u32) Ieee32)
(extern extractor infallible u32_from_ieee32 u32_from_ieee32)
;; Extract a `u64` from an `Ieee64`.
(decl u64_from_ieee64 (u64) Ieee64)