aarch64: Use VCodeConstant for f64/v128 constants (#5997)
* aarch64: Translate float and splat lowering to ISLE
I was looking into `constant_f128` and its fallback lowering into memory
and to get familiar with the code I figured it'd be good to port some
Rust logic to ISLE. This commit ports the `constant_{f128,f64,f32}`
helpers into ISLE from Rust as well as the `splat_const` helper which
ended up being closely related.
Tests reflect a number of regalloc changes that happened but also namely
one major difference is that in the lowering of `f32` a 32-bit immediate
is created now instead of a 64-bit immediate (in a GP register before
it's moved into a FP register). This semantically has no change but the
generated code is slightly different in a few minor cases.
* aarch64: Load f64/v128 constants from a pool
This commit removes the `LoadFpuConst64` and `LoadFpuConst128`
pseudo-instructions from the AArch64 backend which internally loaded a
nearby constant and then jumped over it. Constants now go through the
`VCodeConstant` infrastructure which gets placed at the end of the
function similar to how x64 works. Some minor support was added in as
well to add a new addressing mode for a `MachLabel`-relative load.
This commit is contained in:
@@ -466,14 +466,6 @@
|
||||
(mem PairAMode)
|
||||
(flags MemFlags))
|
||||
|
||||
(LoadFpuConst64
|
||||
(rd WritableReg)
|
||||
(const_data u64))
|
||||
|
||||
(LoadFpuConst128
|
||||
(rd WritableReg)
|
||||
(const_data u128))
|
||||
|
||||
;; Conversion: FP -> integer.
|
||||
(FpuToInt
|
||||
(op FpuToIntOp)
|
||||
@@ -1135,6 +1127,11 @@
|
||||
(off i64)
|
||||
(ty Type))
|
||||
|
||||
;; A reference to a constant which is placed outside of the function's
|
||||
;; body, typically at the end.
|
||||
(Const
|
||||
(addr VCodeConstant))
|
||||
|
||||
;; Offset from the "nominal stack pointer", which is where the real SP is
|
||||
;; just after stack and spill slots are allocated in the function prologue.
|
||||
;; At emission time, this is converted to `SPOffset` with a fixup added to
|
||||
@@ -1194,6 +1191,16 @@
|
||||
(rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32))
|
||||
(rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64))
|
||||
|
||||
;; Helper for extracting the size of a lane from the input `VectorSize`
|
||||
(decl pure vector_lane_size (VectorSize) ScalarSize)
|
||||
(rule (vector_lane_size (VectorSize.Size8x16)) (ScalarSize.Size8))
|
||||
(rule (vector_lane_size (VectorSize.Size8x8)) (ScalarSize.Size8))
|
||||
(rule (vector_lane_size (VectorSize.Size16x8)) (ScalarSize.Size16))
|
||||
(rule (vector_lane_size (VectorSize.Size16x4)) (ScalarSize.Size16))
|
||||
(rule (vector_lane_size (VectorSize.Size32x4)) (ScalarSize.Size32))
|
||||
(rule (vector_lane_size (VectorSize.Size32x2)) (ScalarSize.Size32))
|
||||
(rule (vector_lane_size (VectorSize.Size64x2)) (ScalarSize.Size64))
|
||||
|
||||
(type Cond extern
|
||||
(enum
|
||||
(Eq)
|
||||
@@ -1908,6 +1915,13 @@
|
||||
(_ Unit (emit (MInst.VecDupFromFpu dst src size lane))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecDupImm` instructions.
|
||||
(decl vec_dup_imm (ASIMDMovModImm bool VectorSize) Reg)
|
||||
(rule (vec_dup_imm imm invert size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecDupImm dst imm invert size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.AluRRImm12` instructions.
|
||||
(decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg)
|
||||
(rule (alu_rr_imm12 op ty src imm)
|
||||
@@ -2158,6 +2172,13 @@
|
||||
(_ Unit (emit (MInst.MovToFpu dst x size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuMoveFPImm` instructions.
|
||||
(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg)
|
||||
(rule (fpu_move_fp_imm imm size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuMoveFPImm dst imm size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovToVec` instructions.
|
||||
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
|
||||
(rule (mov_to_vec src1 src2 lane size)
|
||||
@@ -2986,24 +3007,122 @@
|
||||
(amode ty addr offset)))
|
||||
|
||||
;; Lower a constant f32.
|
||||
(decl constant_f32 (u64) Reg)
|
||||
;; TODO: Port lower_constant_f32() to ISLE.
|
||||
(extern constructor constant_f32 constant_f32)
|
||||
;;
|
||||
;; Note that we must make sure that all bits outside the lowest 32 are set to 0
|
||||
;; because this function is also used to load wider constants (that have zeros
|
||||
;; in their most significant bits).
|
||||
(decl constant_f32 (u32) Reg)
|
||||
(rule 2 (constant_f32 0)
|
||||
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
|
||||
$false
|
||||
(VectorSize.Size32x2)))
|
||||
(rule 1 (constant_f32 n)
|
||||
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32)))
|
||||
(fpu_move_fp_imm imm (ScalarSize.Size32)))
|
||||
(rule (constant_f32 n)
|
||||
(mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32)))
|
||||
|
||||
;; Lower a constant f64.
|
||||
;;
|
||||
;; Note that we must make sure that all bits outside the lowest 64 are set to 0
|
||||
;; because this function is also used to load wider constants (that have zeros
|
||||
;; in their most significant bits).
|
||||
;; TODO: Treat as half of a 128 bit vector and consider replicated patterns.
|
||||
;; Scalar MOVI might also be an option.
|
||||
(decl constant_f64 (u64) Reg)
|
||||
;; TODO: Port lower_constant_f64() to ISLE.
|
||||
(extern constructor constant_f64 constant_f64)
|
||||
(rule 4 (constant_f64 0)
|
||||
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
|
||||
$false
|
||||
(VectorSize.Size32x2)))
|
||||
(rule 3 (constant_f64 n)
|
||||
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size64)))
|
||||
(fpu_move_fp_imm imm (ScalarSize.Size64)))
|
||||
(rule 2 (constant_f64 (u64_as_u32 n))
|
||||
(constant_f32 n))
|
||||
(rule 1 (constant_f64 (u64_low32_bits_unset n))
|
||||
(mov_to_fpu (imm $I64 (ImmExtend.Zero) n) (ScalarSize.Size64)))
|
||||
(rule (constant_f64 n)
|
||||
(fpu_load64 (AMode.Const (emit_u64_le_const n)) (mem_flags_trusted)))
|
||||
|
||||
;; Tests whether the low 32 bits in the input are all zero.
|
||||
(decl u64_low32_bits_unset (u64) u64)
|
||||
(extern extractor u64_low32_bits_unset u64_low32_bits_unset)
|
||||
|
||||
;; Lower a constant f128.
|
||||
(decl constant_f128 (u128) Reg)
|
||||
;; TODO: Port lower_constant_f128() to ISLE.
|
||||
(extern constructor constant_f128 constant_f128)
|
||||
(rule 3 (constant_f128 0)
|
||||
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size8))
|
||||
$false
|
||||
(VectorSize.Size8x16)))
|
||||
|
||||
;; If the upper 64-bits are all zero then defer to `constant_f64`.
|
||||
(rule 2 (constant_f128 (u128_as_u64 n)) (constant_f64 n))
|
||||
|
||||
;; If the low half of the u128 equals the high half then delegate to the splat
|
||||
;; logic as a splat of a 64-bit value.
|
||||
(rule 1 (constant_f128 (u128_replicated_u64 n))
|
||||
(splat_const n (VectorSize.Size64x2)))
|
||||
|
||||
;; Base case is to load the constant from memory.
|
||||
(rule (constant_f128 n)
|
||||
(fpu_load128 (AMode.Const (emit_u128_le_const n)) (mem_flags_trusted)))
|
||||
|
||||
;; Lower a vector splat with a constant parameter.
|
||||
;;
|
||||
;; The 64-bit input here only uses the low bits for the lane size in
|
||||
;; `VectorSize` and all other bits are ignored.
|
||||
(decl splat_const (u64 VectorSize) Reg)
|
||||
;; TODO: Port lower_splat_const() to ISLE.
|
||||
(extern constructor splat_const splat_const)
|
||||
|
||||
;; If the splat'd constant can itself be reduced in size then attempt to do so
|
||||
;; as it will make it easier to create the immediates in the instructions below.
|
||||
(rule 5 (splat_const (u64_replicated_u32 n) (VectorSize.Size64x2))
|
||||
(splat_const n (VectorSize.Size32x4)))
|
||||
(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x4))
|
||||
(splat_const n (VectorSize.Size16x8)))
|
||||
(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x2))
|
||||
(splat_const n (VectorSize.Size16x4)))
|
||||
(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x8))
|
||||
(splat_const n (VectorSize.Size8x16)))
|
||||
(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x4))
|
||||
(splat_const n (VectorSize.Size8x8)))
|
||||
|
||||
;; Special cases for `vec_dup_imm` instructions where the input is either
|
||||
;; negated or not.
|
||||
(rule 4 (splat_const n size)
|
||||
(if-let imm (asimd_mov_mod_imm_from_u64 n (vector_lane_size size)))
|
||||
(vec_dup_imm imm $false size))
|
||||
(rule 3 (splat_const n size)
|
||||
(if-let imm (asimd_mov_mod_imm_from_u64 (u64_not n) (vector_lane_size size)))
|
||||
(vec_dup_imm imm $true size))
|
||||
|
||||
;; Special case a 32-bit splat where an immediate can be created by
|
||||
;; concatenating the 32-bit constant into a 64-bit value
|
||||
(rule 2 (splat_const n (VectorSize.Size32x4))
|
||||
(if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64)))
|
||||
(vec_dup_imm imm $false (VectorSize.Size64x2)))
|
||||
(rule 2 (splat_const n (VectorSize.Size32x2))
|
||||
(if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64)))
|
||||
(fpu_extend (vec_dup_imm imm $false (VectorSize.Size64x2)) (ScalarSize.Size64)))
|
||||
|
||||
(rule 1 (splat_const n size)
|
||||
(if-let imm (asimd_fp_mod_imm_from_u64 n (vector_lane_size size)))
|
||||
(vec_dup_fp_imm imm size))
|
||||
|
||||
;; The base case for splat is to use `vec_dup` with the immediate loaded into a
|
||||
;; register.
|
||||
(rule (splat_const n size)
|
||||
(vec_dup (imm $I64 (ImmExtend.Zero) n) size))
|
||||
|
||||
;; Each of these extractors tests whether the upper half of the input equals the
|
||||
;; lower half of the input
|
||||
(decl u128_replicated_u64 (u64) u128)
|
||||
(extern extractor u128_replicated_u64 u128_replicated_u64)
|
||||
(decl u64_replicated_u32 (u64) u64)
|
||||
(extern extractor u64_replicated_u32 u64_replicated_u32)
|
||||
(decl u32_replicated_u16 (u64) u64)
|
||||
(extern extractor u32_replicated_u16 u32_replicated_u16)
|
||||
(decl u16_replicated_u8 (u64) u64)
|
||||
(extern extractor u16_replicated_u8 u16_replicated_u8)
|
||||
|
||||
;; Lower a FloatCC to a Cond.
|
||||
(decl fp_cond_code (FloatCC) Cond)
|
||||
@@ -3814,3 +3933,36 @@
|
||||
;; Helper for emitting the `trn2` instruction
|
||||
(decl vec_trn2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size))
|
||||
|
||||
;; Helper for creating a zero value `ASIMDMovModImm` immediate.
|
||||
(decl asimd_mov_mod_imm_zero (ScalarSize) ASIMDMovModImm)
|
||||
(extern constructor asimd_mov_mod_imm_zero asimd_mov_mod_imm_zero)
|
||||
|
||||
;; Helper for fallibly creating an `ASIMDMovModImm` immediate from its parts.
|
||||
(decl pure partial asimd_mov_mod_imm_from_u64 (u64 ScalarSize) ASIMDMovModImm)
|
||||
(extern constructor asimd_mov_mod_imm_from_u64 asimd_mov_mod_imm_from_u64)
|
||||
|
||||
;; Helper for fallibly creating an `ASIMDFPModImm` immediate from its parts.
|
||||
(decl pure partial asimd_fp_mod_imm_from_u64 (u64 ScalarSize) ASIMDFPModImm)
|
||||
(extern constructor asimd_fp_mod_imm_from_u64 asimd_fp_mod_imm_from_u64)
|
||||
|
||||
;; Helper for creating a `VecDupFPImm` instruction
|
||||
(decl vec_dup_fp_imm (ASIMDFPModImm VectorSize) Reg)
|
||||
(rule (vec_dup_fp_imm imm size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecDupFPImm dst imm size))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating a `FpuLoad64` instruction
|
||||
(decl fpu_load64 (AMode MemFlags) Reg)
|
||||
(rule (fpu_load64 amode flags)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuLoad64 dst amode flags))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating a `FpuLoad128` instruction
|
||||
(decl fpu_load128 (AMode MemFlags) Reg)
|
||||
(rule (fpu_load128 amode flags)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.FpuLoad128 dst amode flags))))
|
||||
dst))
|
||||
|
||||
@@ -124,6 +124,9 @@ pub enum MemLabel {
|
||||
/// offset from this instruction. This form must be used at emission time;
|
||||
/// see `memlabel_finalize()` for how other forms are lowered to this one.
|
||||
PCRel(i32),
|
||||
/// An address that refers to a label within a `MachBuffer`, for example a
|
||||
/// constant that lives in the pool at the end of the function.
|
||||
Mach(MachLabel),
|
||||
}
|
||||
|
||||
impl AMode {
|
||||
@@ -194,6 +197,7 @@ impl AMode {
|
||||
| &AMode::FPOffset { .. }
|
||||
| &AMode::SPOffset { .. }
|
||||
| &AMode::NominalSPOffset { .. }
|
||||
| &AMode::Const { .. }
|
||||
| AMode::Label { .. } => self.clone(),
|
||||
}
|
||||
}
|
||||
@@ -382,7 +386,8 @@ impl PrettyPrint for ExtendOp {
|
||||
impl PrettyPrint for MemLabel {
|
||||
fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
|
||||
match self {
|
||||
&MemLabel::PCRel(off) => format!("pc+{}", off),
|
||||
MemLabel::PCRel(off) => format!("pc+{}", off),
|
||||
MemLabel::Mach(off) => format!("label({})", off.get()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -465,6 +470,8 @@ impl PrettyPrint for AMode {
|
||||
let simm9 = simm9.pretty_print(8, allocs);
|
||||
format!("[sp], {}", simm9)
|
||||
}
|
||||
AMode::Const { addr } => format!("[const({})]", addr.as_u32()),
|
||||
|
||||
// Eliminated by `mem_finalize()`.
|
||||
&AMode::SPOffset { .. }
|
||||
| &AMode::FPOffset { .. }
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use regalloc2::Allocation;
|
||||
|
||||
use crate::binemit::{CodeOffset, Reloc, StackMap};
|
||||
use crate::binemit::{Reloc, StackMap};
|
||||
use crate::ir::{types::*, RelSourceLoc};
|
||||
use crate::ir::{LibCall, MemFlags, TrapCode};
|
||||
use crate::isa::aarch64::inst::*;
|
||||
@@ -10,20 +10,12 @@ use crate::machinst::{ty_bits, Reg, RegClass, Writable};
|
||||
use crate::trace;
|
||||
use core::convert::TryFrom;
|
||||
|
||||
/// Memory label/reference finalization: convert a MemLabel to a PC-relative
|
||||
/// offset, possibly emitting relocation(s) as necessary.
|
||||
pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
|
||||
match label {
|
||||
&MemLabel::PCRel(rel) => rel,
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory addressing mode finalization: convert "special" modes (e.g.,
|
||||
/// generic arbitrary stack offset) into real addressing modes, possibly by
|
||||
/// emitting some helper instructions that come immediately before the use
|
||||
/// of this amode.
|
||||
pub fn mem_finalize(
|
||||
insn_off: CodeOffset,
|
||||
sink: Option<&mut MachBuffer<Inst>>,
|
||||
mem: &AMode,
|
||||
state: &EmitState,
|
||||
) -> (SmallVec<[Inst; 4]>, AMode) {
|
||||
@@ -74,14 +66,14 @@ pub fn mem_finalize(
|
||||
}
|
||||
}
|
||||
|
||||
&AMode::Label { ref label } => {
|
||||
let off = memlabel_finalize(insn_off, label);
|
||||
(
|
||||
smallvec![],
|
||||
AMode::Label {
|
||||
label: MemLabel::PCRel(off),
|
||||
},
|
||||
)
|
||||
AMode::Const { addr } => {
|
||||
let sink = match sink {
|
||||
Some(sink) => sink,
|
||||
None => return (smallvec![], mem.clone()),
|
||||
};
|
||||
let label = sink.get_label_for_constant(*addr);
|
||||
let label = MemLabel::Mach(label);
|
||||
(smallvec![], AMode::Label { label })
|
||||
}
|
||||
|
||||
_ => (smallvec![], mem.clone()),
|
||||
@@ -959,7 +951,7 @@ impl MachInstEmit for Inst {
|
||||
| &Inst::FpuLoad128 { rd, ref mem, flags } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let mem = mem.with_allocs(&mut allocs);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
|
||||
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
@@ -1039,7 +1031,19 @@ impl MachInstEmit for Inst {
|
||||
&AMode::Label { ref label } => {
|
||||
let offset = match label {
|
||||
// cast i32 to u32 (two's-complement)
|
||||
&MemLabel::PCRel(off) => off as u32,
|
||||
MemLabel::PCRel(off) => *off as u32,
|
||||
// Emit a relocation into the `MachBuffer`
|
||||
// for the label that's being loaded from and
|
||||
// encode an address of 0 in its place which will
|
||||
// get filled in by relocation resolution later on.
|
||||
MemLabel::Mach(label) => {
|
||||
sink.use_label_at_offset(
|
||||
sink.cur_offset(),
|
||||
*label,
|
||||
LabelUse::Ldr19,
|
||||
);
|
||||
0
|
||||
}
|
||||
} / 4;
|
||||
assert!(offset < (1 << 19));
|
||||
match self {
|
||||
@@ -1076,6 +1080,7 @@ impl MachInstEmit for Inst {
|
||||
&AMode::SPOffset { .. }
|
||||
| &AMode::FPOffset { .. }
|
||||
| &AMode::NominalSPOffset { .. }
|
||||
| &AMode::Const { .. }
|
||||
| &AMode::RegOffset { .. } => {
|
||||
panic!("Should not see {:?} here!", mem)
|
||||
}
|
||||
@@ -1091,7 +1096,7 @@ impl MachInstEmit for Inst {
|
||||
| &Inst::FpuStore128 { rd, ref mem, flags } => {
|
||||
let rd = allocs.next(rd);
|
||||
let mem = mem.with_allocs(&mut allocs);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
|
||||
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
@@ -1172,6 +1177,7 @@ impl MachInstEmit for Inst {
|
||||
&AMode::SPOffset { .. }
|
||||
| &AMode::FPOffset { .. }
|
||||
| &AMode::NominalSPOffset { .. }
|
||||
| &AMode::Const { .. }
|
||||
| &AMode::RegOffset { .. } => {
|
||||
panic!("Should not see {:?} here!", mem)
|
||||
}
|
||||
@@ -2319,41 +2325,6 @@ impl MachInstEmit for Inst {
|
||||
};
|
||||
sink.put4(enc_inttofpu(top16, rd, rn));
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let inst = Inst::FpuLoad64 {
|
||||
rd,
|
||||
mem: AMode::Label {
|
||||
label: MemLabel::PCRel(8),
|
||||
},
|
||||
flags: MemFlags::trusted(),
|
||||
};
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
sink.put8(const_data);
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let inst = Inst::FpuLoad128 {
|
||||
rd,
|
||||
mem: AMode::Label {
|
||||
label: MemLabel::PCRel(8),
|
||||
},
|
||||
flags: MemFlags::trusted(),
|
||||
};
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(20),
|
||||
};
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
|
||||
for i in const_data.to_le_bytes().iter() {
|
||||
sink.put1(*i);
|
||||
}
|
||||
}
|
||||
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let rn = allocs.next(rn);
|
||||
@@ -3350,7 +3321,7 @@ impl MachInstEmit for Inst {
|
||||
&Inst::LoadAddr { rd, ref mem } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let mem = mem.with_allocs(&mut allocs);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(&[], sink, emit_info, state);
|
||||
}
|
||||
|
||||
@@ -6891,24 +6891,6 @@ fn test_aarch64_binemit() {
|
||||
"stp q18, q22, [sp], #304",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst64 {
|
||||
rd: writable_vreg(16),
|
||||
const_data: 1.0_f64.to_bits(),
|
||||
},
|
||||
"5000005C03000014000000000000F03F",
|
||||
"ldr d16, pc+8 ; b 12 ; data.f64 1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst128 {
|
||||
rd: writable_vreg(5),
|
||||
const_data: 0x0f0e0d0c0b0a09080706050403020100,
|
||||
},
|
||||
"4500009C05000014000102030405060708090A0B0C0D0E0F",
|
||||
"ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuCSel32 {
|
||||
rd: writable_vreg(1),
|
||||
|
||||
@@ -10,7 +10,6 @@ use crate::{settings, CodegenError, CodegenResult};
|
||||
use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use regalloc2::{PRegSet, VReg};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::string::{String, ToString};
|
||||
@@ -250,215 +249,6 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create instructions that load a 32-bit floating-point constant.
|
||||
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u32,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
// Note that we must make sure that all bits outside the lowest 32 are set to 0
|
||||
// because this function is also used to load wider constants (that have zeros
|
||||
// in their most significant bits).
|
||||
if const_data == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x2,
|
||||
}]
|
||||
} else if let Some(imm) =
|
||||
ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
|
||||
{
|
||||
smallvec![Inst::FpuMoveFPImm {
|
||||
rd,
|
||||
imm,
|
||||
size: ScalarSize::Size32,
|
||||
}]
|
||||
} else {
|
||||
let tmp = alloc_tmp(I32);
|
||||
let mut insts = Inst::load_constant(tmp, const_data as u64, &mut alloc_tmp);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size: ScalarSize::Size32,
|
||||
});
|
||||
|
||||
insts
|
||||
}
|
||||
}
|
||||
|
||||
/// Create instructions that load a 64-bit floating-point constant.
|
||||
pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u64,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 4]> {
|
||||
// Note that we must make sure that all bits outside the lowest 64 are set to 0
|
||||
// because this function is also used to load wider constants (that have zeros
|
||||
// in their most significant bits).
|
||||
// TODO: Treat as half of a 128 bit vector and consider replicated patterns.
|
||||
// Scalar MOVI might also be an option.
|
||||
if const_data == 0 {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
|
||||
invert: false,
|
||||
size: VectorSize::Size32x2,
|
||||
}]
|
||||
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
|
||||
smallvec![Inst::FpuMoveFPImm {
|
||||
rd,
|
||||
imm,
|
||||
size: ScalarSize::Size64,
|
||||
}]
|
||||
} else if let Ok(const_data) = u32::try_from(const_data) {
|
||||
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
||||
} else if const_data & (u32::MAX as u64) == 0 {
|
||||
let tmp = alloc_tmp(I64);
|
||||
let mut insts = Inst::load_constant(tmp, const_data, &mut alloc_tmp);
|
||||
|
||||
insts.push(Inst::MovToFpu {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size: ScalarSize::Size64,
|
||||
});
|
||||
|
||||
insts
|
||||
} else {
|
||||
smallvec![Inst::LoadFpuConst64 { rd, const_data }]
|
||||
}
|
||||
}
|
||||
|
||||
/// Create instructions that load a 128-bit vector constant.
|
||||
pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
const_data: u128,
|
||||
alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
if let Ok(const_data) = u64::try_from(const_data) {
|
||||
SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
|
||||
} else if let Some((pattern, size)) =
|
||||
Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
|
||||
{
|
||||
Inst::load_replicated_vector_pattern(
|
||||
rd,
|
||||
pattern,
|
||||
VectorSize::from_lane_size(size, true),
|
||||
alloc_tmp,
|
||||
)
|
||||
} else {
|
||||
smallvec![Inst::LoadFpuConst128 { rd, const_data }]
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether a 128-bit constant represents a vector consisting of elements with
|
||||
/// the same value.
|
||||
pub fn get_replicated_vector_pattern(
|
||||
value: u128,
|
||||
size: ScalarSize,
|
||||
) -> Option<(u64, ScalarSize)> {
|
||||
let (mask, shift, next_size) = match size {
|
||||
ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
|
||||
ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
|
||||
ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
|
||||
ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
|
||||
_ => return None,
|
||||
};
|
||||
let mut r = None;
|
||||
let v = value & mask;
|
||||
|
||||
if (value >> shift) & mask == v {
|
||||
r = Inst::get_replicated_vector_pattern(v, next_size);
|
||||
|
||||
if r.is_none() {
|
||||
r = Some((v as u64, size));
|
||||
}
|
||||
}
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
/// Create instructions that load a vector constant consisting of elements with
|
||||
/// the same value.
|
||||
pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
|
||||
rd: Writable<Reg>,
|
||||
pattern: u64,
|
||||
size: VectorSize,
|
||||
mut alloc_tmp: F,
|
||||
) -> SmallVec<[Inst; 5]> {
|
||||
let lane_size = size.lane_size();
|
||||
let widen_32_bit_pattern = |pattern, lane_size| {
|
||||
if lane_size == ScalarSize::Size32 {
|
||||
let pattern = pattern as u32 as u64;
|
||||
|
||||
ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: false,
|
||||
size
|
||||
}]
|
||||
} else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
|
||||
debug_assert_ne!(lane_size, ScalarSize::Size8);
|
||||
debug_assert_ne!(lane_size, ScalarSize::Size64);
|
||||
|
||||
smallvec![Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: true,
|
||||
size
|
||||
}]
|
||||
} else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
|
||||
let mut insts = smallvec![];
|
||||
|
||||
// TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
|
||||
// lower 64 bits instead.
|
||||
if !size.is_128bits() {
|
||||
let tmp = alloc_tmp(types::I64X2);
|
||||
insts.push(Inst::VecDupImm {
|
||||
rd: tmp,
|
||||
imm,
|
||||
invert: false,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
insts.push(Inst::FpuExtend {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size: ScalarSize::Size64,
|
||||
});
|
||||
} else {
|
||||
insts.push(Inst::VecDupImm {
|
||||
rd,
|
||||
imm,
|
||||
invert: false,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
}
|
||||
|
||||
insts
|
||||
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
|
||||
smallvec![Inst::VecDupFPImm { rd, imm, size }]
|
||||
} else {
|
||||
let tmp = alloc_tmp(I64);
|
||||
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern, &mut alloc_tmp)[..]);
|
||||
|
||||
insts.push(Inst::VecDup {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
size,
|
||||
});
|
||||
|
||||
insts
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic constructor for a load (zero-extending where appropriate).
|
||||
pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
|
||||
match ty {
|
||||
@@ -585,6 +375,7 @@ fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &AMode, collector: &mut OperandC
|
||||
&AMode::RegOffset { rn, .. } => {
|
||||
collector.reg_use(rn);
|
||||
}
|
||||
&AMode::Const { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -928,9 +719,6 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
||||
collector.reg_use(rt2);
|
||||
pairmemarg_operands(mem, collector);
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
|
||||
collector.reg_def(rd);
|
||||
}
|
||||
&Inst::FpuToInt { rd, rn, .. } => {
|
||||
collector.reg_def(rd);
|
||||
collector.reg_use(rn);
|
||||
@@ -1318,7 +1106,7 @@ impl MachInst for Inst {
|
||||
// Pretty-printing of instructions.
|
||||
|
||||
fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) {
|
||||
let (mem_insts, mem) = mem_finalize(0, mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(None, mem, state);
|
||||
let mut mem_str = mem_insts
|
||||
.into_iter()
|
||||
.map(|inst| {
|
||||
@@ -2007,18 +1795,6 @@ impl Inst {
|
||||
|
||||
format!("stp {}, {}, {}", rt, rt2, mem)
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
|
||||
format!(
|
||||
"ldr {}, pc+8 ; b 12 ; data.f64 {}",
|
||||
rd,
|
||||
f64::from_bits(const_data)
|
||||
)
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size128, allocs);
|
||||
format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
|
||||
}
|
||||
&Inst::FpuToInt { op, rd, rn } => {
|
||||
let (op, sizesrc, sizedest) = match op {
|
||||
FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
|
||||
@@ -2820,7 +2596,7 @@ impl Inst {
|
||||
// of the existing legalization framework).
|
||||
let rd = allocs.next_writable(rd);
|
||||
let mem = mem.with_allocs(allocs);
|
||||
let (mem_insts, mem) = mem_finalize(0, &mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(None, &mem, state);
|
||||
let mut ret = String::new();
|
||||
for inst in mem_insts.into_iter() {
|
||||
ret.push_str(
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (f32const (u64_from_ieee32 n)))
|
||||
(rule (lower (f32const (u32_from_ieee32 n)))
|
||||
(constant_f32 n))
|
||||
|
||||
;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -1954,7 +1954,7 @@
|
||||
(rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
|
||||
(vec_dup_from_fpu x (vector_size ty) 0))
|
||||
|
||||
(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
|
||||
(rule (lower (has_type ty (splat (f32const (u32_from_ieee32 n)))))
|
||||
(splat_const n (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))
|
||||
|
||||
@@ -570,67 +570,6 @@ pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f32(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f32) {
|
||||
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
|
||||
for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f64) {
|
||||
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
|
||||
for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f128(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u128) {
|
||||
if value == 0 {
|
||||
// Fast-track a common case. The general case, viz, calling `Inst::load_fp_constant128`,
|
||||
// is potentially expensive.
|
||||
ctx.emit(Inst::VecDupImm {
|
||||
rd,
|
||||
imm: ASIMDMovModImm::zero(ScalarSize::Size8),
|
||||
invert: false,
|
||||
size: VectorSize::Size8x16,
|
||||
});
|
||||
} else {
|
||||
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_splat_const(
|
||||
ctx: &mut Lower<Inst>,
|
||||
rd: Writable<Reg>,
|
||||
value: u64,
|
||||
size: VectorSize,
|
||||
) {
|
||||
let (value, narrow_size) = match size.lane_size() {
|
||||
ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
|
||||
ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
|
||||
ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
|
||||
ScalarSize::Size64 => (value, ScalarSize::Size32),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
|
||||
Some((value, lane_size)) => (
|
||||
value,
|
||||
VectorSize::from_lane_size(lane_size, size.is_128bits()),
|
||||
),
|
||||
None => (value, size),
|
||||
};
|
||||
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
|
||||
for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
|
||||
match cc {
|
||||
IntCC::Equal => Cond::Eq,
|
||||
|
||||
@@ -7,17 +7,16 @@ use smallvec::SmallVec;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{
|
||||
fp_reg, lower_condcode, lower_constant_f128, lower_constant_f32, lower_constant_f64,
|
||||
lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg, zero_reg, AMode,
|
||||
ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp,
|
||||
FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo,
|
||||
MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize,
|
||||
PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2, VectorSize,
|
||||
NZCV,
|
||||
fp_reg, lower_condcode, lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg,
|
||||
zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond,
|
||||
CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst,
|
||||
IntCC, JTSequenceInfo, MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode,
|
||||
OperandSize, PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2,
|
||||
VectorSize, NZCV,
|
||||
};
|
||||
use crate::ir::condcodes;
|
||||
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
|
||||
use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
|
||||
use crate::isa::aarch64::lower::{lower_address, lower_pair_address};
|
||||
use crate::isa::aarch64::AArch64Backend;
|
||||
use crate::machinst::valueregs;
|
||||
use crate::machinst::{isle::*, InputSourceInst};
|
||||
@@ -524,38 +523,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
lower_pair_address(self.lower_ctx, addr, offset as i32)
|
||||
}
|
||||
|
||||
fn constant_f32(&mut self, value: u64) -> Reg {
|
||||
let rd = self.temp_writable_reg(I8X16);
|
||||
|
||||
lower_constant_f32(self.lower_ctx, rd, f32::from_bits(value as u32));
|
||||
|
||||
rd.to_reg()
|
||||
}
|
||||
|
||||
fn constant_f64(&mut self, value: u64) -> Reg {
|
||||
let rd = self.temp_writable_reg(I8X16);
|
||||
|
||||
lower_constant_f64(self.lower_ctx, rd, f64::from_bits(value));
|
||||
|
||||
rd.to_reg()
|
||||
}
|
||||
|
||||
fn constant_f128(&mut self, value: u128) -> Reg {
|
||||
let rd = self.temp_writable_reg(I8X16);
|
||||
|
||||
lower_constant_f128(self.lower_ctx, rd, value);
|
||||
|
||||
rd.to_reg()
|
||||
}
|
||||
|
||||
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
|
||||
let rd = self.temp_writable_reg(I8X16);
|
||||
|
||||
lower_splat_const(self.lower_ctx, rd, value, *size);
|
||||
|
||||
rd.to_reg()
|
||||
}
|
||||
|
||||
fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
|
||||
lower_fp_condcode(*cc)
|
||||
}
|
||||
@@ -612,8 +579,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
}
|
||||
|
||||
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||
|
||||
if in_bits == 32 {
|
||||
// From float32.
|
||||
let min = match (signed, out_bits) {
|
||||
@@ -630,7 +595,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
),
|
||||
};
|
||||
|
||||
lower_constant_f32(self.lower_ctx, tmp, min);
|
||||
generated_code::constructor_constant_f32(self, min.to_bits())
|
||||
} else if in_bits == 64 {
|
||||
// From float64.
|
||||
let min = match (signed, out_bits) {
|
||||
@@ -647,7 +612,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
),
|
||||
};
|
||||
|
||||
lower_constant_f64(self.lower_ctx, tmp, min);
|
||||
generated_code::constructor_constant_f64(self, min.to_bits())
|
||||
} else {
|
||||
unimplemented!(
|
||||
"unexpected input size for min_fp_value: {} (signed: {}, output size: {})",
|
||||
@@ -656,13 +621,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
out_bits
|
||||
);
|
||||
}
|
||||
|
||||
tmp.to_reg()
|
||||
}
|
||||
|
||||
fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||
|
||||
if in_bits == 32 {
|
||||
// From float32.
|
||||
let max = match (signed, out_bits) {
|
||||
@@ -682,7 +643,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
),
|
||||
};
|
||||
|
||||
lower_constant_f32(self.lower_ctx, tmp, max);
|
||||
generated_code::constructor_constant_f32(self, max.to_bits())
|
||||
} else if in_bits == 64 {
|
||||
// From float64.
|
||||
let max = match (signed, out_bits) {
|
||||
@@ -702,7 +663,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
),
|
||||
};
|
||||
|
||||
lower_constant_f64(self.lower_ctx, tmp, max);
|
||||
generated_code::constructor_constant_f64(self, max.to_bits())
|
||||
} else {
|
||||
unimplemented!(
|
||||
"unexpected input size for max_fp_value: {} (signed: {}, output size: {})",
|
||||
@@ -711,8 +672,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
out_bits
|
||||
);
|
||||
}
|
||||
|
||||
tmp.to_reg()
|
||||
}
|
||||
|
||||
fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
|
||||
@@ -785,4 +744,66 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn asimd_mov_mod_imm_zero(&mut self, size: &ScalarSize) -> ASIMDMovModImm {
|
||||
ASIMDMovModImm::zero(*size)
|
||||
}
|
||||
|
||||
fn asimd_mov_mod_imm_from_u64(
|
||||
&mut self,
|
||||
val: u64,
|
||||
size: &ScalarSize,
|
||||
) -> Option<ASIMDMovModImm> {
|
||||
ASIMDMovModImm::maybe_from_u64(val, *size)
|
||||
}
|
||||
|
||||
fn asimd_fp_mod_imm_from_u64(&mut self, val: u64, size: &ScalarSize) -> Option<ASIMDFPModImm> {
|
||||
ASIMDFPModImm::maybe_from_u64(val, *size)
|
||||
}
|
||||
|
||||
fn u64_low32_bits_unset(&mut self, val: u64) -> Option<u64> {
|
||||
if val & 0xffffffff == 0 {
|
||||
Some(val)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn u128_replicated_u64(&mut self, val: u128) -> Option<u64> {
|
||||
let low64 = val as u64 as u128;
|
||||
if (low64 | (low64 << 64)) == val {
|
||||
Some(low64 as u64)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn u64_replicated_u32(&mut self, val: u64) -> Option<u64> {
|
||||
let low32 = val as u32 as u64;
|
||||
if (low32 | (low32 << 32)) == val {
|
||||
Some(low32)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn u32_replicated_u16(&mut self, val: u64) -> Option<u64> {
|
||||
let val = val as u32;
|
||||
let low16 = val as u16 as u32;
|
||||
if (low16 | (low16 << 16)) == val {
|
||||
Some(low16.into())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn u16_replicated_u8(&mut self, val: u64) -> Option<u64> {
|
||||
let val = val as u16;
|
||||
let low8 = val as u8 as u16;
|
||||
if (low8 | (low8 << 8)) == val {
|
||||
Some(low8.into())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (f32const (u64_from_ieee32 n)))
|
||||
(rule (lower (f32const (u32_from_ieee32 n)))
|
||||
(imm $F32 n))
|
||||
|
||||
;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -896,7 +896,7 @@
|
||||
(CallInd
|
||||
(link WritableReg)
|
||||
(info BoxCallIndInfo))
|
||||
|
||||
|
||||
;; A pseudo-instruction that captures register arguments in vregs.
|
||||
(Args
|
||||
(args VecArgPair))
|
||||
@@ -1555,8 +1555,8 @@
|
||||
(decl u8_as_u16 (u8) u16)
|
||||
(extern constructor u8_as_u16 u8_as_u16)
|
||||
|
||||
(decl u64_as_u32 (u64) u32)
|
||||
(extern constructor u64_as_u32 u64_as_u32)
|
||||
(decl u64_truncate_to_u32 (u64) u32)
|
||||
(extern constructor u64_truncate_to_u32 u64_truncate_to_u32)
|
||||
|
||||
(decl u64_as_i16 (u64) i16)
|
||||
(extern constructor u64_as_i16 u64_as_i16)
|
||||
@@ -3000,7 +3000,7 @@
|
||||
;; 32-bit result type, any value
|
||||
(rule 5 (imm (gpr32_ty ty) n)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.Mov32Imm dst (u64_as_u32 n)))))
|
||||
(_ Unit (emit (MInst.Mov32Imm dst (u64_truncate_to_u32 n)))))
|
||||
dst))
|
||||
|
||||
;; 64-bit result type, value fits in i16
|
||||
@@ -3051,7 +3051,7 @@
|
||||
;; TODO: use LZER to load 0.0
|
||||
(rule 8 (imm $F32 n)
|
||||
(let ((dst WritableReg (temp_writable_reg $F32))
|
||||
(_ Unit (emit (MInst.LoadFpuConst32 dst (u64_as_u32 n)))))
|
||||
(_ Unit (emit (MInst.LoadFpuConst32 dst (u64_truncate_to_u32 n)))))
|
||||
dst))
|
||||
|
||||
;; 64-bit floating-point type, any value. Loaded from literal pool.
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (f32const (u64_from_ieee32 x)))
|
||||
(rule (lower (f32const (u32_from_ieee32 x)))
|
||||
(imm $F32 x))
|
||||
|
||||
|
||||
|
||||
@@ -436,7 +436,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_as_u32(&mut self, n: u64) -> u32 {
|
||||
fn u64_truncate_to_u32(&mut self, n: u64) -> u32 {
|
||||
n as u32
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
|
||||
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (f32const (u64_from_ieee32 x)))
|
||||
(rule (lower (f32const (u32_from_ieee32 x)))
|
||||
(imm $F32 x))
|
||||
|
||||
;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -80,6 +80,11 @@ macro_rules! isle_common_prelude_methods {
|
||||
x ^ y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_shl(&mut self, x: u64, y: u64) -> u64 {
|
||||
x << y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imm64_shl(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 {
|
||||
// Mask off any excess shift bits.
|
||||
@@ -502,8 +507,8 @@ macro_rules! isle_common_prelude_methods {
|
||||
}
|
||||
}
|
||||
|
||||
fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
|
||||
val.bits().into()
|
||||
fn u32_from_ieee32(&mut self, val: Ieee32) -> u32 {
|
||||
val.bits()
|
||||
}
|
||||
|
||||
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
|
||||
@@ -748,5 +753,13 @@ macro_rules! isle_common_prelude_methods {
|
||||
fn pack_block_array_2(&mut self, a: BlockCall, b: BlockCall) -> BlockArray2 {
|
||||
[a, b]
|
||||
}
|
||||
|
||||
fn u128_as_u64(&mut self, val: u128) -> Option<u64> {
|
||||
u64::try_from(val).ok()
|
||||
}
|
||||
|
||||
fn u64_as_u32(&mut self, val: u64) -> Option<u32> {
|
||||
u32::try_from(val).ok()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -88,10 +88,17 @@
|
||||
|
||||
(decl pure u32_as_u64 (u32) u64)
|
||||
(extern constructor u32_as_u64 u32_as_u64)
|
||||
(convert u32 u64 u32_as_u64)
|
||||
|
||||
(decl pure i64_as_u64 (i64) u64)
|
||||
(extern constructor i64_as_u64 i64_as_u64)
|
||||
|
||||
(decl u128_as_u64 (u64) u128)
|
||||
(extern extractor u128_as_u64 u128_as_u64)
|
||||
|
||||
(decl u64_as_u32 (u32) u64)
|
||||
(extern extractor u64_as_u32 u64_as_u32)
|
||||
|
||||
;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl pure u8_and (u8 u8) u8)
|
||||
@@ -129,6 +136,9 @@
|
||||
(decl pure u64_xor (u64 u64) u64)
|
||||
(extern constructor u64_xor u64_xor)
|
||||
|
||||
(decl pure u64_shl (u64 u64) u64)
|
||||
(extern constructor u64_shl u64_shl)
|
||||
|
||||
(decl pure imm64_shl (Type Imm64 Imm64) Imm64)
|
||||
(extern constructor imm64_shl imm64_shl)
|
||||
|
||||
@@ -388,8 +398,8 @@
|
||||
(extern constructor imm64_masked imm64_masked)
|
||||
|
||||
;; Extract a `u64` from an `Ieee32`.
|
||||
(decl u64_from_ieee32 (u64) Ieee32)
|
||||
(extern extractor infallible u64_from_ieee32 u64_from_ieee32)
|
||||
(decl u32_from_ieee32 (u32) Ieee32)
|
||||
(extern extractor infallible u32_from_ieee32 u32_from_ieee32)
|
||||
|
||||
;; Extract a `u64` from an `Ieee64`.
|
||||
(decl u64_from_ieee64 (u64) Ieee64)
|
||||
|
||||
Reference in New Issue
Block a user