Implements f64x2.convert_low_i32x4_u for x64
This commit is contained in:
@@ -3557,6 +3557,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::ConstAddr
|
||||
| Opcode::FcvtLowFromSint
|
||||
| Opcode::FcvtLowFromUint
|
||||
| Opcode::Fvdemote
|
||||
| Opcode::FvpromoteLow
|
||||
| Opcode::Vconcat
|
||||
|
||||
@@ -2867,6 +2867,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::UwidenHigh
|
||||
| Opcode::WideningPairwiseDotProductS
|
||||
| Opcode::SqmulRoundSat
|
||||
| Opcode::FcvtLowFromUint
|
||||
| Opcode::FvpromoteLow
|
||||
| Opcode::Fvdemote => {
|
||||
// TODO
|
||||
|
||||
@@ -635,6 +635,7 @@ pub enum SseOpcode {
|
||||
Subsd,
|
||||
Ucomiss,
|
||||
Ucomisd,
|
||||
Unpcklps,
|
||||
Xorps,
|
||||
Xorpd,
|
||||
}
|
||||
@@ -675,6 +676,7 @@ impl SseOpcode {
|
||||
| SseOpcode::Subps
|
||||
| SseOpcode::Subss
|
||||
| SseOpcode::Ucomiss
|
||||
| SseOpcode::Unpcklps
|
||||
| SseOpcode::Xorps => SSE,
|
||||
|
||||
SseOpcode::Addpd
|
||||
@@ -993,6 +995,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Subsd => "subsd",
|
||||
SseOpcode::Ucomiss => "ucomiss",
|
||||
SseOpcode::Ucomisd => "ucomisd",
|
||||
SseOpcode::Unpcklps => "unpcklps",
|
||||
SseOpcode::Xorps => "xorps",
|
||||
SseOpcode::Xorpd => "xorpd",
|
||||
};
|
||||
|
||||
@@ -1529,6 +1529,7 @@ pub(crate) fn emit(
|
||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
|
||||
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
|
||||
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
|
||||
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
|
||||
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
|
||||
@@ -3717,6 +3717,12 @@ fn test_x64_emit() {
|
||||
"punpcklbw %xmm1, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2),
|
||||
"410F14D3",
|
||||
"unpcklps %xmm11, %xmm2",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// XMM_RM_R: Integer Conversion
|
||||
insns.push((
|
||||
|
||||
@@ -4154,6 +4154,58 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
dst,
|
||||
));
|
||||
}
|
||||
Opcode::FcvtLowFromUint => {
|
||||
// Algorithm uses unpcklps to help create a float that is equivalent
|
||||
// 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
|
||||
// every value of the mantissa represents a corresponding uint32 number.
|
||||
// When we subtract 0x1.0p52 we are left with double(src).
|
||||
let src = put_input_in_reg(ctx, inputs[0]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
|
||||
ctx.emit(Inst::gen_move(dst, src, types::I32X4));
|
||||
|
||||
static UINT_MASK: [u8; 16] = [
|
||||
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00,
|
||||
];
|
||||
|
||||
let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
|
||||
|
||||
ctx.emit(Inst::xmm_load_const(
|
||||
uint_mask_const,
|
||||
uint_mask,
|
||||
types::I32X4,
|
||||
));
|
||||
|
||||
// Creates 0x1.0p52 + double(src)
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Unpcklps,
|
||||
RegMem::from(uint_mask),
|
||||
dst,
|
||||
));
|
||||
|
||||
static UINT_MASK_HIGH: [u8; 16] = [
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x30, 0x43,
|
||||
];
|
||||
|
||||
let uint_mask_high_const =
|
||||
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
|
||||
let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(
|
||||
uint_mask_high_const,
|
||||
uint_mask_high,
|
||||
types::I32X4,
|
||||
));
|
||||
|
||||
// 0x1.0p52 + double(src) - 0x1.0p52
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Subpd,
|
||||
RegMem::from(uint_mask_high),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
Opcode::FcvtFromUint => {
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let ty = ty.unwrap();
|
||||
|
||||
@@ -4,6 +4,8 @@ use self::inst::EmitInfo;
|
||||
|
||||
use super::TargetIsa;
|
||||
use crate::ir::{condcodes::IntCC, Function};
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv;
|
||||
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
@@ -11,12 +13,10 @@ use crate::result::CodegenResult;
|
||||
use crate::settings::{self as shared_settings, Flags};
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use core::hash::{Hash, Hasher};
|
||||
|
||||
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
|
||||
use target_lexicon::Triple;
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv;
|
||||
|
||||
mod abi;
|
||||
pub mod encoding;
|
||||
mod inst;
|
||||
|
||||
Reference in New Issue
Block a user