Implements f64x2.convert_low_i32x4_u for x64

This commit is contained in:
Johnnie Birch
2021-06-05 23:15:50 -07:00
parent 16379db174
commit 2d676d838f
11 changed files with 94 additions and 6 deletions

View File

@@ -189,7 +189,6 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
} }
match (testsuite, testname) { match (testsuite, testname) {
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true, ("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
("simd", "simd_i16x8_extmul_i8x16") => return true, ("simd", "simd_i16x8_extmul_i8x16") => return true,
("simd", "simd_i16x8_q15mulr_sat_s") => return true, ("simd", "simd_i16x8_q15mulr_sat_s") => return true,

View File

@@ -4457,6 +4457,27 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
ig.push(
Inst::new(
"fcvt_low_from_uint",
r#"
Converts packed unsigned 32-bit integers to packed double precision floating point.
Considering only the low half of the register, each lane in `x` is interpreted as a
unsigned 32-bit integer that is then converted to a double precision float. This
which are converted to occupy twice the number of bits. No rounding should be needed
for the resulting float.
The result type will have half the number of vector lanes as the input.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);
let WideInt = &TypeVar::new( let WideInt = &TypeVar::new(
"WideInt", "WideInt",
"An integer type with lanes from `i16` upwards", "An integer type with lanes from `i16` upwards",

View File

@@ -3557,6 +3557,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::ConstAddr Opcode::ConstAddr
| Opcode::FcvtLowFromSint | Opcode::FcvtLowFromSint
| Opcode::FcvtLowFromUint
| Opcode::Fvdemote | Opcode::Fvdemote
| Opcode::FvpromoteLow | Opcode::FvpromoteLow
| Opcode::Vconcat | Opcode::Vconcat

View File

@@ -2867,6 +2867,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::UwidenHigh | Opcode::UwidenHigh
| Opcode::WideningPairwiseDotProductS | Opcode::WideningPairwiseDotProductS
| Opcode::SqmulRoundSat | Opcode::SqmulRoundSat
| Opcode::FcvtLowFromUint
| Opcode::FvpromoteLow | Opcode::FvpromoteLow
| Opcode::Fvdemote => { | Opcode::Fvdemote => {
// TODO // TODO

View File

@@ -635,6 +635,7 @@ pub enum SseOpcode {
Subsd, Subsd,
Ucomiss, Ucomiss,
Ucomisd, Ucomisd,
Unpcklps,
Xorps, Xorps,
Xorpd, Xorpd,
} }
@@ -675,6 +676,7 @@ impl SseOpcode {
| SseOpcode::Subps | SseOpcode::Subps
| SseOpcode::Subss | SseOpcode::Subss
| SseOpcode::Ucomiss | SseOpcode::Ucomiss
| SseOpcode::Unpcklps
| SseOpcode::Xorps => SSE, | SseOpcode::Xorps => SSE,
SseOpcode::Addpd SseOpcode::Addpd
@@ -993,6 +995,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Subsd => "subsd", SseOpcode::Subsd => "subsd",
SseOpcode::Ucomiss => "ucomiss", SseOpcode::Ucomiss => "ucomiss",
SseOpcode::Ucomisd => "ucomisd", SseOpcode::Ucomisd => "ucomisd",
SseOpcode::Unpcklps => "unpcklps",
SseOpcode::Xorps => "xorps", SseOpcode::Xorps => "xorps",
SseOpcode::Xorpd => "xorpd", SseOpcode::Xorpd => "xorpd",
}; };

View File

@@ -1529,6 +1529,7 @@ pub(crate) fn emit(
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2), SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2), SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2), SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2), SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2), SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),

View File

@@ -3717,6 +3717,12 @@ fn test_x64_emit() {
"punpcklbw %xmm1, %xmm8", "punpcklbw %xmm1, %xmm8",
)); ));
insns.push((
Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2),
"410F14D3",
"unpcklps %xmm11, %xmm2",
));
// ======================================================== // ========================================================
// XMM_RM_R: Integer Conversion // XMM_RM_R: Integer Conversion
insns.push(( insns.push((

View File

@@ -4154,6 +4154,58 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
)); ));
} }
Opcode::FcvtLowFromUint => {
// Algorithm uses unpcklps to help create a float that is equivalent
// 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
// every value of the mantissa represents a corresponding uint32 number.
// When we subtract 0x1.0p52 we are left with double(src).
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, src, types::I32X4));
static UINT_MASK: [u8; 16] = [
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
];
let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
ctx.emit(Inst::xmm_load_const(
uint_mask_const,
uint_mask,
types::I32X4,
));
// Creates 0x1.0p52 + double(src)
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Unpcklps,
RegMem::from(uint_mask),
dst,
));
static UINT_MASK_HIGH: [u8; 16] = [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x30, 0x43,
];
let uint_mask_high_const =
ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
uint_mask_high_const,
uint_mask_high,
types::I32X4,
));
// 0x1.0p52 + double(src) - 0x1.0p52
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Subpd,
RegMem::from(uint_mask_high),
dst,
));
}
Opcode::FcvtFromUint => { Opcode::FcvtFromUint => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap(); let ty = ty.unwrap();

View File

@@ -4,6 +4,8 @@ use self::inst::EmitInfo;
use super::TargetIsa; use super::TargetIsa;
use crate::ir::{condcodes::IntCC, Function}; use crate::ir::{condcodes::IntCC, Function};
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings}; use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
use crate::isa::Builder as IsaBuilder; use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
@@ -11,12 +13,10 @@ use crate::result::CodegenResult;
use crate::settings::{self as shared_settings, Flags}; use crate::settings::{self as shared_settings, Flags};
use alloc::{boxed::Box, vec::Vec}; use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher}; use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse, Reg}; use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use target_lexicon::Triple; use target_lexicon::Triple;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
mod abi; mod abi;
pub mod encoding; pub mod encoding;
mod inst; mod inst;

View File

@@ -565,6 +565,7 @@ where
Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"), Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"), Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::FcvtLowFromUint => unimplemented!("FcvtLowFromUint"),
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"), Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
Opcode::Fvdemote => unimplemented!("Fvdemote"), Opcode::Fvdemote => unimplemented!("Fvdemote"),
Opcode::Isplit => unimplemented!("Isplit"), Opcode::Isplit => unimplemented!("Isplit"),

View File

@@ -1778,6 +1778,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder); let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_low_from_sint(F64X2, a)); state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
} }
Operator::F64x2ConvertLowI32x4U => {
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_low_from_uint(F64X2, a));
}
Operator::F64x2PromoteLowF32x4 => { Operator::F64x2PromoteLowF32x4 => {
let a = pop1_with_bitcast(state, F32X4, builder); let a = pop1_with_bitcast(state, F32X4, builder);
state.push1(builder.ins().fvpromote_low(a)); state.push1(builder.ins().fvpromote_low(a));
@@ -1921,8 +1925,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8ExtAddPairwiseI8x16S | Operator::I16x8ExtAddPairwiseI8x16S
| Operator::I16x8ExtAddPairwiseI8x16U | Operator::I16x8ExtAddPairwiseI8x16U
| Operator::I32x4ExtAddPairwiseI16x8S | Operator::I32x4ExtAddPairwiseI16x8S
| Operator::I32x4ExtAddPairwiseI16x8U | Operator::I32x4ExtAddPairwiseI16x8U => {
| Operator::F64x2ConvertLowI32x4U => {
return Err(wasm_unsupported!("proposed simd operator {:?}", op)); return Err(wasm_unsupported!("proposed simd operator {:?}", op));
} }
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => { Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {