Implements convert low signed integer to float for x64 simd

This commit is contained in:
Johnnie Birch
2021-03-25 12:33:13 -07:00
parent c43d00fa34
commit 31d3db1ec2
8 changed files with 45 additions and 4 deletions

View File

@@ -4325,6 +4325,26 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"fcvt_low_from_sint",
r#"
Converts packed signed doubleword integers to packed double precision floating point.
Considering only the low half of the register, each lane in `x` is interpreted as a
signed doubleword integer that is then converted to a double precision float. This
instruction differs from fcvt_from_sint in that it converts half the number of lanes
which are converted to occupy twice the number of bits. No rounding should be needed
for the resulting float.
The result type will have half the number of vector lanes as the input.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);
let WideInt = &TypeVar::new(
"WideInt",
"An integer type with lanes from `i16` upwards",

View File

@@ -3013,6 +3013,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::TlsValue => unimplemented!("tls_value"),
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
}
Ok(())

View File

@@ -480,6 +480,7 @@ pub enum SseOpcode {
Cmpss,
Cmpsd,
Cvtdq2ps,
Cvtdq2pd,
Cvtsd2ss,
Cvtsd2si,
Cvtsi2ss,
@@ -673,6 +674,7 @@ impl SseOpcode {
| SseOpcode::Cmpsd
| SseOpcode::Comisd
| SseOpcode::Cvtdq2ps
| SseOpcode::Cvtdq2pd
| SseOpcode::Cvtsd2ss
| SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd
@@ -828,6 +830,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Comiss => "comiss",
SseOpcode::Comisd => "comisd",
SseOpcode::Cvtdq2ps => "cvtdq2ps",
SseOpcode::Cvtdq2pd => "cvtdq2pd",
SseOpcode::Cvtsd2ss => "cvtsd2ss",
SseOpcode::Cvtsd2si => "cvtsd2si",
SseOpcode::Cvtsi2ss => "cvtsi2ss",

View File

@@ -1768,6 +1768,7 @@ pub(crate) fn emit(
let rex = RexFlags::clear_w();
let (prefix, opcode, num_opcodes) = match op {
SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),

View File

@@ -3859,6 +3859,12 @@ fn test_x64_emit() {
"pabsd %xmm10, %xmm11",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8),
"F3440FE6C2",
"cvtdq2pd %xmm2, %xmm8",
));
// Xmm to int conversions, and conversely.
insns.push((

View File

@@ -3915,7 +3915,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
}
}
Opcode::FcvtLowFromSint => {
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::xmm_unary_rm_r(
SseOpcode::Cvtdq2pd,
RegMem::from(src),
dst,
));
}
Opcode::FcvtFromUint => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();

View File

@@ -563,6 +563,7 @@ where
Opcode::FcvtToSintSat => unimplemented!("FcvtToSintSat"),
Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::Isplit => unimplemented!("Isplit"),
Opcode::Iconcat => unimplemented!("Iconcat"),
Opcode::AtomicRmw => unimplemented!("AtomicRmw"),

View File

@@ -1775,6 +1775,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_from_uint(F32X4, a))
}
Operator::F64x2ConvertLowI32x4S => {
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
}
Operator::I32x4TruncSatF32x4S => {
let a = pop1_with_bitcast(state, F32X4, builder);
state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
@@ -1851,12 +1855,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let arg = pop1_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().nearest(arg));
}
Operator::I32x4DotI16x8S => {
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
}
Operator::I64x2ExtendLowI32x4S
| Operator::I64x2ExtendHighI32x4S
| Operator::I64x2ExtendLowI32x4U
@@ -1880,7 +1882,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I32x4ExtAddPairwiseI16x8U
| Operator::F32x4DemoteF64x2Zero
| Operator::F64x2PromoteLowF32x4
| Operator::F64x2ConvertLowI32x4S
| Operator::F64x2ConvertLowI32x4U
| Operator::I32x4TruncSatF64x2SZero
| Operator::I32x4TruncSatF64x2UZero