Implements convert low signed integer to float for x64 simd
This commit is contained in:
@@ -4325,6 +4325,26 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"fcvt_low_from_sint",
|
||||||
|
r#"
|
||||||
|
Converts packed signed doubleword integers to packed double precision floating point.
|
||||||
|
|
||||||
|
Considering only the low half of the register, each lane in `x` is interpreted as a
|
||||||
|
signed doubleword integer that is then converted to a double precision float. This
|
||||||
|
instruction differs from fcvt_from_sint in that it converts half the number of lanes
|
||||||
|
which are converted to occupy twice the number of bits. No rounding should be needed
|
||||||
|
for the resulting float.
|
||||||
|
|
||||||
|
The result type will have half the number of vector lanes as the input.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let WideInt = &TypeVar::new(
|
let WideInt = &TypeVar::new(
|
||||||
"WideInt",
|
"WideInt",
|
||||||
"An integer type with lanes from `i16` upwards",
|
"An integer type with lanes from `i16` upwards",
|
||||||
|
|||||||
@@ -3013,6 +3013,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::TlsValue => unimplemented!("tls_value"),
|
Opcode::TlsValue => unimplemented!("tls_value"),
|
||||||
|
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -480,6 +480,7 @@ pub enum SseOpcode {
|
|||||||
Cmpss,
|
Cmpss,
|
||||||
Cmpsd,
|
Cmpsd,
|
||||||
Cvtdq2ps,
|
Cvtdq2ps,
|
||||||
|
Cvtdq2pd,
|
||||||
Cvtsd2ss,
|
Cvtsd2ss,
|
||||||
Cvtsd2si,
|
Cvtsd2si,
|
||||||
Cvtsi2ss,
|
Cvtsi2ss,
|
||||||
@@ -673,6 +674,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Cmpsd
|
| SseOpcode::Cmpsd
|
||||||
| SseOpcode::Comisd
|
| SseOpcode::Comisd
|
||||||
| SseOpcode::Cvtdq2ps
|
| SseOpcode::Cvtdq2ps
|
||||||
|
| SseOpcode::Cvtdq2pd
|
||||||
| SseOpcode::Cvtsd2ss
|
| SseOpcode::Cvtsd2ss
|
||||||
| SseOpcode::Cvtsd2si
|
| SseOpcode::Cvtsd2si
|
||||||
| SseOpcode::Cvtsi2sd
|
| SseOpcode::Cvtsi2sd
|
||||||
@@ -828,6 +830,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Comiss => "comiss",
|
SseOpcode::Comiss => "comiss",
|
||||||
SseOpcode::Comisd => "comisd",
|
SseOpcode::Comisd => "comisd",
|
||||||
SseOpcode::Cvtdq2ps => "cvtdq2ps",
|
SseOpcode::Cvtdq2ps => "cvtdq2ps",
|
||||||
|
SseOpcode::Cvtdq2pd => "cvtdq2pd",
|
||||||
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
||||||
SseOpcode::Cvtsd2si => "cvtsd2si",
|
SseOpcode::Cvtsd2si => "cvtsd2si",
|
||||||
SseOpcode::Cvtsi2ss => "cvtsi2ss",
|
SseOpcode::Cvtsi2ss => "cvtsi2ss",
|
||||||
|
|||||||
@@ -1768,6 +1768,7 @@ pub(crate) fn emit(
|
|||||||
let rex = RexFlags::clear_w();
|
let rex = RexFlags::clear_w();
|
||||||
|
|
||||||
let (prefix, opcode, num_opcodes) = match op {
|
let (prefix, opcode, num_opcodes) = match op {
|
||||||
|
SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
|
||||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
|
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
|
||||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
|
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
|
||||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
|
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
|
||||||
|
|||||||
@@ -3859,6 +3859,12 @@ fn test_x64_emit() {
|
|||||||
"pabsd %xmm10, %xmm11",
|
"pabsd %xmm10, %xmm11",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8),
|
||||||
|
"F3440FE6C2",
|
||||||
|
"cvtdq2pd %xmm2, %xmm8",
|
||||||
|
));
|
||||||
|
|
||||||
// Xmm to int conversions, and conversely.
|
// Xmm to int conversions, and conversely.
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
|
|||||||
@@ -3915,7 +3915,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
|
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Opcode::FcvtLowFromSint => {
|
||||||
|
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_unary_rm_r(
|
||||||
|
SseOpcode::Cvtdq2pd,
|
||||||
|
RegMem::from(src),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
Opcode::FcvtFromUint => {
|
Opcode::FcvtFromUint => {
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
|
|||||||
@@ -563,6 +563,7 @@ where
|
|||||||
Opcode::FcvtToSintSat => unimplemented!("FcvtToSintSat"),
|
Opcode::FcvtToSintSat => unimplemented!("FcvtToSintSat"),
|
||||||
Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
|
Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
|
||||||
Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
|
Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
|
||||||
|
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
|
||||||
Opcode::Isplit => unimplemented!("Isplit"),
|
Opcode::Isplit => unimplemented!("Isplit"),
|
||||||
Opcode::Iconcat => unimplemented!("Iconcat"),
|
Opcode::Iconcat => unimplemented!("Iconcat"),
|
||||||
Opcode::AtomicRmw => unimplemented!("AtomicRmw"),
|
Opcode::AtomicRmw => unimplemented!("AtomicRmw"),
|
||||||
|
|||||||
@@ -1775,6 +1775,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let a = pop1_with_bitcast(state, I32X4, builder);
|
let a = pop1_with_bitcast(state, I32X4, builder);
|
||||||
state.push1(builder.ins().fcvt_from_uint(F32X4, a))
|
state.push1(builder.ins().fcvt_from_uint(F32X4, a))
|
||||||
}
|
}
|
||||||
|
Operator::F64x2ConvertLowI32x4S => {
|
||||||
|
let a = pop1_with_bitcast(state, I32X4, builder);
|
||||||
|
state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
|
||||||
|
}
|
||||||
Operator::I32x4TruncSatF32x4S => {
|
Operator::I32x4TruncSatF32x4S => {
|
||||||
let a = pop1_with_bitcast(state, F32X4, builder);
|
let a = pop1_with_bitcast(state, F32X4, builder);
|
||||||
state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
|
state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
|
||||||
@@ -1851,12 +1855,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let arg = pop1_with_bitcast(state, type_of(op), builder);
|
let arg = pop1_with_bitcast(state, type_of(op), builder);
|
||||||
state.push1(builder.ins().nearest(arg));
|
state.push1(builder.ins().nearest(arg));
|
||||||
}
|
}
|
||||||
|
|
||||||
Operator::I32x4DotI16x8S => {
|
Operator::I32x4DotI16x8S => {
|
||||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||||
state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
|
state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
Operator::I64x2ExtendLowI32x4S
|
Operator::I64x2ExtendLowI32x4S
|
||||||
| Operator::I64x2ExtendHighI32x4S
|
| Operator::I64x2ExtendHighI32x4S
|
||||||
| Operator::I64x2ExtendLowI32x4U
|
| Operator::I64x2ExtendLowI32x4U
|
||||||
@@ -1880,7 +1882,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
| Operator::I32x4ExtAddPairwiseI16x8U
|
| Operator::I32x4ExtAddPairwiseI16x8U
|
||||||
| Operator::F32x4DemoteF64x2Zero
|
| Operator::F32x4DemoteF64x2Zero
|
||||||
| Operator::F64x2PromoteLowF32x4
|
| Operator::F64x2PromoteLowF32x4
|
||||||
| Operator::F64x2ConvertLowI32x4S
|
|
||||||
| Operator::F64x2ConvertLowI32x4U
|
| Operator::F64x2ConvertLowI32x4U
|
||||||
| Operator::I32x4TruncSatF64x2SZero
|
| Operator::I32x4TruncSatF64x2SZero
|
||||||
| Operator::I32x4TruncSatF64x2UZero
|
| Operator::I32x4TruncSatF64x2UZero
|
||||||
|
|||||||
Reference in New Issue
Block a user