Adds support for signed packed integer conversion to float

f32x4.convert_i32x4_s
This commit is contained in:
Johnnie Birch
2020-10-13 18:45:27 -07:00
parent 2dad74f9d0
commit f27c0f3434
4 changed files with 45 additions and 20 deletions

View File

@@ -364,6 +364,7 @@ pub enum SseOpcode {
Cmppd, Cmppd,
Cmpss, Cmpss,
Cmpsd, Cmpsd,
Cvtdq2ps,
Cvtsd2ss, Cvtsd2ss,
Cvtsd2si, Cvtsd2si,
Cvtsi2ss, Cvtsi2ss,
@@ -529,6 +530,7 @@ impl SseOpcode {
| SseOpcode::Cmppd | SseOpcode::Cmppd
| SseOpcode::Cmpsd | SseOpcode::Cmpsd
| SseOpcode::Comisd | SseOpcode::Comisd
| SseOpcode::Cvtdq2ps
| SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2ss
| SseOpcode::Cvtsd2si | SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd | SseOpcode::Cvtsi2sd
@@ -653,6 +655,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Cmpsd => "cmpsd", SseOpcode::Cmpsd => "cmpsd",
SseOpcode::Comiss => "comiss", SseOpcode::Comiss => "comiss",
SseOpcode::Comisd => "comisd", SseOpcode::Comisd => "comisd",
SseOpcode::Cvtdq2ps => "cvtdq2ps",
SseOpcode::Cvtsd2ss => "cvtsd2ss", SseOpcode::Cvtsd2ss => "cvtsd2ss",
SseOpcode::Cvtsd2si => "cvtsd2si", SseOpcode::Cvtsd2si => "cvtsd2si",
SseOpcode::Cvtsi2ss => "cvtsi2ss", SseOpcode::Cvtsi2ss => "cvtsi2ss",

View File

@@ -1739,6 +1739,7 @@ pub(crate) fn emit(
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2), SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2), SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),

View File

@@ -3290,6 +3290,14 @@ fn test_x64_emit() {
"pshufb %xmm11, %xmm2", "pshufb %xmm11, %xmm2",
)); ));
// ========================================================
// XMM_RM_R: Integer Conversion
insns.push((
Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8),
"440F5BC1",
"cvtdq2ps %xmm1, %xmm8",
));
// XMM_Mov_R_M: float stores // XMM_Mov_R_M: float stores
insns.push(( insns.push((
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),

View File

@@ -2141,28 +2141,41 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
Opcode::FcvtFromSint => { Opcode::FcvtFromSint => {
let (ext_spec, src_size) = match ctx.input_ty(insn, 0) {
types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32),
types::I32 => (None, OperandSize::Size32),
types::I64 => (None, OperandSize::Size64),
_ => unreachable!(),
};
let src = match ext_spec {
Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)),
None => input_to_reg_mem(ctx, inputs[0]),
};
let output_ty = ty.unwrap(); let output_ty = ty.unwrap();
let opcode = if output_ty == types::F32 { if !output_ty.is_vector() {
SseOpcode::Cvtsi2ss let (ext_spec, src_size) = match ctx.input_ty(insn, 0) {
} else { types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32),
assert_eq!(output_ty, types::F64); types::I32 => (None, OperandSize::Size32),
SseOpcode::Cvtsi2sd types::I64 => (None, OperandSize::Size64),
}; _ => unreachable!(),
};
let dst = get_output_reg(ctx, outputs[0]); let src = match ext_spec {
ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst)); Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)),
None => input_to_reg_mem(ctx, inputs[0]),
};
let opcode = if output_ty == types::F32 {
SseOpcode::Cvtsi2ss
} else {
assert_eq!(output_ty, types::F64);
SseOpcode::Cvtsi2sd
};
let dst = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst));
} else {
let ty = ty.unwrap();
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
let opcode = match ctx.input_ty(insn, 0) {
types::I32X4 => SseOpcode::Cvtdq2ps,
_ => {
unimplemented!("unable to use type {} for op {}", ctx.input_ty(insn, 0), op)
}
};
ctx.emit(Inst::gen_move(dst, src, ty));
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
}
} }
Opcode::FcvtFromUint => { Opcode::FcvtFromUint => {