Add support for packed float to signed int conversion
Implements i32x4.trunc_sat_f32x4_s
This commit is contained in:
@@ -371,6 +371,7 @@ pub enum SseOpcode {
|
|||||||
Cvtsi2sd,
|
Cvtsi2sd,
|
||||||
Cvtss2si,
|
Cvtss2si,
|
||||||
Cvtss2sd,
|
Cvtss2sd,
|
||||||
|
Cvttps2dq,
|
||||||
Cvttss2si,
|
Cvttss2si,
|
||||||
Cvttsd2si,
|
Cvttsd2si,
|
||||||
Divps,
|
Divps,
|
||||||
@@ -535,6 +536,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Cvtsd2si
|
| SseOpcode::Cvtsd2si
|
||||||
| SseOpcode::Cvtsi2sd
|
| SseOpcode::Cvtsi2sd
|
||||||
| SseOpcode::Cvtss2sd
|
| SseOpcode::Cvtss2sd
|
||||||
|
| SseOpcode::Cvttps2dq
|
||||||
| SseOpcode::Cvttsd2si
|
| SseOpcode::Cvttsd2si
|
||||||
| SseOpcode::Divpd
|
| SseOpcode::Divpd
|
||||||
| SseOpcode::Divsd
|
| SseOpcode::Divsd
|
||||||
@@ -662,6 +664,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Cvtsi2sd => "cvtsi2sd",
|
SseOpcode::Cvtsi2sd => "cvtsi2sd",
|
||||||
SseOpcode::Cvtss2si => "cvtss2si",
|
SseOpcode::Cvtss2si => "cvtss2si",
|
||||||
SseOpcode::Cvtss2sd => "cvtss2sd",
|
SseOpcode::Cvtss2sd => "cvtss2sd",
|
||||||
|
SseOpcode::Cvttps2dq => "cvttps2dq",
|
||||||
SseOpcode::Cvttss2si => "cvttss2si",
|
SseOpcode::Cvttss2si => "cvttss2si",
|
||||||
SseOpcode::Cvttsd2si => "cvttsd2si",
|
SseOpcode::Cvttsd2si => "cvttsd2si",
|
||||||
SseOpcode::Divps => "divps",
|
SseOpcode::Divps => "divps",
|
||||||
|
|||||||
@@ -1740,6 +1740,7 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||||
|
SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
|
||||||
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
|
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
|
||||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||||
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
||||||
|
|||||||
@@ -3300,6 +3300,12 @@ fn test_x64_emit() {
|
|||||||
"cvtdq2ps %xmm1, %xmm8",
|
"cvtdq2ps %xmm1, %xmm8",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8),
|
||||||
|
"F3450F5BC1",
|
||||||
|
"cvttps2dq %xmm9, %xmm8",
|
||||||
|
));
|
||||||
|
|
||||||
// XMM_Mov_R_M: float stores
|
// XMM_Mov_R_M: float stores
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
||||||
|
|||||||
@@ -2335,6 +2335,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let dst = get_output_reg(ctx, outputs[0]);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
if !input_ty.is_vector() {
|
||||||
let src_size = if input_ty == types::F32 {
|
let src_size = if input_ty == types::F32 {
|
||||||
OperandSize::Size32
|
OperandSize::Size32
|
||||||
} else {
|
} else {
|
||||||
@@ -2369,6 +2370,72 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc,
|
src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if op == Opcode::FcvtToSintSat {
|
||||||
|
// Sets destination to zero if float is NaN
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
|
||||||
|
ctx.emit(Inst::xmm_unary_rm_r(
|
||||||
|
SseOpcode::Movapd,
|
||||||
|
RegMem::reg(src),
|
||||||
|
tmp,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::gen_move(dst, src, input_ty));
|
||||||
|
let cond = FcmpImm::from(FloatCC::Equal);
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Cmpps,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
tmp,
|
||||||
|
cond.encode(),
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Andps,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Sets top bit of tmp if float is positive
|
||||||
|
// Setting up to set top bit on negative float values
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pxor,
|
||||||
|
RegMem::reg(dst.to_reg()),
|
||||||
|
tmp,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Convert the packed float to packed doubleword.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Cvttps2dq,
|
||||||
|
RegMem::reg(dst.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Set top bit only if < 0
|
||||||
|
// Saturate lane with sign (top) bit.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pand,
|
||||||
|
RegMem::reg(dst.to_reg()),
|
||||||
|
tmp,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrad, RegMemImm::imm(31), tmp));
|
||||||
|
|
||||||
|
// On overflow 0x80000000 is returned to a lane.
|
||||||
|
// Below sets positive overflow lanes to 0x7FFFFFFF
|
||||||
|
// Keeps negative overflow lanes as is.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pxor,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
} else if op == Opcode::FcvtToUintSat {
|
||||||
|
unimplemented!("f32x4.convert_i32x4_u");
|
||||||
|
} else {
|
||||||
|
// Since this branch is also guarded by a check for vector types
|
||||||
|
// neither Opcode::FcvtToUint nor Opcode::FcvtToSint can reach here
|
||||||
|
// due to vector varients not existing. The first two branches will
|
||||||
|
// cover all reachable cases.
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Bitcast => {
|
Opcode::Bitcast => {
|
||||||
|
|||||||
Reference in New Issue
Block a user