Add support for packed float to signed int conversion

Implements i32x4.trunc_sat_f32x4_s
This commit is contained in:
Johnnie Birch
2020-10-25 11:35:51 -07:00
parent 97392eae3d
commit 8bbe6a25a9
4 changed files with 105 additions and 28 deletions

View File

@@ -371,6 +371,7 @@ pub enum SseOpcode {
Cvtsi2sd, Cvtsi2sd,
Cvtss2si, Cvtss2si,
Cvtss2sd, Cvtss2sd,
Cvttps2dq,
Cvttss2si, Cvttss2si,
Cvttsd2si, Cvttsd2si,
Divps, Divps,
@@ -535,6 +536,7 @@ impl SseOpcode {
| SseOpcode::Cvtsd2si | SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd | SseOpcode::Cvtsi2sd
| SseOpcode::Cvtss2sd | SseOpcode::Cvtss2sd
| SseOpcode::Cvttps2dq
| SseOpcode::Cvttsd2si | SseOpcode::Cvttsd2si
| SseOpcode::Divpd | SseOpcode::Divpd
| SseOpcode::Divsd | SseOpcode::Divsd
@@ -662,6 +664,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Cvtsi2sd => "cvtsi2sd", SseOpcode::Cvtsi2sd => "cvtsi2sd",
SseOpcode::Cvtss2si => "cvtss2si", SseOpcode::Cvtss2si => "cvtss2si",
SseOpcode::Cvtss2sd => "cvtss2sd", SseOpcode::Cvtss2sd => "cvtss2sd",
SseOpcode::Cvttps2dq => "cvttps2dq",
SseOpcode::Cvttss2si => "cvttss2si", SseOpcode::Cvttss2si => "cvttss2si",
SseOpcode::Cvttsd2si => "cvttsd2si", SseOpcode::Cvttsd2si => "cvttsd2si",
SseOpcode::Divps => "divps", SseOpcode::Divps => "divps",

View File

@@ -1740,6 +1740,7 @@ pub(crate) fn emit(
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2), SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2), SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),

View File

@@ -3300,6 +3300,12 @@ fn test_x64_emit() {
"cvtdq2ps %xmm1, %xmm8", "cvtdq2ps %xmm1, %xmm8",
)); ));
insns.push((
Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8),
"F3450F5BC1",
"cvttps2dq %xmm9, %xmm8",
));
// XMM_Mov_R_M: float stores // XMM_Mov_R_M: float stores
insns.push(( insns.push((
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),

View File

@@ -2335,39 +2335,106 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
let input_ty = ctx.input_ty(insn, 0); let input_ty = ctx.input_ty(insn, 0);
let src_size = if input_ty == types::F32 { if !input_ty.is_vector() {
OperandSize::Size32 let src_size = if input_ty == types::F32 {
OperandSize::Size32
} else {
assert_eq!(input_ty, types::F64);
OperandSize::Size64
};
let output_ty = ty.unwrap();
let dst_size = if output_ty == types::I32 {
OperandSize::Size32
} else {
assert_eq!(output_ty, types::I64);
OperandSize::Size64
};
let to_signed = op == Opcode::FcvtToSint || op == Opcode::FcvtToSintSat;
let is_sat = op == Opcode::FcvtToUintSat || op == Opcode::FcvtToSintSat;
let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty);
ctx.emit(Inst::gen_move(src_copy, src, input_ty));
let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty);
let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty);
let srcloc = ctx.srcloc(insn);
if to_signed {
ctx.emit(Inst::cvt_float_to_sint_seq(
src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc,
));
} else {
ctx.emit(Inst::cvt_float_to_uint_seq(
src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc,
));
}
} else { } else {
assert_eq!(input_ty, types::F64); if op == Opcode::FcvtToSintSat {
OperandSize::Size64 // Sets destination to zero if float is NaN
}; let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
ctx.emit(Inst::xmm_unary_rm_r(
SseOpcode::Movapd,
RegMem::reg(src),
tmp,
));
ctx.emit(Inst::gen_move(dst, src, input_ty));
let cond = FcmpImm::from(FloatCC::Equal);
ctx.emit(Inst::xmm_rm_r_imm(
SseOpcode::Cmpps,
RegMem::reg(tmp.to_reg()),
tmp,
cond.encode(),
false,
));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Andps,
RegMem::reg(tmp.to_reg()),
dst,
));
let output_ty = ty.unwrap(); // Sets top bit of tmp if float is positive
let dst_size = if output_ty == types::I32 { // Setting up to set top bit on negative float values
OperandSize::Size32 ctx.emit(Inst::xmm_rm_r(
} else { SseOpcode::Pxor,
assert_eq!(output_ty, types::I64); RegMem::reg(dst.to_reg()),
OperandSize::Size64 tmp,
}; ));
let to_signed = op == Opcode::FcvtToSint || op == Opcode::FcvtToSintSat; // Convert the packed float to packed doubleword.
let is_sat = op == Opcode::FcvtToUintSat || op == Opcode::FcvtToSintSat; ctx.emit(Inst::xmm_rm_r(
SseOpcode::Cvttps2dq,
RegMem::reg(dst.to_reg()),
dst,
));
let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty); // Set top bit only if < 0
ctx.emit(Inst::gen_move(src_copy, src, input_ty)); // Saturate lane with sign (top) bit.
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pand,
RegMem::reg(dst.to_reg()),
tmp,
));
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrad, RegMemImm::imm(31), tmp));
let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty); // On overflow 0x80000000 is returned to a lane.
let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty); // Below sets positive overflow lanes to 0x7FFFFFFF
// Keeps negative overflow lanes as is.
let srcloc = ctx.srcloc(insn); ctx.emit(Inst::xmm_rm_r(
if to_signed { SseOpcode::Pxor,
ctx.emit(Inst::cvt_float_to_sint_seq( RegMem::reg(tmp.to_reg()),
src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, dst,
)); ));
} else { } else if op == Opcode::FcvtToUintSat {
ctx.emit(Inst::cvt_float_to_uint_seq( unimplemented!("f32x4.convert_i32x4_u");
src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, } else {
)); // Since this branch is also guarded by a check for vector types
// neither Opcode::FcvtToUint nor Opcode::FcvtToSint can reach here
// due to vector varients not existing. The first two branches will
// cover all reachable cases.
unreachable!();
}
} }
} }