diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 0810bdd900..48bd822d4d 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -371,6 +371,7 @@ pub enum SseOpcode { Cvtsi2sd, Cvtss2si, Cvtss2sd, + Cvttps2dq, Cvttss2si, Cvttsd2si, Divps, @@ -535,6 +536,7 @@ impl SseOpcode { | SseOpcode::Cvtsd2si | SseOpcode::Cvtsi2sd | SseOpcode::Cvtss2sd + | SseOpcode::Cvttps2dq | SseOpcode::Cvttsd2si | SseOpcode::Divpd | SseOpcode::Divsd @@ -662,6 +664,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Cvtsi2sd => "cvtsi2sd", SseOpcode::Cvtss2si => "cvtss2si", SseOpcode::Cvtss2sd => "cvtss2sd", + SseOpcode::Cvttps2dq => "cvttps2dq", SseOpcode::Cvttss2si => "cvttss2si", SseOpcode::Cvttsd2si => "cvttsd2si", SseOpcode::Divps => "divps", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 6e16811774..2ad4c4d723 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1740,6 +1740,7 @@ pub(crate) fn emit( SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), + SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2), SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 77cb038b1c..41f4f40a49 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3300,6 +3300,12 @@ fn test_x64_emit() { "cvtdq2ps %xmm1, %xmm8", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8), + "F3450F5BC1", + "cvttps2dq %xmm9, %xmm8", + )); + // XMM_Mov_R_M: float stores insns.push(( Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 237213993c..10205ae668 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2335,39 +2335,106 @@ fn lower_insn_to_regs>( let dst = get_output_reg(ctx, outputs[0]); let input_ty = ctx.input_ty(insn, 0); - let src_size = if input_ty == types::F32 { - OperandSize::Size32 + if !input_ty.is_vector() { + let src_size = if input_ty == types::F32 { + OperandSize::Size32 + } else { + assert_eq!(input_ty, types::F64); + OperandSize::Size64 + }; + + let output_ty = ty.unwrap(); + let dst_size = if output_ty == types::I32 { + OperandSize::Size32 + } else { + assert_eq!(output_ty, types::I64); + OperandSize::Size64 + }; + + let to_signed = op == Opcode::FcvtToSint || op == Opcode::FcvtToSintSat; + let is_sat = op == Opcode::FcvtToUintSat || op == Opcode::FcvtToSintSat; + + let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty); + ctx.emit(Inst::gen_move(src_copy, src, input_ty)); + + let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty); + let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty); + + let srcloc = ctx.srcloc(insn); + if to_signed { + ctx.emit(Inst::cvt_float_to_sint_seq( + src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, + )); + } else { + ctx.emit(Inst::cvt_float_to_uint_seq( + src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, + )); + } } else { - assert_eq!(input_ty, types::F64); - OperandSize::Size64 - }; + if op == Opcode::FcvtToSintSat { + // Sets destination to zero if float is NaN + let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Movapd, + RegMem::reg(src), + tmp, + )); + ctx.emit(Inst::gen_move(dst, src, input_ty)); + let cond = FcmpImm::from(FloatCC::Equal); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Cmpps, + RegMem::reg(tmp.to_reg()), + tmp, + cond.encode(), + false, + )); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Andps, + RegMem::reg(tmp.to_reg()), + dst, + )); - let output_ty = ty.unwrap(); - let dst_size = if output_ty == types::I32 { - OperandSize::Size32 - } else { - assert_eq!(output_ty, types::I64); - OperandSize::Size64 - }; + // Sets top bit of tmp if float is positive + // Setting up to set top bit on negative float values + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::reg(dst.to_reg()), + tmp, + )); - let to_signed = op == Opcode::FcvtToSint || op == Opcode::FcvtToSintSat; - let is_sat = op == Opcode::FcvtToUintSat || op == Opcode::FcvtToSintSat; + // Convert the packed float to packed doubleword. + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Cvttps2dq, + RegMem::reg(dst.to_reg()), + dst, + )); - let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty); - ctx.emit(Inst::gen_move(src_copy, src, input_ty)); + // Set top bit only if < 0 + // Saturate lane with sign (top) bit. + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pand, + RegMem::reg(dst.to_reg()), + tmp, + )); + ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrad, RegMemImm::imm(31), tmp)); - let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty); - let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty); - - let srcloc = ctx.srcloc(insn); - if to_signed { - ctx.emit(Inst::cvt_float_to_sint_seq( - src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, - )); - } else { - ctx.emit(Inst::cvt_float_to_uint_seq( - src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, - )); + // On overflow 0x80000000 is returned to a lane. + // Below sets positive overflow lanes to 0x7FFFFFFF + // Keeps negative overflow lanes as is. + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::reg(tmp.to_reg()), + dst, + )); + } else if op == Opcode::FcvtToUintSat { + unimplemented!("f32x4.convert_i32x4_u"); + } else { + // Since this branch is also guarded by a check for vector types + // neither Opcode::FcvtToUint nor Opcode::FcvtToSint can reach here + // due to vector varients not existing. The first two branches will + // cover all reachable cases. + unreachable!(); + } } }