diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 3a2dd0de09..0810bdd900 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -364,6 +364,7 @@ pub enum SseOpcode { Cmppd, Cmpss, Cmpsd, + Cvtdq2ps, Cvtsd2ss, Cvtsd2si, Cvtsi2ss, @@ -529,6 +530,7 @@ impl SseOpcode { | SseOpcode::Cmppd | SseOpcode::Cmpsd | SseOpcode::Comisd + | SseOpcode::Cvtdq2ps | SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2si | SseOpcode::Cvtsi2sd @@ -653,6 +655,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Cmpsd => "cmpsd", SseOpcode::Comiss => "comiss", SseOpcode::Comisd => "comisd", + SseOpcode::Cvtdq2ps => "cvtdq2ps", SseOpcode::Cvtsd2ss => "cvtsd2ss", SseOpcode::Cvtsd2si => "cvtsd2si", SseOpcode::Cvtsi2ss => "cvtsi2ss", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 688e620d83..3fa029b7c1 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1739,6 +1739,7 @@ pub(crate) fn emit( SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), + SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2), SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 62992be2bd..59ce1817cb 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3290,6 +3290,14 @@ fn test_x64_emit() { "pshufb %xmm11, %xmm2", )); + // ======================================================== + // XMM_RM_R: Integer Conversion + insns.push(( + Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8), + "440F5BC1", + "cvtdq2ps %xmm1, %xmm8", + )); + // XMM_Mov_R_M: float stores insns.push(( Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 108072b97c..8a00b18b63 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2141,28 +2141,41 @@ fn lower_insn_to_regs>( } Opcode::FcvtFromSint => { - let (ext_spec, src_size) = match ctx.input_ty(insn, 0) { - types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32), - types::I32 => (None, OperandSize::Size32), - types::I64 => (None, OperandSize::Size64), - _ => unreachable!(), - }; - - let src = match ext_spec { - Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)), - None => input_to_reg_mem(ctx, inputs[0]), - }; - let output_ty = ty.unwrap(); - let opcode = if output_ty == types::F32 { - SseOpcode::Cvtsi2ss - } else { - assert_eq!(output_ty, types::F64); - SseOpcode::Cvtsi2sd - }; + if !output_ty.is_vector() { + let (ext_spec, src_size) = match ctx.input_ty(insn, 0) { + types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32), + types::I32 => (None, OperandSize::Size32), + types::I64 => (None, OperandSize::Size64), + _ => unreachable!(), + }; - let dst = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst)); + let src = match ext_spec { + Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)), + None => input_to_reg_mem(ctx, inputs[0]), + }; + + let opcode = if output_ty == types::F32 { + SseOpcode::Cvtsi2ss + } else { + assert_eq!(output_ty, types::F64); + SseOpcode::Cvtsi2sd + }; + let dst = get_output_reg(ctx, outputs[0]); + ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst)); + } else { + let ty = ty.unwrap(); + let src = put_input_in_reg(ctx, inputs[0]); + let dst = get_output_reg(ctx, outputs[0]); + let opcode = match ctx.input_ty(insn, 0) { + types::I32X4 => SseOpcode::Cvtdq2ps, + _ => { + unimplemented!("unable to use type {} for op {}", ctx.input_ty(insn, 0), op) + } + }; + ctx.emit(Inst::gen_move(dst, src, ty)); + ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst)); + } } Opcode::FcvtFromUint => {