diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 5697d2dbb5..2387af0ec6 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -332,7 +332,7 @@ pub(crate) enum InstructionSet { /// Some scalar SSE operations requiring 2 operands r/m and r. /// TODO: Below only includes scalar operations. To be seen if packed will be added here. -#[derive(Clone, PartialEq)] +#[derive(Clone, Copy, PartialEq)] pub enum SseOpcode { Addss, Addsd, @@ -797,3 +797,26 @@ impl BranchTarget { } } } + +/// An operand's size in bits. +#[derive(Clone, Copy, PartialEq)] +pub enum OperandSize { + Size32, + Size64, +} + +impl OperandSize { + pub(crate) fn to_bytes(&self) -> u8 { + match self { + Self::Size32 => 4, + Self::Size64 => 8, + } + } + + pub(crate) fn to_bits(&self) -> u8 { + match self { + Self::Size32 => 32, + Self::Size64 => 64, + } + } +} diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 2ff58a8844..09b0957622 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -4,7 +4,10 @@ use regalloc::Reg; use std::convert::TryFrom; use crate::binemit::Reloc; -use crate::isa::x64::inst::*; +use crate::{ + ir::immediates::{Ieee32, Ieee64}, + isa::x64::inst::*, +}; fn low8_will_sign_extend_to_64(x: u32) -> bool { let xs = (x as i32) as i64; @@ -1573,25 +1576,50 @@ pub(crate) fn emit( emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, RexFlags::clear_w()); } - Inst::XmmToGpr { op, src, dst } => { - let (rex, prefix, opcode) = match op { - SseOpcode::Movd => (RexFlags::clear_w(), LegacyPrefix::_66, 0x0F7E), - SseOpcode::Movq => (RexFlags::set_w(), LegacyPrefix::_66, 0x0F7E), + Inst::XmmToGpr { + op, + src, + dst, + dst_size, + } => { + let (prefix, opcode, dst_first) = match op { + SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E, false), + SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false), + SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true), + SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true), _ => panic!("unexpected opcode {:?}", op), }; - emit_std_reg_reg(sink, prefix, opcode, 2, *src, dst.to_reg(), rex); + let rex = match dst_size { + OperandSize::Size32 => RexFlags::clear_w(), + OperandSize::Size64 => RexFlags::set_w(), + }; + + let (src, dst) = if dst_first { + (dst.to_reg(), *src) + } else { + (*src, dst.to_reg()) + }; + + emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex); } Inst::GprToXmm { op, src: src_e, dst: reg_g, + src_size, } => { - let (rex, prefix, opcode) = match op { - SseOpcode::Movd => (RexFlags::clear_w(), LegacyPrefix::_66, 0x0F6E), - SseOpcode::Movq => (RexFlags::set_w(), LegacyPrefix::_66, 0x0F6E), + let (prefix, opcode) = match op { + SseOpcode::Movd => (LegacyPrefix::_66, 0x0F6E), + SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E), + SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A), + SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A), _ => panic!("unexpected opcode {:?}", op), }; + let rex = match *src_size { + OperandSize::Size32 => RexFlags::clear_w(), + OperandSize::Size64 => RexFlags::set_w(), + }; match src_e { RegMem::Reg { reg: reg_e } => { emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); @@ -1622,6 +1650,405 @@ pub(crate) fn emit( } } + Inst::CvtUint64ToFloatSeq { + to_f64, + src, + dst, + tmp_gpr1, + tmp_gpr2, + } => { + // Emit the following sequence: + // + // cmp 0, %src + // jl handle_negative + // + // ;; handle positive, which can't overflow + // cvtsi2sd/cvtsi2ss %src, %dst + // j done + // + // handle_negative: + // mov %src, %tmp_gpr1 + // shr $1, %tmp_gpr1 + // mov %src, %tmp_gpr2 + // and $1, %tmp_gpr2 + // or %tmp_gpr1, %tmp_gpr2 + // ctsi2sd/cvtsi2ss %tmp_gpr2, %dst + // addsd/addss %dst, %dst + // + // done: + + // A small helper to generate a signed conversion instruction, that helps deduplicating + // code below. + let emit_signed_cvt = |sink: &mut MachBuffer, + flags: &settings::Flags, + state: &mut EmitState, + src: Reg, + dst: Writable, + to_f64: bool| { + // Handle an unsigned int, which is the "easy" case: a signed conversion will do the + // right thing. + let op = if to_f64 { + SseOpcode::Cvtsi2sd + } else { + SseOpcode::Cvtsi2ss + }; + let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst); + inst.emit(sink, flags, state); + }; + + let handle_negative = sink.get_label(); + let done = sink.get_label(); + + // If x seen as a signed int is not negative, a signed-conversion will do the right + // thing. + // TODO use tst src, src here. + let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), *src); + inst.emit(sink, flags, state); + + one_way_jmp(sink, CC::L, handle_negative); + + // Handle an unsigned int, which is the "easy" case: a signed conversion will do the + // right thing. + emit_signed_cvt(sink, flags, state, *src, *dst, *to_f64); + + let inst = Inst::jmp_known(BranchTarget::Label(done)); + inst.emit(sink, flags, state); + + sink.bind_label(handle_negative); + + // Divide x by two to get it in range for the signed conversion, keep the LSB, and + // scale it back up on the FP side. + if tmp_gpr1.to_reg() != *src { + let inst = Inst::gen_move(*tmp_gpr1, *src, I64); + inst.emit(sink, flags, state); + } + + // tmp_gpr1 := src >> 1 + let inst = Inst::shift_r( + /*is_64*/ true, + ShiftKind::ShiftRightLogical, + Some(1), + *tmp_gpr1, + ); + inst.emit(sink, flags, state); + + if tmp_gpr2.to_reg() != *src { + let inst = Inst::gen_move(*tmp_gpr2, *src, I64); + inst.emit(sink, flags, state); + } + + let inst = Inst::alu_rmi_r( + true, /* 64bits */ + AluRmiROpcode::And, + RegMemImm::imm(1), + *tmp_gpr2, + ); + inst.emit(sink, flags, state); + + let inst = Inst::alu_rmi_r( + true, /* 64bits */ + AluRmiROpcode::Or, + RegMemImm::reg(tmp_gpr1.to_reg()), + *tmp_gpr2, + ); + inst.emit(sink, flags, state); + + emit_signed_cvt(sink, flags, state, tmp_gpr2.to_reg(), *dst, *to_f64); + + let add_op = if *to_f64 { + SseOpcode::Addsd + } else { + SseOpcode::Addss + }; + let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst); + inst.emit(sink, flags, state); + + sink.bind_label(done); + } + + Inst::CvtFloatToSintSeq { + src_size, + dst_size, + src, + dst, + tmp_gpr, + tmp_xmm, + srcloc, + } => { + // Emits the following sequence: + // + // cvttss2si/cvttsd2si %src, %dst + // cmp $INT_MIN, %dst ;; 2 instructions (movaps + reg cmp) for 64-bits ints + // jnz done + // + // ;; check for NaN + // cmpss/cmpsd %src, %src + // jnp check_if_correct + // ud2 trap BadConversionToInteger + // + // ;; check if INT_MIN was the correct result, against a magic constant: + // check_if_correct: + // movaps/mov $magic, %tmp_gpr + // movq/movd %tmp_gpr, %tmp_xmm + // cmpss/cmpsd %tmp_xmm, %src + // jnb/jnbe $check_positive + // ud2 trap IntegerOverflow + // + // ;; if positive, it was a real overflow + // check_positive: + // mov 0, %tmp_gpr + // movd/movq %tmp_gpr, %tmp_xmm + // cmpss/cmpsd %src, %tmp_xmm + // jnb done + // ud2 trap IntegerOverflow + // + // done: + + let src = src.to_reg(); + + let (cast_op, cmp_op, trunc_op) = match src_size { + OperandSize::Size64 => (SseOpcode::Movq, SseOpcode::Ucomisd, SseOpcode::Cvttsd2si), + OperandSize::Size32 => (SseOpcode::Movd, SseOpcode::Ucomiss, SseOpcode::Cvttss2si), + }; + + let done = sink.get_label(); + + let inst = Inst::xmm_to_gpr(trunc_op, src, *dst, *dst_size); + inst.emit(sink, flags, state); + + // Generate constant INT_MIN, and compare against it. + if *dst_size == OperandSize::Size64 { + let inst = Inst::imm_r(true, 0x8000000000000000, *tmp_gpr); + inst.emit(sink, flags, state); + + let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp_gpr.to_reg()), dst.to_reg()); + inst.emit(sink, flags, state); + } else { + // Emit a simple comparison. + let inst = Inst::cmp_rmi_r(4, RegMemImm::imm(0x80000000), dst.to_reg()); + inst.emit(sink, flags, state); + } + + one_way_jmp(sink, CC::NZ, done); // == (int) + + // Check for NaN. + + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src); + inst.emit(sink, flags, state); + + let check_if_correct = sink.get_label(); + one_way_jmp(sink, CC::NP, check_if_correct); // jump over trap if not a NaN + + let inst = Inst::trap(*srcloc, TrapCode::BadConversionToInteger); + inst.emit(sink, flags, state); + + // Check if INT_MIN was the correct result: determine the smallest floating point + // number that would convert to INT_MIN, put it in a temporary register, and compare + // against the src register. + // If the src register is less (or in some cases, less-or-equal) than the threshold, + // trap! + + sink.bind_label(check_if_correct); + + let mut no_overflow_cc = CC::NB; // >= + let output_bits = dst_size.to_bits(); + match *src_size { + OperandSize::Size32 => { + let cst = Ieee32::pow2(output_bits - 1).neg().bits(); + let inst = Inst::imm32_r_unchecked(cst as u64, *tmp_gpr); + inst.emit(sink, flags, state); + } + OperandSize::Size64 => { + // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + let cst = if output_bits < 64 { + no_overflow_cc = CC::NBE; // > + Ieee64::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee64::pow2(output_bits - 1).neg() + }; + let inst = Inst::imm_r(true, cst.bits(), *tmp_gpr); + inst.emit(sink, flags, state); + } + } + + let inst = + Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm); + inst.emit(sink, flags, state); + + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src); + inst.emit(sink, flags, state); + + let check_positive = sink.get_label(); + one_way_jmp(sink, no_overflow_cc, check_positive); // jump over trap if src >= or > threshold + + let inst = Inst::trap(*srcloc, TrapCode::IntegerOverflow); + inst.emit(sink, flags, state); + + // If positive, it was a real overflow. + + sink.bind_label(check_positive); + + // TODO use xorpd + let inst = Inst::imm_r(false, 0, *tmp_gpr); + inst.emit(sink, flags, state); + + let inst = + Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm); + inst.emit(sink, flags, state); + + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); + inst.emit(sink, flags, state); + + one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src + + let inst = Inst::trap(*srcloc, TrapCode::IntegerOverflow); + inst.emit(sink, flags, state); + + sink.bind_label(done); + } + + Inst::CvtFloatToUintSeq { + src_size, + dst_size, + src, + dst, + tmp_gpr, + tmp_xmm, + srcloc, + } => { + // Emits the following sequence: + // + // movaps/mov 2**(int_width - 1), %tmp_gpr + // movq/movd %tmp_gpr, %tmp_xmm + // cmpss/cmpsd %tmp_xmm, %src + // jnb is_large + // + // ;; check for NaN inputs + // jnp next + // ud2 trap BadConversionToInteger + // + // next: + // cvttss2si/cvttsd2si %src, %dst + // cmp 0, %dst + // jnl done + // ud2 trap IntegerOverflow + // + // is_large: + // subss/subsd %tmp_xmm, %src ; <-- we clobber %src here + // cvttss2si/cvttss2sd %tmp_x, %dst + // cmp 0, %dst + // jnl next_is_large + // ud2 trap IntegerOverflow + // + // next_is_large: + // add 2**(int_width -1), %dst ;; 2 instructions for 64-bits integers + // + // done: + + assert!(tmp_xmm != src, "tmp_xmm clobbers src!"); + + let (sub_op, cast_op, cmp_op, trunc_op) = if *src_size == OperandSize::Size64 { + ( + SseOpcode::Subsd, + SseOpcode::Movq, + SseOpcode::Ucomisd, + SseOpcode::Cvttsd2si, + ) + } else { + ( + SseOpcode::Subss, + SseOpcode::Movd, + SseOpcode::Ucomiss, + SseOpcode::Cvttss2si, + ) + }; + + let done = sink.get_label(); + + if *src_size == OperandSize::Size64 { + let cst = Ieee64::pow2(dst_size.to_bits() - 1).bits(); + let inst = Inst::imm_r(true, cst, *tmp_gpr); + inst.emit(sink, flags, state); + } else { + let cst = Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64; + let inst = Inst::imm32_r_unchecked(cst, *tmp_gpr); + inst.emit(sink, flags, state); + } + + let inst = + Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm); + inst.emit(sink, flags, state); + + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src.to_reg()); + inst.emit(sink, flags, state); + + let handle_large = sink.get_label(); + one_way_jmp(sink, CC::NB, handle_large); // jump to handle_large if src >= large_threshold + + let next = sink.get_label(); + one_way_jmp(sink, CC::NP, next); // jump over trap if not NaN + + let inst = Inst::trap(*srcloc, TrapCode::BadConversionToInteger); + inst.emit(sink, flags, state); + + sink.bind_label(next); + + // Actual truncation for small inputs: if the result is not positive, then we had an + // overflow. + + let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size); + inst.emit(sink, flags, state); + + let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(0), dst.to_reg()); + inst.emit(sink, flags, state); + + one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done + + let inst = Inst::trap(*srcloc, TrapCode::IntegerOverflow); + inst.emit(sink, flags, state); + + // Now handle large inputs. + + sink.bind_label(handle_large); + + let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src); + inst.emit(sink, flags, state); + + let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size); + inst.emit(sink, flags, state); + + let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(0), dst.to_reg()); + inst.emit(sink, flags, state); + + let next_is_large = sink.get_label(); + one_way_jmp(sink, CC::NL, next_is_large); // if dst >= 0, jump to next_is_large + + let inst = Inst::trap(*srcloc, TrapCode::IntegerOverflow); + inst.emit(sink, flags, state); + + sink.bind_label(next_is_large); + + if *dst_size == OperandSize::Size64 { + let inst = Inst::imm_r(true, 1 << 63, *tmp_gpr); + inst.emit(sink, flags, state); + + let inst = Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::reg(tmp_gpr.to_reg()), + *dst, + ); + inst.emit(sink, flags, state); + } else { + let inst = + Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(1 << 31), *dst); + inst.emit(sink, flags, state); + } + + sink.bind_label(done); + } + Inst::LoadExtName { dst, name, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index fc7f4869f5..a53cb72801 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3072,35 +3072,96 @@ fn test_x64_emit() { // Xmm to int conversions, and conversely. insns.push(( - Inst::xmm_to_gpr(SseOpcode::Movd, xmm0, w_rsi), + Inst::xmm_to_gpr(SseOpcode::Movd, xmm0, w_rsi, OperandSize::Size32), "660F7EC6", "movd %xmm0, %esi", )); insns.push(( - Inst::xmm_to_gpr(SseOpcode::Movq, xmm2, w_rdi), + Inst::xmm_to_gpr(SseOpcode::Movq, xmm2, w_rdi, OperandSize::Size64), "66480F7ED7", "movq %xmm2, %rdi", )); insns.push(( - Inst::gpr_to_xmm(SseOpcode::Movd, RegMem::reg(rax), w_xmm15), + Inst::xmm_to_gpr(SseOpcode::Cvttss2si, xmm0, w_rsi, OperandSize::Size32), + "F30F2CF0", + "cvttss2si %xmm0, %esi", + )); + insns.push(( + Inst::xmm_to_gpr(SseOpcode::Cvttss2si, xmm0, w_rdi, OperandSize::Size64), + "F3480F2CF8", + "cvttss2si %xmm0, %rdi", + )); + insns.push(( + Inst::xmm_to_gpr(SseOpcode::Cvttsd2si, xmm0, w_rax, OperandSize::Size32), + "F20F2CC0", + "cvttsd2si %xmm0, %eax", + )); + insns.push(( + Inst::xmm_to_gpr(SseOpcode::Cvttsd2si, xmm0, w_r15, OperandSize::Size64), + "F24C0F2CF8", + "cvttsd2si %xmm0, %r15", + )); + + insns.push(( + Inst::gpr_to_xmm( + SseOpcode::Movd, + RegMem::reg(rax), + OperandSize::Size32, + w_xmm15, + ), "66440F6EF8", "movd %eax, %xmm15", )); insns.push(( - Inst::gpr_to_xmm(SseOpcode::Movd, RegMem::mem(Amode::imm_reg(2, r10)), w_xmm9), + Inst::gpr_to_xmm( + SseOpcode::Movd, + RegMem::mem(Amode::imm_reg(2, r10)), + OperandSize::Size32, + w_xmm9, + ), "66450F6E4A02", "movd 2(%r10), %xmm9", )); insns.push(( - Inst::gpr_to_xmm(SseOpcode::Movd, RegMem::reg(rsi), w_xmm1), + Inst::gpr_to_xmm( + SseOpcode::Movd, + RegMem::reg(rsi), + OperandSize::Size32, + w_xmm1, + ), "660F6ECE", "movd %esi, %xmm1", )); insns.push(( - Inst::gpr_to_xmm(SseOpcode::Movq, RegMem::reg(rdi), w_xmm15), + Inst::gpr_to_xmm( + SseOpcode::Movq, + RegMem::reg(rdi), + OperandSize::Size64, + w_xmm15, + ), "664C0F6EFF", "movq %rdi, %xmm15", )); + insns.push(( + Inst::gpr_to_xmm( + SseOpcode::Cvtsi2ss, + RegMem::reg(rdi), + OperandSize::Size32, + w_xmm15, + ), + "F3440F2AFF", + "cvtsi2ss %edi, %xmm15", + )); + insns.push(( + Inst::gpr_to_xmm( + SseOpcode::Cvtsi2sd, + RegMem::reg(rsi), + OperandSize::Size64, + w_xmm1, + ), + "F2480F2ACE", + "cvtsi2sd %rsi, %xmm1", + )); // ======================================================== // Misc instructions. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 7b1e0dedfc..044fb0bbf5 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -220,18 +220,60 @@ pub enum Inst { srcloc: Option, }, - /// XMM (scalar) unary op (from xmm to integer reg): movd, movq + /// XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si XmmToGpr { op: SseOpcode, src: Reg, dst: Writable, + dst_size: OperandSize, }, - /// XMM (scalar) unary op (from integer to float reg): movd, movq + /// XMM (scalar) unary op (from integer to float reg): movd, movq, cvtsi2s{s,d} GprToXmm { op: SseOpcode, src: RegMem, dst: Writable, + src_size: OperandSize, + }, + + /// Converts an unsigned int64 to a float64. + CvtUint64ToFloatSeq { + /// Is the target a 64-bits or 32-bits register? + to_f64: bool, + src: Reg, + dst: Writable, + tmp_gpr1: Writable, + tmp_gpr2: Writable, + }, + + /// Converts a scalar xmm to a signed int32/int64. + CvtFloatToSintSeq { + dst_size: OperandSize, + src_size: OperandSize, + /// A copy of the source register, fed by lowering. It is marked as modified during + /// register allocation to make sure that the temporary xmm register differs from the src + /// register, since both registers are live at the same time in the generated code + /// sequence. + src: Writable, + dst: Writable, + tmp_gpr: Writable, + tmp_xmm: Writable, + srcloc: SourceLoc, + }, + + /// Converts a scalar xmm to an unsigned int32/int64. + CvtFloatToUintSeq { + src_size: OperandSize, + dst_size: OperandSize, + /// A copy of the source register, fed by lowering, reused as a temporary. It is marked as + /// modified during register allocation to make sure that the temporary xmm register + /// differs from the src register, since both registers are live at the same time in the + /// generated code sequence. + src: Writable, + dst: Writable, + tmp_gpr: Writable, + tmp_xmm: Writable, + srcloc: SourceLoc, }, /// XMM (scalar) conditional move. @@ -475,24 +517,111 @@ impl Inst { } } - pub(crate) fn xmm_to_gpr(op: SseOpcode, src: Reg, dst: Writable) -> Inst { + pub(crate) fn xmm_to_gpr( + op: SseOpcode, + src: Reg, + dst: Writable, + dst_size: OperandSize, + ) -> Inst { debug_assert!(src.get_class() == RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::XmmToGpr { op, src, dst } + Inst::XmmToGpr { + op, + src, + dst, + dst_size, + } } - pub(crate) fn gpr_to_xmm(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { + pub(crate) fn gpr_to_xmm( + op: SseOpcode, + src: RegMem, + src_size: OperandSize, + dst: Writable, + ) -> Inst { src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::GprToXmm { op, src, dst } + Inst::GprToXmm { + op, + src, + dst, + src_size, + } } pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst { - //TODO:: Add assert_reg_type helper + src.assert_regclass_is(RegClass::V128); debug_assert!(dst.get_class() == RegClass::V128); Inst::XMM_Cmp_RM_R { op, src, dst } } + pub(crate) fn cvt_u64_to_float_seq( + to_f64: bool, + src: Reg, + tmp_gpr1: Writable, + tmp_gpr2: Writable, + dst: Writable, + ) -> Inst { + debug_assert!(src.get_class() == RegClass::I64); + debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64); + debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::CvtUint64ToFloatSeq { + src, + dst, + tmp_gpr1, + tmp_gpr2, + to_f64, + } + } + + pub(crate) fn cvt_float_to_sint_seq( + src_size: OperandSize, + dst_size: OperandSize, + src: Writable, + dst: Writable, + tmp_xmm: Writable, + tmp_gpr: Writable, + srcloc: SourceLoc, + ) -> Inst { + debug_assert!(src.to_reg().get_class() == RegClass::V128); + debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128); + debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::CvtFloatToSintSeq { + src, + dst, + src_size, + dst_size, + tmp_gpr, + tmp_xmm, + srcloc, + } + } + + pub(crate) fn cvt_float_to_uint_seq( + src_size: OperandSize, + dst_size: OperandSize, + src: Writable, + dst: Writable, + tmp_gpr: Writable, + tmp_xmm: Writable, + srcloc: SourceLoc, + ) -> Inst { + debug_assert!(src.to_reg().get_class() == RegClass::V128); + debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::CvtFloatToUintSeq { + src, + dst, + src_size, + dst_size, + tmp_gpr, + tmp_xmm, + srcloc, + } + } + pub(crate) fn movzx_rm_r( ext_mode: ExtMode, src: RegMem, @@ -844,11 +973,15 @@ impl ShowWithRRU for Inst { show_ireg_sized(dst.to_reg(), mb_rru, 8), ), - Inst::XmmToGpr { op, src, dst } => { - let dst_size = match op { - SseOpcode::Movd => 4, - SseOpcode::Movq => 8, - _ => panic!("unexpected sse opcode"), + Inst::XmmToGpr { + op, + src, + dst, + dst_size, + } => { + let dst_size = match dst_size { + OperandSize::Size32 => 4, + OperandSize::Size64 => 8, }; format!( "{} {}, {}", @@ -858,19 +991,17 @@ impl ShowWithRRU for Inst { ) } - Inst::GprToXmm { op, src, dst } => { - let src_size = match op { - SseOpcode::Movd => 4, - SseOpcode::Movq => 8, - _ => panic!("unexpected sse opcode"), - }; - format!( - "{} {}, {}", - ljustify(op.to_string()), - src.show_rru_sized(mb_rru, src_size), - dst.show_rru(mb_rru) - ) - } + Inst::GprToXmm { + op, + src, + src_size, + dst, + } => format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, src_size.to_bytes()), + dst.show_rru(mb_rru) + ), Inst::XMM_Cmp_RM_R { op, src, dst } => format!( "{} {}, {}", @@ -878,6 +1009,69 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, 8), show_ireg_sized(*dst, mb_rru, 8), ), + + Inst::CvtUint64ToFloatSeq { + src, dst, to_f64, .. + } => format!( + "{} {}, {}", + ljustify(format!( + "u64_to_{}_seq", + if *to_f64 { "f64" } else { "f32" } + )), + show_ireg_sized(*src, mb_rru, 8), + dst.show_rru(mb_rru), + ), + + Inst::CvtFloatToSintSeq { + src, + dst, + src_size, + dst_size, + .. + } => format!( + "{} {}, {}", + ljustify(format!( + "cvt_float{}_to_sint{}_seq", + if *src_size == OperandSize::Size64 { + "64" + } else { + "32" + }, + if *dst_size == OperandSize::Size64 { + "64" + } else { + "32" + } + )), + show_ireg_sized(src.to_reg(), mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()), + ), + + Inst::CvtFloatToUintSeq { + src, + dst, + src_size, + dst_size, + .. + } => format!( + "{} {}, {}", + ljustify(format!( + "cvt_float{}_to_uint{}_seq", + if *src_size == OperandSize::Size64 { + "64" + } else { + "32" + }, + if *dst_size == OperandSize::Size64 { + "64" + } else { + "32" + } + )), + show_ireg_sized(src.to_reg(), mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()), + ), + Inst::Imm_R { dst_is_64, simm64, @@ -1151,6 +1345,42 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_def(*dst); } + Inst::CvtUint64ToFloatSeq { + src, + dst, + tmp_gpr1, + tmp_gpr2, + .. + } => { + collector.add_use(*src); + collector.add_def(*dst); + collector.add_def(*tmp_gpr1); + collector.add_def(*tmp_gpr2); + } + Inst::CvtFloatToSintSeq { + src, + dst, + tmp_xmm, + tmp_gpr, + .. + } => { + collector.add_mod(*src); + collector.add_def(*dst); + collector.add_def(*tmp_xmm); + collector.add_def(*tmp_gpr); + } + Inst::CvtFloatToUintSeq { + src, + dst, + tmp_gpr, + tmp_xmm, + .. + } => { + collector.add_mod(*src); + collector.add_def(*dst); + collector.add_def(*tmp_gpr); + collector.add_def(*tmp_xmm); + } Inst::MovZX_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); @@ -1385,6 +1615,42 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_def(mapper, dst); } + Inst::CvtUint64ToFloatSeq { + ref mut src, + ref mut dst, + ref mut tmp_gpr1, + ref mut tmp_gpr2, + .. + } => { + map_use(mapper, src); + map_def(mapper, dst); + map_def(mapper, tmp_gpr1); + map_def(mapper, tmp_gpr2); + } + Inst::CvtFloatToSintSeq { + ref mut src, + ref mut dst, + ref mut tmp_xmm, + ref mut tmp_gpr, + .. + } => { + map_mod(mapper, src); + map_def(mapper, dst); + map_def(mapper, tmp_xmm); + map_def(mapper, tmp_gpr); + } + Inst::CvtFloatToUintSeq { + ref mut src, + ref mut dst, + ref mut tmp_gpr, + ref mut tmp_xmm, + .. + } => { + map_mod(mapper, src); + map_def(mapper, dst); + map_def(mapper, tmp_gpr); + map_def(mapper, tmp_xmm); + } Inst::MovZX_RM_R { ref mut src, ref mut dst, @@ -1571,7 +1837,7 @@ impl MachInst for Inst { RegClass::V128 => match ty { F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), - _ => panic!("unexpected V128 type in gen_move"), + _ => panic!("unexpected type {:?} in gen_move of regclass V128", ty), }, _ => panic!("gen_move(x64): unhandled regclass"), } @@ -1624,6 +1890,7 @@ impl MachInst for Inst { ret.push(Inst::gpr_to_xmm( SseOpcode::Movd, RegMem::reg(tmp.to_reg()), + OperandSize::Size32, to_reg, )); } @@ -1635,6 +1902,7 @@ impl MachInst for Inst { ret.push(Inst::gpr_to_xmm( SseOpcode::Movq, RegMem::reg(tmp.to_reg()), + OperandSize::Size64, to_reg, )); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index b7272c9a78..4e8d1e5906 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -953,6 +953,110 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst)); } + Opcode::FcvtFromSint => { + let (ext_spec, src_size) = match ctx.input_ty(insn, 0) { + I8 | I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32), + I32 => (None, OperandSize::Size32), + I64 => (None, OperandSize::Size64), + _ => unreachable!(), + }; + + let src = match ext_spec { + Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)), + None => input_to_reg_mem(ctx, inputs[0]), + }; + + let output_ty = ty.unwrap(); + let opcode = if output_ty == F32 { + SseOpcode::Cvtsi2ss + } else { + assert_eq!(output_ty, F64); + SseOpcode::Cvtsi2sd + }; + + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst)); + } + + Opcode::FcvtFromUint => { + let dst = output_to_reg(ctx, outputs[0]); + let ty = ty.unwrap(); + + let input_ty = ctx.input_ty(insn, 0); + match input_ty { + I8 | I16 | I32 => { + // Conversion from an unsigned int smaller than 64-bit is easy: zero-extend + + // do a signed conversion (which won't overflow). + let opcode = if ty == F32 { + SseOpcode::Cvtsi2ss + } else { + assert_eq!(ty, F64); + SseOpcode::Cvtsi2sd + }; + + let src = + RegMem::reg(extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo64)); + ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst)); + } + + I64 => { + let src = input_to_reg(ctx, inputs[0]); + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I64); + let tmp_gpr2 = ctx.alloc_tmp(RegClass::I64, I64); + ctx.emit(Inst::cvt_u64_to_float_seq( + ty == F64, + src, + tmp_gpr1, + tmp_gpr2, + dst, + )); + } + + _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty), + }; + } + + Opcode::FcvtToUint | Opcode::FcvtToSint => { + let src = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + let input_ty = ctx.input_ty(insn, 0); + let src_size = if input_ty == F32 { + OperandSize::Size32 + } else { + assert_eq!(input_ty, F64); + OperandSize::Size64 + }; + + let output_ty = ty.unwrap(); + let dst_size = if output_ty == I32 { + OperandSize::Size32 + } else { + assert_eq!(output_ty, I64); + OperandSize::Size64 + }; + + let to_signed = op == Opcode::FcvtToSint; + + let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty); + ctx.emit(Inst::gen_move(src_copy, src, input_ty)); + + let srcloc = ctx.srcloc(insn); + if to_signed { + let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty); + let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty); + ctx.emit(Inst::cvt_float_to_sint_seq( + src_size, dst_size, src_copy, dst, tmp_xmm, tmp_gpr, srcloc, + )); + } else { + let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty); + let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty); + ctx.emit(Inst::cvt_float_to_uint_seq( + src_size, dst_size, src_copy, dst, tmp_gpr, tmp_xmm, srcloc, + )); + } + } + Opcode::Bitcast => { let input_ty = ctx.input_ty(insn, 0); let output_ty = ctx.output_ty(insn, 0); @@ -960,22 +1064,42 @@ fn lower_insn_to_regs>( (F32, I32) => { let src = input_to_reg(ctx, inputs[0]); let dst = output_to_reg(ctx, outputs[0]); - ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movd, src, dst)); + ctx.emit(Inst::xmm_to_gpr( + SseOpcode::Movd, + src, + dst, + OperandSize::Size32, + )); } (I32, F32) => { let src = input_to_reg_mem(ctx, inputs[0]); let dst = output_to_reg(ctx, outputs[0]); - ctx.emit(Inst::gpr_to_xmm(SseOpcode::Movd, src, dst)); + ctx.emit(Inst::gpr_to_xmm( + SseOpcode::Movd, + src, + OperandSize::Size32, + dst, + )); } (F64, I64) => { let src = input_to_reg(ctx, inputs[0]); let dst = output_to_reg(ctx, outputs[0]); - ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movq, src, dst)); + ctx.emit(Inst::xmm_to_gpr( + SseOpcode::Movq, + src, + dst, + OperandSize::Size64, + )); } (I64, F64) => { let src = input_to_reg_mem(ctx, inputs[0]); let dst = output_to_reg(ctx, outputs[0]); - ctx.emit(Inst::gpr_to_xmm(SseOpcode::Movq, src, dst)); + ctx.emit(Inst::gpr_to_xmm( + SseOpcode::Movq, + src, + OperandSize::Size64, + dst, + )); } _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), } @@ -1000,6 +1124,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gpr_to_xmm( SseOpcode::Movd, RegMem::reg(tmp_gpr1.to_reg()), + OperandSize::Size32, tmp_xmm1, )); ctx.emit(Inst::xmm_mov(