diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 1c5c6f9a1c..814f846645 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -2065,7 +2065,12 @@ impl MachInst for Inst { Inst::mov(to_reg, from_reg) } - fn gen_constant(to_reg: Writable, value: u64, ty: Type) -> SmallVec<[Inst; 4]> { + fn gen_constant Writable>( + to_reg: Writable, + value: u64, + ty: Type, + _alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { if ty == F64 { let mut ret = SmallVec::new(); ret.push(Inst::load_fp_constant64(to_reg, f64::from_bits(value))); diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index d60fdfe144..2f440f8415 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -219,7 +219,11 @@ pub(crate) fn put_input_in_reg>( }; // Generate constants fresh at each use to minimize long-range register pressure. let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty); - for inst in Inst::gen_constant(to_reg, masked, ty).into_iter() { + for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| { + ctx.alloc_tmp(reg_class, ty) + }) + .into_iter() + { ctx.emit(inst); } to_reg.to_reg() diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index dadaa6e359..25eb6e77d3 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -957,25 +957,39 @@ fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { } fn load_stack(mem: impl Into, into_reg: Writable, ty: Type) -> Inst { - let ext_mode = match ty { - types::B1 | types::B8 | types::I8 => Some(ExtMode::BQ), - types::B16 | types::I16 => Some(ExtMode::WQ), - types::B32 | types::I32 => Some(ExtMode::LQ), - types::B64 | types::I64 => None, - types::F32 => todo!("f32 load_stack"), - types::F64 => todo!("f64 load_stack"), - _ => unimplemented!("load_stack({})", ty), + let (is_int, ext_mode) = match ty { + types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), + types::B16 | types::I16 => (true, Some(ExtMode::WQ)), + types::B32 | types::I32 => (true, Some(ExtMode::LQ)), + types::B64 | types::I64 => (true, None), + types::F32 | types::F64 => (false, None), + _ => panic!("load_stack({})", ty), }; let mem = mem.into(); - match ext_mode { - Some(ext_mode) => Inst::movsx_rm_r( - ext_mode, + + if is_int { + match ext_mode { + Some(ext_mode) => Inst::movsx_rm_r( + ext_mode, + RegMem::mem(mem), + into_reg, + /* infallible load */ None, + ), + None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), + } + } else { + let sse_op = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov( + sse_op, RegMem::mem(mem), into_reg, - /* infallible load */ None, - ), - None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), + None, /* infallible */ + ) } } @@ -993,7 +1007,12 @@ fn store_stack(mem: impl Into, from_reg: Reg, ty: Type) -> Inst if is_int { Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) } else { - unimplemented!("f32/f64 store_stack"); + let sse_op = match size { + 4 => SseOpcode::Movss, + 8 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) } } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 4090f2d33a..e624670a44 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -194,6 +194,13 @@ impl RegMemImm { Self::Imm { simm32 } } + /// Asserts that in register mode, the reg class is the one that's expected. + pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) { + if let Self::Reg { reg } = self { + debug_assert_eq!(reg.get_class(), expected_reg_class); + } + } + /// Add the regs mentioned by `self` to `collector`. pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { match self { @@ -234,6 +241,12 @@ impl RegMem { pub(crate) fn mem(addr: impl Into) -> Self { Self::Mem { addr: addr.into() } } + /// Asserts that in register mode, the reg class is the one that's expected. + pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) { + if let Self::Reg { reg } = self { + debug_assert_eq!(reg.get_class(), expected_reg_class); + } + } /// Add the regs mentioned by `self` to `collector`. pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { match self { @@ -346,6 +359,7 @@ pub enum SseOpcode { Minsd, Movaps, Movd, + Movq, Movss, Movsd, Mulss, @@ -399,6 +413,7 @@ impl SseOpcode { | SseOpcode::Maxsd | SseOpcode::Minsd | SseOpcode::Movd + | SseOpcode::Movq | SseOpcode::Movsd | SseOpcode::Mulsd | SseOpcode::Sqrtsd @@ -411,7 +426,7 @@ impl SseOpcode { } } - /// Returns the src operand size for an instruction + /// Returns the src operand size for an instruction. pub(crate) fn src_size(&self) -> u8 { match self { SseOpcode::Movd => 4, @@ -445,6 +460,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Minsd => "minsd", SseOpcode::Movaps => "movaps", SseOpcode::Movd => "movd", + SseOpcode::Movq => "movq", SseOpcode::Movss => "movss", SseOpcode::Movsd => "movsd", SseOpcode::Mulss => "mulss", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 432bbc9a0d..d932682a28 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1183,6 +1183,28 @@ pub(crate) fn emit( } } + Inst::XmmCmove { + is_64, + cc, + src, + dst, + } => { + let next = sink.get_label(); + + // Jump if cc is *not* set. + one_way_jmp(sink, cc.invert(), next); + + let op = if *is_64 { + SseOpcode::Movsd + } else { + SseOpcode::Movss + }; + let inst = Inst::xmm_unary_rm_r(op, src.clone(), *dst); + inst.emit(sink, flags, state); + + sink.bind_label(next); + } + Inst::Push64 { src } => { match src { RegMemImm::Reg { reg } => { @@ -1462,18 +1484,22 @@ pub(crate) fn emit( sink.bind_label(else_label); } - Inst::XMM_Mov_RM_R { + Inst::XmmUnaryRmR { op, src: src_e, dst: reg_g, srcloc, } => { let rex = RexFlags::clear_w(); + let (prefix, opcode) = match op { SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28), - SseOpcode::Movd => (LegacyPrefix::_66, 0x0F6E), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), + SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), + SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51), + SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A), + SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A), _ => unimplemented!("Opcode {:?} not implemented", op), }; @@ -1489,7 +1515,7 @@ pub(crate) fn emit( } emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } - } + }; } Inst::XMM_RM_R { @@ -1500,13 +1526,20 @@ pub(crate) fn emit( let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58), + SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58), SseOpcode::Andps => (LegacyPrefix::None, 0x0F54), SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55), - SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E), SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59), + SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59), SseOpcode::Orps => (LegacyPrefix::None, 0x0F56), SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C), - SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), + SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C), + SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D), + SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D), + SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E), + SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E), + SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F), + SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F), _ => unimplemented!("Opcode {:?} not implemented", op), }; @@ -1521,25 +1554,53 @@ pub(crate) fn emit( } } - Inst::XMM_Mov_R_M { + Inst::Xmm_Mov_R_M { op, src, dst, srcloc, } => { - let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { - SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), - _ => unimplemented!("Emit xmm mov r m"), + SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11), + _ => unimplemented!("Opcode {:?} not implemented", op), }; - let dst = &dst.finalize(state); if let Some(srcloc) = *srcloc { // Register the offset at which the actual load instruction starts. sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } - emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex); + emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, RexFlags::clear_w()); + } + + Inst::XmmToGpr { op, src, dst } => { + let (rex, prefix, opcode) = match op { + SseOpcode::Movd => (RexFlags::clear_w(), LegacyPrefix::_66, 0x0F7E), + SseOpcode::Movq => (RexFlags::set_w(), LegacyPrefix::_66, 0x0F7E), + _ => panic!("unexpected opcode {:?}", op), + }; + emit_std_reg_reg(sink, prefix, opcode, 2, *src, dst.to_reg(), rex); + } + + Inst::GprToXmm { + op, + src: src_e, + dst: reg_g, + } => { + let (rex, prefix, opcode) = match op { + SseOpcode::Movd => (RexFlags::clear_w(), LegacyPrefix::_66, 0x0F6E), + SseOpcode::Movq => (RexFlags::set_w(), LegacyPrefix::_66, 0x0F6E), + _ => panic!("unexpected opcode {:?}", op), + }; + match src_e { + RegMem::Reg { reg: reg_e } => { + emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); + } + RegMem::Mem { addr } => { + let addr = &addr.finalize(state); + emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); + } + } } Inst::LoadExtName { diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index ee4674729b..655aad43bc 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2877,28 +2877,18 @@ fn test_x64_emit() { )); // ======================================================== - // XMM_RM_R + // XMM_RM_R: float binary ops insns.push(( Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0), "F30F58C1", "addss %xmm1, %xmm0", )); - insns.push(( - Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1), - "F30F5CC8", - "subss %xmm0, %xmm1", - )); insns.push(( Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13), "F3450F58EB", "addss %xmm11, %xmm13", )); - insns.push(( - Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), - "F3410F5CCC", - "subss %xmm12, %xmm1", - )); insns.push(( Inst::xmm_rm_r( SseOpcode::Addss, @@ -2908,6 +2898,22 @@ fn test_x64_emit() { "F3410F5844927B", "addss 123(%r10,%rdx,4), %xmm0", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4), + "F2410F58E7", + "addsd %xmm15, %xmm4", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1), + "F30F5CC8", + "subss %xmm0, %xmm1", + )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), + "F3410F5CCC", + "subss %xmm12, %xmm1", + )); insns.push(( Inst::xmm_rm_r( SseOpcode::Subss, @@ -2917,86 +2923,157 @@ fn test_x64_emit() { "F3450F5C94C241010000", "subss 321(%r10,%rax,8), %xmm10", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14), + "F2440F5CF5", + "subsd %xmm5, %xmm14", + )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4), "F30F59E5", "mulss %xmm5, %xmm4", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4), + "F20F59E5", + "mulsd %xmm5, %xmm4", + )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7), "F3410F5EF8", "divss %xmm8, %xmm7", )); insns.push(( - Inst::xmm_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8), - "F3440F51C7", - "sqrtss %xmm7, %xmm8", + Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4), + "F20F5EE5", + "divsd %xmm5, %xmm4", )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12), "440F54E3", "andps %xmm3, %xmm12", )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11), "440F55DC", "andnps %xmm4, %xmm11", )); - insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14, None), - "440F28F5", - "movaps %xmm5, %xmm14", - )); - insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movd, RegMem::reg(rax), w_xmm15, None), - "66440F6EF8", - "movd %eax, %xmm15", - )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15), "440F56F9", "orps %xmm1, %xmm15", )); - - insns.push(( - Inst::xmm_mov_r_m(SseOpcode::Movd, xmm0, Amode::imm_reg(321, rbx), None), - "660F7E8341010000", - "movd %xmm0, 321(%rbx)", - )); - - insns.push(( - Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), - "F3450F11BC2480000000", - "movss %xmm15, 128(%r12)", - )); - - insns.push(( - Inst::xmm_mov_rm_r( - SseOpcode::Movd, - RegMem::mem(Amode::imm_reg(2, r10)), - w_xmm9, - None, - ), - "66450F6E4A02", - "movd 2(%r10), %xmm9", - )); - insns.push(( Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4), "0F56E5", "orps %xmm5, %xmm4", )); + + // XMM_Mov_R_M: float stores insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2, None), + Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), + "F3450F11BC2480000000", + "movss %xmm15, 128(%r12)", + )); + insns.push(( + Inst::xmm_mov_r_m(SseOpcode::Movsd, xmm1, Amode::imm_reg(0, rsi), None), + "F20F110E", + "movsd %xmm1, 0(%rsi)", + )); + + // XmmUnary: moves and unary float ops + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2), "F3410F10D5", "movss %xmm13, %xmm2", )); + insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3, None), + Inst::xmm_unary_rm_r(SseOpcode::Movsd, RegMem::reg(xmm0), w_xmm1), + "F20F10C8", + "movsd %xmm0, %xmm1", + )); + insns.push(( + Inst::xmm_unary_rm_r( + SseOpcode::Movsd, + RegMem::mem(Amode::imm_reg(0, rsi)), + w_xmm2, + ), + "F20F1016", + "movsd 0(%rsi), %xmm2", + )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3), "F2410F10DE", "movsd %xmm14, %xmm3", )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14), + "440F28F5", + "movaps %xmm5, %xmm14", + )); + + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8), + "F3440F51C7", + "sqrtss %xmm7, %xmm8", + )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Sqrtsd, RegMem::reg(xmm1), w_xmm2), + "F20F51D1", + "sqrtsd %xmm1, %xmm2", + )); + + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, RegMem::reg(xmm0), w_xmm1), + "F30F5AC8", + "cvtss2sd %xmm0, %xmm1", + )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, RegMem::reg(xmm1), w_xmm0), + "F20F5AC1", + "cvtsd2ss %xmm1, %xmm0", + )); + + // Xmm to int conversions, and conversely. + + insns.push(( + Inst::xmm_to_gpr(SseOpcode::Movd, xmm0, w_rsi), + "660F7EC6", + "movd %xmm0, %esi", + )); + insns.push(( + Inst::xmm_to_gpr(SseOpcode::Movq, xmm2, w_rdi), + "66480F7ED7", + "movq %xmm2, %rdi", + )); + insns.push(( + Inst::gpr_to_xmm(SseOpcode::Movd, RegMem::reg(rax), w_xmm15), + "66440F6EF8", + "movd %eax, %xmm15", + )); + insns.push(( + Inst::gpr_to_xmm(SseOpcode::Movd, RegMem::mem(Amode::imm_reg(2, r10)), w_xmm9), + "66450F6E4A02", + "movd 2(%r10), %xmm9", + )); + insns.push(( + Inst::gpr_to_xmm(SseOpcode::Movd, RegMem::reg(rsi), w_xmm1), + "660F6ECE", + "movd %esi, %xmm1", + )); + insns.push(( + Inst::gpr_to_xmm(SseOpcode::Movq, RegMem::reg(rdi), w_xmm15), + "664C0F6EFF", + "movq %rdi, %xmm15", + )); + // ======================================================== // Misc instructions. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index d0c9549892..5461f645f2 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -190,18 +190,20 @@ pub enum Inst { // ===================================== // Floating-point operations. - /// Float arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg + /// XMM (scalar or vector) binary op: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg XMM_RM_R { op: SseOpcode, src: RegMem, dst: Writable, }, - /// mov between XMM registers (32 64) (reg addr) reg XMM_Mov_RM_R differs from XMM_RM_R in - /// that the dst register of XMM_MOV_RM_R is not used in the computation of the instruction - /// dst value and so does not have to be a previously valid value. This is characteristic of - /// mov instructions. - XMM_Mov_RM_R { + /// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt, + /// etc. + /// + /// This differs from XMM_RM_R in that the dst register of XmmUnaryRmR is not used in the + /// computation of the instruction dst value and so does not have to be a previously valid + /// value. This is characteristic of mov instructions. + XmmUnaryRmR { op: SseOpcode, src: RegMem, dst: Writable, @@ -209,8 +211,8 @@ pub enum Inst { srcloc: Option, }, - /// mov reg addr (good for all memory stores from xmm registers) - XMM_Mov_R_M { + /// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq + Xmm_Mov_R_M { op: SseOpcode, src: Reg, dst: SyntheticAmode, @@ -218,6 +220,30 @@ pub enum Inst { srcloc: Option, }, + /// XMM (scalar) unary op (from xmm to integer reg): movd, movq + XmmToGpr { + op: SseOpcode, + src: Reg, + dst: Writable, + }, + + /// XMM (scalar) unary op (from integer to float reg): movd, movq + GprToXmm { + op: SseOpcode, + src: RegMem, + dst: Writable, + }, + + /// XMM (scalar) conditional move. + /// Overwrites the destination register if cc is set. + XmmCmove { + /// Whether the cmove is moving either 32 or 64 bits. + is_64: bool, + cc: CC, + src: RegMem, + dst: Writable, + }, + // ===================================== // Control flow instructions. /// Direct call: call simm32. @@ -318,6 +344,7 @@ impl Inst { src: RegMemImm, dst: Writable, ) -> Self { + src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); Self::Alu_RMI_R { is_64, @@ -333,12 +360,14 @@ impl Inst { src: RegMem, dst: Writable, ) -> Self { + src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); debug_assert!(size == 8 || size == 4 || size == 2); Self::UnaryRmR { size, op, src, dst } } pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst { + divisor.assert_regclass_is(RegClass::I64); debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); Inst::Div { size, @@ -349,6 +378,7 @@ impl Inst { } pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst { + rhs.assert_regclass_is(RegClass::I64); debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); Inst::MulHi { size, signed, rhs } } @@ -374,20 +404,30 @@ impl Inst { } } + pub(crate) fn imm32_r_unchecked(simm64: u64, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::Imm_R { + dst_is_64: false, + simm64, + dst, + } + } + pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable) -> Inst { debug_assert!(src.get_class() == RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::Mov_R_R { is_64, src, dst } } - pub(crate) fn xmm_mov_rm_r( + pub(crate) fn xmm_mov( op: SseOpcode, src: RegMem, dst: Writable, srcloc: Option, ) -> Inst { + src.assert_regclass_is(RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::XMM_Mov_RM_R { + Inst::XmmUnaryRmR { op, src, dst, @@ -395,7 +435,20 @@ impl Inst { } } + /// Convenient helper for unary float operations. + pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { + src.assert_regclass_is(RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XmmUnaryRmR { + op, + src, + dst, + srcloc: None, + } + } + pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { + src.assert_regclass_is(RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128); Inst::XMM_RM_R { op, src, dst } } @@ -407,7 +460,7 @@ impl Inst { srcloc: Option, ) -> Inst { debug_assert!(src.get_class() == RegClass::V128); - Inst::XMM_Mov_R_M { + Inst::Xmm_Mov_R_M { op, src, dst: dst.into(), @@ -415,12 +468,25 @@ impl Inst { } } + pub(crate) fn xmm_to_gpr(op: SseOpcode, src: Reg, dst: Writable) -> Inst { + debug_assert!(src.get_class() == RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::XmmToGpr { op, src, dst } + } + + pub(crate) fn gpr_to_xmm(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { + src.assert_regclass_is(RegClass::I64); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::GprToXmm { op, src, dst } + } + pub(crate) fn movzx_rm_r( ext_mode: ExtMode, src: RegMem, dst: Writable, srcloc: Option, ) -> Inst { + src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::MovZX_RM_R { ext_mode, @@ -436,6 +502,7 @@ impl Inst { dst: Writable, srcloc: Option, ) -> Inst { + src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::MovSX_RM_R { ext_mode, @@ -460,6 +527,7 @@ impl Inst { /// A convenience function to be able to use a RegMem as the source of a move. pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable, srcloc: Option) -> Inst { + src.assert_regclass_is(RegClass::I64); match src { RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst), RegMem::Mem { addr } => Self::mov64_m_r(addr, dst, srcloc), @@ -517,6 +585,7 @@ impl Inst { src: RegMemImm, dst: Reg, ) -> Inst { + src.assert_regclass_is(RegClass::I64); debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); debug_assert!(dst.get_class() == RegClass::I64); Inst::Cmp_RMI_R { size, src, dst } @@ -539,11 +608,24 @@ impl Inst { Inst::Cmove { size, cc, src, dst } } + pub(crate) fn xmm_cmove(is_64: bool, cc: CC, src: RegMem, dst: Writable) -> Inst { + src.assert_regclass_is(RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XmmCmove { + is_64, + cc, + src, + dst, + } + } + pub(crate) fn push64(src: RegMemImm) -> Inst { + src.assert_regclass_is(RegClass::I64); Inst::Push64 { src } } pub(crate) fn pop64(dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::Pop64 { dst } } @@ -570,6 +652,7 @@ impl Inst { loc: SourceLoc, opcode: Opcode, ) -> Inst { + dest.assert_regclass_is(RegClass::I64); Inst::CallUnknown { dest, uses, @@ -600,6 +683,7 @@ impl Inst { } pub(crate) fn jmp_unknown(target: RegMem) -> Inst { + target.assert_regclass_is(RegClass::I64); Inst::JmpUnknown { target } } @@ -689,6 +773,7 @@ impl ShowWithRRU for Inst { }), divisor.show_rru_sized(mb_rru, *size) ), + Inst::MulHi { size, signed, rhs, .. } => format!( @@ -700,6 +785,7 @@ impl ShowWithRRU for Inst { }), rhs.show_rru_sized(mb_rru, *size) ), + Inst::CheckedDivOrRemSeq { kind, size, @@ -715,6 +801,7 @@ impl ShowWithRRU for Inst { }, show_ireg_sized(*divisor, mb_rru, *size), ), + Inst::SignExtendRaxRdx { size } => match size { 2 => "cwd", 4 => "cdq", @@ -722,24 +809,56 @@ impl ShowWithRRU for Inst { _ => unreachable!(), } .into(), - Inst::XMM_Mov_RM_R { op, src, dst, .. } => format!( + + Inst::XmmUnaryRmR { op, src, dst, .. } => format!( "{} {}, {}", ljustify(op.to_string()), src.show_rru_sized(mb_rru, op.src_size()), show_ireg_sized(dst.to_reg(), mb_rru, 8), ), - Inst::XMM_Mov_R_M { op, src, dst, .. } => format!( + + Inst::Xmm_Mov_R_M { op, src, dst, .. } => format!( "{} {}, {}", ljustify(op.to_string()), show_ireg_sized(*src, mb_rru, 8), - dst.show_rru(mb_rru) + dst.show_rru(mb_rru), ), + Inst::XMM_RM_R { op, src, dst } => format!( "{} {}, {}", ljustify(op.to_string()), src.show_rru_sized(mb_rru, 8), show_ireg_sized(dst.to_reg(), mb_rru, 8), ), + + Inst::XmmToGpr { op, src, dst } => { + let dst_size = match op { + SseOpcode::Movd => 4, + SseOpcode::Movq => 8, + _ => panic!("unexpected sse opcode"), + }; + format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru(mb_rru), + show_ireg_sized(dst.to_reg(), mb_rru, dst_size), + ) + } + + Inst::GprToXmm { op, src, dst } => { + let src_size = match op { + SseOpcode::Movd => 4, + SseOpcode::Movq => 8, + _ => panic!("unexpected sse opcode"), + }; + format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, src_size), + dst.show_rru(mb_rru) + ) + } + Inst::Imm_R { dst_is_64, simm64, @@ -761,12 +880,14 @@ impl ShowWithRRU for Inst { ) } } + Inst::Mov_R_R { is_64, src, dst } => format!( "{} {}, {}", ljustify2("mov".to_string(), suffixLQ(*is_64)), show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) ), + Inst::MovZX_RM_R { ext_mode, src, dst, .. } => { @@ -786,18 +907,21 @@ impl ShowWithRRU for Inst { ) } } + Inst::Mov64_M_R { src, dst, .. } => format!( "{} {}, {}", ljustify("movq".to_string()), src.show_rru(mb_rru), dst.show_rru(mb_rru) ), + Inst::LoadEffectiveAddress { addr, dst } => format!( "{} {}, {}", ljustify("lea".to_string()), addr.show_rru(mb_rru), dst.show_rru(mb_rru) ), + Inst::MovSX_RM_R { ext_mode, src, dst, .. } => format!( @@ -806,12 +930,14 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, ext_mode.src_size()), show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size()) ), + Inst::Mov_R_M { size, src, dst, .. } => format!( "{} {}, {}", ljustify2("mov".to_string(), suffixBWLQ(*size)), show_ireg_sized(*src, mb_rru, *size), dst.show_rru(mb_rru) ), + Inst::Shift_R { is_64, kind, @@ -831,40 +957,67 @@ impl ShowWithRRU for Inst { show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) ), }, + Inst::Cmp_RMI_R { size, src, dst } => format!( "{} {}, {}", ljustify2("cmp".to_string(), suffixBWLQ(*size)), src.show_rru_sized(mb_rru, *size), show_ireg_sized(*dst, mb_rru, *size) ), + Inst::Setcc { cc, dst } => format!( "{} {}", ljustify2("set".to_string(), cc.to_string()), show_ireg_sized(dst.to_reg(), mb_rru, 1) ), + Inst::Cmove { size, cc, src, dst } => format!( "{} {}, {}", ljustify(format!("cmov{}{}", cc.to_string(), suffixBWLQ(*size))), src.show_rru_sized(mb_rru, *size), show_ireg_sized(dst.to_reg(), mb_rru, *size) ), + + Inst::XmmCmove { + is_64, + cc, + src, + dst, + } => { + let size = if *is_64 { 8 } else { 4 }; + format!( + "j{} $next; mov{} {}, {}; $next: ", + cc.invert().to_string(), + if *is_64 { "sd" } else { "ss" }, + src.show_rru_sized(mb_rru, size), + show_ireg_sized(dst.to_reg(), mb_rru, size) + ) + } + Inst::Push64 { src } => { format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) } + Inst::Pop64 { dst } => { format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru)) } + Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest), + Inst::CallUnknown { dest, .. } => format!( "{} *{}", ljustify("call".to_string()), dest.show_rru(mb_rru) ), + Inst::Ret => "ret".to_string(), + Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), + Inst::JmpKnown { dst } => { format!("{} {}", ljustify("jmp".to_string()), dst.show_rru(mb_rru)) } + Inst::JmpCond { cc, taken, @@ -875,18 +1028,21 @@ impl ShowWithRRU for Inst { taken.show_rru(mb_rru), not_taken.show_rru(mb_rru) ), + Inst::JmpTableSeq { idx, .. } => { format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru)) } - // + Inst::JmpUnknown { target } => format!( "{} *{}", ljustify("jmp".to_string()), target.show_rru(mb_rru) ), + Inst::TrapIf { cc, trap_code, .. } => { format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code) } + Inst::LoadExtName { dst, name, offset, .. } => format!( @@ -896,8 +1052,11 @@ impl ShowWithRRU for Inst { offset, show_ireg_sized(dst.to_reg(), mb_rru, 8), ), + Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), + Inst::Hlt => "hlt".into(), + Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1), } } @@ -946,7 +1105,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(regs::rax()); collector.add_mod(Writable::from_reg(regs::rdx())); } - Inst::UnaryRmR { src, dst, .. } | Inst::XMM_Mov_RM_R { src, dst, .. } => { + Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); } @@ -954,17 +1113,21 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } - Inst::XMM_Mov_R_M { src, dst, .. } => { + Inst::Xmm_Mov_R_M { src, dst, .. } => { collector.add_use(*src); dst.get_regs_as_uses(collector); } Inst::Imm_R { dst, .. } => { collector.add_def(*dst); } - Inst::Mov_R_R { src, dst, .. } => { + Inst::Mov_R_R { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => { collector.add_use(*src); collector.add_def(*dst); } + Inst::GprToXmm { src, dst, .. } => { + src.get_regs_as_uses(collector); + collector.add_def(*dst); + } Inst::MovZX_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); @@ -994,7 +1157,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { Inst::Setcc { dst, .. } => { collector.add_def(*dst); } - Inst::Cmove { src, dst, .. } => { + Inst::Cmove { src, dst, .. } | Inst::XmmCmove { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_mod(*dst); } @@ -1140,7 +1303,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { } } Inst::SignExtendRaxRdx { .. } => {} - Inst::XMM_Mov_RM_R { + Inst::XmmUnaryRmR { ref mut src, ref mut dst, .. @@ -1161,7 +1324,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } - Inst::XMM_Mov_R_M { + Inst::Xmm_Mov_R_M { ref mut src, ref mut dst, .. @@ -1174,10 +1337,23 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut src, ref mut dst, .. + } + | Inst::XmmToGpr { + ref mut src, + ref mut dst, + .. } => { map_use(mapper, src); map_def(mapper, dst); } + Inst::GprToXmm { + ref mut src, + ref mut dst, + .. + } => { + src.map_uses(mapper); + map_def(mapper, dst); + } Inst::MovZX_RM_R { ref mut src, ref mut dst, @@ -1222,6 +1398,11 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut src, ref mut dst, .. + } + | Inst::XmmCmove { + ref mut src, + ref mut dst, + .. } => { src.map_uses(mapper); map_mod(mapper, dst) @@ -1309,7 +1490,7 @@ impl MachInst for Inst { Self::Mov_R_R { is_64, src, dst, .. } if *is_64 => Some((*dst, *src)), - Self::XMM_Mov_RM_R { op, src, dst, .. } + Self::XmmUnaryRmR { op, src, dst, .. } if *op == SseOpcode::Movss || *op == SseOpcode::Movsd || *op == SseOpcode::Movaps => @@ -1357,8 +1538,8 @@ impl MachInst for Inst { match rc_dst { RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), RegClass::V128 => match ty { - F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), - F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), + F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), + F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), _ => panic!("unexpected V128 type in gen_move"), }, _ => panic!("gen_move(x64): unhandled regclass"), @@ -1393,11 +1574,43 @@ impl MachInst for Inst { Inst::jmp_known(BranchTarget::Label(label)) } - fn gen_constant(to_reg: Writable, value: u64, ty: Type) -> SmallVec<[Self; 4]> { + fn gen_constant Writable>( + to_reg: Writable, + value: u64, + ty: Type, + mut alloc_tmp: F, + ) -> SmallVec<[Self; 4]> { let mut ret = SmallVec::new(); - debug_assert!(ty.is_int(), "float constants NYI"); - let is_64 = ty == I64 && value > 0x7fffffff; - ret.push(Inst::imm_r(is_64, value, to_reg)); + if ty.is_int() { + let is_64 = ty == I64 && value > 0x7fffffff; + ret.push(Inst::imm_r(is_64, value, to_reg)); + } else { + match ty { + F32 => { + let tmp = alloc_tmp(RegClass::I64, I32); + ret.push(Inst::imm32_r_unchecked(value, tmp)); + + ret.push(Inst::gpr_to_xmm( + SseOpcode::Movd, + RegMem::reg(tmp.to_reg()), + to_reg, + )); + } + + F64 => { + let tmp = alloc_tmp(RegClass::I64, I64); + ret.push(Inst::imm_r(true, value, tmp)); + + ret.push(Inst::gpr_to_xmm( + SseOpcode::Movq, + RegMem::reg(tmp.to_reg()), + to_reg, + )); + } + + _ => panic!("unexpected type {:?} in gen_constant", ty), + } + } ret } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 4e4074e6f5..44c6edd3c1 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -792,29 +792,111 @@ fn lower_insn_to_regs>( ctx.emit(Inst::Ud2 { trap_info }) } + Opcode::F64const => { + // TODO use xorpd for 0 + let value = ctx.get_constant(insn).unwrap(); + let dst = output_to_reg(ctx, outputs[0]); + for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| { + ctx.alloc_tmp(reg_class, ty) + }) { + ctx.emit(inst); + } + } + + Opcode::F32const => { + // TODO use xorps for 0. + let value = ctx.get_constant(insn).unwrap(); + let dst = output_to_reg(ctx, outputs[0]); + for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| { + ctx.alloc_tmp(reg_class, ty) + }) { + ctx.emit(inst); + } + } + Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => { - let lhs = input_to_reg(ctx, inputs[0]); + let lhs = input_to_reg_mem(ctx, inputs[0]); let rhs = input_to_reg(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]); + + // Note: min and max can't be handled here, because of the way Cranelift defines them: + // if any operand is a NaN, they must return the NaN operand, while the x86 machine + // instruction will return the other operand. + let (f32_op, f64_op) = match op { + Opcode::Fadd => (SseOpcode::Addss, SseOpcode::Addsd), + Opcode::Fsub => (SseOpcode::Subss, SseOpcode::Subsd), + Opcode::Fmul => (SseOpcode::Mulss, SseOpcode::Mulsd), + Opcode::Fdiv => (SseOpcode::Divss, SseOpcode::Divsd), + _ => unreachable!(), + }; + let is_64 = flt_ty_is_64(ty.unwrap()); - if !is_64 { - let sse_op = match op { - Opcode::Fadd => SseOpcode::Addss, - Opcode::Fsub => SseOpcode::Subss, - Opcode::Fmul => SseOpcode::Mulss, - Opcode::Fdiv => SseOpcode::Divss, - // TODO Fmax, Fmin. - _ => unimplemented!(), - }; - ctx.emit(Inst::xmm_mov_rm_r( - SseOpcode::Movss, - RegMem::reg(lhs), - dst, - None, - )); - ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst)); + + let mov_op = if is_64 { + SseOpcode::Movsd } else { - unimplemented!("unimplemented lowering for opcode {:?}", op); + SseOpcode::Movss + }; + ctx.emit(Inst::xmm_mov(mov_op, lhs, dst, None)); + + let sse_op = if is_64 { f64_op } else { f32_op }; + ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst)); + } + + Opcode::Sqrt => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + let (f32_op, f64_op) = match op { + Opcode::Sqrt => (SseOpcode::Sqrtss, SseOpcode::Sqrtsd), + _ => unreachable!(), + }; + + let sse_op = if flt_ty_is_64(ty.unwrap()) { + f64_op + } else { + f32_op + }; + ctx.emit(Inst::xmm_unary_rm_r(sse_op, src, dst)); + } + + Opcode::Fpromote => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, src, dst)); + } + + Opcode::Fdemote => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst)); + } + + Opcode::Bitcast => { + let input_ty = ctx.input_ty(insn, 0); + let output_ty = ctx.output_ty(insn, 0); + match (input_ty, output_ty) { + (F32, I32) => { + let src = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movd, src, dst)); + } + (I32, F32) => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gpr_to_xmm(SseOpcode::Movd, src, dst)); + } + (F64, I64) => { + let src = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movq, src, dst)); + } + (I64, F64) => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gpr_to_xmm(SseOpcode::Movq, src, dst)); + } + _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), } } @@ -834,20 +916,19 @@ fn lower_insn_to_regs>( let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32); let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32); ctx.emit(Inst::imm_r(true, 0x8000_0000, tmp_gpr1)); - ctx.emit(Inst::xmm_mov_rm_r( + ctx.emit(Inst::gpr_to_xmm( SseOpcode::Movd, RegMem::reg(tmp_gpr1.to_reg()), tmp_xmm1, - None, )); - ctx.emit(Inst::xmm_mov_rm_r( + ctx.emit(Inst::xmm_mov( SseOpcode::Movaps, RegMem::reg(tmp_xmm1.to_reg()), dst, None, )); ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst)); - ctx.emit(Inst::xmm_mov_rm_r( + ctx.emit(Inst::xmm_mov( SseOpcode::Movss, RegMem::reg(rhs), tmp_xmm2, @@ -982,8 +1063,9 @@ fn lower_insn_to_regs>( } (_, true) => { ctx.emit(match elem_ty { - F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc), - _ => unimplemented!("FP load not 32-bit"), + F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc), + F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc), + _ => unreachable!("unexpected type for load: {:?}", elem_ty), }); } } @@ -1025,7 +1107,7 @@ fn lower_insn_to_regs>( | Opcode::Istore32Complex => { assert!( inputs.len() == 3, - "can't handle more than two inputs in complex load" + "can't handle more than two inputs in complex store" ); let base = input_to_reg(ctx, inputs[1]); let index = input_to_reg(ctx, inputs[2]); @@ -1043,7 +1125,8 @@ fn lower_insn_to_regs>( if is_float { ctx.emit(match elem_ty { F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc), - _ => unimplemented!("FP store not 32-bit"), + F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc), + _ => panic!("unexpected type for store {:?}", elem_ty), }); } else { ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc)); @@ -1119,18 +1202,23 @@ fn lower_insn_to_regs>( let dst = output_to_reg(ctx, outputs[0]); let ty = ctx.output_ty(insn, 0); - assert!(is_int_ty(ty), "float cmov NYI"); - let size = ty.bytes() as u8; - if size == 1 { - // Sign-extend operands to 32, then do a cmove of size 4. - let lhs_se = ctx.alloc_tmp(RegClass::I64, I32); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); - ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); + if ty.is_int() { + let size = ty.bytes() as u8; + if size == 1 { + // Sign-extend operands to 32, then do a cmove of size 4. + let lhs_se = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); + ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); + } else { + ctx.emit(Inst::gen_move(dst, rhs, ty)); + ctx.emit(Inst::cmove(size, cc, lhs, dst)); + } } else { + debug_assert!(ty == F32 || ty == F64); ctx.emit(Inst::gen_move(dst, rhs, ty)); - ctx.emit(Inst::cmove(size, cc, lhs, dst)); + ctx.emit(Inst::xmm_cmove(ty == F64, cc, lhs, dst)); } } diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 608e8b4896..6761372a50 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -516,7 +516,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // Now, finally, deal with the moves whose sources are constants. for (ty, dst_reg, const_u64) in &const_bundles { - for inst in I::gen_constant(*dst_reg, *const_u64, *ty).into_iter() { + for inst in I::gen_constant(*dst_reg, *const_u64, *ty, |reg_class, ty| { + self.alloc_tmp(reg_class, ty) + }) + .into_iter() + { self.emit(inst); } } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 9a0ae1e820..0383ff12f6 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -154,7 +154,12 @@ pub trait MachInst: Clone + Debug { fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self; /// Generate a constant into a reg. - fn gen_constant(to_reg: Writable, value: u64, ty: Type) -> SmallVec<[Self; 4]>; + fn gen_constant Writable>( + to_reg: Writable, + value: u64, + ty: Type, + alloc_tmp: F, + ) -> SmallVec<[Self; 4]>; /// Generate a zero-length no-op. fn gen_zero_len_nop() -> Self;