diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index fe65145c50..6c35b6dcb7 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -214,6 +214,8 @@ pub(crate) enum InstructionSet { pub enum SseOpcode { Addss, Addsd, + Andps, + Andnps, Comiss, Comisd, Cmpss, @@ -233,10 +235,13 @@ pub enum SseOpcode { Maxsd, Minss, Minsd, + Movaps, + Movd, Movss, Movsd, Mulss, Mulsd, + Orps, Rcpss, Roundss, Roundsd, @@ -255,14 +260,18 @@ impl SseOpcode { use InstructionSet::*; match self { SseOpcode::Addss + | SseOpcode::Andps + | SseOpcode::Andnps | SseOpcode::Cvtsi2ss | SseOpcode::Cvtss2si | SseOpcode::Cvttss2si | SseOpcode::Divss | SseOpcode::Maxss + | SseOpcode::Movaps | SseOpcode::Minss | SseOpcode::Movss | SseOpcode::Mulss + | SseOpcode::Orps | SseOpcode::Rcpss | SseOpcode::Rsqrtss | SseOpcode::Subss @@ -280,6 +289,7 @@ impl SseOpcode { | SseOpcode::Divsd | SseOpcode::Maxsd | SseOpcode::Minsd + | SseOpcode::Movd | SseOpcode::Movsd | SseOpcode::Mulsd | SseOpcode::Sqrtsd @@ -291,6 +301,14 @@ impl SseOpcode { SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41, } } + + /// Returns src register operand size for an instruction + pub(crate) fn src_size(&self) -> u8 { + match self { + SseOpcode::Movd => 4, + _ => 8, + } + } } impl fmt::Debug for SseOpcode { @@ -298,6 +316,8 @@ impl fmt::Debug for SseOpcode { let name = match self { SseOpcode::Addss => "addss", SseOpcode::Addsd => "addsd", + SseOpcode::Andps => "andps", + SseOpcode::Andnps => "andnps", SseOpcode::Comiss => "comiss", SseOpcode::Comisd => "comisd", SseOpcode::Cvtsd2ss => "cvtsd2ss", @@ -314,10 +334,13 @@ impl fmt::Debug for SseOpcode { SseOpcode::Maxsd => "maxsd", SseOpcode::Minss => "minss", SseOpcode::Minsd => "minsd", + SseOpcode::Movaps => "movaps", + SseOpcode::Movd => "movd", SseOpcode::Movss => "movss", SseOpcode::Movsd => "movsd", SseOpcode::Mulss => "mulss", SseOpcode::Mulsd => "mulsd", + SseOpcode::Orps => "orps", SseOpcode::Rcpss => "rcpss", SseOpcode::Roundss => "roundss", SseOpcode::Roundsd => "roundsd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index efd6386de2..1c9c039693 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1029,12 +1029,14 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let opcode = match op { SseOpcode::Movss => 0x0F10, SseOpcode::Movsd => 0x0F10, + SseOpcode::Movd => 0x0F6E, _ => unimplemented!("XMM_R_R opcode"), }; let prefix = match op { SseOpcode::Movss => LegacyPrefix::_F3, SseOpcode::Movsd => LegacyPrefix::_F2, + SseOpcode::Movd => LegacyPrefix::_66, _ => unimplemented!("XMM_R_R opcode"), }; @@ -1049,45 +1051,56 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { ); } - Inst::XMM_RM_R { + Inst::XMM_MOV_RM_R { op, - src: srcE, + src: src_e, dst: reg_g, } => { let rex = RexFlags::clear_w(); - - let opcode = match op { - SseOpcode::Addss => 0x0F58, - SseOpcode::Subss => 0x0F5C, - SseOpcode::Mulss => 0x0F59, - SseOpcode::Divss => 0x0F5E, - SseOpcode::Sqrtss => 0x0F51, - _ => unimplemented!("XMM_RM_R opcode"), + let (prefix, opcode) = match op { + SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28), + SseOpcode::Movd => (LegacyPrefix::_66, 0x0F6E), + SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), + SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), + _ => unimplemented!("Opcode {:?} not implemented", op), }; - match srcE { - RegMem::Reg { reg: regE } => { - emit_std_reg_reg( - sink, - LegacyPrefix::_F3, - opcode, - 2, - reg_g.to_reg(), - *regE, - rex, - ); + match src_e { + RegMem::Reg { reg: reg_e } => { + emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); } RegMem::Mem { addr } => { - emit_std_reg_mem( - sink, - LegacyPrefix::_F3, - opcode, - 2, - reg_g.to_reg(), - addr, - rex, - ); + emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); + } + } + } + + Inst::XMM_RM_R { + op, + src: src_e, + dst: reg_g, + } => { + let rex = RexFlags::clear_w(); + let (prefix, opcode) = match op { + SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58), + SseOpcode::Andps => (LegacyPrefix::None, 0x0F54), + SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55), + SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E), + SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59), + SseOpcode::Orps => (LegacyPrefix::None, 0x0F56), + SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C), + SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), + _ => unimplemented!("Opcode {:?} not implemented", op), + }; + + match src_e { + RegMem::Reg { reg: reg_e } => { + emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); + } + + RegMem::Mem { addr } => { + emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 670621c3de..783bfbea42 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -79,11 +79,11 @@ fn test_x64_emit() { let w_xmm8 = Writable::::from_reg(xmm8); let _w_xmm9 = Writable::::from_reg(xmm9); let w_xmm10 = Writable::::from_reg(xmm10); - let _w_xmm11 = Writable::::from_reg(xmm11); - let _w_xmm12 = Writable::::from_reg(xmm12); + let w_xmm11 = Writable::::from_reg(xmm11); + let w_xmm12 = Writable::::from_reg(xmm12); let w_xmm13 = Writable::::from_reg(xmm13); - let _w_xmm14 = Writable::::from_reg(xmm14); - let _w_xmm15 = Writable::::from_reg(xmm15); + let w_xmm14 = Writable::::from_reg(xmm14); + let w_xmm15 = Writable::::from_reg(xmm15); let mut insns = Vec::<(Inst, &str, &str)>::new(); @@ -2362,12 +2362,51 @@ fn test_x64_emit() { "F3410F5EF8", "divss %xmm8, %xmm7", )); - insns.push(( Inst::xmm_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8), "F3440F51C7", "sqrtss %xmm7, %xmm8", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12), + "440F54E3", + "andps %xmm3, %xmm12", + )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11), + "440F55DC", + "andnps %xmm4, %xmm11", + )); + insns.push(( + Inst::xmm_mov_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14), + "440F28F5", + "movaps %xmm5, %xmm14", + )); + insns.push(( + Inst::xmm_mov_rm_r(SseOpcode::Movd, RegMem::reg(rax), w_xmm15), + "66440F6EF8", + "movd %eax, %xmm15", + )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15), + "440F56F9", + "orps %xmm1, %xmm15", + )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4), + "0F56E5", + "orps %xmm5, %xmm4", + )); + insns.push(( + Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2), + "F3410F10D5", + "movss %xmm13, %xmm2", + )); + insns.push(( + Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3), + "F2410F10DE", + "movsd %xmm14, %xmm3", + )); // ======================================================== // XMM_R_R diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 0852f30857..0ffc56a83b 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -151,6 +151,18 @@ pub(crate) enum Inst { /// jmpq (reg mem) JmpUnknown { target: RegMem }, + /// mov between XMM registers (32 64) (reg addr) reg + /// XMM_MOV_RM_R differs from XMM_RM_R in that the dst + /// register of XMM_MOV_RM_R is not used in the computation + /// of the instruction dst value and so does not have to + /// be a previously valid value. This is characteristic of + /// mov instructions. + XMM_MOV_RM_R { + op: SseOpcode, + src: RegMem, + dst: Writable, + }, + /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg XMM_RM_R { op: SseOpcode, @@ -220,9 +232,14 @@ impl Inst { Inst::XMM_R_R { op, src, dst } } + pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XMM_MOV_RM_R { op, src, dst } + } + pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Self::XMM_RM_R { op, src, dst } + Inst::XMM_RM_R { op, src, dst } } pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { @@ -371,6 +388,12 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), + Inst::XMM_MOV_RM_R { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, op.src_size()), + show_ireg_sized(dst.to_reg(), mb_rru, 8), + ), Inst::XMM_RM_R { op, src, dst } => format!( "{} {}, {}", ljustify(op.to_string()), @@ -532,7 +555,11 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } - Inst::XMM_RM_R { op: _, src, dst } => { + Inst::XMM_MOV_RM_R { src, dst, .. } => { + src.get_regs_as_uses(collector); + collector.add_def(*dst); + } + Inst::XMM_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_mod(*dst); } @@ -699,6 +726,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::XMM_MOV_RM_R { + op: _, + ref mut src, + ref mut dst, + } => { + src.map_uses(mapper); + map_def(mapper, dst); + } Inst::XMM_RM_R { op: _, ref mut src, @@ -817,7 +852,9 @@ impl MachInst for Inst { match self { Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)), Self::XMM_R_R { op, src, dst } - if *op == SseOpcode::Movss || *op == SseOpcode::Movsd => + if *op == SseOpcode::Movss + || *op == SseOpcode::Movsd + || *op == SseOpcode::Movaps => { Some((*dst, *src)) } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 65c0684077..4edb232c31 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -7,6 +7,7 @@ use log::trace; use regalloc::{Reg, RegClass, Writable}; use crate::ir::types; +use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::{condcodes::IntCC, InstructionData, Opcode, Type}; @@ -214,7 +215,52 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { unimplemented!("unimplemented lowering for opcode {:?}", op); } } - + Opcode::Fcopysign => { + let dst = output_to_reg(ctx, inst, 0); + let lhs = input_to_reg(ctx, inst, 0); + let rhs = input_to_reg(ctx, inst, 1); + if !flt_ty_is_64(ty.unwrap()) { + // movabs 0x8000_0000, tmp_gpr1 + // movd tmp_gpr1, tmp_xmm1 + // movaps tmp_xmm1, dst + // andnps src_1, dst + // movss src_2, tmp_xmm2 + // andps tmp_xmm1, tmp_xmm2 + // orps tmp_xmm2, dst + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); + let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32); + let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32); + ctx.emit(Inst::imm_r(true, 0x8000_0000, tmp_gpr1)); + ctx.emit(Inst::xmm_mov_rm_r( + SseOpcode::Movd, + RegMem::reg(tmp_gpr1.to_reg()), + tmp_xmm1, + )); + ctx.emit(Inst::xmm_mov_rm_r( + SseOpcode::Movaps, + RegMem::reg(tmp_xmm1.to_reg()), + dst, + )); + ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst)); + ctx.emit(Inst::xmm_mov_rm_r( + SseOpcode::Movss, + RegMem::reg(rhs), + tmp_xmm2, + )); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Andps, + RegMem::reg(tmp_xmm1.to_reg()), + tmp_xmm2, + )); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Orps, + RegMem::reg(tmp_xmm2.to_reg()), + dst, + )); + } else { + unimplemented!("{:?} for non 32-bit destination is not supported", op); + } + } Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm