From f2dd1535d51b111621471b287ff920cbeaa8009c Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Wed, 10 Jun 2020 22:09:47 -0700 Subject: [PATCH] Add x64 lowering of Clif flt store instruction for new backend Adds support for the clif flt store instruction. --- cranelift/codegen/src/isa/x64/inst/emit.rs | 12 +++- .../codegen/src/isa/x64/inst/emit_tests.rs | 13 +++++ cranelift/codegen/src/isa/x64/inst/mod.rs | 58 +++++++++++++++---- cranelift/codegen/src/isa/x64/lower.rs | 5 +- 4 files changed, 74 insertions(+), 14 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 2325aca3a4..d2666728c3 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1027,7 +1027,7 @@ pub(crate) fn emit( } } - Inst::XMM_MOV_RM_R { + Inst::XMM_Mov_RM_R { op, src: src_e, dst: reg_g, @@ -1082,7 +1082,17 @@ pub(crate) fn emit( } } } + Inst::XMM_Mov_R_M { op, src, dst } => { + let rex = RexFlags::clear_w(); + let (prefix, opcode) = match op { + SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E), + SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), + _ => unimplemented!("Emit xmm mov r m"), + }; + let dst = &dst.finalize(state); + emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex); + } Inst::Hlt => { sink.put1(0xcc); } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 77b0b79351..fc77fde871 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2688,6 +2688,19 @@ fn test_x64_emit() { "440F56F9", "orps %xmm1, %xmm15", )); + + insns.push(( + Inst::xmm_mov_r_m(SseOpcode::Movd, xmm0, Amode::imm_reg(321, rbx)), + "660F7E8341010000", + "movd %xmm0, 321(%rbx)", + )); + + insns.push(( + Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)), + "F3450F11BC2480000000", + "movss %xmm15, 128(%r12)", + )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4), "0F56E5", diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 726575fd28..bf778a1e40 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -135,16 +135,23 @@ pub enum Inst { dst: Writable, }, - /// mov between XMM registers (32 64) (reg addr) reg - /// XMM_MOV_RM_R differs from XMM_RM_R in that the dst register of XMM_MOV_RM_R is not used in - /// the computation of the instruction dst value and so does not have to be a previously valid - /// value. This is characteristic of mov instructions. - XMM_MOV_RM_R { + /// mov between XMM registers (32 64) (reg addr) reg XMM_Mov_RM_R differs from XMM_RM_R in + /// that the dst register of XMM_MOV_RM_R is not used in the computation of the instruction + /// dst value and so does not have to be a previously valid value. This is characteristic of + /// mov instructions. + XMM_Mov_RM_R { op: SseOpcode, src: RegMem, dst: Writable, }, + /// mov reg addr (good for all memory stores from xmm registers) + XMM_Mov_R_M { + op: SseOpcode, + src: Reg, + dst: SyntheticAmode, + }, + // ===================================== // Control flow instructions. /// Direct call: call simm32. @@ -249,7 +256,7 @@ impl Inst { pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::XMM_MOV_RM_R { op, src, dst } + Inst::XMM_Mov_RM_R { op, src, dst } } pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { @@ -257,6 +264,15 @@ impl Inst { Inst::XMM_RM_R { op, src, dst } } + pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into) -> Inst { + debug_assert!(src.get_class() == RegClass::V128); + Inst::XMM_Mov_R_M { + op, + src, + dst: dst.into(), + } + } + pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::MovZX_RM_R { ext_mode, src, dst } @@ -453,12 +469,18 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), - Inst::XMM_MOV_RM_R { op, src, dst } => format!( + Inst::XMM_Mov_RM_R { op, src, dst } => format!( "{} {}, {}", ljustify(op.to_string()), src.show_rru_sized(mb_rru, op.src_size()), show_ireg_sized(dst.to_reg(), mb_rru, 8), ), + Inst::XMM_Mov_R_M { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + show_ireg_sized(*src, mb_rru, 8), + dst.show_rru(mb_rru) + ), Inst::XMM_RM_R { op, src, dst } => format!( "{} {}, {}", ljustify(op.to_string()), @@ -626,7 +648,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } - Inst::XMM_MOV_RM_R { src, dst, .. } => { + Inst::XMM_Mov_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); } @@ -634,6 +656,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } + Inst::XMM_Mov_R_M { src, dst, .. } => { + collector.add_use(*src); + dst.get_regs_as_uses(collector); + } Inst::Imm_R { dst, .. } => { collector.add_def(*dst); } @@ -791,22 +817,30 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } - Inst::XMM_MOV_RM_R { - op: _, + Inst::XMM_Mov_RM_R { ref mut src, ref mut dst, + .. } => { src.map_uses(mapper); map_def(mapper, dst); } Inst::XMM_RM_R { - op: _, ref mut src, ref mut dst, + .. } => { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::XMM_Mov_R_M { + ref mut src, + ref mut dst, + .. + } => { + map_use(mapper, src); + dst.map_uses(mapper); + } Inst::Imm_R { dst_is_64: _, simm64: _, @@ -931,7 +965,7 @@ impl MachInst for Inst { // %reg. match self { Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)), - Self::XMM_MOV_RM_R { op, src, dst } + Self::XMM_Mov_RM_R { op, src, dst } if *op == SseOpcode::Movss || *op == SseOpcode::Movsd || *op == SseOpcode::Movaps => diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1f63f53a6e..081617f805 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -567,7 +567,10 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) -> Codeg let src = input_to_reg(ctx, inputs[0]); if is_float { - unimplemented!("FPU stores"); + ctx.emit(match elem_ty { + F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr), + _ => unimplemented!("FP store not 32-bit"), + }); } else { ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr)); }