diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index d31b2e8b93..da6be3eb4a 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,6 @@ use crate::binemit::Reloc; use crate::ir::immediates::{Ieee32, Ieee64}; -use crate::ir::TrapCode; +use crate::ir::{types, TrapCode}; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::machinst::{MachBuffer, MachInstEmit, MachLabel}; @@ -1787,6 +1787,40 @@ pub(crate) fn emit( sink.put1(*imm) } + Inst::XmmLoadConstSeq { val, dst, ty } => { + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. TODO Eventually this + // "constant inline" code should be replaced by constant pool integration. + + // Load the inline constant. + let opcode = match *ty { + types::F32X4 => SseOpcode::Movups, + types::F64X2 => SseOpcode::Movupd, + types::I8X16 => SseOpcode::Movupd, // TODO replace with MOVDQU + _ => unimplemented!("cannot yet load constants for type: {}", ty), + }; + let constant_start_label = sink.get_label(); + let load_offset = RegMem::mem(Amode::rip_relative(BranchTarget::Label( + constant_start_label, + ))); + let load = Inst::xmm_unary_rm_r(opcode, load_offset, *dst); + load.emit(sink, flags, state); + + // Jump over the constant. + let constant_end_label = sink.get_label(); + let continue_at_offset = BranchTarget::Label(constant_end_label); + let jump = Inst::jmp_known(continue_at_offset); + jump.emit(sink, flags, state); + + // Emit the constant. + sink.bind_label(constant_start_label); + for i in val.iter() { + sink.put1(*i); + } + sink.bind_label(constant_end_label); + } + Inst::Xmm_Mov_R_M { op, src, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index f27b448fa3..cc7967a64f 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -229,6 +229,13 @@ pub enum Inst { srcloc: Option, }, + /// XMM (vector) unary op (to move a constant value into an xmm register): movups + XmmLoadConstSeq { + val: Vec, + dst: Writable, + ty: Type, + }, + /// XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si XmmToGpr { op: SseOpcode, @@ -537,6 +544,13 @@ impl Inst { } } + pub(crate) fn xmm_load_const_seq(val: Vec, dst: Writable, ty: Type) -> Inst { + debug_assert!(val.len() == 16); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + debug_assert!(ty.is_vector() && ty.bits() == 128); + Inst::XmmLoadConstSeq { val, dst, ty } + } + /// Convenient helper for unary float operations. pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { src.assert_regclass_is(RegClass::V128); @@ -1091,6 +1105,10 @@ impl ShowWithRRU for Inst { dst.show_rru(mb_rru), ), + Inst::XmmLoadConstSeq { val, dst, .. } => { + format!("load_const ${:?}, {}", val, dst.show_rru(mb_rru),) + } + Inst::XmmToGpr { op, src, @@ -1474,6 +1492,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(*dst); } } + Inst::XmmLoadConstSeq { dst, .. } => collector.add_def(*dst), Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => { collector.add_use(*lhs); collector.add_mod(*rhs_dst); @@ -1765,6 +1784,9 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::XmmLoadConstSeq { ref mut dst, .. } => { + map_def(mapper, dst); + } Inst::XmmMinMaxSeq { ref mut lhs, ref mut rhs_dst, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index dd59ba07e1..127b717389 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1869,6 +1869,20 @@ fn lower_insn_to_regs>( )); } + Opcode::Vconst => { + let val = if let &InstructionData::UnaryConst { + constant_handle, .. + } = ctx.data(insn) + { + ctx.get_constant_data(constant_handle).clone().into_vec() + } else { + unreachable!("vconst should always have unary_const format") + }; + let dst = output_to_reg(ctx, outputs[0]); + let ty = ty.unwrap(); + ctx.emit(Inst::xmm_load_const_seq(val, dst, ty)); + } + Opcode::RawBitcast => { // A raw_bitcast is just a mechanism for correcting the type of V128 values (see // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR