machinst x64: add loading of inline 128-bit constants

Eventually the `load + jmp + constant` pattern should be replaced with just `load` once constant pools are more tightly integrated.
2020-07-29 14:14:38 -07:00
parent eda5c6d370
commit 999fa00d6a
3 changed files with 71 additions and 1 deletions
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1,6 +1,6 @@
 use crate::binemit::Reloc;
 use crate::ir::immediates::{Ieee32, Ieee64};
-use crate::ir::TrapCode;
+use crate::ir::{types, TrapCode};
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
 use crate::machinst::{MachBuffer, MachInstEmit, MachLabel};
@@ -1787,6 +1787,40 @@ pub(crate) fn emit(
            sink.put1(*imm)
        }

+        Inst::XmmLoadConstSeq { val, dst, ty } => {
+            // This sequence is *one* instruction in the vcode, and is expanded only here at
+            // emission time, because we cannot allow the regalloc to insert spills/reloads in
+            // the middle; we depend on hardcoded PC-rel addressing below. TODO Eventually this
+            // "constant inline" code should be replaced by constant pool integration.
+
+            // Load the inline constant.
+            let opcode = match *ty {
+                types::F32X4 => SseOpcode::Movups,
+                types::F64X2 => SseOpcode::Movupd,
+                types::I8X16 => SseOpcode::Movupd, // TODO replace with MOVDQU
+                _ => unimplemented!("cannot yet load constants for type: {}", ty),
+            };
+            let constant_start_label = sink.get_label();
+            let load_offset = RegMem::mem(Amode::rip_relative(BranchTarget::Label(
+                constant_start_label,
+            )));
+            let load = Inst::xmm_unary_rm_r(opcode, load_offset, *dst);
+            load.emit(sink, flags, state);
+
+            // Jump over the constant.
+            let constant_end_label = sink.get_label();
+            let continue_at_offset = BranchTarget::Label(constant_end_label);
+            let jump = Inst::jmp_known(continue_at_offset);
+            jump.emit(sink, flags, state);
+
+            // Emit the constant.
+            sink.bind_label(constant_start_label);
+            for i in val.iter() {
+                sink.put1(*i);
+            }
+            sink.bind_label(constant_end_label);
+        }
+
        Inst::Xmm_Mov_R_M {
            op,
            src,
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -229,6 +229,13 @@ pub enum Inst {
        srcloc: Option<SourceLoc>,
    },

+    /// XMM (vector) unary op (to move a constant value into an xmm register): movups
+    XmmLoadConstSeq {
+        val: Vec<u8>,
+        dst: Writable<Reg>,
+        ty: Type,
+    },
+
    /// XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si
    XmmToGpr {
        op: SseOpcode,
@@ -537,6 +544,13 @@ impl Inst {
        }
    }

+    pub(crate) fn xmm_load_const_seq(val: Vec<u8>, dst: Writable<Reg>, ty: Type) -> Inst {
+        debug_assert!(val.len() == 16);
+        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        debug_assert!(ty.is_vector() && ty.bits() == 128);
+        Inst::XmmLoadConstSeq { val, dst, ty }
+    }
+
    /// Convenient helper for unary float operations.
    pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
        src.assert_regclass_is(RegClass::V128);
@@ -1091,6 +1105,10 @@ impl ShowWithRRU for Inst {
                dst.show_rru(mb_rru),
            ),

+            Inst::XmmLoadConstSeq { val, dst, .. } => {
+                format!("load_const ${:?}, {}", val, dst.show_rru(mb_rru),)
+            }
+
            Inst::XmmToGpr {
                op,
                src,
@@ -1474,6 +1492,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
                collector.add_mod(*dst);
            }
        }
+        Inst::XmmLoadConstSeq { dst, .. } => collector.add_def(*dst),
        Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
            collector.add_use(*lhs);
            collector.add_mod(*rhs_dst);
@@ -1765,6 +1784,9 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            src.map_uses(mapper);
            map_mod(mapper, dst);
        }
+        Inst::XmmLoadConstSeq { ref mut dst, .. } => {
+            map_def(mapper, dst);
+        }
        Inst::XmmMinMaxSeq {
            ref mut lhs,
            ref mut rhs_dst,
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -1869,6 +1869,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ));
        }

+        Opcode::Vconst => {
+            let val = if let &InstructionData::UnaryConst {
+                constant_handle, ..
+            } = ctx.data(insn)
+            {
+                ctx.get_constant_data(constant_handle).clone().into_vec()
+            } else {
+                unreachable!("vconst should always have unary_const format")
+            };
+            let dst = output_to_reg(ctx, outputs[0]);
+            let ty = ty.unwrap();
+            ctx.emit(Inst::xmm_load_const_seq(val, dst, ty));
+        }
+
        Opcode::RawBitcast => {
            // A raw_bitcast is just a mechanism for correcting the type of V128 values (see
            // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR