machinst x64: allow use of vector-length types

2020-07-24 12:31:28 -07:00
parent dc6220b87c
commit 77cc2f69c1
5 changed files with 54 additions and 19 deletions
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -93,6 +93,7 @@ fn in_int_reg(ty: types::Type) -> bool {
 fn in_vec_reg(ty: types::Type) -> bool {
    match ty {
        types::F32 | types::F64 => true,
+        _ if ty.is_vector() => true,
        _ => false,
    }
 }
@@ -365,7 +366,7 @@ impl ABIBody for X64ABIBody {
                    1 | 8 => Some(ExtMode::BQ),
                    16 => Some(ExtMode::WQ),
                    32 => Some(ExtMode::LQ),
-                    64 => None,
+                    64 | 128 => None,
                    _ => unreachable!(),
                };

--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -226,7 +226,7 @@ impl ShowWithRRU for RegMemImm {
 }

 /// An operand which is either an integer Register or a value in Memory.  This can denote an 8, 16,
-/// 32 or 64 bit value.
+/// 32, 64, or 128 bit value.
 #[derive(Clone)]
 pub enum RegMem {
    Reg { reg: Reg },
@@ -330,8 +330,7 @@ pub(crate) enum InstructionSet {
    SSE41,
 }

-/// Some scalar SSE operations requiring 2 operands r/m and r.
-/// TODO: Below only includes scalar operations. To be seen if packed will be added here.
+/// Some SSE operations requiring 2 operands r/m and r.
 #[derive(Clone, Copy, PartialEq)]
 pub enum SseOpcode {
    Addss,
@@ -365,6 +364,10 @@ pub enum SseOpcode {
    Movq,
    Movss,
    Movsd,
+    Movups,
+    Movupd,
+    Mulps,
+    Mulpd,
    Mulss,
    Mulsd,
    Orps,
@@ -396,9 +399,11 @@ impl SseOpcode {
            | SseOpcode::Cvttss2si
            | SseOpcode::Divss
            | SseOpcode::Maxss
-            | SseOpcode::Movaps
            | SseOpcode::Minss
+            | SseOpcode::Movaps
            | SseOpcode::Movss
+            | SseOpcode::Movups
+            | SseOpcode::Mulps
            | SseOpcode::Mulss
            | SseOpcode::Orps
            | SseOpcode::Rcpss
@@ -425,6 +430,8 @@ impl SseOpcode {
            | SseOpcode::Movd
            | SseOpcode::Movq
            | SseOpcode::Movsd
+            | SseOpcode::Movupd
+            | SseOpcode::Mulpd
            | SseOpcode::Mulsd
            | SseOpcode::Orpd
            | SseOpcode::Sqrtsd
@@ -478,6 +485,10 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Movq => "movq",
            SseOpcode::Movss => "movss",
            SseOpcode::Movsd => "movsd",
+            SseOpcode::Movups => "movups",
+            SseOpcode::Movupd => "movupd",
+            SseOpcode::Mulps => "mulps",
+            SseOpcode::Mulpd => "mulpd",
            SseOpcode::Mulss => "mulss",
            SseOpcode::Mulsd => "mulsd",
            SseOpcode::Orpd => "orpd",
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1552,6 +1552,10 @@ pub(crate) fn emit(
                SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
                SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
                SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
+                SseOpcode::Movups => (LegacyPrefix::None, 0x0F10),
+                SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10),
+                SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51),
+                SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51),
                SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
                SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51),
                SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A),
@@ -1710,6 +1714,8 @@ pub(crate) fn emit(
            let (prefix, opcode) = match op {
                SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11),
                SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11),
+                SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29),
+                SseOpcode::Movups => (LegacyPrefix::None, 0x0F11),
                _ => unimplemented!("Opcode {:?} not implemented", op),
            };
            let dst = &dst.finalize(state);
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1921,6 +1921,10 @@ impl MachInst for Inst {
            RegClass::V128 => match ty {
                F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
                F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
+                _ if ty.is_vector() && ty.bits() == 128 => {
+                    // TODO Specialize this move for different types: MOVUPD, MOVDQU, etc.
+                    Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None)
+                }
                _ => panic!("unexpected type {:?} in gen_move of regclass V128", ty),
            },
            _ => panic!("gen_move(x64): unhandled regclass"),
@@ -1942,7 +1946,8 @@ impl MachInst for Inst {
    fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
        match ty {
            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
-            F32 | F64 | I128 | B128 => Ok(RegClass::V128),
+            F32 | F64 => Ok(RegClass::V128),
+            _ if ty.bits() == 128 => Ok(RegClass::V128),
            IFLAGS | FFLAGS => Ok(RegClass::I64),
            _ => Err(CodegenError::Unsupported(format!(
                "Unexpected SSA-value type: {}",
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -1475,8 +1475,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                _ => false,
            };

-            let is_float = is_float_ty(elem_ty);
-
            let addr = match op {
                Opcode::Load
                | Opcode::Uload8
@@ -1513,7 +1511,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let srcloc = Some(ctx.srcloc(insn));

            let dst = output_to_reg(ctx, outputs[0]);
-            match (sign_extend, is_float) {
+            let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
+            match (sign_extend, is_xmm) {
                (true, false) => {
                    // The load is sign-extended only when the output size is lower than 64 bits,
                    // so ext-mode is defined in this case.
@@ -1542,6 +1541,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    ctx.emit(match elem_ty {
                        F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc),
                        F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc),
+                        _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
+                            Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc)
+                        } // TODO Specialize for different types: MOVUPD, MOVDQU
                        _ => unreachable!("unexpected type for load: {:?}", elem_ty),
                    });
                }
@@ -1565,7 +1567,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
                _ => unreachable!(),
            };
-            let is_float = is_float_ty(elem_ty);

            let addr = match op {
                Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
@@ -1599,15 +1600,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

            let srcloc = Some(ctx.srcloc(insn));

-            if is_float {
-                ctx.emit(match elem_ty {
-                    F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
-                    F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc),
-                    _ => panic!("unexpected type for store {:?}", elem_ty),
-                });
-            } else {
-                ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc));
-            }
+            ctx.emit(match elem_ty {
+                F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
+                F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc),
+                _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
+                    // TODO Specialize for different types: MOVUPD, MOVDQU, etc.
+                    Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr, srcloc)
+                }
+                _ => Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc),
+            });
        }

        Opcode::FuncAddr => {
@@ -1815,6 +1816,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ));
        }

+        Opcode::RawBitcast => {
+            // A raw_bitcast is just a mechanism for correcting the type of V128 values (see
+            // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
+            // instruction should emit no machine code but a move is necessary to give the register
+            // allocator a definition for the output virtual register.
+            let src = input_to_reg(ctx, inputs[0]);
+            let dst = output_to_reg(ctx, outputs[0]);
+            let ty = ty.unwrap();
+            ctx.emit(Inst::gen_move(dst, src, ty));
+        }
+
        Opcode::IaddImm
        | Opcode::ImulImm
        | Opcode::UdivImm