x64: Fix codegen for the select instruction with v128 (#4317)

This commit fixes a bug in the previous codegen for the `select` instruction when the operations of the `select` were of the `v128` type. Previously teh `XmmCmove` instruction only stored an `OperandSize` of 32 or 64 for a 64 or 32-bit move, but this was also used for these 128-bit types which meant that when used the wrong move instruction was generated. The fix applied here is to store the whole `Type` being moved so the 128-bit variant can be selected as well.
2022-06-27 13:02:40 -05:00
parent 23ae9016af
commit 8bb07523e2
5 changed files with 49 additions and 28 deletions
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -168,7 +168,7 @@
              (dst WritableGpr))

       ;; XMM conditional move; overwrites the destination register.
-       (XmmCmove (size OperandSize)
+       (XmmCmove (ty Type)
                 (cc CC)
                 (consequent XmmMem)
                 (alternative Xmm)
@@ -1896,10 +1896,9 @@

 (decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
 (rule (cmove_xmm ty cc consequent alternative)
-      (let ((dst WritableXmm (temp_writable_xmm))
-            (size OperandSize (operand_size_of_type_32_64 ty)))
+      (let ((dst WritableXmm (temp_writable_xmm)))
        (ConsumesFlags.ConsumesFlagsReturnsReg
-         (MInst.XmmCmove size cc consequent alternative dst)
+         (MInst.XmmCmove ty cc consequent alternative dst)
         dst)))

 ;; Helper for creating `cmove` instructions directly from values. This allows us
@@ -1952,9 +1951,8 @@
 (rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
      (let ((dst WritableXmm (temp_writable_xmm))
            (tmp WritableXmm (temp_writable_xmm))
-            (size OperandSize (operand_size_of_type_32_64 ty))
-            (cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
-            (cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
+            (cmove1 MInst (MInst.XmmCmove ty cc1 consequent alternative tmp))
+            (cmove2 MInst (MInst.XmmCmove ty cc2 consequent tmp dst)))
        (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
         cmove1
         cmove2
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1112,7 +1112,7 @@ pub(crate) fn emit(
        }

        Inst::XmmCmove {
-            size,
+            ty,
            cc,
            consequent,
            alternative,
@@ -1130,10 +1130,15 @@ pub(crate) fn emit(
            // Jump if cc is *not* set.
            one_way_jmp(sink, cc.invert(), next);

-            let op = if *size == OperandSize::Size64 {
-                SseOpcode::Movsd
-            } else {
-                SseOpcode::Movss
+            let op = match *ty {
+                types::F64 => SseOpcode::Movsd,
+                types::F32 => SseOpcode::Movsd,
+                types::F32X4 => SseOpcode::Movaps,
+                types::F64X2 => SseOpcode::Movapd,
+                ty => {
+                    debug_assert!(ty.is_vector() && ty.bytes() == 16);
+                    SseOpcode::Movdqa
+                }
            };
            let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
            inst.emit(&[], sink, info, state);
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -617,14 +617,14 @@ impl Inst {
        }
    }

-    pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
-        debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
+    pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
+        debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
        src.assert_regclass_is(RegClass::Float);
        debug_assert!(dst.to_reg().class() == RegClass::Float);
        let src = XmmMem::new(src).unwrap();
        let dst = WritableXmm::from_writable_reg(dst).unwrap();
        Inst::XmmCmove {
-            size,
+            ty,
            cc,
            consequent: src,
            alternative: dst.to_reg(),
@@ -1507,23 +1507,26 @@ impl PrettyPrint for Inst {
            }

            Inst::XmmCmove {
-                size,
+                ty,
                cc,
                consequent,
                alternative,
                dst,
                ..
            } => {
-                let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
-                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
-                let consequent = consequent.pretty_print(size.to_bytes(), allocs);
+                let size = u8::try_from(ty.bytes()).unwrap();
+                let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
+                let consequent = consequent.pretty_print(size, allocs);
                format!(
                    "mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
                    cc.invert().to_string(),
-                    if *size == OperandSize::Size64 {
-                        "sd"
-                    } else {
-                        "ss"
+                    match *ty {
+                        types::F64 => "sd",
+                        types::F32 => "ss",
+                        types::F32X4 => "aps",
+                        types::F64X2 => "apd",
+                        _ => "dqa",
                    },
                    consequent,
                    dst,
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -2271,11 +2271,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                debug_assert!(ty == types::F32 || ty == types::F64);
                emit_moves(ctx, dst, rhs, ty);
                ctx.emit(Inst::xmm_cmove(
-                    if ty == types::F64 {
-                        OperandSize::Size64
-                    } else {
-                        OperandSize::Size32
-                    },
+                    ty,
                    cc,
                    RegMem::reg(lhs.only_reg().unwrap()),
                    dst.only_reg().unwrap(),