x64: Add AVX support for some more float-related instructions (#6092)

* x64: Add AVX encodings of `vcvt{ss2sd,sd2ss}` Additionally update the instruction helpers to take an `XmmMem` argument to allow load sinking into the instruction. * x64: Add AVX encoding of `sqrts{s,d}` * x64: Add AVX support for `rounds{s,d}`
2023-03-29 13:09:49 -05:00
parent afb417920d
commit 0b0ac3ff73
8 changed files with 543 additions and 11 deletions
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -2405,17 +2405,36 @@ pub(crate) fn emit(
                AvxOpcode::Vbroadcastss => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x18),
                AvxOpcode::Vmovddup => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x12),

+                AvxOpcode::Vcvtss2sd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5A),
+                AvxOpcode::Vcvtsd2ss => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x5A),
+                AvxOpcode::Vsqrtss => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x51),
+                AvxOpcode::Vsqrtsd => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x51),
+
                _ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
            };

-            VexInstruction::new()
+            let vex = VexInstruction::new()
                .length(VexVectorLength::V128)
                .prefix(prefix)
                .map(map)
                .opcode(opcode)
                .reg(dst.to_real_reg().unwrap().hw_enc())
-                .rm(src)
-                .encode(sink);
+                .rm(src);
+
+            // These opcodes take a second operand through `vvvv` which copies
+            // the upper bits into the destination register. That's not
+            // reflected in the CLIF instruction, however, since the SSE version
+            // doesn't have this functionality. Instead just copy whatever
+            // happens to already be in the destination, which at least is what
+            // LLVM seems to do.
+            let vex = match op {
+                AvxOpcode::Vcvtss2sd
+                | AvxOpcode::Vcvtsd2ss
+                | AvxOpcode::Vsqrtss
+                | AvxOpcode::Vsqrtsd => vex.vvvv(dst.to_real_reg().unwrap().hw_enc()),
+                _ => vex,
+            };
+            vex.encode(sink);
        }

        Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => {
@@ -2433,18 +2452,29 @@ pub(crate) fn emit(
                AvxOpcode::Vpshuflw => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x70),
                AvxOpcode::Vpshufhw => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x70),
                AvxOpcode::Vpshufd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x70),
+                AvxOpcode::Vroundss => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x0A),
+                AvxOpcode::Vroundsd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x0B),
                _ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
            };

-            VexInstruction::new()
+            let vex = VexInstruction::new()
                .length(VexVectorLength::V128)
                .prefix(prefix)
                .map(map)
                .opcode(opcode)
                .reg(dst.to_real_reg().unwrap().hw_enc())
                .rm(src)
-                .imm(*imm)
-                .encode(sink);
+                .imm(*imm);
+
+            // See comments in similar block above in `XmmUnaryRmRVex` for what
+            // this is doing.
+            let vex = match op {
+                AvxOpcode::Vroundss | AvxOpcode::Vroundsd => {
+                    vex.vvvv(dst.to_real_reg().unwrap().hw_enc())
+                }
+                _ => vex,
+            };
+            vex.encode(sink);
        }

        Inst::XmmMovRMVex { op, src, dst } => {