x64: Improve codegen for splats (#6025)

This commit goes through the lowerings for the CLIF `splat` instruction and improves the support for each operator. Many of these lowerings are mirrored from v8/SpiderMonkey and there are a number of improvements: * AVX2 `v{p,}broadcast*` instructions are added and used when available. * Float-based splats are much simpler and always a single-instruction * Integer-based splats don't insert into an uninit xmm value and instead start out with a `movd` to move into an `xmm` register. This thoeretically breaks dependencies with prior instructions since `movd` creates a fresh new value in the destination register. * Loads are now sunk into all of the instructions. A new extractor, `sinkable_load_exact`, was added to sink the i8/i16 loads.
2023-03-15 16:33:56 -05:00
parent a10c50afe9
commit d76f7ee52e
12 changed files with 1216 additions and 82 deletions
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -928,6 +928,7 @@ pub(crate) enum InstructionSet {
    BMI2,
    FMA,
    AVX,
+    AVX2,
    AVX512BITALG,
    AVX512DQ,
    AVX512F,
@@ -1126,6 +1127,7 @@ pub enum SseOpcode {
    Pshuflw,
    Pshufhw,
    Pblendw,
+    Movddup,
 }

 impl SseOpcode {
@@ -1280,7 +1282,8 @@ impl SseOpcode {
            | SseOpcode::Pmulhrsw
            | SseOpcode::Pshufb
            | SseOpcode::Phaddw
-            | SseOpcode::Phaddd => SSSE3,
+            | SseOpcode::Phaddd
+            | SseOpcode::Movddup => SSSE3,

            SseOpcode::Blendvpd
            | SseOpcode::Blendvps
@@ -1524,6 +1527,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Pshuflw => "pshuflw",
            SseOpcode::Pshufhw => "pshufhw",
            SseOpcode::Pblendw => "pblendw",
+            SseOpcode::Movddup => "movddup",
        };
        write!(fmt, "{}", name)
    }
@@ -1709,9 +1713,15 @@ impl AvxOpcode {
            | AvxOpcode::Vpextrw
            | AvxOpcode::Vpextrd
            | AvxOpcode::Vpextrq
-            | AvxOpcode::Vpblendw => {
+            | AvxOpcode::Vpblendw
+            | AvxOpcode::Vmovddup
+            | AvxOpcode::Vbroadcastss => {
                smallvec![InstructionSet::AVX]
            }
+
+            AvxOpcode::Vpbroadcastb | AvxOpcode::Vpbroadcastw | AvxOpcode::Vpbroadcastd => {
+                smallvec![InstructionSet::AVX2]
+            }
        }
    }
 }