x64: Improve codegen for splats (#6025)

This commit goes through the lowerings for the CLIF `splat` instruction
and improves the support for each operator. Many of these lowerings are
mirrored from v8/SpiderMonkey and there are a number of improvements:

* AVX2 `v{p,}broadcast*` instructions are added and used when available.
* Float-based splats are much simpler and always a single-instruction
* Integer-based splats don't insert into an uninit xmm value and instead
  start out with a `movd` to move into an `xmm` register. This
  thoeretically breaks dependencies with prior instructions since `movd`
  creates a fresh new value in the destination register.
* Loads are now sunk into all of the instructions. A new extractor,
  `sinkable_load_exact`, was added to sink the i8/i16 loads.
This commit is contained in:
Alex Crichton
2023-03-15 16:33:56 -05:00
committed by GitHub
parent a10c50afe9
commit d76f7ee52e
12 changed files with 1216 additions and 82 deletions

View File

@@ -928,6 +928,7 @@ pub(crate) enum InstructionSet {
BMI2,
FMA,
AVX,
AVX2,
AVX512BITALG,
AVX512DQ,
AVX512F,
@@ -1126,6 +1127,7 @@ pub enum SseOpcode {
Pshuflw,
Pshufhw,
Pblendw,
Movddup,
}
impl SseOpcode {
@@ -1280,7 +1282,8 @@ impl SseOpcode {
| SseOpcode::Pmulhrsw
| SseOpcode::Pshufb
| SseOpcode::Phaddw
| SseOpcode::Phaddd => SSSE3,
| SseOpcode::Phaddd
| SseOpcode::Movddup => SSSE3,
SseOpcode::Blendvpd
| SseOpcode::Blendvps
@@ -1524,6 +1527,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pshuflw => "pshuflw",
SseOpcode::Pshufhw => "pshufhw",
SseOpcode::Pblendw => "pblendw",
SseOpcode::Movddup => "movddup",
};
write!(fmt, "{}", name)
}
@@ -1709,9 +1713,15 @@ impl AvxOpcode {
| AvxOpcode::Vpextrw
| AvxOpcode::Vpextrd
| AvxOpcode::Vpextrq
| AvxOpcode::Vpblendw => {
| AvxOpcode::Vpblendw
| AvxOpcode::Vmovddup
| AvxOpcode::Vbroadcastss => {
smallvec![InstructionSet::AVX]
}
AvxOpcode::Vpbroadcastb | AvxOpcode::Vpbroadcastw | AvxOpcode::Vpbroadcastd => {
smallvec![InstructionSet::AVX2]
}
}
}
}