x64: Improve codegen for splats (#6025)
This commit goes through the lowerings for the CLIF `splat` instruction
and improves the support for each operator. Many of these lowerings are
mirrored from v8/SpiderMonkey and there are a number of improvements:
* AVX2 `v{p,}broadcast*` instructions are added and used when available.
* Float-based splats are much simpler and always a single-instruction
* Integer-based splats don't insert into an uninit xmm value and instead
start out with a `movd` to move into an `xmm` register. This
thoeretically breaks dependencies with prior instructions since `movd`
creates a fresh new value in the destination register.
* Loads are now sunk into all of the instructions. A new extractor,
`sinkable_load_exact`, was added to sink the i8/i16 loads.
This commit is contained in:
@@ -928,6 +928,7 @@ pub(crate) enum InstructionSet {
|
||||
BMI2,
|
||||
FMA,
|
||||
AVX,
|
||||
AVX2,
|
||||
AVX512BITALG,
|
||||
AVX512DQ,
|
||||
AVX512F,
|
||||
@@ -1126,6 +1127,7 @@ pub enum SseOpcode {
|
||||
Pshuflw,
|
||||
Pshufhw,
|
||||
Pblendw,
|
||||
Movddup,
|
||||
}
|
||||
|
||||
impl SseOpcode {
|
||||
@@ -1280,7 +1282,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Pmulhrsw
|
||||
| SseOpcode::Pshufb
|
||||
| SseOpcode::Phaddw
|
||||
| SseOpcode::Phaddd => SSSE3,
|
||||
| SseOpcode::Phaddd
|
||||
| SseOpcode::Movddup => SSSE3,
|
||||
|
||||
SseOpcode::Blendvpd
|
||||
| SseOpcode::Blendvps
|
||||
@@ -1524,6 +1527,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Pshuflw => "pshuflw",
|
||||
SseOpcode::Pshufhw => "pshufhw",
|
||||
SseOpcode::Pblendw => "pblendw",
|
||||
SseOpcode::Movddup => "movddup",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
@@ -1709,9 +1713,15 @@ impl AvxOpcode {
|
||||
| AvxOpcode::Vpextrw
|
||||
| AvxOpcode::Vpextrd
|
||||
| AvxOpcode::Vpextrq
|
||||
| AvxOpcode::Vpblendw => {
|
||||
| AvxOpcode::Vpblendw
|
||||
| AvxOpcode::Vmovddup
|
||||
| AvxOpcode::Vbroadcastss => {
|
||||
smallvec![InstructionSet::AVX]
|
||||
}
|
||||
|
||||
AvxOpcode::Vpbroadcastb | AvxOpcode::Vpbroadcastw | AvxOpcode::Vpbroadcastd => {
|
||||
smallvec![InstructionSet::AVX2]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,6 +122,7 @@ pub(crate) fn emit(
|
||||
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
|
||||
InstructionSet::FMA => info.isa_flags.has_fma(),
|
||||
InstructionSet::AVX => info.isa_flags.has_avx(),
|
||||
InstructionSet::AVX2 => info.isa_flags.has_avx2(),
|
||||
InstructionSet::AVX512BITALG => info.isa_flags.has_avx512bitalg(),
|
||||
InstructionSet::AVX512DQ => info.isa_flags.has_avx512dq(),
|
||||
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
|
||||
@@ -1826,6 +1827,7 @@ pub(crate) fn emit(
|
||||
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
|
||||
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
|
||||
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
|
||||
SseOpcode::Movddup => (LegacyPrefixes::_F2, 0x0F12, 2),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
@@ -2450,6 +2452,13 @@ pub(crate) fn emit(
|
||||
RegisterOrAmode::Amode(_) => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x10),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|
||||
AvxOpcode::Vpbroadcastb => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x78),
|
||||
AvxOpcode::Vpbroadcastw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x79),
|
||||
AvxOpcode::Vpbroadcastd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x58),
|
||||
AvxOpcode::Vbroadcastss => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x18),
|
||||
AvxOpcode::Vmovddup => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x12),
|
||||
|
||||
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user