x64: Add non-SSE4.1 lowerings of pmov{s,z}x* (#6279)

* x64: Add non-SSE4.1 lowerings of `pmov{s,z}x*`

This commit adds lowerings for a suite of sign/zero extension
instructions which don't require SSE4.1. Like before these lowerings are
based on LLVM's output.

This commit also deletes special casees for `i16x8.extmul_{low,high}_*`
since the output of the special case is the same as the default lowering
of all the component instructions used within as well.

* Remove SSE4.1 specialization of `uwiden_high`

LLVM prefers the `punpckh*`-based lowerings and at least according to
`llvm-mca` these are slightly better cycle-wise too.
This commit is contained in:
Alex Crichton
2023-04-26 21:15:34 -05:00
committed by GitHub
parent 57dabd3b0b
commit edae6c0217
13 changed files with 209 additions and 159 deletions

View File

@@ -1116,6 +1116,7 @@ pub enum SseOpcode {
Ucomiss,
Ucomisd,
Unpcklps,
Unpckhps,
Xorps,
Xorpd,
Phaddw,
@@ -1168,6 +1169,7 @@ impl SseOpcode {
| SseOpcode::Subss
| SseOpcode::Ucomiss
| SseOpcode::Unpcklps
| SseOpcode::Unpckhps
| SseOpcode::Xorps => SSE,
SseOpcode::Addpd
@@ -1516,6 +1518,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Ucomiss => "ucomiss",
SseOpcode::Ucomisd => "ucomisd",
SseOpcode::Unpcklps => "unpcklps",
SseOpcode::Unpckhps => "unpckhps",
SseOpcode::Xorps => "xorps",
SseOpcode::Xorpd => "xorpd",
SseOpcode::Phaddw => "phaddw",
@@ -1611,6 +1614,7 @@ impl AvxOpcode {
| AvxOpcode::Vpunpckhwd
| AvxOpcode::Vpunpcklwd
| AvxOpcode::Vunpcklps
| AvxOpcode::Vunpckhps
| AvxOpcode::Vaddps
| AvxOpcode::Vaddpd
| AvxOpcode::Vsubps

View File

@@ -2060,6 +2060,7 @@ pub(crate) fn emit(
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Unpckhps => (LegacyPrefixes::None, 0x0F15, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
SseOpcode::Phaddw => (LegacyPrefixes::_66, 0x0F3801, 3),
@@ -2206,6 +2207,7 @@ pub(crate) fn emit(
AvxOpcode::Vpunpckhwd => (LP::_66, OM::_0F, 0x69),
AvxOpcode::Vpunpcklwd => (LP::_66, OM::_0F, 0x61),
AvxOpcode::Vunpcklps => (LP::None, OM::_0F, 0x14),
AvxOpcode::Vunpckhps => (LP::None, OM::_0F, 0x15),
AvxOpcode::Vaddps => (LP::None, OM::_0F, 0x58),
AvxOpcode::Vaddpd => (LP::_66, OM::_0F, 0x58),
AvxOpcode::Vsubps => (LP::None, OM::_0F, 0x5C),