x64: implement vselect with variable blend instructions

This change implements `vselect` using SSE4.1's `BLENDVPS`, `BLENDVPD`,
and `PBLENDVB`. `vselect` is a lane-selecting instruction that is used
by
[simple_preopt.rs](fa1faf5d22/cranelift/codegen/src/simple_preopt.rs (L947-L999))
to lower `bitselect` to a single x86 instruction when the condition mask
is known to be boolean (all 1s or 0s, e.g., from a conversion). This is
better than `bitselect` in general, which lowers to 4-5 instructions.
The old backend had the `vselect` lowering; this simply introduces it to
the new backend.
This commit is contained in:
Andrew Brown
2021-05-13 20:04:40 -07:00
parent 0742bb4699
commit 7ef3ae2903
7 changed files with 93 additions and 2 deletions

View File

@@ -478,6 +478,7 @@ pub enum SseOpcode {
Andnps,
Andnpd,
Blendvpd,
Blendvps,
Comiss,
Comisd,
Cmpps,
@@ -547,6 +548,7 @@ pub enum SseOpcode {
Pandn,
Pavgb,
Pavgw,
Pblendvb,
Pcmpeqb,
Pcmpeqw,
Pcmpeqd,
@@ -769,8 +771,10 @@ impl SseOpcode {
| SseOpcode::Pshufb => SSSE3,
SseOpcode::Blendvpd
| SseOpcode::Blendvps
| SseOpcode::Insertps
| SseOpcode::Packusdw
| SseOpcode::Pblendvb
| SseOpcode::Pcmpeqq
| SseOpcode::Pextrb
| SseOpcode::Pextrd
@@ -828,6 +832,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Andnps => "andnps",
SseOpcode::Andnpd => "andnpd",
SseOpcode::Blendvpd => "blendvpd",
SseOpcode::Blendvps => "blendvps",
SseOpcode::Cmpps => "cmpps",
SseOpcode::Cmppd => "cmppd",
SseOpcode::Cmpss => "cmpss",
@@ -897,6 +902,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pandn => "pandn",
SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw",
SseOpcode::Pblendvb => "pblendvb",
SseOpcode::Pcmpeqb => "pcmpeqb",
SseOpcode::Pcmpeqw => "pcmpeqw",
SseOpcode::Pcmpeqd => "pcmpeqd",