x64: implement vselect with variable blend instructions
This change implements `vselect` using SSE4.1's `BLENDVPS`, `BLENDVPD`,
and `PBLENDVB`. `vselect` is a lane-selecting instruction that is used
by
[simple_preopt.rs](fa1faf5d22/cranelift/codegen/src/simple_preopt.rs (L947-L999))
to lower `bitselect` to a single x86 instruction when the condition mask
is known to be boolean (all 1s or 0s, e.g., from a conversion). This is
better than `bitselect` in general, which lowers to 4-5 instructions.
The old backend had the `vselect` lowering; this simply introduces it to
the new backend.
This commit is contained in:
@@ -478,6 +478,7 @@ pub enum SseOpcode {
|
||||
Andnps,
|
||||
Andnpd,
|
||||
Blendvpd,
|
||||
Blendvps,
|
||||
Comiss,
|
||||
Comisd,
|
||||
Cmpps,
|
||||
@@ -547,6 +548,7 @@ pub enum SseOpcode {
|
||||
Pandn,
|
||||
Pavgb,
|
||||
Pavgw,
|
||||
Pblendvb,
|
||||
Pcmpeqb,
|
||||
Pcmpeqw,
|
||||
Pcmpeqd,
|
||||
@@ -769,8 +771,10 @@ impl SseOpcode {
|
||||
| SseOpcode::Pshufb => SSSE3,
|
||||
|
||||
SseOpcode::Blendvpd
|
||||
| SseOpcode::Blendvps
|
||||
| SseOpcode::Insertps
|
||||
| SseOpcode::Packusdw
|
||||
| SseOpcode::Pblendvb
|
||||
| SseOpcode::Pcmpeqq
|
||||
| SseOpcode::Pextrb
|
||||
| SseOpcode::Pextrd
|
||||
@@ -828,6 +832,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Andnps => "andnps",
|
||||
SseOpcode::Andnpd => "andnpd",
|
||||
SseOpcode::Blendvpd => "blendvpd",
|
||||
SseOpcode::Blendvps => "blendvps",
|
||||
SseOpcode::Cmpps => "cmpps",
|
||||
SseOpcode::Cmppd => "cmppd",
|
||||
SseOpcode::Cmpss => "cmpss",
|
||||
@@ -897,6 +902,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Pandn => "pandn",
|
||||
SseOpcode::Pavgb => "pavgb",
|
||||
SseOpcode::Pavgw => "pavgw",
|
||||
SseOpcode::Pblendvb => "pblendvb",
|
||||
SseOpcode::Pcmpeqb => "pcmpeqb",
|
||||
SseOpcode::Pcmpeqw => "pcmpeqw",
|
||||
SseOpcode::Pcmpeqd => "pcmpeqd",
|
||||
|
||||
@@ -1441,6 +1441,7 @@ pub(crate) fn emit(
|
||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
|
||||
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
|
||||
SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
|
||||
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
|
||||
@@ -1480,6 +1481,7 @@ pub(crate) fn emit(
|
||||
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
|
||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
|
||||
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
||||
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
|
||||
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
|
||||
|
||||
@@ -3432,6 +3432,18 @@ fn test_x64_emit() {
|
||||
"blendvpd %xmm15, %xmm4",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3),
|
||||
"660F3814DA",
|
||||
"blendvps %xmm2, %xmm3",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13),
|
||||
"66450F3810EC",
|
||||
"pblendvb %xmm12, %xmm13",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// XMM_RM_R: Integer Packed
|
||||
|
||||
|
||||
@@ -1927,13 +1927,20 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::XmmRmR { src, dst, .. } => {
|
||||
Inst::XmmRmR { src, dst, op, .. } => {
|
||||
if inst.produces_const() {
|
||||
// No need to account for src, since src == dst.
|
||||
collector.add_def(*dst);
|
||||
} else {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
// Some instructions have an implicit use of XMM0.
|
||||
if *op == SseOpcode::Blendvpd
|
||||
|| *op == SseOpcode::Blendvps
|
||||
|| *op == SseOpcode::Pblendvb
|
||||
{
|
||||
collector.add_use(regs::xmm0());
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::XmmRmREvex {
|
||||
|
||||
Reference in New Issue
Block a user