x64: implement vselect with variable blend instructions
This change implements `vselect` using SSE4.1's `BLENDVPS`, `BLENDVPD`,
and `PBLENDVB`. `vselect` is a lane-selecting instruction that is used
by
[simple_preopt.rs](fa1faf5d22/cranelift/codegen/src/simple_preopt.rs (L947-L999))
to lower `bitselect` to a single x86 instruction when the condition mask
is known to be boolean (all 1s or 0s, e.g., from a conversion). This is
better than `bitselect` in general, which lowers to 4-5 instructions.
The old backend had the `vselect` lowering; this simply introduces it to
the new backend.
This commit is contained in:
@@ -1927,13 +1927,20 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::XmmRmR { src, dst, .. } => {
|
||||
Inst::XmmRmR { src, dst, op, .. } => {
|
||||
if inst.produces_const() {
|
||||
// No need to account for src, since src == dst.
|
||||
collector.add_def(*dst);
|
||||
} else {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
// Some instructions have an implicit use of XMM0.
|
||||
if *op == SseOpcode::Blendvpd
|
||||
|| *op == SseOpcode::Blendvps
|
||||
|| *op == SseOpcode::Pblendvb
|
||||
{
|
||||
collector.add_use(regs::xmm0());
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::XmmRmREvex {
|
||||
|
||||
Reference in New Issue
Block a user