x64: Lower shuffle and swizzle in ISLE (#4772)

Lower `shuffle` and `swizzle` in ISLE.

This PR surfaced a bug with the lowering of `shuffle` when avx512vl and avx512vbmi are enabled: we use `vpermi2b` as the implementation, but panic if the immediate shuffle mask contains any out-of-bounds values. The behavior when the avx512 extensions are not present is that out-of-bounds values are turned into `0` in the result.

I've resolved this by detecting when the shuffle immediate has out-of-bounds indices in the avx512-enabled lowering, and generating an additional mask to zero out the lanes where those indices occur. This brings the avx512 case into line with the semantics of the `shuffle` op: 94bcbe8446/cranelift/codegen/meta/src/shared/instructions.rs (L1495-L1498)
This commit is contained in:
Trevor Elliott
2022-08-24 14:49:51 -07:00
committed by GitHub
parent b4c25ef63e
commit b8b6f2781e
12 changed files with 295 additions and 190 deletions

View File

@@ -34,6 +34,7 @@ use crate::{
VCodeConstantData,
},
};
use alloc::vec::Vec;
use regalloc2::PReg;
use smallvec::SmallVec;
use std::boxed::Box;
@@ -200,6 +201,15 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}
}
#[inline]
fn avx512vbmi_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512vbmi_simd() {
Some(())
} else {
None
}
}
#[inline]
fn use_lzcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_lzcnt() {
@@ -839,6 +849,73 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
Writable::from_reg(Gpr::new(regs::pinned_reg()).unwrap())
}
#[inline]
fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
let mask = mask
.iter()
.map(|&b| if b > 15 { b.wrapping_sub(15) } else { b })
.map(|b| if b > 15 { 0b10000000 } else { b })
.collect();
self.lower_ctx
.use_constant(VCodeConstantData::Generated(mask))
}
#[inline]
fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
let mask = mask
.iter()
.map(|&b| if b > 15 { 0b10000000 } else { b })
.collect();
self.lower_ctx
.use_constant(VCodeConstantData::Generated(mask))
}
#[inline]
fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
let mask = mask
.iter()
.map(|&b| b.wrapping_sub(16))
.map(|b| if b > 15 { 0b10000000 } else { b })
.collect();
self.lower_ctx
.use_constant(VCodeConstantData::Generated(mask))
}
#[inline]
fn perm_from_mask_with_zeros(
&mut self,
mask: &VecMask,
) -> Option<(VCodeConstant, VCodeConstant)> {
if !mask.iter().any(|&b| b > 31) {
return None;
}
let zeros = mask
.iter()
.map(|&b| if b > 31 { 0x00 } else { 0xff })
.collect();
Some((
self.perm_from_mask(mask),
self.lower_ctx
.use_constant(VCodeConstantData::Generated(zeros)),
))
}
#[inline]
fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
let mask = mask.iter().cloned().collect();
self.lower_ctx
.use_constant(VCodeConstantData::Generated(mask))
}
#[inline]
fn swizzle_zero_mask(&mut self) -> VCodeConstant {
static ZERO_MASK_VALUE: [u8; 16] = [0x70; 16];
self.lower_ctx
.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE))
}
fn emit_div_or_rem(
&mut self,
kind: &DivOrRemKind,