x64: Lower shuffle and swizzle in ISLE (#4772)
Lower `shuffle` and `swizzle` in ISLE.
This PR surfaced a bug with the lowering of `shuffle` when avx512vl and avx512vbmi are enabled: we use `vpermi2b` as the implementation, but panic if the immediate shuffle mask contains any out-of-bounds values. The behavior when the avx512 extensions are not present is that out-of-bounds values are turned into `0` in the result.
I've resolved this by detecting when the shuffle immediate has out-of-bounds indices in the avx512-enabled lowering, and generating an additional mask to zero out the lanes where those indices occur. This brings the avx512 case into line with the semantics of the `shuffle` op: 94bcbe8446/cranelift/codegen/meta/src/shared/instructions.rs (L1495-L1498)
This commit is contained in:
@@ -7,6 +7,7 @@ use std::cell::Cell;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
pub use super::MachLabel;
|
||||
pub use crate::data_value::DataValue;
|
||||
pub use crate::ir::{
|
||||
ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, Immediate,
|
||||
SigRef, StackSlot,
|
||||
@@ -24,6 +25,7 @@ pub type ValueArray2 = [Value; 2];
|
||||
pub type ValueArray3 = [Value; 3];
|
||||
pub type WritableReg = Writable<Reg>;
|
||||
pub type VecReg = Vec<Reg>;
|
||||
pub type VecMask = Vec<u8>;
|
||||
pub type ValueRegs = crate::machinst::ValueRegs<Reg>;
|
||||
pub type WritableValueRegs = crate::machinst::ValueRegs<WritableReg>;
|
||||
pub type InstOutput = SmallVec<[ValueRegs; 2]>;
|
||||
@@ -683,6 +685,16 @@ macro_rules! isle_prelude_methods {
|
||||
Some(u128::from_le_bytes(bytes.try_into().ok()?))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn vec_mask_from_immediate(&mut self, imm: Immediate) -> Option<VecMask> {
|
||||
let data = self.lower_ctx.get_immediate_data(imm);
|
||||
if data.len() == 16 {
|
||||
Some(Vec::from(data.as_slice()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_constant(&mut self, constant: Constant) -> Option<u64> {
|
||||
let bytes = self.lower_ctx.get_constant_data(constant).as_slice();
|
||||
|
||||
Reference in New Issue
Block a user