x64: Lower shuffle and swizzle in ISLE (#4772)
Lower `shuffle` and `swizzle` in ISLE.
This PR surfaced a bug with the lowering of `shuffle` when avx512vl and avx512vbmi are enabled: we use `vpermi2b` as the implementation, but panic if the immediate shuffle mask contains any out-of-bounds values. The behavior when the avx512 extensions are not present is that out-of-bounds values are turned into `0` in the result.
I've resolved this by detecting when the shuffle immediate has out-of-bounds indices in the avx512-enabled lowering, and generating an additional mask to zero out the lanes where those indices occur. This brings the avx512 case into line with the semantics of the `shuffle` op: 94bcbe8446/cranelift/codegen/meta/src/shared/instructions.rs (L1495-L1498)
This commit is contained in:
@@ -5,7 +5,6 @@
|
||||
// TODO: separate the IR-query core of `Lower` from the lowering logic built on
|
||||
// top of it, e.g. the side-effect/coloring analysis and the scan support.
|
||||
|
||||
use crate::data_value::DataValue;
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
|
||||
@@ -23,7 +22,6 @@ use crate::machinst::{
|
||||
};
|
||||
use crate::{trace, CodegenResult};
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryInto;
|
||||
use regalloc2::VReg;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt::Debug;
|
||||
@@ -1381,35 +1379,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
self.vcode.constants().insert(constant)
|
||||
}
|
||||
|
||||
/// Retrieve the value immediate from an instruction. This will perform necessary lookups on the
|
||||
/// `DataFlowGraph` to retrieve even large immediates.
|
||||
pub fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue> {
|
||||
let inst_data = self.data(ir_inst);
|
||||
match inst_data {
|
||||
InstructionData::Shuffle { imm, .. } => {
|
||||
let mask = self.f.dfg.immediates.get(imm.clone()).unwrap().as_slice();
|
||||
let value = match mask.len() {
|
||||
16 => DataValue::V128(mask.try_into().expect("a 16-byte vector mask")),
|
||||
8 => DataValue::V64(mask.try_into().expect("an 8-byte vector mask")),
|
||||
length => panic!("unexpected Shuffle mask length {}", length),
|
||||
};
|
||||
Some(value)
|
||||
}
|
||||
InstructionData::UnaryConst {
|
||||
constant_handle, ..
|
||||
} => {
|
||||
let buffer = self.f.dfg.constants.get(constant_handle.clone()).as_slice();
|
||||
let value = match buffer.len() {
|
||||
16 => DataValue::V128(buffer.try_into().expect("a 16-byte data buffer")),
|
||||
8 => DataValue::V64(buffer.try_into().expect("an 8-byte data buffer")),
|
||||
length => panic!("unexpected UnaryConst buffer length {}", length),
|
||||
};
|
||||
Some(value)
|
||||
}
|
||||
_ => inst_data.imm_value(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
|
||||
/// if `reg` is a real reg. `ty` describes the type of the value in `reg`.
|
||||
pub fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
|
||||
|
||||
Reference in New Issue
Block a user