Add x86 implementation of shuffle
This commit is contained in:
@@ -5,7 +5,7 @@ use crate::ir;
|
||||
use crate::ir::builder::ReplaceBuilder;
|
||||
use crate::ir::extfunc::ExtFuncData;
|
||||
use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData};
|
||||
use crate::ir::{types, ConstantPool};
|
||||
use crate::ir::{types, ConstantPool, Immediate};
|
||||
use crate::ir::{
|
||||
Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList,
|
||||
ValueListPool,
|
||||
@@ -19,6 +19,7 @@ use core::mem;
|
||||
use core::ops::{Index, IndexMut};
|
||||
use core::u16;
|
||||
use std::collections::HashMap;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// A data flow graph defines all instructions and extended basic blocks in a function as well as
|
||||
/// the data flow dependencies between them. The DFG also tracks values which can be either
|
||||
@@ -70,6 +71,9 @@ pub struct DataFlowGraph {
|
||||
|
||||
/// Constants used within the function
|
||||
pub constants: ConstantPool,
|
||||
|
||||
/// Stores large immediates that otherwise will not fit on InstructionData
|
||||
pub immediates: PrimaryMap<Immediate, Vec<u8>>,
|
||||
}
|
||||
|
||||
impl DataFlowGraph {
|
||||
@@ -85,6 +89,7 @@ impl DataFlowGraph {
|
||||
ext_funcs: PrimaryMap::new(),
|
||||
values_labels: None,
|
||||
constants: ConstantPool::new(),
|
||||
immediates: PrimaryMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,7 +103,8 @@ impl DataFlowGraph {
|
||||
self.signatures.clear();
|
||||
self.ext_funcs.clear();
|
||||
self.values_labels = None;
|
||||
self.constants.clear()
|
||||
self.constants.clear();
|
||||
self.immediates.clear();
|
||||
}
|
||||
|
||||
/// Get the total number of instructions created in this function, whether they are currently
|
||||
|
||||
@@ -181,6 +181,29 @@ impl Constant {
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque reference to an immediate.
|
||||
///
|
||||
/// Some immediates (e.g. SIMD shuffle masks) are too large to store in the
|
||||
/// [`InstructionData`](super::instructions::InstructionData) struct and therefore must be
|
||||
/// tracked separately in [`DataFlowGraph::immediates`](super::dfg::DataFlowGraph). `Immediate`
|
||||
/// provides a way to reference values stored there.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Immediate(u32);
|
||||
entity_impl!(Immediate, "imm");
|
||||
|
||||
impl Immediate {
|
||||
/// Create an immediate reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<Self> {
|
||||
if n < u32::MAX {
|
||||
Some(Immediate(n))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table).
|
||||
///
|
||||
/// `JumpTable`s are used for indirect branching and are specialized for dense,
|
||||
|
||||
@@ -31,7 +31,8 @@ pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstIn
|
||||
pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool};
|
||||
pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
|
||||
pub use crate::ir::entities::{
|
||||
Constant, Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value,
|
||||
Constant, Ebb, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot,
|
||||
Table, Value,
|
||||
};
|
||||
pub use crate::ir::extfunc::{
|
||||
AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
|
||||
|
||||
@@ -899,6 +899,80 @@ fn expand_fcvt_to_uint_sat(
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
/// Convert shuffle instructions.
|
||||
fn convert_shuffle(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
_isa: &dyn TargetIsa,
|
||||
) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] {
|
||||
// A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1
|
||||
// in the most significant position zeroes the lane.
|
||||
let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
|
||||
|
||||
// We only have to worry about aliasing here because copies will be introduced later (in
|
||||
// regalloc).
|
||||
let a = pos.func.dfg.resolve_aliases(args[0]);
|
||||
let b = pos.func.dfg.resolve_aliases(args[1]);
|
||||
let mask = pos
|
||||
.func
|
||||
.dfg
|
||||
.immediates
|
||||
.get(mask)
|
||||
.expect("The shuffle immediate should have been recorded before this point")
|
||||
.clone();
|
||||
if a == b {
|
||||
// PSHUFB the first argument (since it is the same as the second).
|
||||
let constructed_mask = mask
|
||||
.iter()
|
||||
// If the mask is greater than 15 it still may be referring to a lane in b.
|
||||
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
|
||||
.map(zero_unknown_lane_index)
|
||||
.collect();
|
||||
let handle = pos.func.dfg.constants.insert(constructed_mask);
|
||||
// Move the built mask into another XMM register.
|
||||
let a_type = pos.func.dfg.value_type(a);
|
||||
let mask_value = pos.ins().vconst(a_type, handle);
|
||||
// Shuffle the single incoming argument.
|
||||
pos.func.dfg.replace(inst).x86_pshufb(a, mask_value);
|
||||
} else {
|
||||
// PSHUFB the first argument, placing zeroes for unused lanes.
|
||||
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
||||
let handle = pos.func.dfg.constants.insert(constructed_mask);
|
||||
// Move the built mask into another XMM register.
|
||||
let a_type = pos.func.dfg.value_type(a);
|
||||
let mask_value = pos.ins().vconst(a_type, handle);
|
||||
// Shuffle the first argument.
|
||||
let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value);
|
||||
|
||||
// PSHUFB the second argument, placing zeroes for unused lanes.
|
||||
let constructed_mask = mask
|
||||
.iter()
|
||||
.map(|b| b.wrapping_sub(16))
|
||||
.map(zero_unknown_lane_index)
|
||||
.collect();
|
||||
let handle = pos.func.dfg.constants.insert(constructed_mask);
|
||||
// Move the built mask into another XMM register.
|
||||
let b_type = pos.func.dfg.value_type(b);
|
||||
let mask_value = pos.ins().vconst(b_type, handle);
|
||||
// Shuffle the second argument.
|
||||
let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value);
|
||||
|
||||
// OR the vectors together to form the final shuffled value.
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.bor(shuffled_first_arg, shuffled_second_arg);
|
||||
|
||||
// TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF
|
||||
/// extractlane instruction
|
||||
fn convert_extractlane(
|
||||
|
||||
@@ -706,7 +706,6 @@ impl<'a> Verifier<'a> {
|
||||
// Exhaustive list so we can't forget to add new formats
|
||||
Unary { .. }
|
||||
| UnaryImm { .. }
|
||||
| UnaryImm128 { .. }
|
||||
| UnaryIeee32 { .. }
|
||||
| UnaryIeee64 { .. }
|
||||
| UnaryBool { .. }
|
||||
@@ -715,6 +714,8 @@ impl<'a> Verifier<'a> {
|
||||
| Ternary { .. }
|
||||
| InsertLane { .. }
|
||||
| ExtractLane { .. }
|
||||
| UnaryConst { .. }
|
||||
| Shuffle { .. }
|
||||
| IntCompare { .. }
|
||||
| IntCompareImm { .. }
|
||||
| IntCond { .. }
|
||||
|
||||
@@ -488,11 +488,6 @@ pub fn write_operands(
|
||||
match dfg[inst] {
|
||||
Unary { arg, .. } => write!(w, " {}", arg),
|
||||
UnaryImm { imm, .. } => write!(w, " {}", imm),
|
||||
UnaryImm128 { imm, .. } => {
|
||||
let data = dfg.constants.get(imm);
|
||||
let uimm128 = Uimm128::from(&data[..]);
|
||||
write!(w, " {}", uimm128)
|
||||
}
|
||||
UnaryIeee32 { imm, .. } => write!(w, " {}", imm),
|
||||
UnaryIeee64 { imm, .. } => write!(w, " {}", imm),
|
||||
UnaryBool { imm, .. } => write!(w, " {}", imm),
|
||||
@@ -510,6 +505,20 @@ pub fn write_operands(
|
||||
NullAry { .. } => write!(w, " "),
|
||||
InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
|
||||
ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
|
||||
UnaryConst {
|
||||
constant_handle, ..
|
||||
} => {
|
||||
let data = dfg.constants.get(constant_handle);
|
||||
let uimm128 = Uimm128::from(&data[..]);
|
||||
write!(w, " {}", uimm128)
|
||||
}
|
||||
Shuffle { mask, args, .. } => {
|
||||
let data = dfg.immediates.get(mask).expect(
|
||||
"Expected the shuffle mask to already be inserted into the immediates table",
|
||||
);
|
||||
let uimm128 = Uimm128::from(&data[..]);
|
||||
write!(w, " {}, {}, {}", args[0], args[1], uimm128)
|
||||
}
|
||||
IntCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]),
|
||||
IntCompareImm { cond, arg, imm, .. } => write!(w, " {} {}, {}", cond, arg, imm),
|
||||
IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg),
|
||||
|
||||
Reference in New Issue
Block a user