Add x86 implementation of shuffle
This commit is contained in:
@@ -1785,7 +1785,7 @@ pub(crate) fn define(
|
|||||||
let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
|
let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
|
||||||
|
|
||||||
// PSHUFB, 8-bit shuffle using two XMM registers.
|
// PSHUFB, 8-bit shuffle using two XMM registers.
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]);
|
let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]);
|
||||||
e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd);
|
e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd);
|
||||||
@@ -1804,7 +1804,7 @@ pub(crate) fn define(
|
|||||||
|
|
||||||
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||||
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
||||||
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
// written to the low doubleword of the register and the register is zero-extended to 128 bits."
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
if ty.is_float() {
|
if ty.is_float() {
|
||||||
@@ -1929,6 +1929,13 @@ pub(crate) fn define(
|
|||||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD bor using ORPS
|
||||||
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
|
let instruction = bor.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
|
let template = rec_fa.nonrex().opcodes(vec![0x0f, 0x56]);
|
||||||
|
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
||||||
|
}
|
||||||
|
|
||||||
// Reference type instructions
|
// Reference type instructions
|
||||||
|
|
||||||
// Null references implemented as iconst 0.
|
// Null references implemented as iconst 0.
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let selectif = insts.by_name("selectif");
|
let selectif = insts.by_name("selectif");
|
||||||
let smulhi = insts.by_name("smulhi");
|
let smulhi = insts.by_name("smulhi");
|
||||||
let splat = insts.by_name("splat");
|
let splat = insts.by_name("splat");
|
||||||
|
let shuffle = insts.by_name("shuffle");
|
||||||
let srem = insts.by_name("srem");
|
let srem = insts.by_name("srem");
|
||||||
let udiv = insts.by_name("udiv");
|
let udiv = insts.by_name("udiv");
|
||||||
let umulhi = insts.by_name("umulhi");
|
let umulhi = insts.by_name("umulhi");
|
||||||
@@ -380,6 +381,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||||
|
|
||||||
|
|||||||
@@ -396,11 +396,11 @@ pub(crate) fn define<'shared>(
|
|||||||
let f_trap = formats.by_name("Trap");
|
let f_trap = formats.by_name("Trap");
|
||||||
let f_unary = formats.by_name("Unary");
|
let f_unary = formats.by_name("Unary");
|
||||||
let f_unary_bool = formats.by_name("UnaryBool");
|
let f_unary_bool = formats.by_name("UnaryBool");
|
||||||
|
let f_unary_const = formats.by_name("UnaryConst");
|
||||||
let f_unary_global_value = formats.by_name("UnaryGlobalValue");
|
let f_unary_global_value = formats.by_name("UnaryGlobalValue");
|
||||||
let f_unary_ieee32 = formats.by_name("UnaryIeee32");
|
let f_unary_ieee32 = formats.by_name("UnaryIeee32");
|
||||||
let f_unary_ieee64 = formats.by_name("UnaryIeee64");
|
let f_unary_ieee64 = formats.by_name("UnaryIeee64");
|
||||||
let f_unary_imm = formats.by_name("UnaryImm");
|
let f_unary_imm = formats.by_name("UnaryImm");
|
||||||
let f_unary_imm128 = formats.by_name("UnaryImm128");
|
|
||||||
|
|
||||||
// Predicates shorthands.
|
// Predicates shorthands.
|
||||||
let use_sse41 = settings.predicate_by_name("use_sse41");
|
let use_sse41 = settings.predicate_by_name("use_sse41");
|
||||||
@@ -2437,14 +2437,14 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_recipe(
|
||||||
EncodingRecipeBuilder::new("vconst", f_unary_imm128, 5)
|
EncodingRecipeBuilder::new("vconst", f_unary_const, 5)
|
||||||
.operands_out(vec![fpr])
|
.operands_out(vec![fpr])
|
||||||
.clobbers_flags(false)
|
.clobbers_flags(false)
|
||||||
.emit(
|
.emit(
|
||||||
r#"
|
r#"
|
||||||
{{PUT_OP}}(bits, rex2(0, out_reg0), sink);
|
{{PUT_OP}}(bits, rex2(0, out_reg0), sink);
|
||||||
modrm_riprel(out_reg0, sink);
|
modrm_riprel(out_reg0, sink);
|
||||||
const_disp4(imm, func, sink);
|
const_disp4(constant_handle, func, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ pub(crate) fn define(imm: &Immediates, entities: &EntityRefs) -> FormatRegistry
|
|||||||
|
|
||||||
registry.insert(Builder::new("Unary").value());
|
registry.insert(Builder::new("Unary").value());
|
||||||
registry.insert(Builder::new("UnaryImm").imm(&imm.imm64));
|
registry.insert(Builder::new("UnaryImm").imm(&imm.imm64));
|
||||||
registry.insert(Builder::new("UnaryImm128").imm(&imm.uimm128));
|
|
||||||
registry.insert(Builder::new("UnaryIeee32").imm(&imm.ieee32));
|
registry.insert(Builder::new("UnaryIeee32").imm(&imm.ieee32));
|
||||||
registry.insert(Builder::new("UnaryIeee64").imm(&imm.ieee64));
|
registry.insert(Builder::new("UnaryIeee64").imm(&imm.ieee64));
|
||||||
registry.insert(Builder::new("UnaryBool").imm(&imm.boolean));
|
registry.insert(Builder::new("UnaryBool").imm(&imm.boolean));
|
||||||
|
registry.insert(Builder::new("UnaryConst").imm(&imm.pool_constant));
|
||||||
registry.insert(Builder::new("UnaryGlobalValue").imm(&entities.global_value));
|
registry.insert(Builder::new("UnaryGlobalValue").imm(&entities.global_value));
|
||||||
|
|
||||||
registry.insert(Builder::new("Binary").value().value());
|
registry.insert(Builder::new("Binary").value().value());
|
||||||
@@ -43,6 +43,12 @@ pub(crate) fn define(imm: &Immediates, entities: &EntityRefs) -> FormatRegistry
|
|||||||
.value()
|
.value()
|
||||||
.imm_with_name("lane", &imm.uimm8),
|
.imm_with_name("lane", &imm.uimm8),
|
||||||
);
|
);
|
||||||
|
registry.insert(
|
||||||
|
Builder::new("Shuffle")
|
||||||
|
.value()
|
||||||
|
.value()
|
||||||
|
.imm_with_name("mask", &imm.uimm128),
|
||||||
|
);
|
||||||
|
|
||||||
registry.insert(Builder::new("IntCompare").imm(&imm.intcc).value().value());
|
registry.insert(Builder::new("IntCompare").imm(&imm.intcc).value().value());
|
||||||
registry.insert(
|
registry.insert(
|
||||||
|
|||||||
@@ -23,6 +23,12 @@ pub(crate) struct Immediates {
|
|||||||
/// const.
|
/// const.
|
||||||
pub uimm128: OperandKind,
|
pub uimm128: OperandKind,
|
||||||
|
|
||||||
|
/// A constant stored in the constant pool.
|
||||||
|
///
|
||||||
|
/// This operand is used to pass constants to instructions like vconst while storing the
|
||||||
|
/// actual bytes in the constant pool.
|
||||||
|
pub pool_constant: OperandKind,
|
||||||
|
|
||||||
/// A 32-bit immediate signed offset.
|
/// A 32-bit immediate signed offset.
|
||||||
///
|
///
|
||||||
/// This is used to represent an immediate address offset in load/store instructions.
|
/// This is used to represent an immediate address offset in load/store instructions.
|
||||||
@@ -84,6 +90,12 @@ impl Immediates {
|
|||||||
|
|
||||||
uimm128: Builder::new_imm("uimm128")
|
uimm128: Builder::new_imm("uimm128")
|
||||||
.doc("A 128-bit immediate unsigned integer.")
|
.doc("A 128-bit immediate unsigned integer.")
|
||||||
|
.rust_type("ir::Immediate")
|
||||||
|
.build(),
|
||||||
|
|
||||||
|
pool_constant: Builder::new_imm("poolConstant")
|
||||||
|
.doc("A constant stored in the constant pool.")
|
||||||
|
.default_member("constant_handle")
|
||||||
.rust_type("ir::Constant")
|
.rust_type("ir::Constant")
|
||||||
.build(),
|
.build(),
|
||||||
|
|
||||||
|
|||||||
@@ -1090,7 +1090,7 @@ pub(crate) fn define(
|
|||||||
|
|
||||||
let N = &operand_doc(
|
let N = &operand_doc(
|
||||||
"N",
|
"N",
|
||||||
&imm.uimm128,
|
&imm.pool_constant,
|
||||||
"The 16 immediate bytes of a 128-bit vector",
|
"The 16 immediate bytes of a 128-bit vector",
|
||||||
);
|
);
|
||||||
let a = &operand_doc("a", TxN, "A constant vector value");
|
let a = &operand_doc("a", TxN, "A constant vector value");
|
||||||
@@ -1108,6 +1108,41 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let mask = &operand_doc(
|
||||||
|
"mask",
|
||||||
|
&imm.uimm128,
|
||||||
|
"The 16 immediate bytes used for selecting the elements to shuffle",
|
||||||
|
);
|
||||||
|
let Tx16 = &TypeVar::new(
|
||||||
|
"Tx16",
|
||||||
|
"A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \
|
||||||
|
lane counts and widths",
|
||||||
|
TypeSetBuilder::new()
|
||||||
|
.ints(8..8)
|
||||||
|
.bools(8..8)
|
||||||
|
.simd_lanes(16..16)
|
||||||
|
.includes_scalars(false)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
let a = &operand_doc("a", Tx16, "A vector value");
|
||||||
|
let b = &operand_doc("b", Tx16, "A vector value");
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"shuffle",
|
||||||
|
r#"
|
||||||
|
SIMD vector shuffle.
|
||||||
|
|
||||||
|
Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the
|
||||||
|
immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of
|
||||||
|
16-31 selects the (i-16)th element of the second vector. Immediate values outside of the
|
||||||
|
0-31 range place a 0 in the resulting vector lane.
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.operands_in(vec![a, b, mask])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let a = &operand_doc("a", Ref, "A constant reference null value");
|
let a = &operand_doc("a", Ref, "A constant reference null value");
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use crate::ir;
|
|||||||
use crate::ir::builder::ReplaceBuilder;
|
use crate::ir::builder::ReplaceBuilder;
|
||||||
use crate::ir::extfunc::ExtFuncData;
|
use crate::ir::extfunc::ExtFuncData;
|
||||||
use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData};
|
use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData};
|
||||||
use crate::ir::{types, ConstantPool};
|
use crate::ir::{types, ConstantPool, Immediate};
|
||||||
use crate::ir::{
|
use crate::ir::{
|
||||||
Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList,
|
Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList,
|
||||||
ValueListPool,
|
ValueListPool,
|
||||||
@@ -19,6 +19,7 @@ use core::mem;
|
|||||||
use core::ops::{Index, IndexMut};
|
use core::ops::{Index, IndexMut};
|
||||||
use core::u16;
|
use core::u16;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::vec::Vec;
|
||||||
|
|
||||||
/// A data flow graph defines all instructions and extended basic blocks in a function as well as
|
/// A data flow graph defines all instructions and extended basic blocks in a function as well as
|
||||||
/// the data flow dependencies between them. The DFG also tracks values which can be either
|
/// the data flow dependencies between them. The DFG also tracks values which can be either
|
||||||
@@ -70,6 +71,9 @@ pub struct DataFlowGraph {
|
|||||||
|
|
||||||
/// Constants used within the function
|
/// Constants used within the function
|
||||||
pub constants: ConstantPool,
|
pub constants: ConstantPool,
|
||||||
|
|
||||||
|
/// Stores large immediates that otherwise will not fit on InstructionData
|
||||||
|
pub immediates: PrimaryMap<Immediate, Vec<u8>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DataFlowGraph {
|
impl DataFlowGraph {
|
||||||
@@ -85,6 +89,7 @@ impl DataFlowGraph {
|
|||||||
ext_funcs: PrimaryMap::new(),
|
ext_funcs: PrimaryMap::new(),
|
||||||
values_labels: None,
|
values_labels: None,
|
||||||
constants: ConstantPool::new(),
|
constants: ConstantPool::new(),
|
||||||
|
immediates: PrimaryMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -98,7 +103,8 @@ impl DataFlowGraph {
|
|||||||
self.signatures.clear();
|
self.signatures.clear();
|
||||||
self.ext_funcs.clear();
|
self.ext_funcs.clear();
|
||||||
self.values_labels = None;
|
self.values_labels = None;
|
||||||
self.constants.clear()
|
self.constants.clear();
|
||||||
|
self.immediates.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the total number of instructions created in this function, whether they are currently
|
/// Get the total number of instructions created in this function, whether they are currently
|
||||||
|
|||||||
@@ -181,6 +181,29 @@ impl Constant {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An opaque reference to an immediate.
|
||||||
|
///
|
||||||
|
/// Some immediates (e.g. SIMD shuffle masks) are too large to store in the
|
||||||
|
/// [`InstructionData`](super::instructions::InstructionData) struct and therefore must be
|
||||||
|
/// tracked separately in [`DataFlowGraph::immediates`](super::dfg::DataFlowGraph). `Immediate`
|
||||||
|
/// provides a way to reference values stored there.
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Immediate(u32);
|
||||||
|
entity_impl!(Immediate, "imm");
|
||||||
|
|
||||||
|
impl Immediate {
|
||||||
|
/// Create an immediate reference from its number.
|
||||||
|
///
|
||||||
|
/// This method is for use by the parser.
|
||||||
|
pub fn with_number(n: u32) -> Option<Self> {
|
||||||
|
if n < u32::MAX {
|
||||||
|
Some(Immediate(n))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table).
|
/// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table).
|
||||||
///
|
///
|
||||||
/// `JumpTable`s are used for indirect branching and are specialized for dense,
|
/// `JumpTable`s are used for indirect branching and are specialized for dense,
|
||||||
|
|||||||
@@ -31,7 +31,8 @@ pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstIn
|
|||||||
pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool};
|
pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool};
|
||||||
pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
|
pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
|
||||||
pub use crate::ir::entities::{
|
pub use crate::ir::entities::{
|
||||||
Constant, Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value,
|
Constant, Ebb, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot,
|
||||||
|
Table, Value,
|
||||||
};
|
};
|
||||||
pub use crate::ir::extfunc::{
|
pub use crate::ir::extfunc::{
|
||||||
AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
|
AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
|
||||||
|
|||||||
@@ -899,6 +899,80 @@ fn expand_fcvt_to_uint_sat(
|
|||||||
cfg.recompute_ebb(pos.func, done);
|
cfg.recompute_ebb(pos.func, done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert shuffle instructions.
|
||||||
|
fn convert_shuffle(
|
||||||
|
inst: ir::Inst,
|
||||||
|
func: &mut ir::Function,
|
||||||
|
_cfg: &mut ControlFlowGraph,
|
||||||
|
_isa: &dyn TargetIsa,
|
||||||
|
) {
|
||||||
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||||
|
pos.use_srcloc(inst);
|
||||||
|
|
||||||
|
if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] {
|
||||||
|
// A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1
|
||||||
|
// in the most significant position zeroes the lane.
|
||||||
|
let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
|
||||||
|
|
||||||
|
// We only have to worry about aliasing here because copies will be introduced later (in
|
||||||
|
// regalloc).
|
||||||
|
let a = pos.func.dfg.resolve_aliases(args[0]);
|
||||||
|
let b = pos.func.dfg.resolve_aliases(args[1]);
|
||||||
|
let mask = pos
|
||||||
|
.func
|
||||||
|
.dfg
|
||||||
|
.immediates
|
||||||
|
.get(mask)
|
||||||
|
.expect("The shuffle immediate should have been recorded before this point")
|
||||||
|
.clone();
|
||||||
|
if a == b {
|
||||||
|
// PSHUFB the first argument (since it is the same as the second).
|
||||||
|
let constructed_mask = mask
|
||||||
|
.iter()
|
||||||
|
// If the mask is greater than 15 it still may be referring to a lane in b.
|
||||||
|
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
|
||||||
|
.map(zero_unknown_lane_index)
|
||||||
|
.collect();
|
||||||
|
let handle = pos.func.dfg.constants.insert(constructed_mask);
|
||||||
|
// Move the built mask into another XMM register.
|
||||||
|
let a_type = pos.func.dfg.value_type(a);
|
||||||
|
let mask_value = pos.ins().vconst(a_type, handle);
|
||||||
|
// Shuffle the single incoming argument.
|
||||||
|
pos.func.dfg.replace(inst).x86_pshufb(a, mask_value);
|
||||||
|
} else {
|
||||||
|
// PSHUFB the first argument, placing zeroes for unused lanes.
|
||||||
|
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
||||||
|
let handle = pos.func.dfg.constants.insert(constructed_mask);
|
||||||
|
// Move the built mask into another XMM register.
|
||||||
|
let a_type = pos.func.dfg.value_type(a);
|
||||||
|
let mask_value = pos.ins().vconst(a_type, handle);
|
||||||
|
// Shuffle the first argument.
|
||||||
|
let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value);
|
||||||
|
|
||||||
|
// PSHUFB the second argument, placing zeroes for unused lanes.
|
||||||
|
let constructed_mask = mask
|
||||||
|
.iter()
|
||||||
|
.map(|b| b.wrapping_sub(16))
|
||||||
|
.map(zero_unknown_lane_index)
|
||||||
|
.collect();
|
||||||
|
let handle = pos.func.dfg.constants.insert(constructed_mask);
|
||||||
|
// Move the built mask into another XMM register.
|
||||||
|
let b_type = pos.func.dfg.value_type(b);
|
||||||
|
let mask_value = pos.ins().vconst(b_type, handle);
|
||||||
|
// Shuffle the second argument.
|
||||||
|
let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value);
|
||||||
|
|
||||||
|
// OR the vectors together to form the final shuffled value.
|
||||||
|
pos.func
|
||||||
|
.dfg
|
||||||
|
.replace(inst)
|
||||||
|
.bor(shuffled_first_arg, shuffled_second_arg);
|
||||||
|
|
||||||
|
// TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF
|
/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF
|
||||||
/// extractlane instruction
|
/// extractlane instruction
|
||||||
fn convert_extractlane(
|
fn convert_extractlane(
|
||||||
|
|||||||
@@ -706,7 +706,6 @@ impl<'a> Verifier<'a> {
|
|||||||
// Exhaustive list so we can't forget to add new formats
|
// Exhaustive list so we can't forget to add new formats
|
||||||
Unary { .. }
|
Unary { .. }
|
||||||
| UnaryImm { .. }
|
| UnaryImm { .. }
|
||||||
| UnaryImm128 { .. }
|
|
||||||
| UnaryIeee32 { .. }
|
| UnaryIeee32 { .. }
|
||||||
| UnaryIeee64 { .. }
|
| UnaryIeee64 { .. }
|
||||||
| UnaryBool { .. }
|
| UnaryBool { .. }
|
||||||
@@ -715,6 +714,8 @@ impl<'a> Verifier<'a> {
|
|||||||
| Ternary { .. }
|
| Ternary { .. }
|
||||||
| InsertLane { .. }
|
| InsertLane { .. }
|
||||||
| ExtractLane { .. }
|
| ExtractLane { .. }
|
||||||
|
| UnaryConst { .. }
|
||||||
|
| Shuffle { .. }
|
||||||
| IntCompare { .. }
|
| IntCompare { .. }
|
||||||
| IntCompareImm { .. }
|
| IntCompareImm { .. }
|
||||||
| IntCond { .. }
|
| IntCond { .. }
|
||||||
|
|||||||
@@ -488,11 +488,6 @@ pub fn write_operands(
|
|||||||
match dfg[inst] {
|
match dfg[inst] {
|
||||||
Unary { arg, .. } => write!(w, " {}", arg),
|
Unary { arg, .. } => write!(w, " {}", arg),
|
||||||
UnaryImm { imm, .. } => write!(w, " {}", imm),
|
UnaryImm { imm, .. } => write!(w, " {}", imm),
|
||||||
UnaryImm128 { imm, .. } => {
|
|
||||||
let data = dfg.constants.get(imm);
|
|
||||||
let uimm128 = Uimm128::from(&data[..]);
|
|
||||||
write!(w, " {}", uimm128)
|
|
||||||
}
|
|
||||||
UnaryIeee32 { imm, .. } => write!(w, " {}", imm),
|
UnaryIeee32 { imm, .. } => write!(w, " {}", imm),
|
||||||
UnaryIeee64 { imm, .. } => write!(w, " {}", imm),
|
UnaryIeee64 { imm, .. } => write!(w, " {}", imm),
|
||||||
UnaryBool { imm, .. } => write!(w, " {}", imm),
|
UnaryBool { imm, .. } => write!(w, " {}", imm),
|
||||||
@@ -510,6 +505,20 @@ pub fn write_operands(
|
|||||||
NullAry { .. } => write!(w, " "),
|
NullAry { .. } => write!(w, " "),
|
||||||
InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
|
InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
|
||||||
ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
|
ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
|
||||||
|
UnaryConst {
|
||||||
|
constant_handle, ..
|
||||||
|
} => {
|
||||||
|
let data = dfg.constants.get(constant_handle);
|
||||||
|
let uimm128 = Uimm128::from(&data[..]);
|
||||||
|
write!(w, " {}", uimm128)
|
||||||
|
}
|
||||||
|
Shuffle { mask, args, .. } => {
|
||||||
|
let data = dfg.immediates.get(mask).expect(
|
||||||
|
"Expected the shuffle mask to already be inserted into the immediates table",
|
||||||
|
);
|
||||||
|
let uimm128 = Uimm128::from(&data[..]);
|
||||||
|
write!(w, " {}, {}, {}", args[0], args[1], uimm128)
|
||||||
|
}
|
||||||
IntCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]),
|
IntCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]),
|
||||||
IntCompareImm { cond, arg, imm, .. } => write!(w, " {} {}, {}", cond, arg, imm),
|
IntCompareImm { cond, arg, imm, .. } => write!(w, " {} {}, {}", cond, arg, imm),
|
||||||
IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg),
|
IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg),
|
||||||
|
|||||||
31
cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif
Normal file
31
cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
test legalizer
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 skylake
|
||||||
|
|
||||||
|
function %test_shuffle_different_ssa_values() -> i8x16 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 0x00
|
||||||
|
v1 = vconst.i8x16 0x01
|
||||||
|
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: v1 = vconst.i8x16 0x01
|
||||||
|
; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000
|
||||||
|
; nextln: v4 = x86_pshufb v0, v3
|
||||||
|
; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080
|
||||||
|
; nextln: v6 = x86_pshufb v1, v5
|
||||||
|
; nextln: v2 = bor v4, v6
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function %test_shuffle_same_ssa_value() -> i8x16 {
|
||||||
|
ebb0:
|
||||||
|
v1 = vconst.i8x16 0x01
|
||||||
|
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: v1 = vconst.i8x16 0x01
|
||||||
|
; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000
|
||||||
|
; nextln: v2 = x86_pshufb v1, v3
|
||||||
44
cranelift/filetests/filetests/isa/x86/shuffle-run.clif
Normal file
44
cranelift/filetests/filetests/isa/x86/shuffle-run.clif
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
test run
|
||||||
|
set enable_simd
|
||||||
|
|
||||||
|
function %test_shuffle_different_ssa_values() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 0x00
|
||||||
|
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
|
||||||
|
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
|
||||||
|
v3 = extractlane.i8x16 v2, 15
|
||||||
|
v4 = iconst.i8 42
|
||||||
|
v5 = icmp eq v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %test_shuffle_same_ssa_value() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
|
||||||
|
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
|
||||||
|
v2 = extractlane.i8x16 v1, 4
|
||||||
|
v3 = iconst.i8 0x01
|
||||||
|
v4 = icmp eq v2, v3
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %compare_shuffle() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v1 = vconst.i32x4 [0 1 2 3]
|
||||||
|
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
||||||
|
; keep each lane in place from the first vector
|
||||||
|
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||||
|
v4 = raw_bitcast.i32x4 v3
|
||||||
|
v5 = extractlane.i32x4 v4, 3
|
||||||
|
v6 = icmp_imm eq v5, 3
|
||||||
|
v7 = extractlane.i32x4 v4, 0
|
||||||
|
v8 = icmp_imm eq v7, 0
|
||||||
|
v9 = band v6, v8
|
||||||
|
return v9
|
||||||
|
}
|
||||||
|
|
||||||
|
; run
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
test rodata
|
test rodata
|
||||||
set enable_simd=true
|
set enable_simd=true
|
||||||
set probestack_enabled=false
|
|
||||||
target x86_64 haswell
|
target x86_64 haswell
|
||||||
|
|
||||||
function %test_vconst_i32() -> i32x4 {
|
function %test_vconst_i32() -> i32x4 {
|
||||||
|
|||||||
@@ -2243,23 +2243,6 @@ impl<'a> Parser<'a> {
|
|||||||
opcode,
|
opcode,
|
||||||
imm: self.match_imm64("expected immediate integer operand")?,
|
imm: self.match_imm64("expected immediate integer operand")?,
|
||||||
},
|
},
|
||||||
InstructionFormat::UnaryImm128 => match explicit_control_type {
|
|
||||||
None => {
|
|
||||||
return err!(
|
|
||||||
self.loc,
|
|
||||||
"Expected {:?} to have a controlling type variable, e.g. inst.i32x4",
|
|
||||||
opcode
|
|
||||||
)
|
|
||||||
}
|
|
||||||
Some(ty) => {
|
|
||||||
let uimm128 = self.match_uimm128_or_literals(ty)?;
|
|
||||||
let constant_handle = ctx.function.dfg.constants.insert(uimm128.0.to_vec());
|
|
||||||
InstructionData::UnaryImm128 {
|
|
||||||
opcode,
|
|
||||||
imm: constant_handle,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 {
|
InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 {
|
||||||
opcode,
|
opcode,
|
||||||
imm: self.match_ieee32("expected immediate 32-bit float operand")?,
|
imm: self.match_ieee32("expected immediate 32-bit float operand")?,
|
||||||
@@ -2442,6 +2425,36 @@ impl<'a> Parser<'a> {
|
|||||||
let lane = self.match_uimm8("expected lane number")?;
|
let lane = self.match_uimm8("expected lane number")?;
|
||||||
InstructionData::ExtractLane { opcode, lane, arg }
|
InstructionData::ExtractLane { opcode, lane, arg }
|
||||||
}
|
}
|
||||||
|
InstructionFormat::UnaryConst => match explicit_control_type {
|
||||||
|
None => {
|
||||||
|
return err!(
|
||||||
|
self.loc,
|
||||||
|
"Expected {:?} to have a controlling type variable, e.g. inst.i32x4",
|
||||||
|
opcode
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Some(controlling_type) => {
|
||||||
|
let uimm128 = self.match_uimm128_or_literals(controlling_type)?;
|
||||||
|
let constant_handle = ctx.function.dfg.constants.insert(uimm128.to_vec());
|
||||||
|
InstructionData::UnaryConst {
|
||||||
|
opcode,
|
||||||
|
constant_handle,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
InstructionFormat::Shuffle => {
|
||||||
|
let a = self.match_value("expected SSA value first operand")?;
|
||||||
|
self.match_token(Token::Comma, "expected ',' between operands")?;
|
||||||
|
let b = self.match_value("expected SSA value second operand")?;
|
||||||
|
self.match_token(Token::Comma, "expected ',' between operands")?;
|
||||||
|
let uimm128 = self.match_uimm128_or_literals(I8X16)?;
|
||||||
|
let mask = ctx.function.dfg.immediates.push(uimm128.to_vec());
|
||||||
|
InstructionData::Shuffle {
|
||||||
|
opcode,
|
||||||
|
mask,
|
||||||
|
args: [a, b],
|
||||||
|
}
|
||||||
|
}
|
||||||
InstructionFormat::IntCompare => {
|
InstructionFormat::IntCompare => {
|
||||||
let cond = self.match_enum("expected intcc condition code")?;
|
let cond = self.match_enum("expected intcc condition code")?;
|
||||||
let lhs = self.match_value("expected SSA value first operand")?;
|
let lhs = self.match_value("expected SSA value first operand")?;
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
use cranelift_codegen::ir::immediates::Uimm128;
|
|
||||||
use cranelift_codegen::ir::{Ebb, Function, Inst, InstructionData, Signature};
|
use cranelift_codegen::ir::{Ebb, Function, Inst, InstructionData, Signature};
|
||||||
use serde_derive::{Deserialize, Serialize};
|
use serde_derive::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -59,6 +58,11 @@ pub enum SerInstData {
|
|||||||
arg: String,
|
arg: String,
|
||||||
lane: String,
|
lane: String,
|
||||||
},
|
},
|
||||||
|
Shuffle {
|
||||||
|
opcode: String,
|
||||||
|
args: [String; 2],
|
||||||
|
mask: String,
|
||||||
|
},
|
||||||
IntCompare {
|
IntCompare {
|
||||||
opcode: String,
|
opcode: String,
|
||||||
args: [String; 2],
|
args: [String; 2],
|
||||||
@@ -262,14 +266,6 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData {
|
|||||||
opcode: opcode.to_string(),
|
opcode: opcode.to_string(),
|
||||||
imm: imm.to_string(),
|
imm: imm.to_string(),
|
||||||
},
|
},
|
||||||
InstructionData::UnaryImm128 { opcode, imm } => {
|
|
||||||
let data = func.dfg.constants.get(imm);
|
|
||||||
let uimm128 = Uimm128::from(&data[..]);
|
|
||||||
SerInstData::UnaryImm {
|
|
||||||
opcode: opcode.to_string(),
|
|
||||||
imm: uimm128.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
InstructionData::UnaryIeee32 { opcode, imm } => SerInstData::UnaryIeee32 {
|
InstructionData::UnaryIeee32 { opcode, imm } => SerInstData::UnaryIeee32 {
|
||||||
opcode: opcode.to_string(),
|
opcode: opcode.to_string(),
|
||||||
imm: imm.to_string(),
|
imm: imm.to_string(),
|
||||||
@@ -340,6 +336,28 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData {
|
|||||||
arg: arg.to_string(),
|
arg: arg.to_string(),
|
||||||
lane: lane.to_string(),
|
lane: lane.to_string(),
|
||||||
},
|
},
|
||||||
|
InstructionData::UnaryConst {
|
||||||
|
opcode,
|
||||||
|
constant_handle,
|
||||||
|
} => {
|
||||||
|
let constant = func.dfg.constants.get(constant_handle);
|
||||||
|
SerInstData::UnaryImm {
|
||||||
|
opcode: opcode.to_string(),
|
||||||
|
imm: format!("{:?}", constant),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
InstructionData::Shuffle { opcode, args, mask } => {
|
||||||
|
let mask = func
|
||||||
|
.dfg
|
||||||
|
.immediates
|
||||||
|
.get(mask)
|
||||||
|
.expect("Expected shuffle mask to already be inserted in immediate mapping");
|
||||||
|
SerInstData::Shuffle {
|
||||||
|
opcode: opcode.to_string(),
|
||||||
|
args: [args[0].to_string(), args[1].to_string()],
|
||||||
|
mask: format!("{:?}", mask),
|
||||||
|
}
|
||||||
|
}
|
||||||
InstructionData::IntCompare { opcode, args, cond } => {
|
InstructionData::IntCompare { opcode, args, cond } => {
|
||||||
let hold_args = [args[0].to_string(), args[1].to_string()];
|
let hold_args = [args[0].to_string(), args[1].to_string()];
|
||||||
SerInstData::IntCompare {
|
SerInstData::IntCompare {
|
||||||
|
|||||||
@@ -974,9 +974,20 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
builder,
|
builder,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
Operator::V8x16Shuffle { lanes, .. } => {
|
||||||
|
let (vector_a, vector_b) = state.pop2();
|
||||||
|
let a = optionally_bitcast_vector(vector_a, I8X16, builder);
|
||||||
|
let b = optionally_bitcast_vector(vector_b, I8X16, builder);
|
||||||
|
let mask = builder.func.dfg.immediates.push(lanes.to_vec());
|
||||||
|
let shuffled = builder.ins().shuffle(a, b, mask);
|
||||||
|
state.push1(shuffled)
|
||||||
|
// At this point the original types of a and b are lost; users of this value (i.e. this
|
||||||
|
// WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due
|
||||||
|
// to WASM using the less specific v128 type for certain operations and more specific
|
||||||
|
// types (e.g. i8x16) for others.
|
||||||
|
}
|
||||||
Operator::V128Load { .. }
|
Operator::V128Load { .. }
|
||||||
| Operator::V128Store { .. }
|
| Operator::V128Store { .. }
|
||||||
| Operator::V8x16Shuffle { .. }
|
|
||||||
| Operator::I8x16Eq
|
| Operator::I8x16Eq
|
||||||
| Operator::I8x16Ne
|
| Operator::I8x16Ne
|
||||||
| Operator::I8x16LtS
|
| Operator::I8x16LtS
|
||||||
|
|||||||
Reference in New Issue
Block a user