Add x86 encoding for SIMD icmp eq
Also adds a predicate for matching the `eq` IntCC code (TODO this should be replaced by something more general)
This commit is contained in:
@@ -628,6 +628,9 @@ pub enum FormatPredicateKind {
|
|||||||
|
|
||||||
/// Is the referenced data object colocated?
|
/// Is the referenced data object colocated?
|
||||||
IsColocatedData,
|
IsColocatedData,
|
||||||
|
|
||||||
|
/// Does the operation have a specific condition code?
|
||||||
|
HasConditionCode(&'static str),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Hash, PartialEq, Eq)]
|
#[derive(Clone, Hash, PartialEq, Eq)]
|
||||||
@@ -714,6 +717,10 @@ impl FormatPredicateNode {
|
|||||||
FormatPredicateKind::IsColocatedData => {
|
FormatPredicateKind::IsColocatedData => {
|
||||||
format!("predicates::is_colocated_data({}, func)", self.member_name)
|
format!("predicates::is_colocated_data({}, func)", self.member_name)
|
||||||
}
|
}
|
||||||
|
FormatPredicateKind::HasConditionCode(code) => format!(
|
||||||
|
"predicates::match_condition_code_to_str({}, \"{}\")",
|
||||||
|
self.member_name, code
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -997,6 +1004,18 @@ impl InstructionPredicate {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_has_condition_code(
|
||||||
|
format: &InstructionFormat,
|
||||||
|
condition_code: &'static str,
|
||||||
|
field_name: &'static str,
|
||||||
|
) -> InstructionPredicateNode {
|
||||||
|
InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
|
||||||
|
format,
|
||||||
|
field_name,
|
||||||
|
FormatPredicateKind::HasConditionCode(condition_code),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn and(mut self, new_node: InstructionPredicateNode) -> Self {
|
pub fn and(mut self, new_node: InstructionPredicateNode) -> Self {
|
||||||
let node = self.node;
|
let node = self.node;
|
||||||
let mut and_nodes = match node {
|
let mut and_nodes = match node {
|
||||||
|
|||||||
@@ -573,6 +573,7 @@ pub(crate) fn define(
|
|||||||
let rec_gvaddr4 = r.template("gvaddr4");
|
let rec_gvaddr4 = r.template("gvaddr4");
|
||||||
let rec_gvaddr8 = r.template("gvaddr8");
|
let rec_gvaddr8 = r.template("gvaddr8");
|
||||||
let rec_icscc = r.template("icscc");
|
let rec_icscc = r.template("icscc");
|
||||||
|
let rec_icscc_fpr = r.template("icscc_fpr");
|
||||||
let rec_icscc_ib = r.template("icscc_ib");
|
let rec_icscc_ib = r.template("icscc_ib");
|
||||||
let rec_icscc_id = r.template("icscc_id");
|
let rec_icscc_id = r.template("icscc_id");
|
||||||
let rec_indirect_jmp = r.template("indirect_jmp");
|
let rec_indirect_jmp = r.template("indirect_jmp");
|
||||||
@@ -2058,6 +2059,31 @@ pub(crate) fn define(
|
|||||||
e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
|
e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD icmp using PCMPEQ*
|
||||||
|
let mut pcmpeq_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
|
||||||
|
HashMap::new();
|
||||||
|
pcmpeq_mapping.insert(8, (vec![0x66, 0x0f, 0x74], None)); // PCMPEQB from SSE2
|
||||||
|
pcmpeq_mapping.insert(16, (vec![0x66, 0x0f, 0x75], None)); // PCMPEQW from SSE2
|
||||||
|
pcmpeq_mapping.insert(32, (vec![0x66, 0x0f, 0x76], None)); // PCMPEQD from SSE2
|
||||||
|
pcmpeq_mapping.insert(64, (vec![0x66, 0x0f, 0x38, 0x29], Some(use_sse41_simd))); // PCMPEQQ from SSE4.1
|
||||||
|
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
|
||||||
|
if let Some((opcodes, isa_predicate)) = pcmpeq_mapping.get(&ty.lane_bits()) {
|
||||||
|
let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
|
let f_int_compare = formats.get(formats.by_name("IntCompare"));
|
||||||
|
let has_eq_condition_code =
|
||||||
|
InstructionPredicate::new_has_condition_code(f_int_compare, "eq", "cond");
|
||||||
|
let template = rec_icscc_fpr.nonrex().opcodes(opcodes.clone());
|
||||||
|
e.enc_32_64_func(instruction, template, |builder| {
|
||||||
|
let builder = builder.inst_predicate(has_eq_condition_code);
|
||||||
|
if let Some(p) = isa_predicate {
|
||||||
|
builder.isa_predicate(*p)
|
||||||
|
} else {
|
||||||
|
builder
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Reference type instructions
|
// Reference type instructions
|
||||||
|
|
||||||
// Null references implemented as iconst 0.
|
// Null references implemented as iconst 0.
|
||||||
|
|||||||
@@ -329,6 +329,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
def!(y = splat_any8x16(x)),
|
def!(y = splat_any8x16(x)),
|
||||||
vec![
|
vec![
|
||||||
def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
|
def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
|
||||||
|
// TODO replace the following two instructions with `vconst(0)` when this is possible; see https://github.com/CraneStation/cranelift/issues/1052
|
||||||
def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
|
def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
|
||||||
def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type
|
def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type
|
||||||
def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b)
|
def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b)
|
||||||
|
|||||||
@@ -2940,6 +2940,19 @@ pub(crate) fn define<'shared>(
|
|||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
recipes.add_template_recipe(
|
||||||
|
EncodingRecipeBuilder::new("icscc_fpr", f_int_compare, 1)
|
||||||
|
.operands_in(vec![fpr, fpr])
|
||||||
|
.operands_out(vec![0])
|
||||||
|
.emit(
|
||||||
|
r#"
|
||||||
|
// Comparison instruction.
|
||||||
|
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
|
||||||
|
modrm_rr(in_reg1, in_reg0, sink);
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
{
|
{
|
||||||
let format = formats.get(f_int_compare_imm);
|
let format = formats.get(f_int_compare_imm);
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,8 @@
|
|||||||
//! dead code warning.
|
//! dead code warning.
|
||||||
|
|
||||||
use crate::ir;
|
use crate::ir;
|
||||||
|
use crate::ir::condcodes::IntCC;
|
||||||
|
use std::string::ToString;
|
||||||
|
|
||||||
/// Check that an integer value is zero.
|
/// Check that an integer value is zero.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
@@ -83,6 +85,14 @@ pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function
|
|||||||
value_list.len(&func.dfg.value_lists) == num
|
value_list.len(&func.dfg.value_lists) == num
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn match_condition_code_to_str(
|
||||||
|
condition_code: IntCC,
|
||||||
|
stringified_condition_code: &str,
|
||||||
|
) -> bool {
|
||||||
|
condition_code.to_string().eq(stringified_condition_code)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -136,4 +146,10 @@ mod tests {
|
|||||||
assert!(!is_all_ones_128_bit(&[0; 16]));
|
assert!(!is_all_ones_128_bit(&[0; 16]));
|
||||||
assert!(is_all_ones_128_bit(&[0xff; 16]));
|
assert!(is_all_ones_128_bit(&[0xff; 16]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn condition_code() {
|
||||||
|
assert!(match_condition_code_to_str(IntCC::Equal, "eq"));
|
||||||
|
assert!(!match_condition_code_to_str(IntCC::Equal, "ne"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
35
cranelift/filetests/filetests/isa/x86/icmp-compile.clif
Normal file
35
cranelift/filetests/filetests/isa/x86/icmp-compile.clif
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
test binemit
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 skylake
|
||||||
|
|
||||||
|
function %icmp_i8x16() {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db
|
||||||
|
[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4
|
||||||
|
[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %icmp_i16x8() {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm0] v0 = vconst.i16x8 0x00
|
||||||
|
[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff
|
||||||
|
[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %icmp_i32x4() {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm0] v0 = vconst.i32x4 0x00
|
||||||
|
[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff
|
||||||
|
[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %icmp_i64x2() {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm0] v0 = vconst.i64x2 0x00
|
||||||
|
[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
|
||||||
|
[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1
|
||||||
|
return
|
||||||
|
}
|
||||||
24
cranelift/filetests/filetests/isa/x86/icmp-run.clif
Normal file
24
cranelift/filetests/filetests/isa/x86/icmp-run.clif
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
test run
|
||||||
|
set enable_simd
|
||||||
|
|
||||||
|
function %run_icmp_i8x16() -> b8 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 0x00
|
||||||
|
v1 = vconst.i8x16 0x00
|
||||||
|
v2 = icmp eq v0, v1
|
||||||
|
v3 = extractlane v2, 0
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %run_icmp_i64x2() -> b64 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
|
||||||
|
v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
|
||||||
|
v2 = icmp eq v0, v1
|
||||||
|
v3 = extractlane v2, 1
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; run
|
||||||
Reference in New Issue
Block a user