Add x86 encoding for SIMD icmp eq

Also adds a predicate for matching the `eq` IntCC code (TODO this should be replaced by something more general)
This commit is contained in:
Andrew Brown
2019-09-03 15:15:55 -07:00
parent 702155b19b
commit a3db30d97e
7 changed files with 134 additions and 0 deletions

View File

@@ -628,6 +628,9 @@ pub enum FormatPredicateKind {
/// Is the referenced data object colocated?
IsColocatedData,
/// Does the operation have a specific condition code?
HasConditionCode(&'static str),
}
#[derive(Clone, Hash, PartialEq, Eq)]
@@ -714,6 +717,10 @@ impl FormatPredicateNode {
FormatPredicateKind::IsColocatedData => {
format!("predicates::is_colocated_data({}, func)", self.member_name)
}
FormatPredicateKind::HasConditionCode(code) => format!(
"predicates::match_condition_code_to_str({}, \"{}\")",
self.member_name, code
),
}
}
}
@@ -997,6 +1004,18 @@ impl InstructionPredicate {
))
}
pub fn new_has_condition_code(
format: &InstructionFormat,
condition_code: &'static str,
field_name: &'static str,
) -> InstructionPredicateNode {
InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
format,
field_name,
FormatPredicateKind::HasConditionCode(condition_code),
))
}
pub fn and(mut self, new_node: InstructionPredicateNode) -> Self {
let node = self.node;
let mut and_nodes = match node {

View File

@@ -573,6 +573,7 @@ pub(crate) fn define(
let rec_gvaddr4 = r.template("gvaddr4");
let rec_gvaddr8 = r.template("gvaddr8");
let rec_icscc = r.template("icscc");
let rec_icscc_fpr = r.template("icscc_fpr");
let rec_icscc_ib = r.template("icscc_ib");
let rec_icscc_id = r.template("icscc_id");
let rec_indirect_jmp = r.template("indirect_jmp");
@@ -2058,6 +2059,31 @@ pub(crate) fn define(
e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
}
// SIMD icmp using PCMPEQ*
let mut pcmpeq_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
HashMap::new();
pcmpeq_mapping.insert(8, (vec![0x66, 0x0f, 0x74], None)); // PCMPEQB from SSE2
pcmpeq_mapping.insert(16, (vec![0x66, 0x0f, 0x75], None)); // PCMPEQW from SSE2
pcmpeq_mapping.insert(32, (vec![0x66, 0x0f, 0x76], None)); // PCMPEQD from SSE2
pcmpeq_mapping.insert(64, (vec![0x66, 0x0f, 0x38, 0x29], Some(use_sse41_simd))); // PCMPEQQ from SSE4.1
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
if let Some((opcodes, isa_predicate)) = pcmpeq_mapping.get(&ty.lane_bits()) {
let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size);
let f_int_compare = formats.get(formats.by_name("IntCompare"));
let has_eq_condition_code =
InstructionPredicate::new_has_condition_code(f_int_compare, "eq", "cond");
let template = rec_icscc_fpr.nonrex().opcodes(opcodes.clone());
e.enc_32_64_func(instruction, template, |builder| {
let builder = builder.inst_predicate(has_eq_condition_code);
if let Some(p) = isa_predicate {
builder.isa_predicate(*p)
} else {
builder
}
});
}
}
// Reference type instructions
// Null references implemented as iconst 0.

View File

@@ -329,6 +329,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
def!(y = splat_any8x16(x)),
vec![
def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
// TODO replace the following two instructions with `vconst(0)` when this is possible; see https://github.com/CraneStation/cranelift/issues/1052
def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type
def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b)

View File

@@ -2940,6 +2940,19 @@ pub(crate) fn define<'shared>(
),
);
recipes.add_template_recipe(
EncodingRecipeBuilder::new("icscc_fpr", f_int_compare, 1)
.operands_in(vec![fpr, fpr])
.operands_out(vec![0])
.emit(
r#"
// Comparison instruction.
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
modrm_rr(in_reg1, in_reg0, sink);
"#,
),
);
{
let format = formats.get(f_int_compare_imm);

View File

@@ -10,6 +10,8 @@
//! dead code warning.
use crate::ir;
use crate::ir::condcodes::IntCC;
use std::string::ToString;
/// Check that an integer value is zero.
#[allow(dead_code)]
@@ -83,6 +85,14 @@ pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function
value_list.len(&func.dfg.value_lists) == num
}
#[allow(dead_code)]
pub fn match_condition_code_to_str(
condition_code: IntCC,
stringified_condition_code: &str,
) -> bool {
condition_code.to_string().eq(stringified_condition_code)
}
#[cfg(test)]
mod tests {
use super::*;
@@ -136,4 +146,10 @@ mod tests {
assert!(!is_all_ones_128_bit(&[0; 16]));
assert!(is_all_ones_128_bit(&[0xff; 16]));
}
#[test]
fn condition_code() {
assert!(match_condition_code_to_str(IntCC::Equal, "eq"));
assert!(!match_condition_code_to_str(IntCC::Equal, "ne"));
}
}

View File

@@ -0,0 +1,35 @@
test binemit
set enable_simd
target x86_64 skylake
function %icmp_i8x16() {
ebb0:
[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db
[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4
[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc
return
}
function %icmp_i16x8() {
ebb0:
[-, %xmm0] v0 = vconst.i16x8 0x00
[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff
[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7
return
}
function %icmp_i32x4() {
ebb0:
[-, %xmm0] v0 = vconst.i32x4 0x00
[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff
[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4
return
}
function %icmp_i64x2() {
ebb0:
[-, %xmm0] v0 = vconst.i64x2 0x00
[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1
return
}

View File

@@ -0,0 +1,24 @@
test run
set enable_simd
function %run_icmp_i8x16() -> b8 {
ebb0:
v0 = vconst.i8x16 0x00
v1 = vconst.i8x16 0x00
v2 = icmp eq v0, v1
v3 = extractlane v2, 0
return v3
}
; run
function %run_icmp_i64x2() -> b64 {
ebb0:
v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
v2 = icmp eq v0, v1
v3 = extractlane v2, 1
return v3
}
; run