Add x86 encoding for SIMD icmp eq

Also adds a predicate for matching the `eq` IntCC code (TODO this should be replaced by something more general)
This commit is contained in:
Andrew Brown
2019-09-03 15:15:55 -07:00
parent 702155b19b
commit a3db30d97e
7 changed files with 134 additions and 0 deletions

View File

@@ -573,6 +573,7 @@ pub(crate) fn define(
let rec_gvaddr4 = r.template("gvaddr4");
let rec_gvaddr8 = r.template("gvaddr8");
let rec_icscc = r.template("icscc");
let rec_icscc_fpr = r.template("icscc_fpr");
let rec_icscc_ib = r.template("icscc_ib");
let rec_icscc_id = r.template("icscc_id");
let rec_indirect_jmp = r.template("indirect_jmp");
@@ -2058,6 +2059,31 @@ pub(crate) fn define(
e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
}
// SIMD icmp using PCMPEQ*
let mut pcmpeq_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
HashMap::new();
pcmpeq_mapping.insert(8, (vec![0x66, 0x0f, 0x74], None)); // PCMPEQB from SSE2
pcmpeq_mapping.insert(16, (vec![0x66, 0x0f, 0x75], None)); // PCMPEQW from SSE2
pcmpeq_mapping.insert(32, (vec![0x66, 0x0f, 0x76], None)); // PCMPEQD from SSE2
pcmpeq_mapping.insert(64, (vec![0x66, 0x0f, 0x38, 0x29], Some(use_sse41_simd))); // PCMPEQQ from SSE4.1
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
if let Some((opcodes, isa_predicate)) = pcmpeq_mapping.get(&ty.lane_bits()) {
let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size);
let f_int_compare = formats.get(formats.by_name("IntCompare"));
let has_eq_condition_code =
InstructionPredicate::new_has_condition_code(f_int_compare, "eq", "cond");
let template = rec_icscc_fpr.nonrex().opcodes(opcodes.clone());
e.enc_32_64_func(instruction, template, |builder| {
let builder = builder.inst_predicate(has_eq_condition_code);
if let Some(p) = isa_predicate {
builder.isa_predicate(*p)
} else {
builder
}
});
}
}
// Reference type instructions
// Null references implemented as iconst 0.

View File

@@ -329,6 +329,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
def!(y = splat_any8x16(x)),
vec![
def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
// TODO replace the following two instructions with `vconst(0)` when this is possible; see https://github.com/CraneStation/cranelift/issues/1052
def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type
def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b)

View File

@@ -2940,6 +2940,19 @@ pub(crate) fn define<'shared>(
),
);
recipes.add_template_recipe(
EncodingRecipeBuilder::new("icscc_fpr", f_int_compare, 1)
.operands_in(vec![fpr, fpr])
.operands_out(vec![0])
.emit(
r#"
// Comparison instruction.
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
modrm_rr(in_reg1, in_reg0, sink);
"#,
),
);
{
let format = formats.get(f_int_compare_imm);