Add x86 encoding for SIMD icmp eq
Also adds a predicate for matching the `eq` IntCC code (TODO this should be replaced by something more general)
This commit is contained in:
@@ -573,6 +573,7 @@ pub(crate) fn define(
|
||||
let rec_gvaddr4 = r.template("gvaddr4");
|
||||
let rec_gvaddr8 = r.template("gvaddr8");
|
||||
let rec_icscc = r.template("icscc");
|
||||
let rec_icscc_fpr = r.template("icscc_fpr");
|
||||
let rec_icscc_ib = r.template("icscc_ib");
|
||||
let rec_icscc_id = r.template("icscc_id");
|
||||
let rec_indirect_jmp = r.template("indirect_jmp");
|
||||
@@ -2058,6 +2059,31 @@ pub(crate) fn define(
|
||||
e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
|
||||
}
|
||||
|
||||
// SIMD icmp using PCMPEQ*
|
||||
let mut pcmpeq_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
|
||||
HashMap::new();
|
||||
pcmpeq_mapping.insert(8, (vec![0x66, 0x0f, 0x74], None)); // PCMPEQB from SSE2
|
||||
pcmpeq_mapping.insert(16, (vec![0x66, 0x0f, 0x75], None)); // PCMPEQW from SSE2
|
||||
pcmpeq_mapping.insert(32, (vec![0x66, 0x0f, 0x76], None)); // PCMPEQD from SSE2
|
||||
pcmpeq_mapping.insert(64, (vec![0x66, 0x0f, 0x38, 0x29], Some(use_sse41_simd))); // PCMPEQQ from SSE4.1
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
|
||||
if let Some((opcodes, isa_predicate)) = pcmpeq_mapping.get(&ty.lane_bits()) {
|
||||
let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size);
|
||||
let f_int_compare = formats.get(formats.by_name("IntCompare"));
|
||||
let has_eq_condition_code =
|
||||
InstructionPredicate::new_has_condition_code(f_int_compare, "eq", "cond");
|
||||
let template = rec_icscc_fpr.nonrex().opcodes(opcodes.clone());
|
||||
e.enc_32_64_func(instruction, template, |builder| {
|
||||
let builder = builder.inst_predicate(has_eq_condition_code);
|
||||
if let Some(p) = isa_predicate {
|
||||
builder.isa_predicate(*p)
|
||||
} else {
|
||||
builder
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Reference type instructions
|
||||
|
||||
// Null references implemented as iconst 0.
|
||||
|
||||
@@ -329,6 +329,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
def!(y = splat_any8x16(x)),
|
||||
vec![
|
||||
def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
|
||||
// TODO replace the following two instructions with `vconst(0)` when this is possible; see https://github.com/CraneStation/cranelift/issues/1052
|
||||
def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
|
||||
def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type
|
||||
def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b)
|
||||
|
||||
@@ -2940,6 +2940,19 @@ pub(crate) fn define<'shared>(
|
||||
),
|
||||
);
|
||||
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("icscc_fpr", f_int_compare, 1)
|
||||
.operands_in(vec![fpr, fpr])
|
||||
.operands_out(vec![0])
|
||||
.emit(
|
||||
r#"
|
||||
// Comparison instruction.
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rr(in_reg1, in_reg0, sink);
|
||||
"#,
|
||||
),
|
||||
);
|
||||
|
||||
{
|
||||
let format = formats.get(f_int_compare_imm);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user