Infer REX prefix for SIMD store and vconst instructions

This commit is contained in:
Andrew Brown
2020-02-12 15:04:10 -08:00
parent 9b3ac10ebc
commit 936120dcf9
4 changed files with 60 additions and 7 deletions

View File

@@ -1795,14 +1795,14 @@ fn define_simd(
let is_zero_128bit =
InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
let template = rec_vconst_optimized.nonrex().opcodes(&PXOR);
let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
e.enc_32_64_func(instruction.clone(), template, |builder| {
builder.inst_predicate(is_zero_128bit)
});
let is_ones_128bit =
InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
let template = rec_vconst_optimized.nonrex().opcodes(&PCMPEQB);
let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
e.enc_32_64_func(instruction, template, |builder| {
builder.inst_predicate(is_ones_128bit)
});
@@ -1816,7 +1816,7 @@ fn define_simd(
// in memory) but some performance measurements are needed.
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let instruction = vconst.bind(vector(ty, sse_vector_size));
let template = rec_vconst.nonrex().opcodes(&MOVUPS_LOAD);
let template = rec_vconst.opcodes(&MOVUPS_LOAD).infer_rex();
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
}
@@ -1826,7 +1826,10 @@ fn define_simd(
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
// Store
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
e.enc_32_64(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
e.enc_32_64(
bound_store.clone(),
rec_fst.opcodes(&MOVUPS_STORE).infer_rex(),
);
e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));

View File

@@ -46,6 +46,16 @@ impl<'builder> RecipeGroup<'builder> {
self.templates.push(template.clone());
template
}
fn add_template_inferred(
&mut self,
recipe: EncodingRecipeBuilder,
infer_function: &'static str,
) -> Rc<Template<'builder>> {
let template =
Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
self.templates.push(template.clone());
template
}
fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
let template = Rc::new(template);
self.templates.push(template.clone());
@@ -1481,7 +1491,7 @@ pub(crate) fn define<'shared>(
);
// XX /r register-indirect store of FPR with no offset.
recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("fst", &formats.store, 1)
.operands_in(vec![fpr, gpr])
.inst_predicate(has_no_offset)
@@ -1504,6 +1514,7 @@ pub(crate) fn define<'shared>(
}
"#,
),
"size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
);
let has_small_offset =
@@ -2515,7 +2526,7 @@ pub(crate) fn define<'shared>(
),
);
recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
.operands_out(vec![fpr])
.clobbers_flags(false)
@@ -2526,9 +2537,10 @@ pub(crate) fn define<'shared>(
const_disp4(constant_handle, func, sink);
"#,
),
"size_with_inferred_rex_for_outreg0",
);
recipes.add_template_recipe(
recipes.add_template_inferred(
EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
.operands_out(vec![fpr])
.clobbers_flags(false)
@@ -2538,6 +2550,7 @@ pub(crate) fn define<'shared>(
modrm_rr(out_reg0, out_reg0, sink);
"#,
),
"size_with_inferred_rex_for_outreg0",
);
recipes.add_template_recipe(

View File

@@ -123,6 +123,22 @@ fn size_plus_maybe_sib_or_offset_for_inreg_1(
sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
}
/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset.
fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|| test_input(0, inst, divert, func, is_extended_reg)
|| test_input(1, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func)
+ if needs_rex { 1 } else { 0 }
}
/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg.
///
/// A REX prefix is known to be emitted if either:
@@ -199,6 +215,19 @@ fn size_with_inferred_rex_for_inreg0_outreg0(
sizing.base_size + if needs_rex { 1 } else { 0 }
}
/// Infers whether a dynamic REX prefix will be emitted, based on a single output register.
fn size_with_inferred_rex_for_outreg0(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|| test_result(0, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV.
///
/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2.

View File

@@ -1679,3 +1679,11 @@ block0:
[-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001
return
}
function %V128() {
block0:
[-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
[-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 00000005 PCRelRodata4(23)
store v4, v3 ; bin: heap_oob 45 0f 11 0a
return
}