Infer REX prefix for SIMD store and vconst instructions
This commit is contained in:
@@ -1795,14 +1795,14 @@ fn define_simd(
|
||||
|
||||
let is_zero_128bit =
|
||||
InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
|
||||
let template = rec_vconst_optimized.nonrex().opcodes(&PXOR);
|
||||
let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
|
||||
e.enc_32_64_func(instruction.clone(), template, |builder| {
|
||||
builder.inst_predicate(is_zero_128bit)
|
||||
});
|
||||
|
||||
let is_ones_128bit =
|
||||
InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
|
||||
let template = rec_vconst_optimized.nonrex().opcodes(&PCMPEQB);
|
||||
let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
|
||||
e.enc_32_64_func(instruction, template, |builder| {
|
||||
builder.inst_predicate(is_ones_128bit)
|
||||
});
|
||||
@@ -1816,7 +1816,7 @@ fn define_simd(
|
||||
// in memory) but some performance measurements are needed.
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let instruction = vconst.bind(vector(ty, sse_vector_size));
|
||||
let template = rec_vconst.nonrex().opcodes(&MOVUPS_LOAD);
|
||||
let template = rec_vconst.opcodes(&MOVUPS_LOAD).infer_rex();
|
||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
||||
}
|
||||
|
||||
@@ -1826,7 +1826,10 @@ fn define_simd(
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
// Store
|
||||
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
|
||||
e.enc_32_64(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
|
||||
e.enc_32_64(
|
||||
bound_store.clone(),
|
||||
rec_fst.opcodes(&MOVUPS_STORE).infer_rex(),
|
||||
);
|
||||
e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
|
||||
e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
|
||||
|
||||
|
||||
@@ -46,6 +46,16 @@ impl<'builder> RecipeGroup<'builder> {
|
||||
self.templates.push(template.clone());
|
||||
template
|
||||
}
|
||||
fn add_template_inferred(
|
||||
&mut self,
|
||||
recipe: EncodingRecipeBuilder,
|
||||
infer_function: &'static str,
|
||||
) -> Rc<Template<'builder>> {
|
||||
let template =
|
||||
Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
|
||||
self.templates.push(template.clone());
|
||||
template
|
||||
}
|
||||
fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
|
||||
let template = Rc::new(template);
|
||||
self.templates.push(template.clone());
|
||||
@@ -1481,7 +1491,7 @@ pub(crate) fn define<'shared>(
|
||||
);
|
||||
|
||||
// XX /r register-indirect store of FPR with no offset.
|
||||
recipes.add_template_recipe(
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("fst", &formats.store, 1)
|
||||
.operands_in(vec![fpr, gpr])
|
||||
.inst_predicate(has_no_offset)
|
||||
@@ -1504,6 +1514,7 @@ pub(crate) fn define<'shared>(
|
||||
}
|
||||
"#,
|
||||
),
|
||||
"size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
|
||||
);
|
||||
|
||||
let has_small_offset =
|
||||
@@ -2515,7 +2526,7 @@ pub(crate) fn define<'shared>(
|
||||
),
|
||||
);
|
||||
|
||||
recipes.add_template_recipe(
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
|
||||
.operands_out(vec![fpr])
|
||||
.clobbers_flags(false)
|
||||
@@ -2526,9 +2537,10 @@ pub(crate) fn define<'shared>(
|
||||
const_disp4(constant_handle, func, sink);
|
||||
"#,
|
||||
),
|
||||
"size_with_inferred_rex_for_outreg0",
|
||||
);
|
||||
|
||||
recipes.add_template_recipe(
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
|
||||
.operands_out(vec![fpr])
|
||||
.clobbers_flags(false)
|
||||
@@ -2538,6 +2550,7 @@ pub(crate) fn define<'shared>(
|
||||
modrm_rr(out_reg0, out_reg0, sink);
|
||||
"#,
|
||||
),
|
||||
"size_with_inferred_rex_for_outreg0",
|
||||
);
|
||||
|
||||
recipes.add_template_recipe(
|
||||
|
||||
@@ -123,6 +123,22 @@ fn size_plus_maybe_sib_or_offset_for_inreg_1(
|
||||
sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
|
||||
/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset.
|
||||
fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
|
||||
sizing: &RecipeSizing,
|
||||
enc: Encoding,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|
||||
|| test_input(0, inst, divert, func, is_extended_reg)
|
||||
|| test_input(1, inst, divert, func, is_extended_reg);
|
||||
size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func)
|
||||
+ if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg.
|
||||
///
|
||||
/// A REX prefix is known to be emitted if either:
|
||||
@@ -199,6 +215,19 @@ fn size_with_inferred_rex_for_inreg0_outreg0(
|
||||
sizing.base_size + if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, based on a single output register.
|
||||
fn size_with_inferred_rex_for_outreg0(
|
||||
sizing: &RecipeSizing,
|
||||
enc: Encoding,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|
||||
|| test_result(0, inst, divert, func, is_extended_reg);
|
||||
sizing.base_size + if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV.
|
||||
///
|
||||
/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2.
|
||||
|
||||
@@ -1679,3 +1679,11 @@ block0:
|
||||
[-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001
|
||||
return
|
||||
}
|
||||
|
||||
function %V128() {
|
||||
block0:
|
||||
[-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
|
||||
[-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 00000005 PCRelRodata4(23)
|
||||
store v4, v3 ; bin: heap_oob 45 0f 11 0a
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user