Infer REX prefixes for SIMD load and store with displacement
This commit is contained in:
@@ -1866,8 +1866,8 @@ fn define_simd(
|
|||||||
// Store
|
// Store
|
||||||
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
|
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
|
||||||
e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
|
e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
|
||||||
e.enc_both(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
|
e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
|
||||||
e.enc_both(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
|
e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
|
||||||
|
|
||||||
// Store complex
|
// Store complex
|
||||||
let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
|
let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
|
||||||
@@ -1887,8 +1887,8 @@ fn define_simd(
|
|||||||
// Load
|
// Load
|
||||||
let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
|
let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
|
||||||
e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
|
e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
|
||||||
e.enc_both(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
|
e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
|
||||||
e.enc_both(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
|
e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
|
||||||
|
|
||||||
// Load complex
|
// Load complex
|
||||||
let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
|
let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
|
||||||
|
|||||||
@@ -1604,7 +1604,7 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// XX /r register-indirect store with 8-bit offset of FPR.
|
// XX /r register-indirect store with 8-bit offset of FPR.
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2)
|
EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2)
|
||||||
.operands_in(vec![fpr, gpr])
|
.operands_in(vec![fpr, gpr])
|
||||||
.inst_predicate(has_small_offset)
|
.inst_predicate(has_small_offset)
|
||||||
@@ -1626,6 +1626,7 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(offset as u8);
|
sink.put1(offset as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /r register-indirect store with 32-bit offset.
|
// XX /r register-indirect store with 32-bit offset.
|
||||||
@@ -1682,7 +1683,7 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// XX /r register-indirect store with 32-bit offset of FPR.
|
// XX /r register-indirect store with 32-bit offset of FPR.
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5)
|
EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5)
|
||||||
.operands_in(vec![fpr, gpr])
|
.operands_in(vec![fpr, gpr])
|
||||||
.clobbers_flags(false)
|
.clobbers_flags(false)
|
||||||
@@ -1703,6 +1704,7 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put4(offset as u32);
|
sink.put4(offset as u32);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -139,6 +139,21 @@ fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
|
|||||||
+ if needs_rex { 1 } else { 0 }
|
+ if needs_rex { 1 } else { 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
|
||||||
|
/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB.
|
||||||
|
fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1(
|
||||||
|
sizing: &RecipeSizing,
|
||||||
|
enc: Encoding,
|
||||||
|
inst: Inst,
|
||||||
|
divert: &RegDiversions,
|
||||||
|
func: &Function,
|
||||||
|
) -> u8 {
|
||||||
|
let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
|
||||||
|
|| test_input(0, inst, divert, func, is_extended_reg)
|
||||||
|
|| test_input(1, inst, divert, func, is_extended_reg);
|
||||||
|
size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
|
||||||
|
}
|
||||||
|
|
||||||
/// Calculates the size while inferring if the first input register (inreg0) and first output
|
/// Calculates the size while inferring if the first input register (inreg0) and first output
|
||||||
/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
|
/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
|
||||||
/// SIB or offset.
|
/// SIB or offset.
|
||||||
|
|||||||
@@ -7,10 +7,6 @@ block0(v0: i64 [%rax]):
|
|||||||
[-, %xmm0] v10 = load.i32x4 v0 ; bin: heap_oob 0f 10 00
|
[-, %xmm0] v10 = load.i32x4 v0 ; bin: heap_oob 0f 10 00
|
||||||
[-] store v10, v0 ; bin: heap_oob 0f 11 00
|
[-] store v10, v0 ; bin: heap_oob 0f 11 00
|
||||||
|
|
||||||
; use displacement
|
|
||||||
[-, %xmm0] v11 = load.f32x4 v0+42 ; bin: heap_oob 40 0f 10 40 2a
|
|
||||||
[-] store v11, v0+42 ; bin: heap_oob 40 0f 11 40 2a
|
|
||||||
|
|
||||||
; use REX prefix
|
; use REX prefix
|
||||||
[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00
|
[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00
|
||||||
[-] store v12, v0 ; bin: heap_oob 44 0f 11 00
|
[-] store v12, v0 ; bin: heap_oob 44 0f 11 00
|
||||||
@@ -18,6 +14,27 @@ block0(v0: i64 [%rax]):
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %load_store_with_displacement(i64) {
|
||||||
|
block0(v0: i64 [%rax]):
|
||||||
|
; use 8-bit displacement
|
||||||
|
[-, %xmm0] v1 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a
|
||||||
|
[-] store v1, v0+42 ; bin: heap_oob 0f 11 40 2a
|
||||||
|
|
||||||
|
; use 8-bit displacement with REX prefix
|
||||||
|
[-, %xmm8] v2 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00
|
||||||
|
[-] store v2, v0 ; bin: heap_oob 44 0f 11 00
|
||||||
|
|
||||||
|
; use 32-bit displacement
|
||||||
|
[-, %xmm0] v3 = load.f32x4 v0+256 ; bin: heap_oob 0f 10 80 00000100
|
||||||
|
[-] store v3, v0+256 ; bin: heap_oob 0f 11 80 00000100
|
||||||
|
|
||||||
|
; use 32-bit displacement with REX prefix
|
||||||
|
[-, %xmm8] v4 = load.f32x4 v0+256 ; bin: heap_oob 44 0f 10 80 00000100
|
||||||
|
[-] store v4, v0+256 ; bin: heap_oob 44 0f 11 80 00000100
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
function %load_store_complex(i64, i64) {
|
function %load_store_complex(i64, i64) {
|
||||||
block0(v0: i64 [%rax], v1: i64 [%rbx]):
|
block0(v0: i64 [%rax], v1: i64 [%rbx]):
|
||||||
; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows
|
; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows
|
||||||
|
|||||||
Reference in New Issue
Block a user