Infer REX prefix for SIMD operations; fixes #1127
- Convert recipes to have necessary size calculator - Add a missing binemit function, `put_dynrexmp3` - Modify the meta-encodings of x86 SIMD instructions to use `infer_rex()`, mostly through the `enc_both_inferred()` helper - Fix up tests that previously always emitted a REX prefix
This commit is contained in:
@@ -312,6 +312,23 @@ impl PerCpuModeEncodings {
|
|||||||
self.enc_x86_64_instp(inst, template, instp);
|
self.enc_x86_64_instp(inst, template, instp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add two encodings for `inst`:
|
||||||
|
/// - X86_32, dynamically infer the REX prefix.
|
||||||
|
/// - X86_64, dynamically infer the REX prefix.
|
||||||
|
fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
|
||||||
|
self.enc32(inst.clone(), template.infer_rex());
|
||||||
|
self.enc64(inst, template.infer_rex());
|
||||||
|
}
|
||||||
|
fn enc_both_inferred_maybe_isap(
|
||||||
|
&mut self,
|
||||||
|
inst: impl Clone + Into<InstSpec>,
|
||||||
|
template: Template,
|
||||||
|
isap: Option<SettingPredicateNumber>,
|
||||||
|
) {
|
||||||
|
self.enc32_maybe_isap(inst.clone(), template.infer_rex(), isap);
|
||||||
|
self.enc64_maybe_isap(inst, template.infer_rex(), isap);
|
||||||
|
}
|
||||||
|
|
||||||
/// Add two encodings for `inst`:
|
/// Add two encodings for `inst`:
|
||||||
/// - X86_32
|
/// - X86_32
|
||||||
/// - X86_64 with the REX prefix.
|
/// - X86_64 with the REX prefix.
|
||||||
@@ -340,12 +357,6 @@ impl PerCpuModeEncodings {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add the same encoding/template pairing to both X86_32 and X86_64
|
|
||||||
fn enc_32_64(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
|
|
||||||
self.enc32(inst.clone(), template.clone());
|
|
||||||
self.enc64(inst, template);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add the same encoding/recipe pairing to both X86_32 and X86_64
|
/// Add the same encoding/recipe pairing to both X86_32 and X86_64
|
||||||
fn enc_32_64_rec(
|
fn enc_32_64_rec(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -1674,17 +1685,15 @@ fn define_simd(
|
|||||||
// PSHUFB, 8-bit shuffle using two XMM registers.
|
// PSHUFB, 8-bit shuffle using two XMM registers.
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
|
let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
|
||||||
let template = rec_fa.nonrex().opcodes(&PSHUFB);
|
let template = rec_fa.opcodes(&PSHUFB);
|
||||||
e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd);
|
e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd));
|
||||||
e.enc64_isap(instruction, template, use_ssse3_simd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
|
// PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
|
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
|
||||||
let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
|
let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
|
||||||
let template = rec_r_ib_unsigned_fpr.nonrex().opcodes(&PSHUFD);
|
let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD);
|
||||||
e.enc32(instruction.clone(), template.clone());
|
e.enc_both_inferred(instruction, template);
|
||||||
e.enc64(instruction, template);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||||
@@ -1693,12 +1702,12 @@ fn define_simd(
|
|||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
|
let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
|
||||||
if ty.is_float() {
|
if ty.is_float() {
|
||||||
|
// No need to move floats--they already live in XMM registers.
|
||||||
e.enc_32_64_rec(instruction, rec_null_fpr, 0);
|
e.enc_32_64_rec(instruction, rec_null_fpr, 0);
|
||||||
} else {
|
} else {
|
||||||
let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
|
let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
|
||||||
if ty.lane_bits() < 64 {
|
if ty.lane_bits() < 64 {
|
||||||
e.enc32(instruction.clone(), template.clone());
|
e.enc_both_inferred(instruction, template);
|
||||||
e.enc_x86_64(instruction, template);
|
|
||||||
} else {
|
} else {
|
||||||
// No 32-bit encodings for 64-bit widths.
|
// No 32-bit encodings for 64-bit widths.
|
||||||
assert_eq!(ty.lane_bits(), 64);
|
assert_eq!(ty.lane_bits(), 64);
|
||||||
@@ -1719,7 +1728,7 @@ fn define_simd(
|
|||||||
let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
|
let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
|
||||||
let template = rec_r_ib_unsigned_r.opcodes(opcode);
|
let template = rec_r_ib_unsigned_r.opcodes(opcode);
|
||||||
if ty.lane_bits() < 64 {
|
if ty.lane_bits() < 64 {
|
||||||
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap);
|
e.enc_both_inferred_maybe_isap(instruction, template, isap);
|
||||||
} else {
|
} else {
|
||||||
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
||||||
// x86_64.
|
// x86_64.
|
||||||
@@ -1730,22 +1739,22 @@ fn define_simd(
|
|||||||
// For legalizing insertlane with floats, INSERTPS from SSE4.1.
|
// For legalizing insertlane with floats, INSERTPS from SSE4.1.
|
||||||
{
|
{
|
||||||
let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
|
let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
|
||||||
let template = rec_fa_ib.nonrex().opcodes(&INSERTPS);
|
let template = rec_fa_ib.opcodes(&INSERTPS);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, Some(use_sse41_simd));
|
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
|
||||||
}
|
}
|
||||||
|
|
||||||
// For legalizing insertlane with floats, MOVSD from SSE2.
|
// For legalizing insertlane with floats, MOVSD from SSE2.
|
||||||
{
|
{
|
||||||
let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
|
let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
|
||||||
let template = rec_fa.nonrex().opcodes(&MOVSD_LOAD);
|
let template = rec_fa.opcodes(&MOVSD_LOAD);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE2
|
e.enc_both_inferred(instruction, template); // from SSE2
|
||||||
}
|
}
|
||||||
|
|
||||||
// For legalizing insertlane with floats, MOVLHPS from SSE.
|
// For legalizing insertlane with floats, MOVLHPS from SSE.
|
||||||
{
|
{
|
||||||
let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
|
let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
|
||||||
let template = rec_fa.nonrex().opcodes(&MOVLHPS);
|
let template = rec_fa.opcodes(&MOVLHPS);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
e.enc_both_inferred(instruction, template); // from SSE
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD extractlane
|
// SIMD extractlane
|
||||||
@@ -1760,7 +1769,7 @@ fn define_simd(
|
|||||||
let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
|
let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
|
||||||
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
|
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
|
||||||
if ty.lane_bits() < 64 {
|
if ty.lane_bits() < 64 {
|
||||||
e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
|
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
|
||||||
} else {
|
} else {
|
||||||
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
||||||
// x86_64.
|
// x86_64.
|
||||||
@@ -1838,85 +1847,81 @@ fn define_simd(
|
|||||||
// in memory) but some performance measurements are needed.
|
// in memory) but some performance measurements are needed.
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = vconst.bind(vector(ty, sse_vector_size));
|
let instruction = vconst.bind(vector(ty, sse_vector_size));
|
||||||
let template = rec_vconst.opcodes(&MOVUPS_LOAD).infer_rex();
|
let template = rec_vconst.opcodes(&MOVUPS_LOAD);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
e.enc_both_inferred(instruction, template); // from SSE
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD register movement: store, load, spill, fill, regmove. All of these use encodings of
|
// SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of
|
||||||
// MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
|
// MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
|
||||||
// alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
|
// alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
|
||||||
|
// Also, it would be ideal to infer REX prefixes for all of these instructions but for the
|
||||||
|
// time being only instructions with common recipes have `infer_rex()` support.
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
// Store
|
// Store
|
||||||
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
|
let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
|
||||||
bound_store.clone(),
|
e.enc_both(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
|
||||||
rec_fst.opcodes(&MOVUPS_STORE).infer_rex(),
|
e.enc_both(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
|
||||||
);
|
|
||||||
e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
|
|
||||||
e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
|
|
||||||
|
|
||||||
// Store complex
|
// Store complex
|
||||||
let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
|
let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(
|
e.enc_both(
|
||||||
bound_store_complex.clone(),
|
bound_store_complex.clone(),
|
||||||
rec_fstWithIndex.opcodes(&MOVUPS_STORE),
|
rec_fstWithIndex.opcodes(&MOVUPS_STORE),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both(
|
||||||
bound_store_complex.clone(),
|
bound_store_complex.clone(),
|
||||||
rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
|
rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both(
|
||||||
bound_store_complex,
|
bound_store_complex,
|
||||||
rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
|
rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Load
|
// Load
|
||||||
let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
|
let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
|
||||||
bound_load.clone(),
|
e.enc_both(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
|
||||||
rec_fld.opcodes(&MOVUPS_LOAD).infer_rex(),
|
e.enc_both(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
|
||||||
);
|
|
||||||
e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
|
|
||||||
e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
|
|
||||||
|
|
||||||
// Load complex
|
// Load complex
|
||||||
let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
|
let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(
|
e.enc_both(
|
||||||
bound_load_complex.clone(),
|
bound_load_complex.clone(),
|
||||||
rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
|
rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both(
|
||||||
bound_load_complex.clone(),
|
bound_load_complex.clone(),
|
||||||
rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
|
rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both(
|
||||||
bound_load_complex,
|
bound_load_complex,
|
||||||
rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
|
rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Spill
|
// Spill
|
||||||
let bound_spill = spill.bind(vector(ty, sse_vector_size));
|
let bound_spill = spill.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
|
e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
|
||||||
let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
|
let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
|
e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
|
||||||
|
|
||||||
// Fill
|
// Fill
|
||||||
let bound_fill = fill.bind(vector(ty, sse_vector_size));
|
let bound_fill = fill.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
|
e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
|
||||||
let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
|
let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
|
e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
|
||||||
let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
|
let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
|
e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
|
||||||
|
|
||||||
// Regmove
|
// Regmove
|
||||||
let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
|
let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
|
e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
|
||||||
|
|
||||||
// Copy
|
// Copy
|
||||||
let bound_copy = copy.bind(vector(ty, sse_vector_size));
|
let bound_copy = copy.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
|
e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
|
||||||
let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
|
let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
|
e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
|
||||||
let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
|
let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
|
e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
|
||||||
}
|
}
|
||||||
@@ -1924,23 +1929,23 @@ fn define_simd(
|
|||||||
// SIMD integer addition
|
// SIMD integer addition
|
||||||
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
|
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
|
||||||
let iadd = iadd.bind(vector(*ty, sse_vector_size));
|
let iadd = iadd.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(iadd, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD integer saturating addition
|
// SIMD integer saturating addition
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
sadd_sat.bind(vector(I8, sse_vector_size)),
|
sadd_sat.bind(vector(I8, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PADDSB),
|
rec_fa.opcodes(&PADDSB),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
sadd_sat.bind(vector(I16, sse_vector_size)),
|
sadd_sat.bind(vector(I16, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PADDSW),
|
rec_fa.opcodes(&PADDSW),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
uadd_sat.bind(vector(I8, sse_vector_size)),
|
uadd_sat.bind(vector(I8, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PADDUSB),
|
rec_fa.opcodes(&PADDUSB),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
uadd_sat.bind(vector(I16, sse_vector_size)),
|
uadd_sat.bind(vector(I16, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PADDUSW),
|
rec_fa.opcodes(&PADDUSW),
|
||||||
);
|
);
|
||||||
@@ -1949,23 +1954,23 @@ fn define_simd(
|
|||||||
let isub = shared.by_name("isub");
|
let isub = shared.by_name("isub");
|
||||||
for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
|
for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
|
||||||
let isub = isub.bind(vector(*ty, sse_vector_size));
|
let isub = isub.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(isub, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD integer saturating subtraction
|
// SIMD integer saturating subtraction
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
ssub_sat.bind(vector(I8, sse_vector_size)),
|
ssub_sat.bind(vector(I8, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PSUBSB),
|
rec_fa.opcodes(&PSUBSB),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
ssub_sat.bind(vector(I16, sse_vector_size)),
|
ssub_sat.bind(vector(I16, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PSUBSW),
|
rec_fa.opcodes(&PSUBSW),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
usub_sat.bind(vector(I8, sse_vector_size)),
|
usub_sat.bind(vector(I8, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PSUBUSB),
|
rec_fa.opcodes(&PSUBUSB),
|
||||||
);
|
);
|
||||||
e.enc_32_64(
|
e.enc_both_inferred(
|
||||||
usub_sat.bind(vector(I16, sse_vector_size)),
|
usub_sat.bind(vector(I16, sse_vector_size)),
|
||||||
rec_fa.opcodes(&PSUBUSW),
|
rec_fa.opcodes(&PSUBUSW),
|
||||||
);
|
);
|
||||||
@@ -1977,7 +1982,7 @@ fn define_simd(
|
|||||||
(I32, &PMULLD[..], Some(use_sse41_simd)),
|
(I32, &PMULLD[..], Some(use_sse41_simd)),
|
||||||
] {
|
] {
|
||||||
let imul = imul.bind(vector(*ty, sse_vector_size));
|
let imul = imul.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD integer multiplication for I64x2 using a AVX512.
|
// SIMD integer multiplication for I64x2 using a AVX512.
|
||||||
@@ -1993,7 +1998,7 @@ fn define_simd(
|
|||||||
// SIMD integer average with rounding.
|
// SIMD integer average with rounding.
|
||||||
for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
|
for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
|
||||||
let avgr = avg_round.bind(vector(*ty, sse_vector_size));
|
let avgr = avg_round.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(avgr, rec_fa.opcodes(opcodes));
|
e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD logical operations
|
// SIMD logical operations
|
||||||
@@ -2002,23 +2007,23 @@ fn define_simd(
|
|||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
// and
|
// and
|
||||||
let band = band.bind(vector(ty, sse_vector_size));
|
let band = band.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(band, rec_fa.opcodes(&PAND));
|
e.enc_both_inferred(band, rec_fa.opcodes(&PAND));
|
||||||
|
|
||||||
// and not (note flipped recipe operands to match band_not order)
|
// and not (note flipped recipe operands to match band_not order)
|
||||||
let band_not = band_not.bind(vector(ty, sse_vector_size));
|
let band_not = band_not.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(band_not, rec_fax.opcodes(&PANDN));
|
e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN));
|
||||||
|
|
||||||
// or
|
// or
|
||||||
let bor = bor.bind(vector(ty, sse_vector_size));
|
let bor = bor.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bor, rec_fa.opcodes(&POR));
|
e.enc_both_inferred(bor, rec_fa.opcodes(&POR));
|
||||||
|
|
||||||
// xor
|
// xor
|
||||||
let bxor = bxor.bind(vector(ty, sse_vector_size));
|
let bxor = bxor.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
|
e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR));
|
||||||
|
|
||||||
// ptest
|
// ptest
|
||||||
let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
|
let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
|
e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
|
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
|
||||||
@@ -2026,7 +2031,7 @@ fn define_simd(
|
|||||||
// I128x1 but restrictions on the type builder prevent this; the general idea here is that
|
// I128x1 but restrictions on the type builder prevent this; the general idea here is that
|
||||||
// the upper bits are all zeroed and do not form parts of any separate lane. See
|
// the upper bits are all zeroed and do not form parts of any separate lane. See
|
||||||
// https://github.com/bytecodealliance/wasmtime/issues/1140.
|
// https://github.com/bytecodealliance/wasmtime/issues/1140.
|
||||||
e.enc_both(
|
e.enc_both_inferred(
|
||||||
bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
|
bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
|
||||||
rec_frurm.opcodes(&MOVD_LOAD_XMM),
|
rec_frurm.opcodes(&MOVD_LOAD_XMM),
|
||||||
);
|
);
|
||||||
@@ -2038,31 +2043,31 @@ fn define_simd(
|
|||||||
// SIMD shift left
|
// SIMD shift left
|
||||||
for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
|
for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
|
||||||
let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
|
let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(x86_psll, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD shift right (logical)
|
// SIMD shift right (logical)
|
||||||
for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
|
for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
|
||||||
let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
|
let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(x86_psrl, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD shift right (arithmetic)
|
// SIMD shift right (arithmetic)
|
||||||
for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
|
for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
|
||||||
let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
|
let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD immediate shift
|
// SIMD immediate shift
|
||||||
for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
|
for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
|
||||||
let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
|
let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
|
e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
|
||||||
|
|
||||||
let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
|
let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
|
e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
|
||||||
|
|
||||||
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
|
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
|
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD integer comparisons
|
// SIMD integer comparisons
|
||||||
@@ -2081,8 +2086,8 @@ fn define_simd(
|
|||||||
let instruction = icmp
|
let instruction = icmp
|
||||||
.bind(Immediate::IntCC(*cc))
|
.bind(Immediate::IntCC(*cc))
|
||||||
.bind(vector(*ty, sse_vector_size));
|
.bind(vector(*ty, sse_vector_size));
|
||||||
let template = rec_icscc_fpr.nonrex().opcodes(opcodes);
|
let template = rec_icscc_fpr.opcodes(opcodes);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, *isa_predicate);
|
e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2102,15 +2107,15 @@ fn define_simd(
|
|||||||
(I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
|
(I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
|
||||||
] {
|
] {
|
||||||
let inst = inst.bind(vector(*ty, sse_vector_size));
|
let inst = inst.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_32_64_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
|
e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD float comparisons
|
// SIMD float comparisons
|
||||||
e.enc_both(
|
e.enc_both_inferred(
|
||||||
fcmp.bind(vector(F32, sse_vector_size)),
|
fcmp.bind(vector(F32, sse_vector_size)),
|
||||||
rec_pfcmp.opcodes(&CMPPS),
|
rec_pfcmp.opcodes(&CMPPS),
|
||||||
);
|
);
|
||||||
e.enc_both(
|
e.enc_both_inferred(
|
||||||
fcmp.bind(vector(F64, sse_vector_size)),
|
fcmp.bind(vector(F64, sse_vector_size)),
|
||||||
rec_pfcmp.opcodes(&CMPPD),
|
rec_pfcmp.opcodes(&CMPPD),
|
||||||
);
|
);
|
||||||
@@ -2131,11 +2136,11 @@ fn define_simd(
|
|||||||
(F64, fmax, &MAXPD[..]),
|
(F64, fmax, &MAXPD[..]),
|
||||||
] {
|
] {
|
||||||
let inst = inst.bind(vector(*ty, sse_vector_size));
|
let inst = inst.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_both(inst, rec_fa.opcodes(opcodes));
|
e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
|
||||||
}
|
}
|
||||||
for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
|
for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
|
||||||
let inst = inst.bind(vector(*ty, sse_vector_size));
|
let inst = inst.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_both(inst, rec_furm.opcodes(opcodes));
|
e.enc_both_inferred(inst, rec_furm.opcodes(opcodes));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -547,8 +547,7 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// XX /r
|
// XX /r
|
||||||
recipes.add_template(
|
recipes.add_template_inferred(
|
||||||
Template::new(
|
|
||||||
EncodingRecipeBuilder::new("rr", &formats.binary, 1)
|
EncodingRecipeBuilder::new("rr", &formats.binary, 1)
|
||||||
.operands_in(vec![gpr, gpr])
|
.operands_in(vec![gpr, gpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -558,14 +557,11 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg0, in_reg1, sink);
|
modrm_rr(in_reg0, in_reg1, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
regs,
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
)
|
|
||||||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /r with operands swapped. (RM form).
|
// XX /r with operands swapped. (RM form).
|
||||||
recipes.add_template(
|
recipes.add_template_inferred(
|
||||||
Template::new(
|
|
||||||
EncodingRecipeBuilder::new("rrx", &formats.binary, 1)
|
EncodingRecipeBuilder::new("rrx", &formats.binary, 1)
|
||||||
.operands_in(vec![gpr, gpr])
|
.operands_in(vec![gpr, gpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -575,13 +571,11 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg1, in_reg0, sink);
|
modrm_rr(in_reg1, in_reg0, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
regs,
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
)
|
|
||||||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /r with FPR ins and outs. A form.
|
// XX /r with FPR ins and outs. A form.
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("fa", &formats.binary, 1)
|
EncodingRecipeBuilder::new("fa", &formats.binary, 1)
|
||||||
.operands_in(vec![fpr, fpr])
|
.operands_in(vec![fpr, fpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -591,10 +585,11 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg1, in_reg0, sink);
|
modrm_rr(in_reg1, in_reg0, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /r with FPR ins and outs. A form with input operands swapped.
|
// XX /r with FPR ins and outs. A form with input operands swapped.
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("fax", &formats.binary, 1)
|
EncodingRecipeBuilder::new("fax", &formats.binary, 1)
|
||||||
.operands_in(vec![fpr, fpr])
|
.operands_in(vec![fpr, fpr])
|
||||||
.operands_out(vec![1])
|
.operands_out(vec![1])
|
||||||
@@ -604,11 +599,13 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg0, in_reg1, sink);
|
modrm_rr(in_reg0, in_reg1, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
// The operand order does not matter for calculating whether a REX prefix is needed.
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /r with FPR ins and outs. A form with a byte immediate.
|
// XX /r with FPR ins and outs. A form with a byte immediate.
|
||||||
{
|
{
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
|
EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
|
||||||
.operands_in(vec![fpr, fpr])
|
.operands_in(vec![fpr, fpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -626,6 +623,7 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(imm as u8);
|
sink.put1(imm as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -740,7 +738,7 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// XX /r, RM form, FPR -> FPR.
|
// XX /r, RM form, FPR -> FPR.
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("furm", &formats.unary, 1)
|
EncodingRecipeBuilder::new("furm", &formats.unary, 1)
|
||||||
.operands_in(vec![fpr])
|
.operands_in(vec![fpr])
|
||||||
.operands_out(vec![fpr])
|
.operands_out(vec![fpr])
|
||||||
@@ -751,6 +749,7 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg0, out_reg0, sink);
|
modrm_rr(in_reg0, out_reg0, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_outreg0",
|
||||||
);
|
);
|
||||||
|
|
||||||
// Same as furm, but with the source register specified directly.
|
// Same as furm, but with the source register specified directly.
|
||||||
@@ -768,8 +767,7 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// XX /r, RM form, GPR -> FPR.
|
// XX /r, RM form, GPR -> FPR.
|
||||||
recipes.add_template(
|
recipes.add_template_inferred(
|
||||||
Template::new(
|
|
||||||
EncodingRecipeBuilder::new("frurm", &formats.unary, 1)
|
EncodingRecipeBuilder::new("frurm", &formats.unary, 1)
|
||||||
.operands_in(vec![gpr])
|
.operands_in(vec![gpr])
|
||||||
.operands_out(vec![fpr])
|
.operands_out(vec![fpr])
|
||||||
@@ -780,9 +778,7 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg0, out_reg0, sink);
|
modrm_rr(in_reg0, out_reg0, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
regs,
|
"size_with_inferred_rex_for_inreg0_outreg0",
|
||||||
)
|
|
||||||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /r, RM form, FPR -> GPR.
|
// XX /r, RM form, FPR -> GPR.
|
||||||
@@ -909,8 +905,7 @@ pub(crate) fn define<'shared>(
|
|||||||
|
|
||||||
// XX /n ib with 8-bit immediate sign-extended.
|
// XX /n ib with 8-bit immediate sign-extended.
|
||||||
{
|
{
|
||||||
recipes.add_template(
|
recipes.add_template_inferred(
|
||||||
Template::new(
|
|
||||||
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
|
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
|
||||||
.operands_in(vec![gpr])
|
.operands_in(vec![gpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -928,12 +923,10 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(imm as u8);
|
sink.put1(imm as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
regs,
|
"size_with_inferred_rex_for_inreg0",
|
||||||
)
|
|
||||||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
|
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
|
||||||
.operands_in(vec![fpr])
|
.operands_in(vec![fpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -951,6 +944,7 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(imm as u8);
|
sink.put1(imm as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0",
|
||||||
);
|
);
|
||||||
|
|
||||||
// XX /n id with 32-bit immediate sign-extended.
|
// XX /n id with 32-bit immediate sign-extended.
|
||||||
@@ -981,7 +975,7 @@ pub(crate) fn define<'shared>(
|
|||||||
|
|
||||||
// XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
|
// XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
|
||||||
{
|
{
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
|
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
|
||||||
.operands_in(vec![fpr])
|
.operands_in(vec![fpr])
|
||||||
.operands_out(vec![fpr])
|
.operands_out(vec![fpr])
|
||||||
@@ -999,12 +993,13 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(imm as u8);
|
sink.put1(imm as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_outreg0",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
|
// XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
|
||||||
{
|
{
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
|
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
|
||||||
.operands_in(vec![fpr])
|
.operands_in(vec![fpr])
|
||||||
.operands_out(vec![gpr])
|
.operands_out(vec![gpr])
|
||||||
@@ -1018,13 +1013,13 @@ pub(crate) fn define<'shared>(
|
|||||||
let imm:i64 = lane.into();
|
let imm:i64 = lane.into();
|
||||||
sink.put1(imm as u8);
|
sink.put1(imm as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
), "size_with_inferred_rex_for_inreg0_outreg0"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
|
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
|
||||||
{
|
{
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
|
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
|
||||||
.operands_in(vec![fpr, gpr])
|
.operands_in(vec![fpr, gpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -1042,6 +1037,7 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(imm as u8);
|
sink.put1(imm as u8);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2825,7 +2821,7 @@ pub(crate) fn define<'shared>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// XX /r, RM form. Compare two FPR registers and set flags.
|
// XX /r, RM form. Compare two FPR registers and set flags.
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("fcmp", &formats.binary, 1)
|
EncodingRecipeBuilder::new("fcmp", &formats.binary, 1)
|
||||||
.operands_in(vec![fpr, fpr])
|
.operands_in(vec![fpr, fpr])
|
||||||
.operands_out(vec![reg_rflags])
|
.operands_out(vec![reg_rflags])
|
||||||
@@ -2835,6 +2831,7 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg1, in_reg0, sink);
|
modrm_rr(in_reg1, in_reg0, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -3089,7 +3086,7 @@ pub(crate) fn define<'shared>(
|
|||||||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
|
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
|
||||||
);
|
);
|
||||||
|
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1)
|
EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1)
|
||||||
.operands_in(vec![fpr, fpr])
|
.operands_in(vec![fpr, fpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -3100,6 +3097,7 @@ pub(crate) fn define<'shared>(
|
|||||||
modrm_rr(in_reg1, in_reg0, sink);
|
modrm_rr(in_reg1, in_reg0, sink);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -3219,7 +3217,7 @@ pub(crate) fn define<'shared>(
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|name| Literal::enumerator_for(floatcc, name))
|
.map(|name| Literal::enumerator_for(floatcc, name))
|
||||||
.collect();
|
.collect();
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_inferred(
|
||||||
EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2)
|
EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2)
|
||||||
.operands_in(vec![fpr, fpr])
|
.operands_in(vec![fpr, fpr])
|
||||||
.operands_out(vec![0])
|
.operands_out(vec![0])
|
||||||
@@ -3248,6 +3246,7 @@ pub(crate) fn define<'shared>(
|
|||||||
sink.put1(imm);
|
sink.put1(imm);
|
||||||
"#,
|
"#,
|
||||||
),
|
),
|
||||||
|
"size_with_inferred_rex_for_inreg0_inreg1",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ fn put_dynrexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
|||||||
sink.put1(bits as u8);
|
sink.put1(bits as u8);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
|
/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
|
||||||
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||||
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
|
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
|
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
|
||||||
@@ -208,7 +208,7 @@ fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
|||||||
sink.put1(bits as u8);
|
sink.put1(bits as u8);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
|
/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
|
||||||
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||||
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*");
|
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*");
|
||||||
let enc = EncodingBits::from(bits);
|
let enc = EncodingBits::from(bits);
|
||||||
@@ -219,6 +219,23 @@ fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
|||||||
sink.put1(bits as u8);
|
sink.put1(bits as u8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix.
|
||||||
|
fn put_dynrexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||||
|
debug_assert_eq!(
|
||||||
|
bits & 0x0800,
|
||||||
|
0x0800,
|
||||||
|
"Invalid encoding bits for DynRexMp3*"
|
||||||
|
);
|
||||||
|
let enc = EncodingBits::from(bits);
|
||||||
|
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||||
|
if needs_rex(bits, rex) {
|
||||||
|
rex_prefix(bits, rex, sink);
|
||||||
|
}
|
||||||
|
sink.put1(0x0f);
|
||||||
|
sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
|
||||||
|
sink.put1(bits as u8);
|
||||||
|
}
|
||||||
|
|
||||||
/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
|
/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
|
||||||
/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
|
/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
|
||||||
/// used together for certain classes of instructions; i.e., special care should be taken to ensure
|
/// used together for certain classes of instructions; i.e., special care should be taken to ensure
|
||||||
|
|||||||
@@ -176,25 +176,49 @@ block0:
|
|||||||
|
|
||||||
function %float_arithmetic_f32x4(f32x4, f32x4) {
|
function %float_arithmetic_f32x4(f32x4, f32x4) {
|
||||||
block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]):
|
block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]):
|
||||||
[-, %xmm3] v2 = fadd v0, v1 ; bin: 40 0f 58 dd
|
[-, %xmm3] v2 = fadd v0, v1 ; bin: 0f 58 dd
|
||||||
[-, %xmm3] v3 = fsub v0, v1 ; bin: 40 0f 5c dd
|
[-, %xmm3] v3 = fsub v0, v1 ; bin: 0f 5c dd
|
||||||
[-, %xmm3] v4 = fmul v0, v1 ; bin: 40 0f 59 dd
|
[-, %xmm3] v4 = fmul v0, v1 ; bin: 0f 59 dd
|
||||||
[-, %xmm3] v5 = fdiv v0, v1 ; bin: 40 0f 5e dd
|
[-, %xmm3] v5 = fdiv v0, v1 ; bin: 0f 5e dd
|
||||||
[-, %xmm3] v6 = fmin v0, v1 ; bin: 40 0f 5d dd
|
[-, %xmm3] v6 = fmin v0, v1 ; bin: 0f 5d dd
|
||||||
[-, %xmm3] v7 = fmax v0, v1 ; bin: 40 0f 5f dd
|
[-, %xmm3] v7 = fmax v0, v1 ; bin: 0f 5f dd
|
||||||
[-, %xmm3] v8 = sqrt v0 ; bin: 40 0f 51 db
|
[-, %xmm3] v8 = sqrt v0 ; bin: 0f 51 db
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %float_arithmetic_f32x4_rex(f32x4, f32x4) {
|
||||||
|
block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]):
|
||||||
|
[-, %xmm3] v2 = fadd v0, v1 ; bin: 41 0f 58 da
|
||||||
|
[-, %xmm3] v3 = fsub v0, v1 ; bin: 41 0f 5c da
|
||||||
|
[-, %xmm3] v4 = fmul v0, v1 ; bin: 41 0f 59 da
|
||||||
|
[-, %xmm3] v5 = fdiv v0, v1 ; bin: 41 0f 5e da
|
||||||
|
[-, %xmm3] v6 = fmin v0, v1 ; bin: 41 0f 5d da
|
||||||
|
[-, %xmm3] v7 = fmax v0, v1 ; bin: 41 0f 5f da
|
||||||
|
[-, %xmm3] v8 = sqrt v1 ; bin: 41 0f 51 da
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
function %float_arithmetic_f64x2(f64x2, f64x2) {
|
function %float_arithmetic_f64x2(f64x2, f64x2) {
|
||||||
block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]):
|
block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]):
|
||||||
[-, %xmm3] v2 = fadd v0, v1 ; bin: 66 40 0f 58 dd
|
[-, %xmm3] v2 = fadd v0, v1 ; bin: 66 0f 58 dd
|
||||||
[-, %xmm3] v3 = fsub v0, v1 ; bin: 66 40 0f 5c dd
|
[-, %xmm3] v3 = fsub v0, v1 ; bin: 66 0f 5c dd
|
||||||
[-, %xmm3] v4 = fmul v0, v1 ; bin: 66 40 0f 59 dd
|
[-, %xmm3] v4 = fmul v0, v1 ; bin: 66 0f 59 dd
|
||||||
[-, %xmm3] v5 = fdiv v0, v1 ; bin: 66 40 0f 5e dd
|
[-, %xmm3] v5 = fdiv v0, v1 ; bin: 66 0f 5e dd
|
||||||
[-, %xmm3] v6 = fmin v0, v1 ; bin: 66 40 0f 5d dd
|
[-, %xmm3] v6 = fmin v0, v1 ; bin: 66 0f 5d dd
|
||||||
[-, %xmm3] v7 = fmax v0, v1 ; bin: 66 40 0f 5f dd
|
[-, %xmm3] v7 = fmax v0, v1 ; bin: 66 0f 5f dd
|
||||||
[-, %xmm3] v8 = sqrt v0 ; bin: 66 40 0f 51 db
|
[-, %xmm3] v8 = sqrt v0 ; bin: 66 0f 51 db
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %float_arithmetic_f64x2_rex(f64x2, f64x2) {
|
||||||
|
block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]):
|
||||||
|
[-, %xmm11] v2 = fadd v0, v1 ; bin: 66 45 0f 58 dd
|
||||||
|
[-, %xmm11] v3 = fsub v0, v1 ; bin: 66 45 0f 5c dd
|
||||||
|
[-, %xmm11] v4 = fmul v0, v1 ; bin: 66 45 0f 59 dd
|
||||||
|
[-, %xmm11] v5 = fdiv v0, v1 ; bin: 66 45 0f 5e dd
|
||||||
|
[-, %xmm11] v6 = fmin v0, v1 ; bin: 66 45 0f 5d dd
|
||||||
|
[-, %xmm11] v7 = fmax v0, v1 ; bin: 66 45 0f 5f dd
|
||||||
|
[-, %xmm11] v8 = sqrt v0 ; bin: 66 45 0f 51 db
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -87,26 +87,52 @@ block0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]):
|
|||||||
|
|
||||||
function %fcmp_f32x4(f32x4, f32x4) {
|
function %fcmp_f32x4(f32x4, f32x4) {
|
||||||
block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]):
|
block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]):
|
||||||
[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 40 0f c2 d4 00
|
[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 0f c2 d4 00
|
||||||
[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 40 0f c2 d4 01
|
[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 0f c2 d4 01
|
||||||
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02
|
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 0f c2 d4 02
|
||||||
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03
|
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 0f c2 d4 03
|
||||||
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04
|
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 0f c2 d4 04
|
||||||
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 40 0f c2 d4 05
|
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 0f c2 d4 05
|
||||||
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 40 0f c2 d4 06
|
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 0f c2 d4 06
|
||||||
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07
|
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 0f c2 d4 07
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %fcmp_f32x4_rex(f32x4, f32x4) {
|
||||||
|
block0(v0: f32x4 [%xmm8], v1: f32x4 [%xmm8]):
|
||||||
|
[-, %xmm8] v2 = fcmp eq v0, v1 ; bin: 45 0f c2 c0 00
|
||||||
|
[-, %xmm8] v3 = fcmp lt v0, v1 ; bin: 45 0f c2 c0 01
|
||||||
|
[-, %xmm8] v4 = fcmp le v0, v1 ; bin: 45 0f c2 c0 02
|
||||||
|
[-, %xmm8] v5 = fcmp uno v0, v1 ; bin: 45 0f c2 c0 03
|
||||||
|
[-, %xmm8] v6 = fcmp ne v0, v1 ; bin: 45 0f c2 c0 04
|
||||||
|
[-, %xmm8] v7 = fcmp uge v0, v1 ; bin: 45 0f c2 c0 05
|
||||||
|
[-, %xmm8] v8 = fcmp ugt v0, v1 ; bin: 45 0f c2 c0 06
|
||||||
|
[-, %xmm8] v9 = fcmp ord v0, v1 ; bin: 45 0f c2 c0 07
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
function %fcmp_f64x2(f64x2, f64x2) {
|
function %fcmp_f64x2(f64x2, f64x2) {
|
||||||
block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]):
|
block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]):
|
||||||
[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 40 0f c2 d0 00
|
[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 0f c2 d0 00
|
||||||
[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 40 0f c2 d0 01
|
[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 0f c2 d0 01
|
||||||
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02
|
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 0f c2 d0 02
|
||||||
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03
|
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 0f c2 d0 03
|
||||||
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04
|
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 0f c2 d0 04
|
||||||
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 40 0f c2 d0 05
|
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 0f c2 d0 05
|
||||||
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 40 0f c2 d0 06
|
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 0f c2 d0 06
|
||||||
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07
|
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 0f c2 d0 07
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %fcmp_f64x2_rex(f64x2, f64x2) {
|
||||||
|
block0(v0: f64x2 [%xmm9], v1: f64x2 [%xmm11]):
|
||||||
|
[-, %xmm9] v2 = fcmp eq v0, v1 ; bin: 66 45 0f c2 cb 00
|
||||||
|
[-, %xmm9] v3 = fcmp lt v0, v1 ; bin: 66 45 0f c2 cb 01
|
||||||
|
[-, %xmm9] v4 = fcmp le v0, v1 ; bin: 66 45 0f c2 cb 02
|
||||||
|
[-, %xmm9] v5 = fcmp uno v0, v1 ; bin: 66 45 0f c2 cb 03
|
||||||
|
[-, %xmm9] v6 = fcmp ne v0, v1 ; bin: 66 45 0f c2 cb 04
|
||||||
|
[-, %xmm9] v7 = fcmp uge v0, v1 ; bin: 66 45 0f c2 cb 05
|
||||||
|
[-, %xmm9] v8 = fcmp ugt v0, v1 ; bin: 66 45 0f c2 cb 06
|
||||||
|
[-, %xmm9] v9 = fcmp ord v0, v1 ; bin: 66 45 0f c2 cb 07
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ block0:
|
|||||||
function %pshufd() {
|
function %pshufd() {
|
||||||
block0:
|
block0:
|
||||||
[-, %rax] v0 = iconst.i32 42
|
[-, %rax] v0 = iconst.i32 42
|
||||||
[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 40 0f 6e c0
|
[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 0f 6e c0
|
||||||
[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00
|
[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -89,9 +89,9 @@ block0:
|
|||||||
function %pshufb() {
|
function %pshufb() {
|
||||||
block0:
|
block0:
|
||||||
[-, %rax] v0 = iconst.i8 42
|
[-, %rax] v0 = iconst.i8 42
|
||||||
[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 40 0f 6e c0
|
[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 0f 6e c0
|
||||||
[-, %rbx] v2 = iconst.i8 43
|
[-, %rbx] v2 = iconst.i8 43
|
||||||
[-, %xmm4] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 40 0f 6e e3
|
[-, %xmm12] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 44 0f 6e e3
|
||||||
[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 0f 38 00 c4
|
[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ block0(v0: i64 [%rax]):
|
|||||||
[-] store v10, v0 ; bin: heap_oob 0f 11 00
|
[-] store v10, v0 ; bin: heap_oob 0f 11 00
|
||||||
|
|
||||||
; use displacement
|
; use displacement
|
||||||
[-, %xmm0] v11 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a
|
[-, %xmm0] v11 = load.f32x4 v0+42 ; bin: heap_oob 40 0f 10 40 2a
|
||||||
[-] store v11, v0+42 ; bin: heap_oob 0f 11 40 2a
|
[-] store v11, v0+42 ; bin: heap_oob 40 0f 11 40 2a
|
||||||
|
|
||||||
; use REX prefix
|
; use REX prefix
|
||||||
[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00
|
[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00
|
||||||
@@ -22,16 +22,17 @@ function %load_store_complex(i64, i64) {
|
|||||||
block0(v0: i64 [%rax], v1: i64 [%rbx]):
|
block0(v0: i64 [%rax], v1: i64 [%rbx]):
|
||||||
; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows
|
; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows
|
||||||
; %rax and %rbx form the SIB 0x18
|
; %rax and %rbx form the SIB 0x18
|
||||||
[-, %xmm1] v10 = load_complex.f64x2 v0+v1 ; bin: heap_oob 0f 10 0c 18
|
[-, %xmm1] v10 = load_complex.f64x2 v0+v1 ; bin: heap_oob 40 0f 10 0c 18
|
||||||
; enabling bit 6 of the ModR/M byte indicates a disp8 follows
|
; enabling bit 6 of the ModR/M byte indicates a disp8 follows
|
||||||
[-] store_complex v10, v0+v1+5 ; bin: heap_oob 0f 11 4c 18 05
|
[-] store_complex v10, v0+v1+5 ; bin: heap_oob 40 0f 11 4c 18 05
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
function %copy_to_ssa() {
|
function %copy_to_ssa() {
|
||||||
block0:
|
block0:
|
||||||
[-, %xmm1] v0 = copy_to_ssa.i64x2 %xmm3 ; bin: 0f 28 cb
|
[-, %xmm1] v0 = copy_to_ssa.i64x2 %xmm3 ; bin: 40 0f 28 cb
|
||||||
|
[-, %xmm2] v1 = copy_to_ssa.i64x2 %xmm15 ; bin: 41 0f 28 d7
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user