Fix 16-bit x86_pextr encoding
The x86 ISA has (at least) two encodings for PEXTRW: 1. in the SSE2 opcode (66 0f c5) the XMM operand uses r/m and the GPR operand uses reg 2. in the SSE4.1 opcode (66 0f 3a 15) the XMM operand uses reg and the GPR operand uses r/m This changes the 16-bit x86_pextr encoding from 1 to 2 to match the other PEXTR* implementations (all #2 style).
This commit is contained in:
@@ -1798,23 +1798,22 @@ pub(crate) fn define(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SIMD extractlane
|
// SIMD extractlane
|
||||||
let mut x86_pextr_mapping: HashMap<u64, (&'static [u8], Option<SettingPredicateNumber>)> =
|
let mut x86_pextr_mapping: HashMap<u64, &'static [u8]> = HashMap::new();
|
||||||
HashMap::new();
|
x86_pextr_mapping.insert(8, &PEXTRB);
|
||||||
x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd)));
|
x86_pextr_mapping.insert(16, &PEXTRW);
|
||||||
x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None));
|
x86_pextr_mapping.insert(32, &PEXTR);
|
||||||
x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd)));
|
x86_pextr_mapping.insert(64, &PEXTR);
|
||||||
x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd)));
|
|
||||||
|
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) {
|
if let Some(opcode) = x86_pextr_mapping.get(&ty.lane_bits()) {
|
||||||
let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
|
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
|
||||||
if ty.lane_bits() < 64 {
|
if ty.lane_bits() < 64 {
|
||||||
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
|
e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
|
||||||
} else {
|
} else {
|
||||||
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
||||||
// x86_64.
|
// x86_64.
|
||||||
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
|
e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -269,8 +269,8 @@ pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
|
|||||||
/// Extract byte (SSE4.1).
|
/// Extract byte (SSE4.1).
|
||||||
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
|
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
|
||||||
|
|
||||||
/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16.
|
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
|
||||||
pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5];
|
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
|
||||||
|
|
||||||
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
|
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
|
||||||
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
|
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ function %test_extractlane_i16() {
|
|||||||
ebb0:
|
ebb0:
|
||||||
[-, %rax] v0 = iconst.i16 4
|
[-, %rax] v0 = iconst.i16 4
|
||||||
[-, %xmm1] v1 = splat.i16x8 v0
|
[-, %xmm1] v1 = splat.i16x8 v0
|
||||||
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04
|
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user