Fix 16-bit x86_pextr encoding
The x86 ISA has (at least) two encodings for PEXTRW: 1. in the SSE2 opcode (66 0f c5) the XMM operand uses r/m and the GPR operand uses reg 2. in the SSE4.1 opcode (66 0f 3a 15) the XMM operand uses reg and the GPR operand uses r/m This changes the 16-bit x86_pextr encoding from 1 to 2 to match the other PEXTR* implementations (all #2 style).
This commit is contained in:
@@ -1798,23 +1798,22 @@ pub(crate) fn define(
|
||||
}
|
||||
|
||||
// SIMD extractlane
|
||||
let mut x86_pextr_mapping: HashMap<u64, (&'static [u8], Option<SettingPredicateNumber>)> =
|
||||
HashMap::new();
|
||||
x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd)));
|
||||
x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None));
|
||||
x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd)));
|
||||
x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd)));
|
||||
let mut x86_pextr_mapping: HashMap<u64, &'static [u8]> = HashMap::new();
|
||||
x86_pextr_mapping.insert(8, &PEXTRB);
|
||||
x86_pextr_mapping.insert(16, &PEXTRW);
|
||||
x86_pextr_mapping.insert(32, &PEXTR);
|
||||
x86_pextr_mapping.insert(64, &PEXTR);
|
||||
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) {
|
||||
if let Some(opcode) = x86_pextr_mapping.get(&ty.lane_bits()) {
|
||||
let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
|
||||
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
|
||||
if ty.lane_bits() < 64 {
|
||||
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
|
||||
e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
|
||||
} else {
|
||||
// It turns out the 64-bit widths have REX/W encodings and only are available on
|
||||
// x86_64.
|
||||
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
|
||||
e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,8 +269,8 @@ pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
|
||||
/// Extract byte (SSE4.1).
|
||||
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
|
||||
|
||||
/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16.
|
||||
pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5];
|
||||
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
|
||||
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
|
||||
|
||||
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
|
||||
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
|
||||
|
||||
@@ -17,7 +17,7 @@ function %test_extractlane_i16() {
|
||||
ebb0:
|
||||
[-, %rax] v0 = iconst.i16 4
|
||||
[-, %xmm1] v1 = splat.i16x8 v0
|
||||
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04
|
||||
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user