Fix 16-bit x86_pextr encoding

The x86 ISA has (at least) two encodings for PEXTRW:
 1. in the SSE2 opcode (66 0f c5) the XMM operand uses r/m and the GPR operand uses reg
 2. in the SSE4.1 opcode (66 0f 3a 15) the XMM operand uses reg and the GPR operand uses r/m

This changes the 16-bit x86_pextr encoding from 1 to 2 to match the other PEXTR* implementations (all #2 style).
This commit is contained in:
Andrew Brown
2019-09-18 14:37:31 -07:00
parent c932f9b2b5
commit 168ad7fda3
3 changed files with 11 additions and 12 deletions

View File

@@ -1798,23 +1798,22 @@ pub(crate) fn define(
}
// SIMD extractlane
let mut x86_pextr_mapping: HashMap<u64, (&'static [u8], Option<SettingPredicateNumber>)> =
HashMap::new();
x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd)));
x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None));
x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd)));
x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd)));
let mut x86_pextr_mapping: HashMap<u64, &'static [u8]> = HashMap::new();
x86_pextr_mapping.insert(8, &PEXTRB);
x86_pextr_mapping.insert(16, &PEXTRW);
x86_pextr_mapping.insert(32, &PEXTR);
x86_pextr_mapping.insert(64, &PEXTR);
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) {
if let Some(opcode) = x86_pextr_mapping.get(&ty.lane_bits()) {
let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
if ty.lane_bits() < 64 {
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
} else {
// It turns out the 64-bit widths have REX/W encodings and only are available on
// x86_64.
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
}
}
}

View File

@@ -269,8 +269,8 @@ pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
/// Extract byte (SSE4.1).
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16.
pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5];
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];

View File

@@ -17,7 +17,7 @@ function %test_extractlane_i16() {
ebb0:
[-, %rax] v0 = iconst.i16 4
[-, %xmm1] v1 = splat.i16x8 v0
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04
return
}