Add x86 unpack instructions

This commit is contained in:
Andrew Brown
2020-03-24 17:20:56 -07:00
parent 18c31403e8
commit f5fc09f64a
6 changed files with 109 additions and 0 deletions

View File

@@ -1631,6 +1631,8 @@ fn define_simd(
let x86_psra = x86.by_name("x86_psra");
let x86_psrl = x86.by_name("x86_psrl");
let x86_ptest = x86.by_name("x86_ptest");
let x86_punpckh = x86.by_name("x86_punpckh");
let x86_punpckl = x86.by_name("x86_punpckl");
// Shorthands for recipes.
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
@@ -1783,6 +1785,26 @@ fn define_simd(
}
}
// SIMD packing/unpacking
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let (high, low) = match ty.lane_bits() {
8 => (&PUNPCKHBW, &PUNPCKLBW),
16 => (&PUNPCKHWD, &PUNPCKLWD),
32 => (&PUNPCKHDQ, &PUNPCKLDQ),
64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
_ => panic!("invalid size for SIMD packing/unpacking"),
};
e.enc_both_inferred(
x86_punpckh.bind(vector(ty, sse_vector_size)),
rec_fa.opcodes(high),
);
e.enc_both_inferred(
x86_punpckl.bind(vector(ty, sse_vector_size)),
rec_fa.opcodes(low),
);
}
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
for to_type in

View File

@@ -376,6 +376,40 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let x = &Operand::new("x", TxN);
let y = &Operand::new("y", TxN);
let a = &Operand::new("a", TxN);
ig.push(
Inst::new(
"x86_punpckh",
r#"
Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
ordering).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"x86_punpckl",
r#"
Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
ordering).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
let x = &Operand::new("x", FxN);
let y = &Operand::new("y", FxN);
let a = &Operand::new("a", FxN);

View File

@@ -537,6 +537,30 @@ pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
/// 0s (SSE4.1).
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
/// Push r{16,32,64}.
pub static PUSH_REG: [u8; 1] = [0x50];