Add x86 unpack instructions

This commit is contained in:
Andrew Brown
2020-03-24 17:20:56 -07:00
parent 18c31403e8
commit f5fc09f64a
6 changed files with 109 additions and 0 deletions

View File

@@ -1631,6 +1631,8 @@ fn define_simd(
let x86_psra = x86.by_name("x86_psra"); let x86_psra = x86.by_name("x86_psra");
let x86_psrl = x86.by_name("x86_psrl"); let x86_psrl = x86.by_name("x86_psrl");
let x86_ptest = x86.by_name("x86_ptest"); let x86_ptest = x86.by_name("x86_ptest");
let x86_punpckh = x86.by_name("x86_punpckh");
let x86_punpckl = x86.by_name("x86_punpckl");
// Shorthands for recipes. // Shorthands for recipes.
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
@@ -1783,6 +1785,26 @@ fn define_simd(
} }
} }
// SIMD packing/unpacking
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let (high, low) = match ty.lane_bits() {
8 => (&PUNPCKHBW, &PUNPCKLBW),
16 => (&PUNPCKHWD, &PUNPCKLWD),
32 => (&PUNPCKHDQ, &PUNPCKLDQ),
64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
_ => panic!("invalid size for SIMD packing/unpacking"),
};
e.enc_both_inferred(
x86_punpckh.bind(vector(ty, sse_vector_size)),
rec_fa.opcodes(high),
);
e.enc_both_inferred(
x86_punpckl.bind(vector(ty, sse_vector_size)),
rec_fa.opcodes(low),
);
}
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
for to_type in for to_type in

View File

@@ -376,6 +376,40 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let x = &Operand::new("x", TxN);
let y = &Operand::new("y", TxN);
let a = &Operand::new("a", TxN);
ig.push(
Inst::new(
"x86_punpckh",
r#"
Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
ordering).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"x86_punpckl",
r#"
Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
ordering).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
let x = &Operand::new("x", FxN); let x = &Operand::new("x", FxN);
let y = &Operand::new("y", FxN); let y = &Operand::new("y", FxN);
let a = &Operand::new("a", FxN); let a = &Operand::new("a", FxN);

View File

@@ -537,6 +537,30 @@ pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
/// 0s (SSE4.1). /// 0s (SSE4.1).
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
/// Push r{16,32,64}. /// Push r{16,32,64}.
pub static PUSH_REG: [u8; 1] = [0x50]; pub static PUSH_REG: [u8; 1] = [0x50];

View File

@@ -2375,6 +2375,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
| Opcode::X86Pmaxu | Opcode::X86Pmaxu
| Opcode::X86Pmins | Opcode::X86Pmins
| Opcode::X86Pminu | Opcode::X86Pminu
| Opcode::X86Punpckh
| Opcode::X86Punpckl
| Opcode::X86ElfTlsGetAddr | Opcode::X86ElfTlsGetAddr
| Opcode::X86MachoTlsGetAddr => { | Opcode::X86MachoTlsGetAddr => {
panic!("x86-specific opcode in supposedly arch-neutral IR!"); panic!("x86-specific opcode in supposedly arch-neutral IR!");

View File

@@ -95,3 +95,17 @@ block0:
[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4 [-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4
return return
} }
;; pack/unpack
function %unpack_high_i8x16(i8x16, i8x16) {
block0(v0: i8x16 [%xmm0], v1: i8x16 [%xmm12]):
[-, %xmm0] v2 = x86_punpckh v0, v1 ; bin: 66 41 0f 68 c4
return
}
function %unpack_low_i32x4(i32x4, i32x4) {
block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]):
[-, %xmm7] v2 = x86_punpckl v0, v1 ; bin: 66 0f 62 fe
return
}

View File

@@ -192,3 +192,16 @@ block0:
return v5 return v5
} }
; run ; run
function %unpack_low() -> b1 {
block0:
v0 = vconst.i32x4 [0 1 2 3]
v1 = vconst.i32x4 [4 5 6 7]
v2 = x86_punpckl v0, v1
v3 = vconst.i32x4 [0 4 1 5]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run