Add x86 unpack instructions
This commit is contained in:
@@ -1631,6 +1631,8 @@ fn define_simd(
|
|||||||
let x86_psra = x86.by_name("x86_psra");
|
let x86_psra = x86.by_name("x86_psra");
|
||||||
let x86_psrl = x86.by_name("x86_psrl");
|
let x86_psrl = x86.by_name("x86_psrl");
|
||||||
let x86_ptest = x86.by_name("x86_ptest");
|
let x86_ptest = x86.by_name("x86_ptest");
|
||||||
|
let x86_punpckh = x86.by_name("x86_punpckh");
|
||||||
|
let x86_punpckl = x86.by_name("x86_punpckl");
|
||||||
|
|
||||||
// Shorthands for recipes.
|
// Shorthands for recipes.
|
||||||
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
||||||
@@ -1783,6 +1785,26 @@ fn define_simd(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD packing/unpacking
|
||||||
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
|
let (high, low) = match ty.lane_bits() {
|
||||||
|
8 => (&PUNPCKHBW, &PUNPCKLBW),
|
||||||
|
16 => (&PUNPCKHWD, &PUNPCKLWD),
|
||||||
|
32 => (&PUNPCKHDQ, &PUNPCKLDQ),
|
||||||
|
64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
|
||||||
|
_ => panic!("invalid size for SIMD packing/unpacking"),
|
||||||
|
};
|
||||||
|
|
||||||
|
e.enc_both_inferred(
|
||||||
|
x86_punpckh.bind(vector(ty, sse_vector_size)),
|
||||||
|
rec_fa.opcodes(high),
|
||||||
|
);
|
||||||
|
e.enc_both_inferred(
|
||||||
|
x86_punpckl.bind(vector(ty, sse_vector_size)),
|
||||||
|
rec_fa.opcodes(low),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
||||||
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
for to_type in
|
for to_type in
|
||||||
|
|||||||
@@ -376,6 +376,40 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let x = &Operand::new("x", TxN);
|
||||||
|
let y = &Operand::new("y", TxN);
|
||||||
|
let a = &Operand::new("a", TxN);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_punpckh",
|
||||||
|
r#"
|
||||||
|
Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
|
||||||
|
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
|
||||||
|
would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
|
||||||
|
ordering).
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_punpckl",
|
||||||
|
r#"
|
||||||
|
Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
|
||||||
|
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
|
||||||
|
would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
|
||||||
|
ordering).
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let x = &Operand::new("x", FxN);
|
let x = &Operand::new("x", FxN);
|
||||||
let y = &Operand::new("y", FxN);
|
let y = &Operand::new("y", FxN);
|
||||||
let a = &Operand::new("a", FxN);
|
let a = &Operand::new("a", FxN);
|
||||||
|
|||||||
@@ -537,6 +537,30 @@ pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
|||||||
/// 0s (SSE4.1).
|
/// 0s (SSE4.1).
|
||||||
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
|
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
|
||||||
|
|
||||||
|
/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
|
||||||
|
|
||||||
|
/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
|
||||||
|
|
||||||
|
/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
|
||||||
|
|
||||||
|
/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
|
||||||
|
|
||||||
|
/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
|
||||||
|
|
||||||
|
/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
|
||||||
|
|
||||||
|
/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
|
||||||
|
|
||||||
|
/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||||
|
pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
|
||||||
|
|
||||||
/// Push r{16,32,64}.
|
/// Push r{16,32,64}.
|
||||||
pub static PUSH_REG: [u8; 1] = [0x50];
|
pub static PUSH_REG: [u8; 1] = [0x50];
|
||||||
|
|
||||||
|
|||||||
@@ -2375,6 +2375,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
| Opcode::X86Pmaxu
|
| Opcode::X86Pmaxu
|
||||||
| Opcode::X86Pmins
|
| Opcode::X86Pmins
|
||||||
| Opcode::X86Pminu
|
| Opcode::X86Pminu
|
||||||
|
| Opcode::X86Punpckh
|
||||||
|
| Opcode::X86Punpckl
|
||||||
| Opcode::X86ElfTlsGetAddr
|
| Opcode::X86ElfTlsGetAddr
|
||||||
| Opcode::X86MachoTlsGetAddr => {
|
| Opcode::X86MachoTlsGetAddr => {
|
||||||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||||
|
|||||||
@@ -95,3 +95,17 @@ block0:
|
|||||||
[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4
|
[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; pack/unpack
|
||||||
|
|
||||||
|
function %unpack_high_i8x16(i8x16, i8x16) {
|
||||||
|
block0(v0: i8x16 [%xmm0], v1: i8x16 [%xmm12]):
|
||||||
|
[-, %xmm0] v2 = x86_punpckh v0, v1 ; bin: 66 41 0f 68 c4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %unpack_low_i32x4(i32x4, i32x4) {
|
||||||
|
block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]):
|
||||||
|
[-, %xmm7] v2 = x86_punpckl v0, v1 ; bin: 66 0f 62 fe
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -192,3 +192,16 @@ block0:
|
|||||||
return v5
|
return v5
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %unpack_low() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = vconst.i32x4 [0 1 2 3]
|
||||||
|
v1 = vconst.i32x4 [4 5 6 7]
|
||||||
|
v2 = x86_punpckl v0, v1
|
||||||
|
|
||||||
|
v3 = vconst.i32x4 [0 4 1 5]
|
||||||
|
v4 = icmp eq v2, v3
|
||||||
|
v5 = vall_true v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
Reference in New Issue
Block a user