Add x86_palignr instructions

This instruction is necessary for implementing `[s|u]widen_high`.
This commit is contained in:
Andrew Brown
2020-07-07 15:56:02 -07:00
parent 0e5e8a62c8
commit fafef7db77
5 changed files with 36 additions and 4 deletions

View File

@@ -1697,6 +1697,7 @@ fn define_simd(
let x86_pminu = x86.by_name("x86_pminu"); let x86_pminu = x86.by_name("x86_pminu");
let x86_pmullq = x86.by_name("x86_pmullq"); let x86_pmullq = x86.by_name("x86_pmullq");
let x86_pmuludq = x86.by_name("x86_pmuludq"); let x86_pmuludq = x86.by_name("x86_pmuludq");
let x86_palignr = x86.by_name("x86_palignr");
let x86_pshufb = x86.by_name("x86_pshufb"); let x86_pshufb = x86.by_name("x86_pshufb");
let x86_pshufd = x86.by_name("x86_pshufd"); let x86_pshufd = x86.by_name("x86_pshufd");
let x86_psll = x86.by_name("x86_psll"); let x86_psll = x86.by_name("x86_psll");
@@ -1901,6 +1902,8 @@ fn define_simd(
rec_fa.opcodes(low), rec_fa.opcodes(low),
); );
} }
// SIMD narrow/widen
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
@@ -1912,6 +1915,13 @@ fn define_simd(
let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
} }
for ty in &[I8, I16, I32, I64] {
e.enc_both_inferred_maybe_isap(
x86_palignr.bind(vector(*ty, sse_vector_size)),
rec_fa_ib.opcodes(&PALIGNR[..]),
Some(use_ssse3_simd),
);
}
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {

View File

@@ -664,6 +664,21 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let c = &Operand::new("c", uimm8)
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
ig.push(
Inst::new(
"x86_palignr",
r#"
Concatenate destination and source operands, extracting a byte-aligned result shifted to
the right by `c`.
"#,
&formats.ternary_imm8,
)
.operands_in(vec![x, y, c])
.operands_out(vec![a]),
);
let i64_t = &TypeVar::new( let i64_t = &TypeVar::new(
"i64_t", "i64_t",
"A scalar 64bit integer", "A scalar 64bit integer",

View File

@@ -354,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). /// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
/// Bitwise AND of xmm2/m128 and xmm1 (SSE2). /// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];

View File

@@ -2133,6 +2133,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::X86Insertps | Opcode::X86Insertps
| Opcode::X86Movsd | Opcode::X86Movsd
| Opcode::X86Movlhps | Opcode::X86Movlhps
| Opcode::X86Palignr
| Opcode::X86Psll | Opcode::X86Psll
| Opcode::X86Psrl | Opcode::X86Psrl
| Opcode::X86Psra | Opcode::X86Psra

View File

@@ -1,5 +1,6 @@
test binemit test binemit
target x86_64 set enable_simd
target x86_64 has_ssse3=true
; Ensure raw_bitcast emits no instructions. ; Ensure raw_bitcast emits no instructions.
function %raw_bitcast_i16x8_to_b32x4() { function %raw_bitcast_i16x8_to_b32x4() {
@@ -10,8 +11,9 @@ block0:
return return
} }
function %fcvt_32(i32x4) { function %conversions_i32x4(i32x4, i32x4) {
block0(v0: i32x4 [%xmm6]): block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]):
[-, %xmm2] v1 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6 [-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6
[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03
return return
} }