x64: Add support for the pblendw instruction (#6023)

This commit adds another case for `shuffle` lowering to the x64 backend
for the `{,v}pblendw` instruction. This instruction selects 16-bit
values from either of the inputs corresponding to an immediate 8-bit-mask where
each bit selects the corresponding lane from the inputs.
This commit is contained in:
Alex Crichton
2023-03-15 12:20:43 -05:00
committed by GitHub
parent fcddb9ca81
commit 6ed90f86c8
8 changed files with 132 additions and 14 deletions

View File

@@ -918,6 +918,7 @@
Punpcklqdq
Pshuflw
Pshufhw
Pblendw
))
(type CmpOpcode extern
@@ -1290,6 +1291,7 @@
Vpextrw
Vpextrd
Vpextrq
Vpblendw
))
(type Avx512Opcode extern
@@ -2967,6 +2969,14 @@
(if-let $true (has_avx))
(xmm_rmr_blend_vex (AvxOpcode.Vpblendvb) src1 src2 mask))
;; Helper for creating `pblendw` instructions.
(decl x64_pblendw (Xmm XmmMem u8) Xmm)
(rule 0 (x64_pblendw src1 src2 imm)
(xmm_rm_r_imm (SseOpcode.Pblendw) src1 src2 imm (OperandSize.Size32)))
(rule 1 (x64_pblendw src1 src2 imm)
(if-let $true (has_avx))
(xmm_rmr_imm_vex (AvxOpcode.Vpblendw) src1 src2 imm))
;; Helper for creating a `movsd` instruction which creates a new vector
;; register where the upper 64-bits are from the first operand and the low
;; 64-bits are from the second operand.

View File

@@ -1125,6 +1125,7 @@ pub enum SseOpcode {
Punpcklqdq,
Pshuflw,
Pshufhw,
Pblendw,
}
impl SseOpcode {
@@ -1318,7 +1319,8 @@ impl SseOpcode {
| SseOpcode::Roundps
| SseOpcode::Roundpd
| SseOpcode::Roundss
| SseOpcode::Roundsd => SSE41,
| SseOpcode::Roundsd
| SseOpcode::Pblendw => SSE41,
SseOpcode::Pcmpgtq => SSE42,
}
@@ -1521,6 +1523,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Punpckhqdq => "punpckhqdq",
SseOpcode::Pshuflw => "pshuflw",
SseOpcode::Pshufhw => "pshufhw",
SseOpcode::Pblendw => "pblendw",
};
write!(fmt, "{}", name)
}
@@ -1705,7 +1708,8 @@ impl AvxOpcode {
| AvxOpcode::Vpextrb
| AvxOpcode::Vpextrw
| AvxOpcode::Vpextrd
| AvxOpcode::Vpextrq => {
| AvxOpcode::Vpextrq
| AvxOpcode::Vpblendw => {
smallvec![InstructionSet::AVX]
}
}

View File

@@ -2263,6 +2263,7 @@ pub(crate) fn emit(
AvxOpcode::Vpalignr => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x0F),
AvxOpcode::Vinsertps => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x21),
AvxOpcode::Vshufps => (false, LegacyPrefixes::None, OpcodeMap::_0F, 0xC6),
AvxOpcode::Vpblendw => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x0E),
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
};
@@ -2719,6 +2720,7 @@ pub(crate) fn emit(
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
SseOpcode::Pblendw => (LegacyPrefixes::_66, 0x0F3A0E, 3),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let rex = RexFlags::from(*size);

View File

@@ -3704,6 +3704,15 @@
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Special case for `pblendw` which takes an 8-bit immediate where each bit
;; indicates which lane of the two operands is chosen for the output. A bit of
;; 0 chooses the corresponding 16-it lane from `a` and a bit of 1 chooses the
;; corresponding 16-bit lane from `b`.
(rule 14 (lower (shuffle a b (pblendw_imm n)))
(x64_pblendw a b n))
(decl pblendw_imm (u8) Immediate)
(extern extractor pblendw_imm pblendw_imm)
;; When the shuffle looks like "concatenate `a` and `b` and shift right by n*8
;; bytes", that's a `palignr` instruction. Note that the order of operands are
;; swapped in the instruction here. The `palignr` instruction uses the second

View File

@@ -980,6 +980,41 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
None
}
}
fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
// First make sure that the shuffle immediate is selecting 16-bit lanes.
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
// Next build up an 8-bit mask from each of the bits of the selected
// lanes above. This instruction can only be used when each lane
// selector chooses from the corresponding lane in either of the two
// operands, meaning the Nth lane selection must satisfy `lane % 8 ==
// N`.
//
// This helper closure is used to calculate the value of the
// corresponding bit.
let bit = |x: u8, c: u8| {
if x % 8 == c {
if x < 8 {
Some(0)
} else {
Some(1 << c)
}
} else {
None
}
};
Some(
bit(a, 0)?
| bit(b, 1)?
| bit(c, 2)?
| bit(d, 3)?
| bit(e, 4)?
| bit(f, 5)?
| bit(g, 6)?
| bit(h, 7)?,
)
}
}
impl IsleContext<'_, '_, MInst, X64Backend> {