x64: Add support for phadd{w,d} instructions (#5896)
This commit adds support for the bare lowering of the `iadd_pairwise` instruction with `i16x8` and `i32x4` types on the x64 backend. These lowerings are achieved with the `phaddw` and `phaddd` instructions, respectively. Additionally AVX encodings of these instructions are added too. The motivation for these new lowerings comes from the relaxed-simd proposal which will use them in the deterministic lowering of some instructions on the x64 backend.
This commit is contained in:
@@ -860,7 +860,10 @@
|
||||
Ucomisd
|
||||
Unpcklps
|
||||
Xorps
|
||||
Xorpd))
|
||||
Xorpd
|
||||
Phaddw
|
||||
Phaddd
|
||||
))
|
||||
|
||||
(type CmpOpcode extern
|
||||
(enum Cmp
|
||||
@@ -1356,6 +1359,8 @@
|
||||
Vcvtps2pd
|
||||
Vcvttpd2dq
|
||||
Vcvttps2dq
|
||||
Vphaddw
|
||||
Vphaddd
|
||||
))
|
||||
|
||||
(type Avx512Opcode extern
|
||||
@@ -2482,6 +2487,22 @@
|
||||
(if-let $true (has_avx))
|
||||
(xmm_rmir_vex (AvxOpcode.Vpaddsw) src1 src2))
|
||||
|
||||
;; Helper for creating `phaddw` instructions.
|
||||
(decl x64_phaddw (Xmm XmmMem) Xmm)
|
||||
(rule 0 (x64_phaddw src1 src2)
|
||||
(xmm_rm_r (SseOpcode.Phaddw) src1 src2))
|
||||
(rule 1 (x64_phaddw src1 src2)
|
||||
(if-let $true (has_avx))
|
||||
(xmm_rmir_vex (AvxOpcode.Vphaddw) src1 src2))
|
||||
|
||||
;; Helper for creating `phaddd` instructions.
|
||||
(decl x64_phaddd (Xmm XmmMem) Xmm)
|
||||
(rule 0 (x64_phaddd src1 src2)
|
||||
(xmm_rm_r (SseOpcode.Phaddd) src1 src2))
|
||||
(rule 1 (x64_phaddd src1 src2)
|
||||
(if-let $true (has_avx))
|
||||
(xmm_rmir_vex (AvxOpcode.Vphaddd) src1 src2))
|
||||
|
||||
;; Helper for creating `paddusb` instructions.
|
||||
(decl x64_paddusb (Xmm XmmMem) Xmm)
|
||||
(rule 0 (x64_paddusb src1 src2)
|
||||
|
||||
@@ -1115,6 +1115,8 @@ pub enum SseOpcode {
|
||||
Unpcklps,
|
||||
Xorps,
|
||||
Xorpd,
|
||||
Phaddw,
|
||||
Phaddd,
|
||||
}
|
||||
|
||||
impl SseOpcode {
|
||||
@@ -1261,7 +1263,9 @@ impl SseOpcode {
|
||||
| SseOpcode::Pabsd
|
||||
| SseOpcode::Palignr
|
||||
| SseOpcode::Pmulhrsw
|
||||
| SseOpcode::Pshufb => SSSE3,
|
||||
| SseOpcode::Pshufb
|
||||
| SseOpcode::Phaddw
|
||||
| SseOpcode::Phaddd => SSSE3,
|
||||
|
||||
SseOpcode::Blendvpd
|
||||
| SseOpcode::Blendvps
|
||||
@@ -1495,6 +1499,8 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Unpcklps => "unpcklps",
|
||||
SseOpcode::Xorps => "xorps",
|
||||
SseOpcode::Xorpd => "xorpd",
|
||||
SseOpcode::Phaddw => "phaddw",
|
||||
SseOpcode::Phaddd => "phaddd",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
@@ -1661,7 +1667,9 @@ impl AvxOpcode {
|
||||
| AvxOpcode::Vcvtpd2ps
|
||||
| AvxOpcode::Vcvtps2pd
|
||||
| AvxOpcode::Vcvttpd2dq
|
||||
| AvxOpcode::Vcvttps2dq => {
|
||||
| AvxOpcode::Vcvttps2dq
|
||||
| AvxOpcode::Vphaddw
|
||||
| AvxOpcode::Vphaddd => {
|
||||
smallvec![InstructionSet::AVX]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1954,6 +1954,8 @@ pub(crate) fn emit(
|
||||
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
|
||||
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
|
||||
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
|
||||
SseOpcode::Phaddw => (LegacyPrefixes::_66, 0x0F3801, 3),
|
||||
SseOpcode::Phaddd => (LegacyPrefixes::_66, 0x0F3802, 3),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
@@ -2167,6 +2169,8 @@ pub(crate) fn emit(
|
||||
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
|
||||
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
|
||||
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
|
||||
AvxOpcode::Vphaddw => (LP::_66, OM::_0F38, 0x01),
|
||||
AvxOpcode::Vphaddd => (LP::_66, OM::_0F38, 0x02),
|
||||
_ => panic!("unexpected rmir vex opcode {op:?}"),
|
||||
};
|
||||
VexInstruction::new()
|
||||
|
||||
@@ -3173,8 +3173,14 @@
|
||||
|
||||
;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I16X8 (iadd_pairwise x y)))
|
||||
(x64_phaddw x y))
|
||||
|
||||
(rule (lower (has_type $I32X4 (iadd_pairwise x y)))
|
||||
(x64_phaddd x y))
|
||||
|
||||
;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
|
||||
(rule (lower
|
||||
(rule 1 (lower
|
||||
(has_type $I16X8 (iadd_pairwise
|
||||
(swiden_low val @ (value_type $I8X16))
|
||||
(swiden_high val))))
|
||||
@@ -3182,7 +3188,7 @@
|
||||
(x64_pmaddubsw mul_const val)))
|
||||
|
||||
;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
|
||||
(rule (lower
|
||||
(rule 1 (lower
|
||||
(has_type $I32X4 (iadd_pairwise
|
||||
(swiden_low val @ (value_type $I16X8))
|
||||
(swiden_high val))))
|
||||
@@ -3190,7 +3196,7 @@
|
||||
(x64_pmaddwd val mul_const)))
|
||||
|
||||
;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
|
||||
(rule (lower
|
||||
(rule 1 (lower
|
||||
(has_type $I16X8 (iadd_pairwise
|
||||
(uwiden_low val @ (value_type $I8X16))
|
||||
(uwiden_high val))))
|
||||
@@ -3198,7 +3204,7 @@
|
||||
(x64_pmaddubsw val mul_const)))
|
||||
|
||||
;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
|
||||
(rule (lower
|
||||
(rule 1 (lower
|
||||
(has_type $I32X4 (iadd_pairwise
|
||||
(uwiden_low val @ (value_type $I16X8))
|
||||
(uwiden_high val))))
|
||||
@@ -3212,7 +3218,7 @@
|
||||
(x64_paddd dst addd_const)))
|
||||
|
||||
;; special case for the `i32x4.dot_i16x8_s` wasm instruction
|
||||
(rule (lower
|
||||
(rule 1 (lower
|
||||
(has_type $I32X4 (iadd_pairwise
|
||||
(imul (swiden_low x) (swiden_low y))
|
||||
(imul (swiden_high x) (swiden_high y)))))
|
||||
|
||||
Reference in New Issue
Block a user