x64: Add support for phadd{w,d} instructions (#5896)

This commit adds support for the bare lowering of the `iadd_pairwise`
instruction with `i16x8` and `i32x4` types on the x64 backend. These
lowerings are achieved with the `phaddw` and `phaddd` instructions,
respectively. Additionally AVX encodings of these instructions are added
too.

The motivation for these new lowerings comes from the relaxed-simd
proposal which will use them in the deterministic lowering of some
instructions on the x64 backend.
This commit is contained in:
Alex Crichton
2023-02-28 17:35:53 -06:00
committed by GitHub
parent 32cfd60877
commit e0ef0b7c72
6 changed files with 155 additions and 8 deletions

View File

@@ -860,7 +860,10 @@
Ucomisd
Unpcklps
Xorps
Xorpd))
Xorpd
Phaddw
Phaddd
))
(type CmpOpcode extern
(enum Cmp
@@ -1356,6 +1359,8 @@
Vcvtps2pd
Vcvttpd2dq
Vcvttps2dq
Vphaddw
Vphaddd
))
(type Avx512Opcode extern
@@ -2482,6 +2487,22 @@
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vpaddsw) src1 src2))
;; Helper for creating `phaddw` instructions.
(decl x64_phaddw (Xmm XmmMem) Xmm)
(rule 0 (x64_phaddw src1 src2)
(xmm_rm_r (SseOpcode.Phaddw) src1 src2))
(rule 1 (x64_phaddw src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vphaddw) src1 src2))
;; Helper for creating `phaddd` instructions.
(decl x64_phaddd (Xmm XmmMem) Xmm)
(rule 0 (x64_phaddd src1 src2)
(xmm_rm_r (SseOpcode.Phaddd) src1 src2))
(rule 1 (x64_phaddd src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vphaddd) src1 src2))
;; Helper for creating `paddusb` instructions.
(decl x64_paddusb (Xmm XmmMem) Xmm)
(rule 0 (x64_paddusb src1 src2)

View File

@@ -1115,6 +1115,8 @@ pub enum SseOpcode {
Unpcklps,
Xorps,
Xorpd,
Phaddw,
Phaddd,
}
impl SseOpcode {
@@ -1261,7 +1263,9 @@ impl SseOpcode {
| SseOpcode::Pabsd
| SseOpcode::Palignr
| SseOpcode::Pmulhrsw
| SseOpcode::Pshufb => SSSE3,
| SseOpcode::Pshufb
| SseOpcode::Phaddw
| SseOpcode::Phaddd => SSSE3,
SseOpcode::Blendvpd
| SseOpcode::Blendvps
@@ -1495,6 +1499,8 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Unpcklps => "unpcklps",
SseOpcode::Xorps => "xorps",
SseOpcode::Xorpd => "xorpd",
SseOpcode::Phaddw => "phaddw",
SseOpcode::Phaddd => "phaddd",
};
write!(fmt, "{}", name)
}
@@ -1661,7 +1667,9 @@ impl AvxOpcode {
| AvxOpcode::Vcvtpd2ps
| AvxOpcode::Vcvtps2pd
| AvxOpcode::Vcvttpd2dq
| AvxOpcode::Vcvttps2dq => {
| AvxOpcode::Vcvttps2dq
| AvxOpcode::Vphaddw
| AvxOpcode::Vphaddd => {
smallvec![InstructionSet::AVX]
}
}

View File

@@ -1954,6 +1954,8 @@ pub(crate) fn emit(
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
SseOpcode::Phaddw => (LegacyPrefixes::_66, 0x0F3801, 3),
SseOpcode::Phaddd => (LegacyPrefixes::_66, 0x0F3802, 3),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
@@ -2167,6 +2169,8 @@ pub(crate) fn emit(
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
AvxOpcode::Vphaddw => (LP::_66, OM::_0F38, 0x01),
AvxOpcode::Vphaddd => (LP::_66, OM::_0F38, 0x02),
_ => panic!("unexpected rmir vex opcode {op:?}"),
};
VexInstruction::new()

View File

@@ -3173,8 +3173,14 @@
;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16X8 (iadd_pairwise x y)))
(x64_phaddw x y))
(rule (lower (has_type $I32X4 (iadd_pairwise x y)))
(x64_phaddd x y))
;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I16X8 (iadd_pairwise
(swiden_low val @ (value_type $I8X16))
(swiden_high val))))
@@ -3182,7 +3188,7 @@
(x64_pmaddubsw mul_const val)))
;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I32X4 (iadd_pairwise
(swiden_low val @ (value_type $I16X8))
(swiden_high val))))
@@ -3190,7 +3196,7 @@
(x64_pmaddwd val mul_const)))
;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I16X8 (iadd_pairwise
(uwiden_low val @ (value_type $I8X16))
(uwiden_high val))))
@@ -3198,7 +3204,7 @@
(x64_pmaddubsw val mul_const)))
;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I32X4 (iadd_pairwise
(uwiden_low val @ (value_type $I16X8))
(uwiden_high val))))
@@ -3212,7 +3218,7 @@
(x64_paddd dst addd_const)))
;; special case for the `i32x4.dot_i16x8_s` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I32X4 (iadd_pairwise
(imul (swiden_low x) (swiden_low y))
(imul (swiden_high x) (swiden_high y)))))