x64: Add support for phadd{w,d} instructions (#5896)

This commit adds support for the bare lowering of the `iadd_pairwise` instruction with `i16x8` and `i32x4` types on the x64 backend. These lowerings are achieved with the `phaddw` and `phaddd` instructions, respectively. Additionally AVX encodings of these instructions are added too. The motivation for these new lowerings comes from the relaxed-simd proposal which will use them in the deterministic lowering of some instructions on the x64 backend.
2023-02-28 17:35:53 -06:00
parent 32cfd60877
commit e0ef0b7c72
6 changed files with 155 additions and 8 deletions
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -860,7 +860,10 @@
            Ucomisd
            Unpcklps
            Xorps
-            Xorpd))
+            Xorpd
+            Phaddw
+            Phaddd
+          ))

 (type CmpOpcode extern
      (enum Cmp
@@ -1356,6 +1359,8 @@
            Vcvtps2pd
            Vcvttpd2dq
            Vcvttps2dq
+            Vphaddw
+            Vphaddd
          ))

 (type Avx512Opcode extern
@@ -2482,6 +2487,22 @@
      (if-let $true (has_avx))
      (xmm_rmir_vex (AvxOpcode.Vpaddsw) src1 src2))

+;; Helper for creating `phaddw` instructions.
+(decl x64_phaddw (Xmm XmmMem) Xmm)
+(rule 0 (x64_phaddw src1 src2)
+      (xmm_rm_r (SseOpcode.Phaddw) src1 src2))
+(rule 1 (x64_phaddw src1 src2)
+      (if-let $true (has_avx))
+      (xmm_rmir_vex (AvxOpcode.Vphaddw) src1 src2))
+
+;; Helper for creating `phaddd` instructions.
+(decl x64_phaddd (Xmm XmmMem) Xmm)
+(rule 0 (x64_phaddd src1 src2)
+      (xmm_rm_r (SseOpcode.Phaddd) src1 src2))
+(rule 1 (x64_phaddd src1 src2)
+      (if-let $true (has_avx))
+      (xmm_rmir_vex (AvxOpcode.Vphaddd) src1 src2))
+
 ;; Helper for creating `paddusb` instructions.
 (decl x64_paddusb (Xmm XmmMem) Xmm)
 (rule 0 (x64_paddusb src1 src2)
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -1115,6 +1115,8 @@ pub enum SseOpcode {
    Unpcklps,
    Xorps,
    Xorpd,
+    Phaddw,
+    Phaddd,
 }

 impl SseOpcode {
@@ -1261,7 +1263,9 @@ impl SseOpcode {
            | SseOpcode::Pabsd
            | SseOpcode::Palignr
            | SseOpcode::Pmulhrsw
-            | SseOpcode::Pshufb => SSSE3,
+            | SseOpcode::Pshufb
+            | SseOpcode::Phaddw
+            | SseOpcode::Phaddd => SSSE3,

            SseOpcode::Blendvpd
            | SseOpcode::Blendvps
@@ -1495,6 +1499,8 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Unpcklps => "unpcklps",
            SseOpcode::Xorps => "xorps",
            SseOpcode::Xorpd => "xorpd",
+            SseOpcode::Phaddw => "phaddw",
+            SseOpcode::Phaddd => "phaddd",
        };
        write!(fmt, "{}", name)
    }
@@ -1661,7 +1667,9 @@ impl AvxOpcode {
            | AvxOpcode::Vcvtpd2ps
            | AvxOpcode::Vcvtps2pd
            | AvxOpcode::Vcvttpd2dq
-            | AvxOpcode::Vcvttps2dq => {
+            | AvxOpcode::Vcvttps2dq
+            | AvxOpcode::Vphaddw
+            | AvxOpcode::Vphaddd => {
                smallvec![InstructionSet::AVX]
            }
        }
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1954,6 +1954,8 @@ pub(crate) fn emit(
                SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
                SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
                SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
+                SseOpcode::Phaddw => (LegacyPrefixes::_66, 0x0F3801, 3),
+                SseOpcode::Phaddd => (LegacyPrefixes::_66, 0x0F3802, 3),
                _ => unimplemented!("Opcode {:?} not implemented", op),
            };

@@ -2167,6 +2169,8 @@ pub(crate) fn emit(
                AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
                AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
                AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
+                AvxOpcode::Vphaddw => (LP::_66, OM::_0F38, 0x01),
+                AvxOpcode::Vphaddd => (LP::_66, OM::_0F38, 0x02),
                _ => panic!("unexpected rmir vex opcode {op:?}"),
            };
            VexInstruction::new()
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3173,8 +3173,14 @@

 ;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

+(rule (lower (has_type $I16X8 (iadd_pairwise x y)))
+      (x64_phaddw x y))
+
+(rule (lower (has_type $I32X4 (iadd_pairwise x y)))
+      (x64_phaddd x y))
+
 ;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
-(rule (lower
+(rule 1 (lower
        (has_type $I16X8 (iadd_pairwise
                           (swiden_low val @ (value_type $I8X16))
                           (swiden_high val))))
@@ -3182,7 +3188,7 @@
        (x64_pmaddubsw mul_const val)))

 ;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
-(rule (lower
+(rule 1 (lower
        (has_type $I32X4 (iadd_pairwise
                           (swiden_low val @ (value_type $I16X8))
                           (swiden_high val))))
@@ -3190,7 +3196,7 @@
        (x64_pmaddwd val mul_const)))

 ;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
-(rule (lower
+(rule 1 (lower
        (has_type $I16X8 (iadd_pairwise
                           (uwiden_low val @ (value_type $I8X16))
                           (uwiden_high val))))
@@ -3198,7 +3204,7 @@
        (x64_pmaddubsw val mul_const)))

 ;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
-(rule (lower
+(rule 1 (lower
        (has_type $I32X4 (iadd_pairwise
                           (uwiden_low val @ (value_type $I16X8))
                           (uwiden_high val))))
@@ -3212,7 +3218,7 @@
        (x64_paddd dst addd_const)))

 ;; special case for the `i32x4.dot_i16x8_s` wasm instruction
-(rule (lower
+(rule 1 (lower
        (has_type $I32X4 (iadd_pairwise
                           (imul (swiden_low x) (swiden_low y))
                           (imul (swiden_high x) (swiden_high y)))))