diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index d25cc7efc2..993b4984d7 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -860,7 +860,10 @@ Ucomisd Unpcklps Xorps - Xorpd)) + Xorpd + Phaddw + Phaddd + )) (type CmpOpcode extern (enum Cmp @@ -1356,6 +1359,8 @@ Vcvtps2pd Vcvttpd2dq Vcvttps2dq + Vphaddw + Vphaddd )) (type Avx512Opcode extern @@ -2482,6 +2487,22 @@ (if-let $true (has_avx)) (xmm_rmir_vex (AvxOpcode.Vpaddsw) src1 src2)) +;; Helper for creating `phaddw` instructions. +(decl x64_phaddw (Xmm XmmMem) Xmm) +(rule 0 (x64_phaddw src1 src2) + (xmm_rm_r (SseOpcode.Phaddw) src1 src2)) +(rule 1 (x64_phaddw src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vphaddw) src1 src2)) + +;; Helper for creating `phaddd` instructions. +(decl x64_phaddd (Xmm XmmMem) Xmm) +(rule 0 (x64_phaddd src1 src2) + (xmm_rm_r (SseOpcode.Phaddd) src1 src2)) +(rule 1 (x64_phaddd src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vphaddd) src1 src2)) + ;; Helper for creating `paddusb` instructions. (decl x64_paddusb (Xmm XmmMem) Xmm) (rule 0 (x64_paddusb src1 src2) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 772727133b..ca6e40ce55 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1115,6 +1115,8 @@ pub enum SseOpcode { Unpcklps, Xorps, Xorpd, + Phaddw, + Phaddd, } impl SseOpcode { @@ -1261,7 +1263,9 @@ impl SseOpcode { | SseOpcode::Pabsd | SseOpcode::Palignr | SseOpcode::Pmulhrsw - | SseOpcode::Pshufb => SSSE3, + | SseOpcode::Pshufb + | SseOpcode::Phaddw + | SseOpcode::Phaddd => SSSE3, SseOpcode::Blendvpd | SseOpcode::Blendvps @@ -1495,6 +1499,8 @@ impl fmt::Debug for SseOpcode { SseOpcode::Unpcklps => "unpcklps", SseOpcode::Xorps => "xorps", SseOpcode::Xorpd => "xorpd", + SseOpcode::Phaddw => "phaddw", + SseOpcode::Phaddd => "phaddd", }; write!(fmt, "{}", name) } @@ -1661,7 +1667,9 @@ impl AvxOpcode { | AvxOpcode::Vcvtpd2ps | AvxOpcode::Vcvtps2pd | AvxOpcode::Vcvttpd2dq - | AvxOpcode::Vcvttps2dq => { + | AvxOpcode::Vcvttps2dq + | AvxOpcode::Vphaddw + | AvxOpcode::Vphaddd => { smallvec![InstructionSet::AVX] } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index e43746c423..b92e1ecd9b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1954,6 +1954,8 @@ pub(crate) fn emit( SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2), SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2), SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2), + SseOpcode::Phaddw => (LegacyPrefixes::_66, 0x0F3801, 3), + SseOpcode::Phaddd => (LegacyPrefixes::_66, 0x0F3802, 3), _ => unimplemented!("Opcode {:?} not implemented", op), }; @@ -2167,6 +2169,8 @@ pub(crate) fn emit( AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D), AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F), AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F), + AvxOpcode::Vphaddw => (LP::_66, OM::_0F38, 0x01), + AvxOpcode::Vphaddd => (LP::_66, OM::_0F38, 0x02), _ => panic!("unexpected rmir vex opcode {op:?}"), }; VexInstruction::new() diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 40b40f9b9f..38c1f2de79 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3173,8 +3173,14 @@ ;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $I16X8 (iadd_pairwise x y))) + (x64_phaddw x y)) + +(rule (lower (has_type $I32X4 (iadd_pairwise x y))) + (x64_phaddd x y)) + ;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction -(rule (lower +(rule 1 (lower (has_type $I16X8 (iadd_pairwise (swiden_low val @ (value_type $I8X16)) (swiden_high val)))) @@ -3182,7 +3188,7 @@ (x64_pmaddubsw mul_const val))) ;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction -(rule (lower +(rule 1 (lower (has_type $I32X4 (iadd_pairwise (swiden_low val @ (value_type $I16X8)) (swiden_high val)))) @@ -3190,7 +3196,7 @@ (x64_pmaddwd val mul_const))) ;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction -(rule (lower +(rule 1 (lower (has_type $I16X8 (iadd_pairwise (uwiden_low val @ (value_type $I8X16)) (uwiden_high val)))) @@ -3198,7 +3204,7 @@ (x64_pmaddubsw val mul_const))) ;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction -(rule (lower +(rule 1 (lower (has_type $I32X4 (iadd_pairwise (uwiden_low val @ (value_type $I16X8)) (uwiden_high val)))) @@ -3212,7 +3218,7 @@ (x64_paddd dst addd_const))) ;; special case for the `i32x4.dot_i16x8_s` wasm instruction -(rule (lower +(rule 1 (lower (has_type $I32X4 (iadd_pairwise (imul (swiden_low x) (swiden_low y)) (imul (swiden_high x) (swiden_high y))))) diff --git a/cranelift/filetests/filetests/isa/x64/iadd-pairwise-avx.clif b/cranelift/filetests/filetests/isa/x64/iadd-pairwise-avx.clif new file mode 100644 index 0000000000..4ad4003e8b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/iadd-pairwise-avx.clif @@ -0,0 +1,54 @@ +test compile precise-output +set enable_simd +target x86_64 has_avx + +function %iadd_pairwise_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vphaddw %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vphaddw %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vphaddd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vphaddd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/iadd-pairwise.clif b/cranelift/filetests/filetests/isa/x64/iadd-pairwise.clif new file mode 100644 index 0000000000..2e1d1c85d0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/iadd-pairwise.clif @@ -0,0 +1,54 @@ +test compile precise-output +set enable_simd +target x86_64 + +function %iadd_pairwise_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; phaddw %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; phaddw %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; phaddd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; phaddd %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq +