x64: Sink constant loads into xmm instructions (#5880)

A number of places in the x64 backend make use of 128-bit constants for
various wasm SIMD-related instructions although most of them currently
use the `x64_xmm_load_const` helper to load the constant into a
register. Almost all xmm instructions, however, enable using a memory
operand which means that these loads can be folded into instructions to
help reduce register pressure. Automatic conversions were added for a
`VCodeConstant` into an `XmmMem` value and then explicit loads were all
removed in favor of forwarding the `XmmMem` value directly to the
underlying instruction. Note that some instances of `x64_xmm_load_const`
remain since they're used in contexts where load sinking won't work
(e.g. they're the first operand, not the second for non-commutative
instructions).
This commit is contained in:
Alex Crichton
2023-02-27 16:02:42 -06:00
committed by GitHub
parent 9b86a0b9b1
commit f2dce812c3
11 changed files with 147 additions and 182 deletions

View File

@@ -17,12 +17,10 @@ block0:
; movq %rsp, %rbp
; block0:
; movdqu const(3), %xmm0
; movdqu const(2), %xmm4
; movdqu const(0), %xmm2
; pshufb %xmm0, %xmm2, %xmm0
; movdqu const(1), %xmm6
; pshufb %xmm4, %xmm6, %xmm4
; por %xmm0, %xmm4, %xmm0
; movdqu const(2), %xmm2
; pshufb %xmm0, const(0), %xmm0
; pshufb %xmm2, const(1), %xmm2
; por %xmm0, %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -32,13 +30,11 @@ block0:
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqu 0x64(%rip), %xmm0
; movdqu 0x4c(%rip), %xmm4
; movdqu 0x24(%rip), %xmm2
; pshufb %xmm2, %xmm0
; movdqu 0x27(%rip), %xmm6
; pshufb %xmm6, %xmm4
; por %xmm4, %xmm0
; movdqu 0x54(%rip), %xmm0
; movdqu 0x3c(%rip), %xmm2
; pshufb 0x13(%rip), %xmm0
; pshufb 0x1a(%rip), %xmm2
; por %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
@@ -50,10 +46,6 @@ block0:
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb $0x80, -0x7f7f7f80(%rax)
; addb $0x80, -0x7f7f7f80(%rax)
; addb $0, 0x101(%rax)
@@ -84,8 +76,7 @@ block0:
; movq %rsp, %rbp
; block0:
; movdqu const(1), %xmm0
; movdqu const(0), %xmm1
; pshufb %xmm0, %xmm1, %xmm0
; pshufb %xmm0, const(0), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -96,8 +87,7 @@ block0:
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqu 0x24(%rip), %xmm0
; movdqu 0xc(%rip), %xmm1
; pshufb %xmm1, %xmm0
; pshufb 0xb(%rip), %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
@@ -109,6 +99,8 @@ block0:
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rcx, %rax)
; addb %al, (%rax)
; addb %al, (%rax)
@@ -131,10 +123,9 @@ block0:
; movq %rsp, %rbp
; block0:
; movdqu const(1), %xmm0
; movdqu const(1), %xmm2
; movdqu const(0), %xmm3
; paddusb %xmm2, %xmm3, %xmm2
; pshufb %xmm0, %xmm2, %xmm0
; movdqu const(1), %xmm1
; paddusb %xmm1, const(0), %xmm1
; pshufb %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -145,16 +136,17 @@ block0:
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqu 0x34(%rip), %xmm0
; movdqu 0x2c(%rip), %xmm2
; movdqu 0x14(%rip), %xmm3
; paddusb %xmm3, %xmm2
; pshufb %xmm2, %xmm0
; movdqu 0x2c(%rip), %xmm1
; paddusb 0x14(%rip), %xmm1
; pshufb %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; jo 0xa2
; jo 0xa4
; jo 0xa6