x64: Sink constant loads into xmm instructions (#5880)
A number of places in the x64 backend make use of 128-bit constants for various wasm SIMD-related instructions although most of them currently use the `x64_xmm_load_const` helper to load the constant into a register. Almost all xmm instructions, however, enable using a memory operand which means that these loads can be folded into instructions to help reduce register pressure. Automatic conversions were added for a `VCodeConstant` into an `XmmMem` value and then explicit loads were all removed in favor of forwarding the `XmmMem` value directly to the underlying instruction. Note that some instances of `x64_xmm_load_const` remain since they're used in contexts where load sinking won't work (e.g. they're the first operand, not the second for non-commutative instructions).
This commit is contained in:
@@ -17,12 +17,10 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movdqu const(3), %xmm0
|
||||
; movdqu const(2), %xmm4
|
||||
; movdqu const(0), %xmm2
|
||||
; pshufb %xmm0, %xmm2, %xmm0
|
||||
; movdqu const(1), %xmm6
|
||||
; pshufb %xmm4, %xmm6, %xmm4
|
||||
; por %xmm0, %xmm4, %xmm0
|
||||
; movdqu const(2), %xmm2
|
||||
; pshufb %xmm0, const(0), %xmm0
|
||||
; pshufb %xmm2, const(1), %xmm2
|
||||
; por %xmm0, %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -32,13 +30,11 @@ block0:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movdqu 0x64(%rip), %xmm0
|
||||
; movdqu 0x4c(%rip), %xmm4
|
||||
; movdqu 0x24(%rip), %xmm2
|
||||
; pshufb %xmm2, %xmm0
|
||||
; movdqu 0x27(%rip), %xmm6
|
||||
; pshufb %xmm6, %xmm4
|
||||
; por %xmm4, %xmm0
|
||||
; movdqu 0x54(%rip), %xmm0
|
||||
; movdqu 0x3c(%rip), %xmm2
|
||||
; pshufb 0x13(%rip), %xmm0
|
||||
; pshufb 0x1a(%rip), %xmm2
|
||||
; por %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
@@ -50,10 +46,6 @@ block0:
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb $0x80, -0x7f7f7f80(%rax)
|
||||
; addb $0x80, -0x7f7f7f80(%rax)
|
||||
; addb $0, 0x101(%rax)
|
||||
@@ -84,8 +76,7 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movdqu const(1), %xmm0
|
||||
; movdqu const(0), %xmm1
|
||||
; pshufb %xmm0, %xmm1, %xmm0
|
||||
; pshufb %xmm0, const(0), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -96,8 +87,7 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movdqu 0x24(%rip), %xmm0
|
||||
; movdqu 0xc(%rip), %xmm1
|
||||
; pshufb %xmm1, %xmm0
|
||||
; pshufb 0xb(%rip), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
@@ -109,6 +99,8 @@ block0:
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rcx, %rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
@@ -131,10 +123,9 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movdqu const(1), %xmm0
|
||||
; movdqu const(1), %xmm2
|
||||
; movdqu const(0), %xmm3
|
||||
; paddusb %xmm2, %xmm3, %xmm2
|
||||
; pshufb %xmm0, %xmm2, %xmm0
|
||||
; movdqu const(1), %xmm1
|
||||
; paddusb %xmm1, const(0), %xmm1
|
||||
; pshufb %xmm0, %xmm1, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -145,16 +136,17 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movdqu 0x34(%rip), %xmm0
|
||||
; movdqu 0x2c(%rip), %xmm2
|
||||
; movdqu 0x14(%rip), %xmm3
|
||||
; paddusb %xmm3, %xmm2
|
||||
; pshufb %xmm2, %xmm0
|
||||
; movdqu 0x2c(%rip), %xmm1
|
||||
; paddusb 0x14(%rip), %xmm1
|
||||
; pshufb %xmm1, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; jo 0xa2
|
||||
; jo 0xa4
|
||||
; jo 0xa6
|
||||
|
||||
Reference in New Issue
Block a user