x64: Refactor and add extractlane special case for uextend/sextend (#6022)
* x64: Refactor sextend/uextend rules
Move much of the meaty logic from these lowering rules into the
`extend_to_gpr` helper to benefit other callers of `extend_to_gpr` to
elide instructions. This additionally simplifies `sextend` and `uextend`
lowerings to rely on optimizations happening within the `extend_to_gpr`
helper.
* x64: Skip `uextend` for `pextr{b,w}` instructions
These instructions are documented as automatically zeroing the upper
bits so `uextend` operations can be skipped. This slightly improves
codegen for the wasm `i{8x16,16x8}.extract_lane_u` instructions, for
example.
* Modernize an extractor pattern
* Trim some superfluous match clauses
Additionally rejigger priorities to be "mostly default" now.
* Refactor 32-to-64 predicate to a helper
Also adjust the pattern matched in the `extend_to_gpr` helper.
* Slightly refactor pextr{b,w} case
* Review comments
This commit is contained in:
@@ -27,3 +27,81 @@ block0(v0: i32, v1: i32):
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extractlane_i8x16_i16(i8x16) -> i16 {
|
||||
block0(v0: i8x16):
|
||||
v1 = extractlane v0, 1
|
||||
v2 = uextend.i16 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pextrb $1, %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pextrb $1, %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extractlane_i8x16_i32(i8x16) -> i32 {
|
||||
block0(v0: i8x16):
|
||||
v1 = extractlane v0, 1
|
||||
v2 = uextend.i32 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pextrb $1, %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pextrb $1, %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extractlane_i16x8_i32(i16x8) -> i32 {
|
||||
block0(v0: i16x8):
|
||||
v1 = extractlane v0, 1
|
||||
v2 = uextend.i32 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pextrw $1, %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pextrw $1, %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
|
||||
Reference in New Issue
Block a user