x64: Optimize store-of-extract-lane-0 (#5924)
* x64: Optimize store-of-extract-lane-0 The `movss` and `movsd` instructions can be used to store the 0th lane of a `t32x4` or a `t64x2` vector into memory, enabling fusing a `store` and an `extractlane` instruction. * Fix merge conflict with `main`
This commit is contained in:
@@ -151,3 +151,107 @@ block0(v0: f64x2):
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extract_i32x4_lane0_to_memory(i32x4, i64) {
|
||||
block0(v0: i32x4, v1: i64):
|
||||
v2 = extractlane v0, 0
|
||||
store v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movss %xmm0, 0(%rdi)
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movss %xmm0, (%rdi) ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extract_f32x4_lane0_to_memory(f32x4, i64) {
|
||||
block0(v0: f32x4, v1: i64):
|
||||
v2 = extractlane v0, 0
|
||||
store v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movss %xmm0, 0(%rdi)
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movss %xmm0, (%rdi) ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extract_i64x2_lane0_to_memory(i64x2, i64) {
|
||||
block0(v0: i64x2, v1: i64):
|
||||
v2 = extractlane v0, 0
|
||||
store v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movsd %xmm0, 0(%rdi)
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movsd %xmm0, (%rdi) ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %extract_f64x2_lane0_to_memory(f64x2, i64) {
|
||||
block0(v0: f64x2, v1: i64):
|
||||
v2 = extractlane v0, 0
|
||||
store v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movsd %xmm0, 0(%rdi)
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movsd %xmm0, (%rdi) ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
|
||||
Reference in New Issue
Block a user