Files
wasmtime/cranelift/filetests/filetests/isa/x64/extractlane.clif
Alex Crichton 0ec7b872fa x64: Optimize store-of-extract-lane-0 (#5924)
* x64: Optimize store-of-extract-lane-0

The `movss` and `movsd` instructions can be used to store the 0th lane
of a `t32x4` or a `t64x2` vector into memory, enabling fusing a `store`
and an `extractlane` instruction.

* Fix merge conflict with `main`
2023-03-10 01:06:38 +00:00

258 lines
4.1 KiB
Plaintext

test compile precise-output
target x86_64
function %f1(i8x16) -> i8 {
block0(v0: i8x16):
v1 = extractlane v0, 1
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pextrb $1, %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pextrb $1, %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f2(i16x8) -> i16 {
block0(v0: i16x8):
v1 = extractlane v0, 1
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pextrw $1, %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pextrw $1, %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f3(i32x4) -> i32 {
block0(v0: i32x4):
v1 = extractlane v0, 1
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pextrd $1, %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pextrd $1, %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f4(i64x2) -> i64 {
block0(v0: i64x2):
v1 = extractlane v0, 1
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pextrq $1, %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pextrq $1, %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f5(f32x4) -> f32 {
block0(v0: f32x4):
v1 = extractlane v0, 1
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pshufd $1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pshufd $1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f6(f64x2) -> f64 {
block0(v0: f64x2):
v1 = extractlane v0, 1
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pshufd $238, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pshufd $0xee, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %extract_i32x4_lane0_to_memory(i32x4, i64) {
block0(v0: i32x4, v1: i64):
v2 = extractlane v0, 0
store v2, v1
return
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movss %xmm0, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movss %xmm0, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %extract_f32x4_lane0_to_memory(f32x4, i64) {
block0(v0: f32x4, v1: i64):
v2 = extractlane v0, 0
store v2, v1
return
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movss %xmm0, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movss %xmm0, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %extract_i64x2_lane0_to_memory(i64x2, i64) {
block0(v0: i64x2, v1: i64):
v2 = extractlane v0, 0
store v2, v1
return
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsd %xmm0, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsd %xmm0, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %extract_f64x2_lane0_to_memory(f64x2, i64) {
block0(v0: f64x2, v1: i64):
v2 = extractlane v0, 0
store v2, v1
return
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsd %xmm0, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsd %xmm0, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq