x64: Add AVX support for some more float-related instructions (#6092)
* x64: Add AVX encodings of `vcvt{ss2sd,sd2ss}`
Additionally update the instruction helpers to take an `XmmMem` argument
to allow load sinking into the instruction.
* x64: Add AVX encoding of `sqrts{s,d}`
* x64: Add AVX support for `rounds{s,d}`
This commit is contained in:
104
cranelift/filetests/filetests/isa/x64/ceil-avx.clif
Normal file
104
cranelift/filetests/filetests/isa/x64/ceil-avx.clif
Normal file
@@ -0,0 +1,104 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %f1(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundss $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundss $2, %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f2(f64) -> f64 {
|
||||
block0(v0: f64):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundsd $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundsd $2, %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f4(f32x4) -> f32x4 {
|
||||
block0(v0: f32x4):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundps $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundps $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f4(f64x2) -> f64x2 {
|
||||
block0(v0: f64x2):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundpd $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundpd $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
130
cranelift/filetests/filetests/isa/x64/fpromote-demote-avx.clif
Normal file
130
cranelift/filetests/filetests/isa/x64/fpromote-demote-avx.clif
Normal file
@@ -0,0 +1,130 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %fpromote(f32) -> f64 {
|
||||
block0(v0: f32):
|
||||
v1 = fpromote.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtss2sd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fpromote_load(i64, f32) -> f64 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f32):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f32 v2, v3
|
||||
v4 = load.f32 v3
|
||||
v5 = fpromote.f64 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; vmovss %xmm0, 0(%rdx)
|
||||
; vcvtss2sd 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; vmovss %xmm0, (%rdx) ; trap: heap_oob
|
||||
; vcvtss2sd (%rdx), %xmm0, %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote(f64) -> f32 {
|
||||
block0(v0: f64):
|
||||
v1 = fdemote.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtsd2ss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtsd2ss %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote_load(i64, f64) -> f32 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f64):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f64 v2, v3
|
||||
v4 = load.f64 v3
|
||||
v5 = fdemote.f32 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; vmovsd %xmm0, 0(%rdx)
|
||||
; vcvtsd2ss 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; vmovsd %xmm0, (%rdx) ; trap: heap_oob
|
||||
; vcvtsd2ss (%rdx), %xmm0, %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
130
cranelift/filetests/filetests/isa/x64/fpromote-demote.clif
Normal file
130
cranelift/filetests/filetests/isa/x64/fpromote-demote.clif
Normal file
@@ -0,0 +1,130 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
function %fpromote(f32) -> f64 {
|
||||
block0(v0: f32):
|
||||
v1 = fpromote.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cvtss2sd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; cvtss2sd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fpromote_load(i64, f32) -> f64 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f32):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f32 v2, v3
|
||||
v4 = load.f32 v3
|
||||
v5 = fpromote.f64 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; movss %xmm0, 0(%rdx)
|
||||
; cvtss2sd 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; movss %xmm0, (%rdx) ; trap: heap_oob
|
||||
; cvtss2sd (%rdx), %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote(f64) -> f32 {
|
||||
block0(v0: f64):
|
||||
v1 = fdemote.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cvtsd2ss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; cvtsd2ss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote_load(i64, f64) -> f32 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f64):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f64 v2, v3
|
||||
v4 = load.f64 v3
|
||||
v5 = fdemote.f32 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; movsd %xmm0, 0(%rdx)
|
||||
; cvtsd2ss 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; movsd %xmm0, (%rdx) ; trap: heap_oob
|
||||
; cvtsd2ss (%rdx), %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
54
cranelift/filetests/filetests/isa/x64/fsqrt-avx.clif
Normal file
54
cranelift/filetests/filetests/isa/x64/fsqrt-avx.clif
Normal file
@@ -0,0 +1,54 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %sqrt_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vsqrtss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vsqrtss %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sqrt_f64(f64) -> f64 {
|
||||
block0(v0: f64):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vsqrtsd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vsqrtsd %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
54
cranelift/filetests/filetests/isa/x64/fsqrt.clif
Normal file
54
cranelift/filetests/filetests/isa/x64/fsqrt.clif
Normal file
@@ -0,0 +1,54 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
function %sqrt_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; sqrtss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; sqrtss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sqrt_f64(f64) -> f64 {
|
||||
block0(v0: f64):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; sqrtsd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; sqrtsd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
Reference in New Issue
Block a user