x64: Add AVX support for some more float-related instructions (#6092)
* x64: Add AVX encodings of `vcvt{ss2sd,sd2ss}`
Additionally update the instruction helpers to take an `XmmMem` argument
to allow load sinking into the instruction.
* x64: Add AVX encoding of `sqrts{s,d}`
* x64: Add AVX support for `rounds{s,d}`
This commit is contained in:
@@ -1299,6 +1299,12 @@
|
||||
Vpmovmskb
|
||||
Vcvtsi2ss
|
||||
Vcvtsi2sd
|
||||
Vcvtss2sd
|
||||
Vcvtsd2ss
|
||||
Vsqrtss
|
||||
Vsqrtsd
|
||||
Vroundss
|
||||
Vroundsd
|
||||
))
|
||||
|
||||
(type Avx512Opcode extern
|
||||
@@ -3348,11 +3354,17 @@
|
||||
(decl x64_roundss (XmmMem RoundImm) Xmm)
|
||||
(rule (x64_roundss src1 round)
|
||||
(xmm_unary_rm_r_imm (SseOpcode.Roundss) src1 (encode_round_imm round)))
|
||||
(rule 1 (x64_roundss src1 round)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundss) src1 (encode_round_imm round)))
|
||||
|
||||
;; Helper for creating `roundsd` instructions.
|
||||
(decl x64_roundsd (XmmMem RoundImm) Xmm)
|
||||
(rule (x64_roundsd src1 round)
|
||||
(xmm_unary_rm_r_imm (SseOpcode.Roundsd) src1 (encode_round_imm round)))
|
||||
(rule 1 (x64_roundsd src1 round)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundsd) src1 (encode_round_imm round)))
|
||||
|
||||
;; Helper for creating `roundps` instructions.
|
||||
(decl x64_roundps (XmmMem RoundImm) Xmm)
|
||||
@@ -3985,10 +3997,16 @@
|
||||
;; Helper for creating `sqrtss` instructions.
|
||||
(decl x64_sqrtss (XmmMem) Xmm)
|
||||
(rule (x64_sqrtss x) (xmm_unary_rm_r_unaligned (SseOpcode.Sqrtss) x))
|
||||
(rule 1 (x64_sqrtss x)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtss) x))
|
||||
|
||||
;; Helper for creating `sqrtsd` instructions.
|
||||
(decl x64_sqrtsd (XmmMem) Xmm)
|
||||
(rule (x64_sqrtsd x) (xmm_unary_rm_r_unaligned (SseOpcode.Sqrtsd) x))
|
||||
(rule 1 (x64_sqrtsd x)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtsd) x))
|
||||
|
||||
;; Helper for creating `sqrtps` instructions.
|
||||
(decl x64_sqrtps (XmmMem) Xmm)
|
||||
@@ -4005,12 +4023,18 @@
|
||||
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtpd) x))
|
||||
|
||||
;; Helper for creating `cvtss2sd` instructions.
|
||||
(decl x64_cvtss2sd (Xmm) Xmm)
|
||||
(rule (x64_cvtss2sd x) (xmm_unary_rm_r (SseOpcode.Cvtss2sd) x))
|
||||
(decl x64_cvtss2sd (XmmMem) Xmm)
|
||||
(rule (x64_cvtss2sd x) (xmm_unary_rm_r_unaligned (SseOpcode.Cvtss2sd) x))
|
||||
(rule 1 (x64_cvtss2sd x)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtss2sd) x))
|
||||
|
||||
;; Helper for creating `cvtsd2ss` instructions.
|
||||
(decl x64_cvtsd2ss (Xmm) Xmm)
|
||||
(rule (x64_cvtsd2ss x) (xmm_unary_rm_r (SseOpcode.Cvtsd2ss) x))
|
||||
(decl x64_cvtsd2ss (XmmMem) Xmm)
|
||||
(rule (x64_cvtsd2ss x) (xmm_unary_rm_r_unaligned (SseOpcode.Cvtsd2ss) x))
|
||||
(rule 1 (x64_cvtsd2ss x)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtsd2ss) x))
|
||||
|
||||
;; Helper for creating `cvtdq2ps` instructions.
|
||||
(decl x64_cvtdq2ps (XmmMem) Xmm)
|
||||
|
||||
@@ -1722,7 +1722,13 @@ impl AvxOpcode {
|
||||
| AvxOpcode::Vmovmskpd
|
||||
| AvxOpcode::Vpmovmskb
|
||||
| AvxOpcode::Vcvtsi2ss
|
||||
| AvxOpcode::Vcvtsi2sd => {
|
||||
| AvxOpcode::Vcvtsi2sd
|
||||
| AvxOpcode::Vcvtss2sd
|
||||
| AvxOpcode::Vcvtsd2ss
|
||||
| AvxOpcode::Vsqrtss
|
||||
| AvxOpcode::Vsqrtsd
|
||||
| AvxOpcode::Vroundss
|
||||
| AvxOpcode::Vroundsd => {
|
||||
smallvec![InstructionSet::AVX]
|
||||
}
|
||||
|
||||
|
||||
@@ -2405,17 +2405,36 @@ pub(crate) fn emit(
|
||||
AvxOpcode::Vbroadcastss => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x18),
|
||||
AvxOpcode::Vmovddup => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x12),
|
||||
|
||||
AvxOpcode::Vcvtss2sd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5A),
|
||||
AvxOpcode::Vcvtsd2ss => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x5A),
|
||||
AvxOpcode::Vsqrtss => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x51),
|
||||
AvxOpcode::Vsqrtsd => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x51),
|
||||
|
||||
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||
};
|
||||
|
||||
VexInstruction::new()
|
||||
let vex = VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(prefix)
|
||||
.map(map)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src)
|
||||
.encode(sink);
|
||||
.rm(src);
|
||||
|
||||
// These opcodes take a second operand through `vvvv` which copies
|
||||
// the upper bits into the destination register. That's not
|
||||
// reflected in the CLIF instruction, however, since the SSE version
|
||||
// doesn't have this functionality. Instead just copy whatever
|
||||
// happens to already be in the destination, which at least is what
|
||||
// LLVM seems to do.
|
||||
let vex = match op {
|
||||
AvxOpcode::Vcvtss2sd
|
||||
| AvxOpcode::Vcvtsd2ss
|
||||
| AvxOpcode::Vsqrtss
|
||||
| AvxOpcode::Vsqrtsd => vex.vvvv(dst.to_real_reg().unwrap().hw_enc()),
|
||||
_ => vex,
|
||||
};
|
||||
vex.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => {
|
||||
@@ -2433,18 +2452,29 @@ pub(crate) fn emit(
|
||||
AvxOpcode::Vpshuflw => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x70),
|
||||
AvxOpcode::Vpshufhw => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x70),
|
||||
AvxOpcode::Vpshufd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x70),
|
||||
AvxOpcode::Vroundss => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x0A),
|
||||
AvxOpcode::Vroundsd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x0B),
|
||||
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||
};
|
||||
|
||||
VexInstruction::new()
|
||||
let vex = VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(prefix)
|
||||
.map(map)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src)
|
||||
.imm(*imm)
|
||||
.encode(sink);
|
||||
.imm(*imm);
|
||||
|
||||
// See comments in similar block above in `XmmUnaryRmRVex` for what
|
||||
// this is doing.
|
||||
let vex = match op {
|
||||
AvxOpcode::Vroundss | AvxOpcode::Vroundsd => {
|
||||
vex.vvvv(dst.to_real_reg().unwrap().hw_enc())
|
||||
}
|
||||
_ => vex,
|
||||
};
|
||||
vex.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmMovRMVex { op, src, dst } => {
|
||||
|
||||
104
cranelift/filetests/filetests/isa/x64/ceil-avx.clif
Normal file
104
cranelift/filetests/filetests/isa/x64/ceil-avx.clif
Normal file
@@ -0,0 +1,104 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %f1(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundss $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundss $2, %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f2(f64) -> f64 {
|
||||
block0(v0: f64):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundsd $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundsd $2, %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f4(f32x4) -> f32x4 {
|
||||
block0(v0: f32x4):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundps $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundps $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f4(f64x2) -> f64x2 {
|
||||
block0(v0: f64x2):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vroundpd $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vroundpd $2, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
130
cranelift/filetests/filetests/isa/x64/fpromote-demote-avx.clif
Normal file
130
cranelift/filetests/filetests/isa/x64/fpromote-demote-avx.clif
Normal file
@@ -0,0 +1,130 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %fpromote(f32) -> f64 {
|
||||
block0(v0: f32):
|
||||
v1 = fpromote.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtss2sd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fpromote_load(i64, f32) -> f64 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f32):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f32 v2, v3
|
||||
v4 = load.f32 v3
|
||||
v5 = fpromote.f64 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; vmovss %xmm0, 0(%rdx)
|
||||
; vcvtss2sd 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; vmovss %xmm0, (%rdx) ; trap: heap_oob
|
||||
; vcvtss2sd (%rdx), %xmm0, %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote(f64) -> f32 {
|
||||
block0(v0: f64):
|
||||
v1 = fdemote.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtsd2ss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtsd2ss %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote_load(i64, f64) -> f32 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f64):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f64 v2, v3
|
||||
v4 = load.f64 v3
|
||||
v5 = fdemote.f32 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; vmovsd %xmm0, 0(%rdx)
|
||||
; vcvtsd2ss 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; vmovsd %xmm0, (%rdx) ; trap: heap_oob
|
||||
; vcvtsd2ss (%rdx), %xmm0, %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
130
cranelift/filetests/filetests/isa/x64/fpromote-demote.clif
Normal file
130
cranelift/filetests/filetests/isa/x64/fpromote-demote.clif
Normal file
@@ -0,0 +1,130 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
function %fpromote(f32) -> f64 {
|
||||
block0(v0: f32):
|
||||
v1 = fpromote.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cvtss2sd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; cvtss2sd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fpromote_load(i64, f32) -> f64 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f32):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f32 v2, v3
|
||||
v4 = load.f32 v3
|
||||
v5 = fpromote.f64 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; movss %xmm0, 0(%rdx)
|
||||
; cvtss2sd 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; movss %xmm0, (%rdx) ; trap: heap_oob
|
||||
; cvtss2sd (%rdx), %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote(f64) -> f32 {
|
||||
block0(v0: f64):
|
||||
v1 = fdemote.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cvtsd2ss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; cvtsd2ss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %fdemote_load(i64, f64) -> f32 {
|
||||
ss0 = explicit_slot 16
|
||||
|
||||
block0(v1: i64, v2: f64):
|
||||
v3 = stack_addr.i64 ss0
|
||||
store.f64 v2, v3
|
||||
v4 = load.f64 v3
|
||||
v5 = fdemote.f32 v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq %rsp, $16, %rsp
|
||||
; block0:
|
||||
; lea rsp(0 + virtual offset), %rdx
|
||||
; movsd %xmm0, 0(%rdx)
|
||||
; cvtsd2ss 0(%rdx), %xmm0
|
||||
; addq %rsp, $16, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x10, %rsp
|
||||
; block1: ; offset 0x8
|
||||
; leaq (%rsp), %rdx
|
||||
; movsd %xmm0, (%rdx) ; trap: heap_oob
|
||||
; cvtsd2ss (%rdx), %xmm0 ; trap: heap_oob
|
||||
; addq $0x10, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
54
cranelift/filetests/filetests/isa/x64/fsqrt-avx.clif
Normal file
54
cranelift/filetests/filetests/isa/x64/fsqrt-avx.clif
Normal file
@@ -0,0 +1,54 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %sqrt_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vsqrtss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vsqrtss %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sqrt_f64(f64) -> f64 {
|
||||
block0(v0: f64):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vsqrtsd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vsqrtsd %xmm0, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
54
cranelift/filetests/filetests/isa/x64/fsqrt.clif
Normal file
54
cranelift/filetests/filetests/isa/x64/fsqrt.clif
Normal file
@@ -0,0 +1,54 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
function %sqrt_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; sqrtss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; sqrtss %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sqrt_f64(f64) -> f64 {
|
||||
block0(v0: f64):
|
||||
v1 = sqrt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; sqrtsd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; sqrtsd %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
Reference in New Issue
Block a user