These Intel-specific instructions represent the semantics of the minss / maxss Intel instructions which behave more like a C ternary operator than the WebAssembly fmin and fmax instructions. They will be used as building blocks for implementing the WebAssembly semantics.
447 lines
20 KiB
Plaintext
447 lines
20 KiB
Plaintext
; Binary emission of 64-bit floating point code.
|
|
test binemit
|
|
set is_64bit
|
|
set is_compressed
|
|
isa intel has_sse2
|
|
|
|
; The binary encodings can be verified with the command:
|
|
;
|
|
; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64-float.cton | llvm-mc -show-encoding -triple=x86_64
|
|
;
|
|
|
|
function %F32() {
|
|
ss0 = incoming_arg 8, offset 0
|
|
ss1 = incoming_arg 1024, offset -1024
|
|
ss2 = incoming_arg 1024, offset -2048
|
|
ss3 = incoming_arg 8, offset -2056
|
|
|
|
ebb0:
|
|
[-,%r11] v0 = iconst.i32 1
|
|
[-,%rsi] v1 = iconst.i32 2
|
|
[-,%rax] v2 = iconst.i64 11
|
|
[-,%r14] v3 = iconst.i64 12
|
|
|
|
; asm: cvtsi2ssl %r11d, %xmm5
|
|
[-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 41 0f 2a eb
|
|
; asm: cvtsi2ssl %esi, %xmm10
|
|
[-,%xmm10] v11 = fcvt_from_sint.f32 v1 ; bin: f3 44 0f 2a d6
|
|
|
|
; asm: cvtsi2ssq %rax, %xmm5
|
|
[-,%xmm5] v12 = fcvt_from_sint.f32 v2 ; bin: f3 48 0f 2a e8
|
|
; asm: cvtsi2ssq %r14, %xmm10
|
|
[-,%xmm10] v13 = fcvt_from_sint.f32 v3 ; bin: f3 4d 0f 2a d6
|
|
|
|
; asm: cvtss2sd %xmm10, %xmm5
|
|
[-,%xmm5] v14 = fpromote.f64 v11 ; bin: f3 41 0f 5a ea
|
|
; asm: cvtss2sd %xmm5, %xmm10
|
|
[-,%xmm10] v15 = fpromote.f64 v10 ; bin: f3 44 0f 5a d5
|
|
|
|
; asm: movd %r11d, %xmm5
|
|
[-,%xmm5] v16 = bitcast.f32 v0 ; bin: 66 41 0f 6e eb
|
|
; asm: movd %esi, %xmm10
|
|
[-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6
|
|
|
|
; asm: movd %xmm5, %ecx
|
|
[-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9
|
|
; asm: movd %xmm10, %esi
|
|
[-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6
|
|
|
|
; Binary arithmetic.
|
|
|
|
; asm: addss %xmm10, %xmm5
|
|
[-,%xmm5] v20 = fadd v10, v11 ; bin: f3 41 0f 58 ea
|
|
; asm: addss %xmm5, %xmm10
|
|
[-,%xmm10] v21 = fadd v11, v10 ; bin: f3 44 0f 58 d5
|
|
|
|
; asm: subss %xmm10, %xmm5
|
|
[-,%xmm5] v22 = fsub v10, v11 ; bin: f3 41 0f 5c ea
|
|
; asm: subss %xmm5, %xmm10
|
|
[-,%xmm10] v23 = fsub v11, v10 ; bin: f3 44 0f 5c d5
|
|
|
|
; asm: mulss %xmm10, %xmm5
|
|
[-,%xmm5] v24 = fmul v10, v11 ; bin: f3 41 0f 59 ea
|
|
; asm: mulss %xmm5, %xmm10
|
|
[-,%xmm10] v25 = fmul v11, v10 ; bin: f3 44 0f 59 d5
|
|
|
|
; asm: divss %xmm10, %xmm5
|
|
[-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 41 0f 5e ea
|
|
; asm: divss %xmm5, %xmm10
|
|
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5
|
|
|
|
; Bitwise ops.
|
|
; We use the *ps SSE instructions for everything because they are smaller.
|
|
|
|
; asm: andps %xmm10, %xmm5
|
|
[-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea
|
|
; asm: andps %xmm5, %xmm10
|
|
[-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5
|
|
|
|
; asm: andnps %xmm10, %xmm5
|
|
[-,%xmm5] v32 = band_not v10, v11 ; bin: 41 0f 55 ea
|
|
; asm: andnps %xmm5, %xmm10
|
|
[-,%xmm10] v33 = band_not v11, v10 ; bin: 44 0f 55 d5
|
|
|
|
; asm: orps %xmm10, %xmm5
|
|
[-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea
|
|
; asm: orps %xmm5, %xmm10
|
|
[-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5
|
|
|
|
; asm: xorps %xmm10, %xmm5
|
|
[-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea
|
|
; asm: xorps %xmm5, %xmm10
|
|
[-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5
|
|
|
|
; asm: movaps %xmm10, %xmm5
|
|
[-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea
|
|
; asm: movaps %xmm5, %xmm10
|
|
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
|
|
|
; Convert float to int.
|
|
|
|
; asm: cvttss2si %xmm5, %ecx
|
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd
|
|
; asm: cvttss2si %xmm10, %esi
|
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2
|
|
|
|
; asm: cvttss2si %xmm5, %rcx
|
|
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd
|
|
; asm: cvttss2si %xmm10, %rsi
|
|
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2
|
|
|
|
; Min/max.
|
|
|
|
; asm: minss %xmm10, %xmm5
|
|
[-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f3 41 0f 5d ea
|
|
; asm: minss %xmm5, %xmm10
|
|
[-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f3 44 0f 5d d5
|
|
; asm: maxss %xmm10, %xmm5
|
|
[-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f3 41 0f 5f ea
|
|
; asm: maxss %xmm5, %xmm10
|
|
[-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f3 44 0f 5f d5
|
|
|
|
; Unary arithmetic.
|
|
|
|
; asm: sqrtss %xmm5, %xmm10
|
|
[-,%xmm10] v50 = sqrt v10 ; bin: f3 44 0f 51 d5
|
|
; asm: sqrtss %xmm10, %xmm5
|
|
[-,%xmm5] v51 = sqrt v11 ; bin: f3 41 0f 51 ea
|
|
|
|
; asm: roundss $0, %xmm5, %xmm10
|
|
[-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0a d5 00
|
|
; asm: roundss $0, %xmm10, %xmm5
|
|
[-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0a ea 00
|
|
; asm: roundss $0, %xmm5, %xmm2
|
|
[-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00
|
|
|
|
; asm: roundss $1, %xmm5, %xmm10
|
|
[-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0a d5 01
|
|
; asm: roundss $1, %xmm10, %xmm5
|
|
[-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0a ea 01
|
|
; asm: roundss $1, %xmm5, %xmm2
|
|
[-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01
|
|
|
|
; asm: roundss $2, %xmm5, %xmm10
|
|
[-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0a d5 02
|
|
; asm: roundss $2, %xmm10, %xmm5
|
|
[-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0a ea 02
|
|
; asm: roundss $2, %xmm5, %xmm2
|
|
[-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02
|
|
|
|
; asm: roundss $3, %xmm5, %xmm10
|
|
[-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0a d5 03
|
|
; asm: roundss $3, %xmm10, %xmm5
|
|
[-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0a ea 03
|
|
; asm: roundss $3, %xmm5, %xmm2
|
|
[-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03
|
|
|
|
; Load/Store
|
|
|
|
; asm: movd (%r14), %xmm5
|
|
[-,%xmm5] v100 = load.f32 v3 ; bin: 66 41 0f 6e 2e
|
|
; asm: movd (%rax), %xmm10
|
|
[-,%xmm10] v101 = load.f32 v2 ; bin: 66 44 0f 6e 10
|
|
; asm: movd 50(%r14), %xmm5
|
|
[-,%xmm5] v110 = load.f32 v3+50 ; bin: 66 41 0f 6e 6e 32
|
|
; asm: movd -50(%rax), %xmm10
|
|
[-,%xmm10] v111 = load.f32 v2-50 ; bin: 66 44 0f 6e 50 ce
|
|
; asm: movd 10000(%r14), %xmm5
|
|
[-,%xmm5] v120 = load.f32 v3+10000 ; bin: 66 41 0f 6e ae 00002710
|
|
; asm: movd -10000(%rax), %xmm10
|
|
[-,%xmm10] v121 = load.f32 v2-10000 ; bin: 66 44 0f 6e 90 ffffd8f0
|
|
|
|
; asm: movd %xmm5, (%r14)
|
|
[-] store.f32 v100, v3 ; bin: 66 41 0f 7e 2e
|
|
; asm: movd %xmm10, (%rax)
|
|
[-] store.f32 v101, v2 ; bin: 66 44 0f 7e 10
|
|
; asm: movd %xmm5, 50(%r14)
|
|
[-] store.f32 v100, v3+50 ; bin: 66 41 0f 7e 6e 32
|
|
; asm: movd %xmm10, -50(%rax)
|
|
[-] store.f32 v101, v2-50 ; bin: 66 44 0f 7e 50 ce
|
|
; asm: movd %xmm5, 10000(%r14)
|
|
[-] store.f32 v100, v3+10000 ; bin: 66 41 0f 7e ae 00002710
|
|
; asm: movd %xmm10, -10000(%rax)
|
|
[-] store.f32 v101, v2-10000 ; bin: 66 44 0f 7e 90 ffffd8f0
|
|
|
|
; Spill / Fill.
|
|
|
|
; asm: movd %xmm5, 1032(%rsp)
|
|
[-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408
|
|
; asm: movd %xmm10, 1032(%rsp)
|
|
[-,ss1] v201 = spill v101 ; bin: 66 44 0f 7e 94 24 00000408
|
|
|
|
; asm: movd 1032(%rsp), %xmm5
|
|
[-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408
|
|
; asm: movd 1032(%rsp), %xmm10
|
|
[-,%xmm10] v211 = fill v201 ; bin: 66 44 0f 6e 94 24 00000408
|
|
|
|
; Comparisons.
|
|
;
|
|
; Only `supported_floatccs` are tested here. Others are handled by
|
|
; legalization paterns.
|
|
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: setnp %bl
|
|
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: setp %bl
|
|
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: setne %dl
|
|
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: sete %dl
|
|
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: seta %bl
|
|
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: setae %bl
|
|
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: setb %dl
|
|
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: setbe %dl
|
|
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2
|
|
|
|
return
|
|
}
|
|
|
|
function %F64() {
|
|
ss0 = incoming_arg 8, offset 0
|
|
ss1 = incoming_arg 1024, offset -1024
|
|
ss2 = incoming_arg 1024, offset -2048
|
|
ss3 = incoming_arg 8, offset -2056
|
|
|
|
ebb0:
|
|
[-,%r11] v0 = iconst.i32 1
|
|
[-,%rsi] v1 = iconst.i32 2
|
|
[-,%rax] v2 = iconst.i64 11
|
|
[-,%r14] v3 = iconst.i64 12
|
|
|
|
; asm: cvtsi2sdl %r11d, %xmm5
|
|
[-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 41 0f 2a eb
|
|
; asm: cvtsi2sdl %esi, %xmm10
|
|
[-,%xmm10] v11 = fcvt_from_sint.f64 v1 ; bin: f2 44 0f 2a d6
|
|
|
|
; asm: cvtsi2sdq %rax, %xmm5
|
|
[-,%xmm5] v12 = fcvt_from_sint.f64 v2 ; bin: f2 48 0f 2a e8
|
|
; asm: cvtsi2sdq %r14, %xmm10
|
|
[-,%xmm10] v13 = fcvt_from_sint.f64 v3 ; bin: f2 4d 0f 2a d6
|
|
|
|
; asm: cvtsd2ss %xmm10, %xmm5
|
|
[-,%xmm5] v14 = fdemote.f32 v11 ; bin: f2 41 0f 5a ea
|
|
; asm: cvtsd2ss %xmm5, %xmm10
|
|
[-,%xmm10] v15 = fdemote.f32 v10 ; bin: f2 44 0f 5a d5
|
|
|
|
; asm: movq %rax, %xmm5
|
|
[-,%xmm5] v16 = bitcast.f64 v2 ; bin: 66 48 0f 6e e8
|
|
; asm: movq %r14, %xmm10
|
|
[-,%xmm10] v17 = bitcast.f64 v3 ; bin: 66 4d 0f 6e d6
|
|
|
|
; asm: movq %xmm5, %rcx
|
|
[-,%rcx] v18 = bitcast.i64 v10 ; bin: 66 48 0f 7e e9
|
|
; asm: movq %xmm10, %rsi
|
|
[-,%rsi] v19 = bitcast.i64 v11 ; bin: 66 4c 0f 7e d6
|
|
|
|
; Binary arithmetic.
|
|
|
|
; asm: addsd %xmm10, %xmm5
|
|
[-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea
|
|
; asm: addsd %xmm5, %xmm10
|
|
[-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5
|
|
|
|
; asm: subsd %xmm10, %xmm5
|
|
[-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea
|
|
; asm: subsd %xmm5, %xmm10
|
|
[-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5
|
|
|
|
; asm: mulsd %xmm10, %xmm5
|
|
[-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea
|
|
; asm: mulsd %xmm5, %xmm10
|
|
[-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5
|
|
|
|
; asm: divsd %xmm10, %xmm5
|
|
[-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea
|
|
; asm: divsd %xmm5, %xmm10
|
|
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5
|
|
|
|
; Bitwise ops.
|
|
; We use the *ps SSE instructions for everything because they are smaller.
|
|
|
|
; asm: andps %xmm10, %xmm5
|
|
[-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea
|
|
; asm: andps %xmm5, %xmm10
|
|
[-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5
|
|
|
|
; asm: andnps %xmm10, %xmm5
|
|
[-,%xmm5] v32 = band_not v10, v11 ; bin: 41 0f 55 ea
|
|
; asm: andnps %xmm5, %xmm10
|
|
[-,%xmm10] v33 = band_not v11, v10 ; bin: 44 0f 55 d5
|
|
|
|
; asm: orps %xmm10, %xmm5
|
|
[-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea
|
|
; asm: orps %xmm5, %xmm10
|
|
[-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5
|
|
|
|
; asm: xorps %xmm10, %xmm5
|
|
[-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea
|
|
; asm: xorps %xmm5, %xmm10
|
|
[-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5
|
|
|
|
; asm: movaps %xmm10, %xmm5
|
|
[-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea
|
|
; asm: movaps %xmm5, %xmm10
|
|
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
|
|
|
; Convert float to int.
|
|
|
|
; asm: cvttsd2si %xmm5, %ecx
|
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd
|
|
; asm: cvttsd2si %xmm10, %esi
|
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2
|
|
|
|
; asm: cvttsd2si %xmm5, %rcx
|
|
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd
|
|
; asm: cvttsd2si %xmm10, %rsi
|
|
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2
|
|
|
|
; Min/max.
|
|
|
|
; asm: minsd %xmm10, %xmm5
|
|
[-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f2 41 0f 5d ea
|
|
; asm: minsd %xmm5, %xmm10
|
|
[-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f2 44 0f 5d d5
|
|
; asm: maxsd %xmm10, %xmm5
|
|
[-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f2 41 0f 5f ea
|
|
; asm: maxsd %xmm5, %xmm10
|
|
[-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f2 44 0f 5f d5
|
|
|
|
; Unary arithmetic.
|
|
|
|
; asm: sqrtsd %xmm5, %xmm10
|
|
[-,%xmm10] v50 = sqrt v10 ; bin: f2 44 0f 51 d5
|
|
; asm: sqrtsd %xmm10, %xmm5
|
|
[-,%xmm5] v51 = sqrt v11 ; bin: f2 41 0f 51 ea
|
|
|
|
; asm: roundsd $0, %xmm5, %xmm10
|
|
[-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0b d5 00
|
|
; asm: roundsd $0, %xmm10, %xmm5
|
|
[-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0b ea 00
|
|
; asm: roundsd $0, %xmm5, %xmm2
|
|
[-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00
|
|
|
|
; asm: roundsd $1, %xmm5, %xmm10
|
|
[-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0b d5 01
|
|
; asm: roundsd $1, %xmm10, %xmm5
|
|
[-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0b ea 01
|
|
; asm: roundsd $1, %xmm5, %xmm2
|
|
[-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01
|
|
|
|
; asm: roundsd $2, %xmm5, %xmm10
|
|
[-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0b d5 02
|
|
; asm: roundsd $2, %xmm10, %xmm5
|
|
[-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0b ea 02
|
|
; asm: roundsd $2, %xmm5, %xmm2
|
|
[-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02
|
|
|
|
; asm: roundsd $3, %xmm5, %xmm10
|
|
[-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0b d5 03
|
|
; asm: roundsd $3, %xmm10, %xmm5
|
|
[-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0b ea 03
|
|
; asm: roundsd $3, %xmm5, %xmm2
|
|
[-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03
|
|
|
|
; Load/Store
|
|
|
|
; asm: movq (%r14), %xmm5
|
|
[-,%xmm5] v100 = load.f64 v3 ; bin: f3 41 0f 7e 2e
|
|
; asm: movq (%rax), %xmm10
|
|
[-,%xmm10] v101 = load.f64 v2 ; bin: f3 44 0f 7e 10
|
|
; asm: movq 50(%r14), %xmm5
|
|
[-,%xmm5] v110 = load.f64 v3+50 ; bin: f3 41 0f 7e 6e 32
|
|
; asm: movq -50(%rax), %xmm10
|
|
[-,%xmm10] v111 = load.f64 v2-50 ; bin: f3 44 0f 7e 50 ce
|
|
; asm: movq 10000(%r14), %xmm5
|
|
[-,%xmm5] v120 = load.f64 v3+10000 ; bin: f3 41 0f 7e ae 00002710
|
|
; asm: movq -10000(%rax), %xmm10
|
|
[-,%xmm10] v121 = load.f64 v2-10000 ; bin: f3 44 0f 7e 90 ffffd8f0
|
|
|
|
; asm: movq %xmm5, (%r14)
|
|
[-] store.f64 v100, v3 ; bin: 66 41 0f d6 2e
|
|
; asm: movq %xmm10, (%rax)
|
|
[-] store.f64 v101, v2 ; bin: 66 44 0f d6 10
|
|
; asm: movq %xmm5, 50(%r14)
|
|
[-] store.f64 v100, v3+50 ; bin: 66 41 0f d6 6e 32
|
|
; asm: movq %xmm10, -50(%rax)
|
|
[-] store.f64 v101, v2-50 ; bin: 66 44 0f d6 50 ce
|
|
; asm: movq %xmm5, 10000(%r14)
|
|
[-] store.f64 v100, v3+10000 ; bin: 66 41 0f d6 ae 00002710
|
|
; asm: movq %xmm10, -10000(%rax)
|
|
[-] store.f64 v101, v2-10000 ; bin: 66 44 0f d6 90 ffffd8f0
|
|
|
|
; Spill / Fill.
|
|
|
|
; asm: movq %xmm5, 1032(%rsp)
|
|
[-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408
|
|
; asm: movq %xmm10, 1032(%rsp)
|
|
[-,ss1] v201 = spill v101 ; bin: 66 44 0f d6 94 24 00000408
|
|
|
|
; asm: movq 1032(%rsp), %xmm5
|
|
[-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408
|
|
; asm: movq 1032(%rsp), %xmm10
|
|
[-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 7e 94 24 00000408
|
|
|
|
; Comparisons.
|
|
;
|
|
; Only `supported_floatccs` are tested here. Others are handled by
|
|
; legalization paterns.
|
|
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: setnp %bl
|
|
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: setp %bl
|
|
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: setne %dl
|
|
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: sete %dl
|
|
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: seta %bl
|
|
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: setae %bl
|
|
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: setb %dl
|
|
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: setbe %dl
|
|
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2
|
|
|
|
return
|
|
}
|