* First draft of TrapSink implementation. * Add trap sink calls to 'trapif' and 'trapff' recipes. * Add SourceLoc to trap sink calls, and add trap sink calls to all loads and stores. * Add IntegerDivisionByZero trap to div recipe. * Only emit load/store traps if 'notrap' flag is not set on the instruction. * Update filetest machinery to add new trap sink functionality. * Update filetests to include traps in output. * Add a few more trap outputs to filetests. * Add trap output to CLI tool.
543 lines
24 KiB
Plaintext
543 lines
24 KiB
Plaintext
; Binary emission of 64-bit floating point code.
|
|
test binemit
|
|
set is_64bit
|
|
set is_compressed
|
|
isa intel haswell
|
|
|
|
; The binary encodings can be verified with the command:
|
|
;
|
|
; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64-float.cton | llvm-mc -show-encoding -triple=x86_64
|
|
;
|
|
|
|
function %F32() {
|
|
ss0 = incoming_arg 8, offset 0
|
|
ss1 = incoming_arg 1024, offset -1024
|
|
ss2 = incoming_arg 1024, offset -2048
|
|
ss3 = incoming_arg 8, offset -2056
|
|
|
|
ebb0:
|
|
[-,%r11] v0 = iconst.i32 1
|
|
[-,%rsi] v1 = iconst.i32 2
|
|
[-,%rax] v2 = iconst.i64 11
|
|
[-,%r14] v3 = iconst.i64 12
|
|
[-,%r13] v4 = iconst.i64 13
|
|
|
|
; asm: cvtsi2ssl %r11d, %xmm5
|
|
[-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 41 0f 2a eb
|
|
; asm: cvtsi2ssl %esi, %xmm10
|
|
[-,%xmm10] v11 = fcvt_from_sint.f32 v1 ; bin: f3 44 0f 2a d6
|
|
|
|
; asm: cvtsi2ssq %rax, %xmm5
|
|
[-,%xmm5] v12 = fcvt_from_sint.f32 v2 ; bin: f3 48 0f 2a e8
|
|
; asm: cvtsi2ssq %r14, %xmm10
|
|
[-,%xmm10] v13 = fcvt_from_sint.f32 v3 ; bin: f3 4d 0f 2a d6
|
|
|
|
; asm: cvtss2sd %xmm10, %xmm5
|
|
[-,%xmm5] v14 = fpromote.f64 v11 ; bin: f3 41 0f 5a ea
|
|
; asm: cvtss2sd %xmm5, %xmm10
|
|
[-,%xmm10] v15 = fpromote.f64 v10 ; bin: f3 44 0f 5a d5
|
|
|
|
; asm: movd %r11d, %xmm5
|
|
[-,%xmm5] v16 = bitcast.f32 v0 ; bin: 66 41 0f 6e eb
|
|
; asm: movd %esi, %xmm10
|
|
[-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6
|
|
|
|
; asm: movd %xmm5, %ecx
|
|
[-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9
|
|
; asm: movd %xmm10, %esi
|
|
[-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6
|
|
|
|
; Binary arithmetic.
|
|
|
|
; asm: addss %xmm10, %xmm5
|
|
[-,%xmm5] v20 = fadd v10, v11 ; bin: f3 41 0f 58 ea
|
|
; asm: addss %xmm5, %xmm10
|
|
[-,%xmm10] v21 = fadd v11, v10 ; bin: f3 44 0f 58 d5
|
|
|
|
; asm: subss %xmm10, %xmm5
|
|
[-,%xmm5] v22 = fsub v10, v11 ; bin: f3 41 0f 5c ea
|
|
; asm: subss %xmm5, %xmm10
|
|
[-,%xmm10] v23 = fsub v11, v10 ; bin: f3 44 0f 5c d5
|
|
|
|
; asm: mulss %xmm10, %xmm5
|
|
[-,%xmm5] v24 = fmul v10, v11 ; bin: f3 41 0f 59 ea
|
|
; asm: mulss %xmm5, %xmm10
|
|
[-,%xmm10] v25 = fmul v11, v10 ; bin: f3 44 0f 59 d5
|
|
|
|
; asm: divss %xmm10, %xmm5
|
|
[-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 41 0f 5e ea
|
|
; asm: divss %xmm5, %xmm10
|
|
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5
|
|
|
|
; Bitwise ops.
|
|
; We use the *ps SSE instructions for everything because they are smaller.
|
|
|
|
; asm: andps %xmm10, %xmm5
|
|
[-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea
|
|
; asm: andps %xmm5, %xmm10
|
|
[-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5
|
|
|
|
; asm: andnps %xmm10, %xmm5
|
|
[-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea
|
|
; asm: andnps %xmm5, %xmm10
|
|
[-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5
|
|
|
|
; asm: orps %xmm10, %xmm5
|
|
[-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea
|
|
; asm: orps %xmm5, %xmm10
|
|
[-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5
|
|
|
|
; asm: xorps %xmm10, %xmm5
|
|
[-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea
|
|
; asm: xorps %xmm5, %xmm10
|
|
[-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5
|
|
|
|
; asm: movaps %xmm10, %xmm5
|
|
[-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea
|
|
; asm: movaps %xmm5, %xmm10
|
|
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
|
|
|
; Convert float to int.
|
|
|
|
; asm: cvttss2si %xmm5, %ecx
|
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd
|
|
; asm: cvttss2si %xmm10, %esi
|
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2
|
|
|
|
; asm: cvttss2si %xmm5, %rcx
|
|
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd
|
|
; asm: cvttss2si %xmm10, %rsi
|
|
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2
|
|
|
|
; Min/max.
|
|
|
|
; asm: minss %xmm10, %xmm5
|
|
[-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f3 41 0f 5d ea
|
|
; asm: minss %xmm5, %xmm10
|
|
[-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f3 44 0f 5d d5
|
|
; asm: maxss %xmm10, %xmm5
|
|
[-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f3 41 0f 5f ea
|
|
; asm: maxss %xmm5, %xmm10
|
|
[-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f3 44 0f 5f d5
|
|
|
|
; Unary arithmetic.
|
|
|
|
; asm: sqrtss %xmm5, %xmm10
|
|
[-,%xmm10] v50 = sqrt v10 ; bin: f3 44 0f 51 d5
|
|
; asm: sqrtss %xmm10, %xmm5
|
|
[-,%xmm5] v51 = sqrt v11 ; bin: f3 41 0f 51 ea
|
|
|
|
; asm: roundss $0, %xmm5, %xmm10
|
|
[-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0a d5 00
|
|
; asm: roundss $0, %xmm10, %xmm5
|
|
[-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0a ea 00
|
|
; asm: roundss $0, %xmm5, %xmm2
|
|
[-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00
|
|
|
|
; asm: roundss $1, %xmm5, %xmm10
|
|
[-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0a d5 01
|
|
; asm: roundss $1, %xmm10, %xmm5
|
|
[-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0a ea 01
|
|
; asm: roundss $1, %xmm5, %xmm2
|
|
[-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01
|
|
|
|
; asm: roundss $2, %xmm5, %xmm10
|
|
[-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0a d5 02
|
|
; asm: roundss $2, %xmm10, %xmm5
|
|
[-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0a ea 02
|
|
; asm: roundss $2, %xmm5, %xmm2
|
|
[-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02
|
|
|
|
; asm: roundss $3, %xmm5, %xmm10
|
|
[-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0a d5 03
|
|
; asm: roundss $3, %xmm10, %xmm5
|
|
[-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0a ea 03
|
|
; asm: roundss $3, %xmm5, %xmm2
|
|
[-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03
|
|
|
|
; Load/Store
|
|
|
|
; asm: movss (%r14), %xmm5
|
|
[-,%xmm5] v100 = load.f32 v3 ; bin: heap_oob f3 41 0f 10 2e
|
|
; asm: movss (%rax), %xmm10
|
|
[-,%xmm10] v101 = load.f32 v2 ; bin: heap_oob f3 44 0f 10 10
|
|
; asm: movss 50(%r14), %xmm5
|
|
[-,%xmm5] v110 = load.f32 v3+50 ; bin: heap_oob f3 41 0f 10 6e 32
|
|
; asm: movss -50(%rax), %xmm10
|
|
[-,%xmm10] v111 = load.f32 v2-50 ; bin: heap_oob f3 44 0f 10 50 ce
|
|
; asm: movss 10000(%r14), %xmm5
|
|
[-,%xmm5] v120 = load.f32 v3+10000 ; bin: heap_oob f3 41 0f 10 ae 00002710
|
|
; asm: movss -10000(%rax), %xmm10
|
|
[-,%xmm10] v121 = load.f32 v2-10000 ; bin: heap_oob f3 44 0f 10 90 ffffd8f0
|
|
|
|
; asm: movss %xmm5, (%r14)
|
|
[-] store.f32 v100, v3 ; bin: heap_oob f3 41 0f 11 2e
|
|
; asm: movss %xmm10, (%rax)
|
|
[-] store.f32 v101, v2 ; bin: heap_oob f3 44 0f 11 10
|
|
; asm: movss %xmm5, (%r13)
|
|
[-] store.f32 v100, v4 ; bin: heap_oob f3 41 0f 11 6d 00
|
|
; asm: movss %xmm10, (%r13)
|
|
[-] store.f32 v101, v4 ; bin: heap_oob f3 45 0f 11 55 00
|
|
; asm: movss %xmm5, 50(%r14)
|
|
[-] store.f32 v100, v3+50 ; bin: heap_oob f3 41 0f 11 6e 32
|
|
; asm: movss %xmm10, -50(%rax)
|
|
[-] store.f32 v101, v2-50 ; bin: heap_oob f3 44 0f 11 50 ce
|
|
; asm: movss %xmm5, 10000(%r14)
|
|
[-] store.f32 v100, v3+10000 ; bin: heap_oob f3 41 0f 11 ae 00002710
|
|
; asm: movss %xmm10, -10000(%rax)
|
|
[-] store.f32 v101, v2-10000 ; bin: heap_oob f3 44 0f 11 90 ffffd8f0
|
|
|
|
; Spill / Fill.
|
|
|
|
; asm: movss %xmm5, 1032(%rsp)
|
|
[-,ss1] v200 = spill v100 ; bin: f3 0f 11 ac 24 00000408
|
|
; asm: movss %xmm10, 1032(%rsp)
|
|
[-,ss1] v201 = spill v101 ; bin: f3 44 0f 11 94 24 00000408
|
|
|
|
; asm: movss 1032(%rsp), %xmm5
|
|
[-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408
|
|
; asm: movss 1032(%rsp), %xmm10
|
|
[-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 10 94 24 00000408
|
|
|
|
; asm: movss %xmm5, 1032(%rsp)
|
|
regspill v100, %xmm5 -> ss1 ; bin: f3 0f 11 ac 24 00000408
|
|
; asm: movss 1032(%rsp), %xmm5
|
|
regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408
|
|
|
|
; Comparisons.
|
|
;
|
|
; Only `supported_floatccs` are tested here. Others are handled by
|
|
; legalization paterns.
|
|
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: setnp %bl
|
|
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: setp %bl
|
|
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: setne %dl
|
|
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: sete %dl
|
|
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: seta %bl
|
|
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: setae %bl
|
|
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
; asm: setb %dl
|
|
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2
|
|
; asm: ucomiss %xmm5, %xmm10
|
|
; asm: setbe %dl
|
|
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2
|
|
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
[-,%rflags] v310 = ffcmp v10, v11 ; bin: 41 0f 2e ea
|
|
; asm: ucomiss %xmm10, %xmm5
|
|
[-,%rflags] v311 = ffcmp v11, v10 ; bin: 44 0f 2e d5
|
|
; asm: ucomiss %xmm5, %xmm5
|
|
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
|
|
|
|
return
|
|
}
|
|
|
|
function %F64() {
|
|
ss0 = incoming_arg 8, offset 0
|
|
ss1 = incoming_arg 1024, offset -1024
|
|
ss2 = incoming_arg 1024, offset -2048
|
|
ss3 = incoming_arg 8, offset -2056
|
|
|
|
ebb0:
|
|
[-,%r11] v0 = iconst.i32 1
|
|
[-,%rsi] v1 = iconst.i32 2
|
|
[-,%rax] v2 = iconst.i64 11
|
|
[-,%r14] v3 = iconst.i64 12
|
|
[-,%r13] v4 = iconst.i64 13
|
|
|
|
; asm: cvtsi2sdl %r11d, %xmm5
|
|
[-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 41 0f 2a eb
|
|
; asm: cvtsi2sdl %esi, %xmm10
|
|
[-,%xmm10] v11 = fcvt_from_sint.f64 v1 ; bin: f2 44 0f 2a d6
|
|
|
|
; asm: cvtsi2sdq %rax, %xmm5
|
|
[-,%xmm5] v12 = fcvt_from_sint.f64 v2 ; bin: f2 48 0f 2a e8
|
|
; asm: cvtsi2sdq %r14, %xmm10
|
|
[-,%xmm10] v13 = fcvt_from_sint.f64 v3 ; bin: f2 4d 0f 2a d6
|
|
|
|
; asm: cvtsd2ss %xmm10, %xmm5
|
|
[-,%xmm5] v14 = fdemote.f32 v11 ; bin: f2 41 0f 5a ea
|
|
; asm: cvtsd2ss %xmm5, %xmm10
|
|
[-,%xmm10] v15 = fdemote.f32 v10 ; bin: f2 44 0f 5a d5
|
|
|
|
; asm: movq %rax, %xmm5
|
|
[-,%xmm5] v16 = bitcast.f64 v2 ; bin: 66 48 0f 6e e8
|
|
; asm: movq %r14, %xmm10
|
|
[-,%xmm10] v17 = bitcast.f64 v3 ; bin: 66 4d 0f 6e d6
|
|
|
|
; asm: movq %xmm5, %rcx
|
|
[-,%rcx] v18 = bitcast.i64 v10 ; bin: 66 48 0f 7e e9
|
|
; asm: movq %xmm10, %rsi
|
|
[-,%rsi] v19 = bitcast.i64 v11 ; bin: 66 4c 0f 7e d6
|
|
|
|
; Binary arithmetic.
|
|
|
|
; asm: addsd %xmm10, %xmm5
|
|
[-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea
|
|
; asm: addsd %xmm5, %xmm10
|
|
[-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5
|
|
|
|
; asm: subsd %xmm10, %xmm5
|
|
[-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea
|
|
; asm: subsd %xmm5, %xmm10
|
|
[-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5
|
|
|
|
; asm: mulsd %xmm10, %xmm5
|
|
[-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea
|
|
; asm: mulsd %xmm5, %xmm10
|
|
[-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5
|
|
|
|
; asm: divsd %xmm10, %xmm5
|
|
[-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea
|
|
; asm: divsd %xmm5, %xmm10
|
|
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5
|
|
|
|
; Bitwise ops.
|
|
; We use the *ps SSE instructions for everything because they are smaller.
|
|
|
|
; asm: andps %xmm10, %xmm5
|
|
[-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea
|
|
; asm: andps %xmm5, %xmm10
|
|
[-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5
|
|
|
|
; asm: andnps %xmm10, %xmm5
|
|
[-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea
|
|
; asm: andnps %xmm5, %xmm10
|
|
[-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5
|
|
|
|
; asm: orps %xmm10, %xmm5
|
|
[-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea
|
|
; asm: orps %xmm5, %xmm10
|
|
[-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5
|
|
|
|
; asm: xorps %xmm10, %xmm5
|
|
[-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea
|
|
; asm: xorps %xmm5, %xmm10
|
|
[-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5
|
|
|
|
; asm: movaps %xmm10, %xmm5
|
|
[-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea
|
|
; asm: movaps %xmm5, %xmm10
|
|
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
|
|
|
; Convert float to int.
|
|
|
|
; asm: cvttsd2si %xmm5, %ecx
|
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd
|
|
; asm: cvttsd2si %xmm10, %esi
|
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2
|
|
|
|
; asm: cvttsd2si %xmm5, %rcx
|
|
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd
|
|
; asm: cvttsd2si %xmm10, %rsi
|
|
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2
|
|
|
|
; Min/max.
|
|
|
|
; asm: minsd %xmm10, %xmm5
|
|
[-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f2 41 0f 5d ea
|
|
; asm: minsd %xmm5, %xmm10
|
|
[-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f2 44 0f 5d d5
|
|
; asm: maxsd %xmm10, %xmm5
|
|
[-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f2 41 0f 5f ea
|
|
; asm: maxsd %xmm5, %xmm10
|
|
[-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f2 44 0f 5f d5
|
|
|
|
; Unary arithmetic.
|
|
|
|
; asm: sqrtsd %xmm5, %xmm10
|
|
[-,%xmm10] v50 = sqrt v10 ; bin: f2 44 0f 51 d5
|
|
; asm: sqrtsd %xmm10, %xmm5
|
|
[-,%xmm5] v51 = sqrt v11 ; bin: f2 41 0f 51 ea
|
|
|
|
; asm: roundsd $0, %xmm5, %xmm10
|
|
[-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0b d5 00
|
|
; asm: roundsd $0, %xmm10, %xmm5
|
|
[-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0b ea 00
|
|
; asm: roundsd $0, %xmm5, %xmm2
|
|
[-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00
|
|
|
|
; asm: roundsd $1, %xmm5, %xmm10
|
|
[-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0b d5 01
|
|
; asm: roundsd $1, %xmm10, %xmm5
|
|
[-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0b ea 01
|
|
; asm: roundsd $1, %xmm5, %xmm2
|
|
[-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01
|
|
|
|
; asm: roundsd $2, %xmm5, %xmm10
|
|
[-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0b d5 02
|
|
; asm: roundsd $2, %xmm10, %xmm5
|
|
[-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0b ea 02
|
|
; asm: roundsd $2, %xmm5, %xmm2
|
|
[-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02
|
|
|
|
; asm: roundsd $3, %xmm5, %xmm10
|
|
[-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0b d5 03
|
|
; asm: roundsd $3, %xmm10, %xmm5
|
|
[-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0b ea 03
|
|
; asm: roundsd $3, %xmm5, %xmm2
|
|
[-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03
|
|
|
|
; Load/Store
|
|
|
|
; asm: movsd (%r14), %xmm5
|
|
[-,%xmm5] v100 = load.f64 v3 ; bin: heap_oob f2 41 0f 10 2e
|
|
; asm: movsd (%rax), %xmm10
|
|
[-,%xmm10] v101 = load.f64 v2 ; bin: heap_oob f2 44 0f 10 10
|
|
; asm: movsd 50(%r14), %xmm5
|
|
[-,%xmm5] v110 = load.f64 v3+50 ; bin: heap_oob f2 41 0f 10 6e 32
|
|
; asm: movsd -50(%rax), %xmm10
|
|
[-,%xmm10] v111 = load.f64 v2-50 ; bin: heap_oob f2 44 0f 10 50 ce
|
|
; asm: movsd 10000(%r14), %xmm5
|
|
[-,%xmm5] v120 = load.f64 v3+10000 ; bin: heap_oob f2 41 0f 10 ae 00002710
|
|
; asm: movsd -10000(%rax), %xmm10
|
|
[-,%xmm10] v121 = load.f64 v2-10000 ; bin: heap_oob f2 44 0f 10 90 ffffd8f0
|
|
|
|
; asm: movsd %xmm5, (%r14)
|
|
[-] store.f64 v100, v3 ; bin: heap_oob f2 41 0f 11 2e
|
|
; asm: movsd %xmm10, (%rax)
|
|
[-] store.f64 v101, v2 ; bin: heap_oob f2 44 0f 11 10
|
|
; asm: movsd %xmm5, (%r13)
|
|
[-] store.f64 v100, v4 ; bin: heap_oob f2 41 0f 11 6d 00
|
|
; asm: movsd %xmm10, (%r13)
|
|
[-] store.f64 v101, v4 ; bin: heap_oob f2 45 0f 11 55 00
|
|
; asm: movsd %xmm5, 50(%r14)
|
|
[-] store.f64 v100, v3+50 ; bin: heap_oob f2 41 0f 11 6e 32
|
|
; asm: movsd %xmm10, -50(%rax)
|
|
[-] store.f64 v101, v2-50 ; bin: heap_oob f2 44 0f 11 50 ce
|
|
; asm: movsd %xmm5, 10000(%r14)
|
|
[-] store.f64 v100, v3+10000 ; bin: heap_oob f2 41 0f 11 ae 00002710
|
|
; asm: movsd %xmm10, -10000(%rax)
|
|
[-] store.f64 v101, v2-10000 ; bin: heap_oob f2 44 0f 11 90 ffffd8f0
|
|
|
|
; Spill / Fill.
|
|
|
|
; asm: movsd %xmm5, 1032(%rsp)
|
|
[-,ss1] v200 = spill v100 ; bin: f2 0f 11 ac 24 00000408
|
|
; asm: movsd %xmm10, 1032(%rsp)
|
|
[-,ss1] v201 = spill v101 ; bin: f2 44 0f 11 94 24 00000408
|
|
|
|
; asm: movsd 1032(%rsp), %xmm5
|
|
[-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408
|
|
; asm: movsd 1032(%rsp), %xmm10
|
|
[-,%xmm10] v211 = fill v201 ; bin: f2 44 0f 10 94 24 00000408
|
|
|
|
; asm: movsd %xmm5, 1032(%rsp)
|
|
regspill v100, %xmm5 -> ss1 ; bin: f2 0f 11 ac 24 00000408
|
|
; asm: movsd 1032(%rsp), %xmm5
|
|
regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408
|
|
|
|
; Comparisons.
|
|
;
|
|
; Only `supported_floatccs` are tested here. Others are handled by
|
|
; legalization paterns.
|
|
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: setnp %bl
|
|
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: setp %bl
|
|
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: setne %dl
|
|
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: sete %dl
|
|
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: seta %bl
|
|
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: setae %bl
|
|
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
; asm: setb %dl
|
|
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2
|
|
; asm: ucomisd %xmm5, %xmm10
|
|
; asm: setbe %dl
|
|
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2
|
|
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
[-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 41 0f 2e ea
|
|
; asm: ucomisd %xmm10, %xmm5
|
|
[-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 44 0f 2e d5
|
|
; asm: ucomisd %xmm5, %xmm5
|
|
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed
|
|
|
|
return
|
|
}
|
|
|
|
function %cpuflags_float(f32 [%xmm0]) {
|
|
ebb0(v0: f32 [%xmm0]):
|
|
; asm: ucomiss %xmm0, %xmm0
|
|
[-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0
|
|
|
|
jump ebb1
|
|
|
|
ebb1:
|
|
; asm: jnp ebb1
|
|
brff ord v1, ebb1 ; bin: 7b fe
|
|
; asm: jp ebb1
|
|
brff uno v1, ebb1 ; bin: 7a fc
|
|
; asm: jne ebb1
|
|
brff one v1, ebb1 ; bin: 75 fa
|
|
; asm: je ebb1
|
|
brff ueq v1, ebb1 ; bin: 74 f8
|
|
; asm: ja ebb1
|
|
brff gt v1, ebb1 ; bin: 77 f6
|
|
; asm: jae ebb1
|
|
brff ge v1, ebb1 ; bin: 73 f4
|
|
; asm: jb ebb1
|
|
brff ult v1, ebb1 ; bin: 72 f2
|
|
; asm: jbe ebb1
|
|
brff ule v1, ebb1 ; bin: 76 f0
|
|
|
|
; asm: jp .+4; ud2
|
|
trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b
|
|
; asm: jnp .+4; ud2
|
|
trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b
|
|
; asm: je .+4; ud2
|
|
trapff one v1, user0 ; bin: 74 02 user0 0f 0b
|
|
; asm: jne .+4; ud2
|
|
trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b
|
|
; asm: jna .+4; ud2
|
|
trapff gt v1, user0 ; bin: 76 02 user0 0f 0b
|
|
; asm: jnae .+4; ud2
|
|
trapff ge v1, user0 ; bin: 72 02 user0 0f 0b
|
|
; asm: jnb .+4; ud2
|
|
trapff ult v1, user0 ; bin: 73 02 user0 0f 0b
|
|
; asm: jnbe .+4; ud2
|
|
trapff ule v1, user0 ; bin: 77 02 user0 0f 0b
|
|
|
|
; asm: setnp %bl
|
|
[-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3
|
|
; asm: setp %bl
|
|
[-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3
|
|
; asm: setne %dl
|
|
[-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2
|
|
; asm: sete %dl
|
|
[-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2
|
|
; asm: seta %r10b
|
|
[-,%r10] v14 = trueff gt v1 ; bin: 41 0f 97 c2
|
|
; asm: setae %r10b
|
|
[-,%r10] v15 = trueff ge v1 ; bin: 41 0f 93 c2
|
|
; asm: setb %r14b
|
|
[-,%r14] v16 = trueff ult v1 ; bin: 41 0f 92 c6
|
|
; asm: setbe %r14b
|
|
[-,%r14] v17 = trueff ule v1 ; bin: 41 0f 96 c6
|
|
|
|
return
|
|
}
|