; Binary emission of 32-bit floating point code. test binemit isa intel has_sse2 ; The binary encodings can be verified with the command: ; ; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary32-float.cton | llvm-mc -show-encoding -triple=i386 ; function %F32() { ss0 = incoming_arg 8, offset 0 ss1 = incoming_arg 1024, offset -1024 ss2 = incoming_arg 1024, offset -2048 ss3 = incoming_arg 8, offset -2056 ebb0: [-,%rcx] v0 = iconst.i32 1 [-,%rsi] v1 = iconst.i32 2 ; asm: cvtsi2ss %ecx, %xmm5 [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 0f 2a e9 ; asm: cvtsi2ss %esi, %xmm2 [-,%xmm2] v11 = fcvt_from_sint.f32 v1 ; bin: f3 0f 2a d6 ; asm: cvtss2sd %xmm2, %xmm5 [-,%xmm5] v12 = fpromote.f64 v11 ; bin: f3 0f 5a ea ; asm: cvtss2sd %xmm5, %xmm2 [-,%xmm2] v13 = fpromote.f64 v10 ; bin: f3 0f 5a d5 ; asm: movd %ecx, %xmm5 [-,%xmm5] v14 = bitcast.f32 v0 ; bin: 66 0f 6e e9 ; asm: movd %esi, %xmm2 [-,%xmm2] v15 = bitcast.f32 v1 ; bin: 66 0f 6e d6 ; asm: movd %xmm5, %ecx [-,%rcx] v16 = bitcast.i32 v10 ; bin: 66 0f 7e e9 ; asm: movd %xmm2, %esi [-,%rsi] v17 = bitcast.i32 v11 ; bin: 66 0f 7e d6 ; asm: movaps %xmm2, %xmm5 [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea ; asm: movaps %xmm5, %xmm2 [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 ; Binary arithmetic. ; asm: addss %xmm2, %xmm5 [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 0f 58 ea ; asm: addss %xmm5, %xmm2 [-,%xmm2] v21 = fadd v11, v10 ; bin: f3 0f 58 d5 ; asm: subss %xmm2, %xmm5 [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 0f 5c ea ; asm: subss %xmm5, %xmm2 [-,%xmm2] v23 = fsub v11, v10 ; bin: f3 0f 5c d5 ; asm: mulss %xmm2, %xmm5 [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 0f 59 ea ; asm: mulss %xmm5, %xmm2 [-,%xmm2] v25 = fmul v11, v10 ; bin: f3 0f 59 d5 ; asm: divss %xmm2, %xmm5 [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 0f 5e ea ; asm: divss %xmm5, %xmm2 [-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5 ; Bitwise ops. ; We use the *ps SSE instructions for everything because they are smaller. ; asm: andps %xmm2, %xmm5 [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea ; asm: andps %xmm5, %xmm2 [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 ; asm: andnps %xmm2, %xmm5 [-,%xmm5] v32 = band_not v10, v11 ; bin: 0f 55 ea ; asm: andnps %xmm5, %xmm2 [-,%xmm2] v33 = band_not v11, v10 ; bin: 0f 55 d5 ; asm: orps %xmm2, %xmm5 [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea ; asm: orps %xmm5, %xmm2 [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 ; asm: xorps %xmm2, %xmm5 [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea ; asm: xorps %xmm5, %xmm2 [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 ; Unary arithmetic. ; asm: sqrtss %xmm5, %xmm2 [-,%xmm2] v50 = sqrt v10 ; bin: f3 0f 51 d5 ; asm: sqrtss %xmm2, %xmm5 [-,%xmm5] v51 = sqrt v11 ; bin: f3 0f 51 ea ; asm: roundss $0, %xmm5, %xmm4 [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0a e5 00 ; asm: roundss $0, %xmm2, %xmm5 [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0a ea 00 ; asm: roundss $0, %xmm5, %xmm2 [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 ; asm: roundss $1, %xmm5, %xmm4 [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0a e5 01 ; asm: roundss $1, %xmm2, %xmm5 [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0a ea 01 ; asm: roundss $1, %xmm5, %xmm2 [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 ; asm: roundss $2, %xmm5, %xmm4 [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0a e5 02 ; asm: roundss $2, %xmm2, %xmm5 [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0a ea 02 ; asm: roundss $2, %xmm5, %xmm2 [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 ; asm: roundss $3, %xmm5, %xmm4 [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0a e5 03 ; asm: roundss $3, %xmm2, %xmm5 [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0a ea 03 ; asm: roundss $3, %xmm5, %xmm2 [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 ; Load/Store ; asm: movd (%ecx), %xmm5 [-,%xmm5] v100 = load.f32 v0 ; bin: 66 0f 6e 29 ; asm: movd (%esi), %xmm2 [-,%xmm2] v101 = load.f32 v1 ; bin: 66 0f 6e 16 ; asm: movd 50(%ecx), %xmm5 [-,%xmm5] v110 = load.f32 v0+50 ; bin: 66 0f 6e 69 32 ; asm: movd -50(%esi), %xmm2 [-,%xmm2] v111 = load.f32 v1-50 ; bin: 66 0f 6e 56 ce ; asm: movd 10000(%ecx), %xmm5 [-,%xmm5] v120 = load.f32 v0+10000 ; bin: 66 0f 6e a9 00002710 ; asm: movd -10000(%esi), %xmm2 [-,%xmm2] v121 = load.f32 v1-10000 ; bin: 66 0f 6e 96 ffffd8f0 ; asm: movd %xmm5, (%ecx) [-] store.f32 v100, v0 ; bin: 66 0f 7e 29 ; asm: movd %xmm2, (%esi) [-] store.f32 v101, v1 ; bin: 66 0f 7e 16 ; asm: movd %xmm5, 50(%ecx) [-] store.f32 v100, v0+50 ; bin: 66 0f 7e 69 32 ; asm: movd %xmm2, -50(%esi) [-] store.f32 v101, v1-50 ; bin: 66 0f 7e 56 ce ; asm: movd %xmm5, 10000(%ecx) [-] store.f32 v100, v0+10000 ; bin: 66 0f 7e a9 00002710 ; asm: movd %xmm2, -10000(%esi) [-] store.f32 v101, v1-10000 ; bin: 66 0f 7e 96 ffffd8f0 ; Spill / Fill. ; asm: movd %xmm5, 1032(%esp) [-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408 ; asm: movd %xmm2, 1032(%esp) [-,ss1] v201 = spill v101 ; bin: 66 0f 7e 94 24 00000408 ; asm: movd 1032(%esp), %xmm5 [-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408 ; asm: movd 1032(%esp), %xmm2 [-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408 ; Comparisons. ; ; Only `supported_floatccs` are tested here. Others are handled by ; legalization paterns. ; asm: ucomiss %xmm2, %xmm5 ; asm: setnp %bl [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3 ; asm: ucomiss %xmm5, %xmm2 ; asm: setp %bl [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3 ; asm: ucomiss %xmm2, %xmm5 ; asm: setne %dl [-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2 ; asm: ucomiss %xmm5, %xmm2 ; asm: sete %dl [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2 ; asm: ucomiss %xmm2, %xmm5 ; asm: seta %bl [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3 ; asm: ucomiss %xmm5, %xmm2 ; asm: setae %bl [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3 ; asm: ucomiss %xmm2, %xmm5 ; asm: setb %dl [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2 ; asm: ucomiss %xmm5, %xmm2 ; asm: setbe %dl [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2 return } function %F64() { ss0 = incoming_arg 8, offset 0 ss1 = incoming_arg 1024, offset -1024 ss2 = incoming_arg 1024, offset -2048 ss3 = incoming_arg 8, offset -2056 ebb0: [-,%rcx] v0 = iconst.i32 1 [-,%rsi] v1 = iconst.i32 2 ; asm: cvtsi2sd %ecx, %xmm5 [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 0f 2a e9 ; asm: cvtsi2sd %esi, %xmm2 [-,%xmm2] v11 = fcvt_from_sint.f64 v1 ; bin: f2 0f 2a d6 ; asm: cvtsd2ss %xmm2, %xmm5 [-,%xmm5] v12 = fdemote.f32 v11 ; bin: f2 0f 5a ea ; asm: cvtsd2ss %xmm5, %xmm2 [-,%xmm2] v13 = fdemote.f32 v10 ; bin: f2 0f 5a d5 ; No i64 <-> f64 bitcasts in 32-bit mode. ; asm: movaps %xmm2, %xmm5 [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea ; asm: movaps %xmm5, %xmm2 [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 ; Binary arithmetic. ; asm: addsd %xmm2, %xmm5 [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 0f 58 ea ; asm: addsd %xmm5, %xmm2 [-,%xmm2] v21 = fadd v11, v10 ; bin: f2 0f 58 d5 ; asm: subsd %xmm2, %xmm5 [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 0f 5c ea ; asm: subsd %xmm5, %xmm2 [-,%xmm2] v23 = fsub v11, v10 ; bin: f2 0f 5c d5 ; asm: mulsd %xmm2, %xmm5 [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 0f 59 ea ; asm: mulsd %xmm5, %xmm2 [-,%xmm2] v25 = fmul v11, v10 ; bin: f2 0f 59 d5 ; asm: divsd %xmm2, %xmm5 [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 0f 5e ea ; asm: divsd %xmm5, %xmm2 [-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5 ; Bitwise ops. ; We use the *ps SSE instructions for everything because they are smaller. ; asm: andps %xmm2, %xmm5 [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea ; asm: andps %xmm5, %xmm2 [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 ; asm: andnps %xmm2, %xmm5 [-,%xmm5] v32 = band_not v10, v11 ; bin: 0f 55 ea ; asm: andnps %xmm5, %xmm2 [-,%xmm2] v33 = band_not v11, v10 ; bin: 0f 55 d5 ; asm: orps %xmm2, %xmm5 [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea ; asm: orps %xmm5, %xmm2 [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 ; asm: xorps %xmm2, %xmm5 [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea ; asm: xorps %xmm5, %xmm2 [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 ; Unary arithmetic. ; asm: sqrtsd %xmm5, %xmm2 [-,%xmm2] v50 = sqrt v10 ; bin: f2 0f 51 d5 ; asm: sqrtsd %xmm2, %xmm5 [-,%xmm5] v51 = sqrt v11 ; bin: f2 0f 51 ea ; asm: roundsd $0, %xmm5, %xmm4 [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0b e5 00 ; asm: roundsd $0, %xmm2, %xmm5 [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0b ea 00 ; asm: roundsd $0, %xmm5, %xmm2 [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 ; asm: roundsd $1, %xmm5, %xmm4 [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0b e5 01 ; asm: roundsd $1, %xmm2, %xmm5 [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0b ea 01 ; asm: roundsd $1, %xmm5, %xmm2 [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 ; asm: roundsd $2, %xmm5, %xmm4 [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0b e5 02 ; asm: roundsd $2, %xmm2, %xmm5 [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0b ea 02 ; asm: roundsd $2, %xmm5, %xmm2 [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 ; asm: roundsd $3, %xmm5, %xmm4 [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0b e5 03 ; asm: roundsd $3, %xmm2, %xmm5 [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0b ea 03 ; asm: roundsd $3, %xmm5, %xmm2 [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 ; Load/Store ; asm: movq (%ecx), %xmm5 [-,%xmm5] v100 = load.f64 v0 ; bin: f3 0f 7e 29 ; asm: movq (%esi), %xmm2 [-,%xmm2] v101 = load.f64 v1 ; bin: f3 0f 7e 16 ; asm: movq 50(%ecx), %xmm5 [-,%xmm5] v110 = load.f64 v0+50 ; bin: f3 0f 7e 69 32 ; asm: movq -50(%esi), %xmm2 [-,%xmm2] v111 = load.f64 v1-50 ; bin: f3 0f 7e 56 ce ; asm: movq 10000(%ecx), %xmm5 [-,%xmm5] v120 = load.f64 v0+10000 ; bin: f3 0f 7e a9 00002710 ; asm: movq -10000(%esi), %xmm2 [-,%xmm2] v121 = load.f64 v1-10000 ; bin: f3 0f 7e 96 ffffd8f0 ; asm: movq %xmm5, (%ecx) [-] store.f64 v100, v0 ; bin: 66 0f d6 29 ; asm: movq %xmm2, (%esi) [-] store.f64 v101, v1 ; bin: 66 0f d6 16 ; asm: movq %xmm5, 50(%ecx) [-] store.f64 v100, v0+50 ; bin: 66 0f d6 69 32 ; asm: movq %xmm2, -50(%esi) [-] store.f64 v101, v1-50 ; bin: 66 0f d6 56 ce ; asm: movq %xmm5, 10000(%ecx) [-] store.f64 v100, v0+10000 ; bin: 66 0f d6 a9 00002710 ; asm: movq %xmm2, -10000(%esi) [-] store.f64 v101, v1-10000 ; bin: 66 0f d6 96 ffffd8f0 ; Spill / Fill. ; asm: movq %xmm5, 1032(%esp) [-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408 ; asm: movq %xmm2, 1032(%esp) [-,ss1] v201 = spill v101 ; bin: 66 0f d6 94 24 00000408 ; asm: movq 1032(%esp), %xmm5 [-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408 ; asm: movq 1032(%esp), %xmm2 [-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408 ; Comparisons. ; ; Only `supported_floatccs` are tested here. Others are handled by ; legalization paterns. ; asm: ucomisd %xmm2, %xmm5 ; asm: setnp %bl [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3 ; asm: ucomisd %xmm5, %xmm2 ; asm: setp %bl [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3 ; asm: ucomisd %xmm2, %xmm5 ; asm: setne %dl [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2 ; asm: ucomisd %xmm5, %xmm2 ; asm: sete %dl [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2 ; asm: ucomisd %xmm2, %xmm5 ; asm: seta %bl [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3 ; asm: ucomisd %xmm5, %xmm2 ; asm: setae %bl [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3 ; asm: ucomisd %xmm2, %xmm5 ; asm: setb %dl [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2 ; asm: ucomisd %xmm5, %xmm2 ; asm: setbe %dl [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2 return }