Use movss/movsd rather than movd/movq for floating-point loads and stores.
While there may be CPUs that have a domain crossing penalty here, this also helps the generated code look more like the code produced by other compilers.
This commit is contained in:
@@ -147,48 +147,48 @@ ebb0:
|
||||
|
||||
; Load/Store
|
||||
|
||||
; asm: movd (%ecx), %xmm5
|
||||
[-,%xmm5] v100 = load.f32 v0 ; bin: 66 0f 6e 29
|
||||
; asm: movd (%esi), %xmm2
|
||||
[-,%xmm2] v101 = load.f32 v1 ; bin: 66 0f 6e 16
|
||||
; asm: movd 50(%ecx), %xmm5
|
||||
[-,%xmm5] v110 = load.f32 v0+50 ; bin: 66 0f 6e 69 32
|
||||
; asm: movd -50(%esi), %xmm2
|
||||
[-,%xmm2] v111 = load.f32 v1-50 ; bin: 66 0f 6e 56 ce
|
||||
; asm: movd 10000(%ecx), %xmm5
|
||||
[-,%xmm5] v120 = load.f32 v0+10000 ; bin: 66 0f 6e a9 00002710
|
||||
; asm: movd -10000(%esi), %xmm2
|
||||
[-,%xmm2] v121 = load.f32 v1-10000 ; bin: 66 0f 6e 96 ffffd8f0
|
||||
; asm: movss (%ecx), %xmm5
|
||||
[-,%xmm5] v100 = load.f32 v0 ; bin: f3 0f 10 29
|
||||
; asm: movss (%esi), %xmm2
|
||||
[-,%xmm2] v101 = load.f32 v1 ; bin: f3 0f 10 16
|
||||
; asm: movss 50(%ecx), %xmm5
|
||||
[-,%xmm5] v110 = load.f32 v0+50 ; bin: f3 0f 10 69 32
|
||||
; asm: movss -50(%esi), %xmm2
|
||||
[-,%xmm2] v111 = load.f32 v1-50 ; bin: f3 0f 10 56 ce
|
||||
; asm: movss 10000(%ecx), %xmm5
|
||||
[-,%xmm5] v120 = load.f32 v0+10000 ; bin: f3 0f 10 a9 00002710
|
||||
; asm: movss -10000(%esi), %xmm2
|
||||
[-,%xmm2] v121 = load.f32 v1-10000 ; bin: f3 0f 10 96 ffffd8f0
|
||||
|
||||
; asm: movd %xmm5, (%ecx)
|
||||
[-] store.f32 v100, v0 ; bin: 66 0f 7e 29
|
||||
; asm: movd %xmm2, (%esi)
|
||||
[-] store.f32 v101, v1 ; bin: 66 0f 7e 16
|
||||
; asm: movd %xmm5, 50(%ecx)
|
||||
[-] store.f32 v100, v0+50 ; bin: 66 0f 7e 69 32
|
||||
; asm: movd %xmm2, -50(%esi)
|
||||
[-] store.f32 v101, v1-50 ; bin: 66 0f 7e 56 ce
|
||||
; asm: movd %xmm5, 10000(%ecx)
|
||||
[-] store.f32 v100, v0+10000 ; bin: 66 0f 7e a9 00002710
|
||||
; asm: movd %xmm2, -10000(%esi)
|
||||
[-] store.f32 v101, v1-10000 ; bin: 66 0f 7e 96 ffffd8f0
|
||||
; asm: movss %xmm5, (%ecx)
|
||||
[-] store.f32 v100, v0 ; bin: f3 0f 11 29
|
||||
; asm: movss %xmm2, (%esi)
|
||||
[-] store.f32 v101, v1 ; bin: f3 0f 11 16
|
||||
; asm: movss %xmm5, 50(%ecx)
|
||||
[-] store.f32 v100, v0+50 ; bin: f3 0f 11 69 32
|
||||
; asm: movss %xmm2, -50(%esi)
|
||||
[-] store.f32 v101, v1-50 ; bin: f3 0f 11 56 ce
|
||||
; asm: movss %xmm5, 10000(%ecx)
|
||||
[-] store.f32 v100, v0+10000 ; bin: f3 0f 11 a9 00002710
|
||||
; asm: movss %xmm2, -10000(%esi)
|
||||
[-] store.f32 v101, v1-10000 ; bin: f3 0f 11 96 ffffd8f0
|
||||
|
||||
; Spill / Fill.
|
||||
|
||||
; asm: movd %xmm5, 1032(%esp)
|
||||
[-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408
|
||||
; asm: movd %xmm2, 1032(%esp)
|
||||
[-,ss1] v201 = spill v101 ; bin: 66 0f 7e 94 24 00000408
|
||||
; asm: movss %xmm5, 1032(%esp)
|
||||
[-,ss1] v200 = spill v100 ; bin: f3 0f 11 ac 24 00000408
|
||||
; asm: movss %xmm2, 1032(%esp)
|
||||
[-,ss1] v201 = spill v101 ; bin: f3 0f 11 94 24 00000408
|
||||
|
||||
; asm: movd 1032(%esp), %xmm5
|
||||
[-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408
|
||||
; asm: movd 1032(%esp), %xmm2
|
||||
[-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408
|
||||
; asm: movss 1032(%esp), %xmm5
|
||||
[-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408
|
||||
; asm: movss 1032(%esp), %xmm2
|
||||
[-,%xmm2] v211 = fill v201 ; bin: f3 0f 10 94 24 00000408
|
||||
|
||||
; asm: movd %xmm5, 1032(%rsp)
|
||||
regspill v100, %xmm5 -> ss1 ; bin: 66 0f 7e ac 24 00000408
|
||||
; asm: movd 1032(%rsp), %xmm5
|
||||
regfill v100, ss1 -> %xmm5 ; bin: 66 0f 6e ac 24 00000408
|
||||
; asm: movss %xmm5, 1032(%rsp)
|
||||
regspill v100, %xmm5 -> ss1 ; bin: f3 0f 11 ac 24 00000408
|
||||
; asm: movss 1032(%rsp), %xmm5
|
||||
regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408
|
||||
|
||||
; Comparisons.
|
||||
;
|
||||
@@ -362,48 +362,48 @@ ebb0:
|
||||
|
||||
; Load/Store
|
||||
|
||||
; asm: movq (%ecx), %xmm5
|
||||
[-,%xmm5] v100 = load.f64 v0 ; bin: f3 0f 7e 29
|
||||
; asm: movq (%esi), %xmm2
|
||||
[-,%xmm2] v101 = load.f64 v1 ; bin: f3 0f 7e 16
|
||||
; asm: movq 50(%ecx), %xmm5
|
||||
[-,%xmm5] v110 = load.f64 v0+50 ; bin: f3 0f 7e 69 32
|
||||
; asm: movq -50(%esi), %xmm2
|
||||
[-,%xmm2] v111 = load.f64 v1-50 ; bin: f3 0f 7e 56 ce
|
||||
; asm: movq 10000(%ecx), %xmm5
|
||||
[-,%xmm5] v120 = load.f64 v0+10000 ; bin: f3 0f 7e a9 00002710
|
||||
; asm: movq -10000(%esi), %xmm2
|
||||
[-,%xmm2] v121 = load.f64 v1-10000 ; bin: f3 0f 7e 96 ffffd8f0
|
||||
; asm: movsd (%ecx), %xmm5
|
||||
[-,%xmm5] v100 = load.f64 v0 ; bin: f2 0f 10 29
|
||||
; asm: movsd (%esi), %xmm2
|
||||
[-,%xmm2] v101 = load.f64 v1 ; bin: f2 0f 10 16
|
||||
; asm: movsd 50(%ecx), %xmm5
|
||||
[-,%xmm5] v110 = load.f64 v0+50 ; bin: f2 0f 10 69 32
|
||||
; asm: movsd -50(%esi), %xmm2
|
||||
[-,%xmm2] v111 = load.f64 v1-50 ; bin: f2 0f 10 56 ce
|
||||
; asm: movsd 10000(%ecx), %xmm5
|
||||
[-,%xmm5] v120 = load.f64 v0+10000 ; bin: f2 0f 10 a9 00002710
|
||||
; asm: movsd -10000(%esi), %xmm2
|
||||
[-,%xmm2] v121 = load.f64 v1-10000 ; bin: f2 0f 10 96 ffffd8f0
|
||||
|
||||
; asm: movq %xmm5, (%ecx)
|
||||
[-] store.f64 v100, v0 ; bin: 66 0f d6 29
|
||||
; asm: movq %xmm2, (%esi)
|
||||
[-] store.f64 v101, v1 ; bin: 66 0f d6 16
|
||||
; asm: movq %xmm5, 50(%ecx)
|
||||
[-] store.f64 v100, v0+50 ; bin: 66 0f d6 69 32
|
||||
; asm: movq %xmm2, -50(%esi)
|
||||
[-] store.f64 v101, v1-50 ; bin: 66 0f d6 56 ce
|
||||
; asm: movq %xmm5, 10000(%ecx)
|
||||
[-] store.f64 v100, v0+10000 ; bin: 66 0f d6 a9 00002710
|
||||
; asm: movq %xmm2, -10000(%esi)
|
||||
[-] store.f64 v101, v1-10000 ; bin: 66 0f d6 96 ffffd8f0
|
||||
; asm: movsd %xmm5, (%ecx)
|
||||
[-] store.f64 v100, v0 ; bin: f2 0f 11 29
|
||||
; asm: movsd %xmm2, (%esi)
|
||||
[-] store.f64 v101, v1 ; bin: f2 0f 11 16
|
||||
; asm: movsd %xmm5, 50(%ecx)
|
||||
[-] store.f64 v100, v0+50 ; bin: f2 0f 11 69 32
|
||||
; asm: movsd %xmm2, -50(%esi)
|
||||
[-] store.f64 v101, v1-50 ; bin: f2 0f 11 56 ce
|
||||
; asm: movsd %xmm5, 10000(%ecx)
|
||||
[-] store.f64 v100, v0+10000 ; bin: f2 0f 11 a9 00002710
|
||||
; asm: movsd %xmm2, -10000(%esi)
|
||||
[-] store.f64 v101, v1-10000 ; bin: f2 0f 11 96 ffffd8f0
|
||||
|
||||
; Spill / Fill.
|
||||
|
||||
; asm: movq %xmm5, 1032(%esp)
|
||||
[-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408
|
||||
; asm: movq %xmm2, 1032(%esp)
|
||||
[-,ss1] v201 = spill v101 ; bin: 66 0f d6 94 24 00000408
|
||||
; asm: movsd %xmm5, 1032(%esp)
|
||||
[-,ss1] v200 = spill v100 ; bin: f2 0f 11 ac 24 00000408
|
||||
; asm: movsd %xmm2, 1032(%esp)
|
||||
[-,ss1] v201 = spill v101 ; bin: f2 0f 11 94 24 00000408
|
||||
|
||||
; asm: movq 1032(%esp), %xmm5
|
||||
[-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408
|
||||
; asm: movq 1032(%esp), %xmm2
|
||||
[-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408
|
||||
; asm: movsd 1032(%esp), %xmm5
|
||||
[-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408
|
||||
; asm: movsd 1032(%esp), %xmm2
|
||||
[-,%xmm2] v211 = fill v201 ; bin: f2 0f 10 94 24 00000408
|
||||
|
||||
; asm: movq %xmm5, 1032(%rsp)
|
||||
regspill v100, %xmm5 -> ss1 ; bin: 66 0f d6 ac 24 00000408
|
||||
; asm: movq 1032(%rsp), %xmm5
|
||||
regfill v100, ss1 -> %xmm5 ; bin: f3 0f 7e ac 24 00000408
|
||||
; asm: movsd %xmm5, 1032(%rsp)
|
||||
regspill v100, %xmm5 -> ss1 ; bin: f2 0f 11 ac 24 00000408
|
||||
; asm: movsd 1032(%rsp), %xmm5
|
||||
regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408
|
||||
|
||||
; Comparisons.
|
||||
;
|
||||
|
||||
Reference in New Issue
Block a user