Avoid extra register movement when lowering the x86 extractlane of a float vector

This commit is based on the assumption that floats are already stored in XMM registers in x86. When extracting a lane, cranelift was moving the float to a regular register and back to an XMM register; this change avoids this by shuffling the float value to the lowest bits of the XMM register. It also assumes that the upper bits can be left as is (instead of zeroing them out).
This commit is contained in:
Andrew Brown
2019-08-21 16:56:11 -07:00
parent f1363168a9
commit 00bedca274
7 changed files with 154 additions and 43 deletions

View File

@@ -0,0 +1,38 @@
test binemit
set enable_simd
target x86_64 haswell
; for extractlane, floats are legalized differently than integers and booleans; integers and booleans use x86_pextr
; which is manually placed in the IR so that it can be binemit-tested
function %test_extractlane_b8() {
ebb0:
[-, %rax] v0 = bconst.b8 true
[-, %xmm0] v1 = splat.b8x16 v0
[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a
return
}
function %test_extractlane_i16() {
ebb0:
[-, %rax] v0 = iconst.i16 4
[-, %xmm1] v1 = splat.i16x8 v0
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04
return
}
function %test_extractlane_i32() {
ebb0:
[-, %rax] v0 = iconst.i32 42
[-, %xmm4] v1 = splat.i32x4 v0
[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02
return
}
function %test_extractlane_b64() {
ebb0:
[-, %rax] v0 = bconst.b64 false
[-, %xmm2] v1 = splat.b64x2 v0
[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01
return
}

View File

@@ -0,0 +1,31 @@
test run
set enable_simd
function %test_extractlane_b8() -> b8 {
ebb0:
v1 = vconst.b8x16 [false false false false false false false false false false true false false
false false false]
v2 = extractlane v1, 10
return v2
}
; run
function %test_extractlane_i16() -> b1 {
ebb0:
v0 = vconst.i16x8 0x00080007000600050004000300020001
v1 = extractlane v0, 1
v2 = icmp_imm eq v1, 2
return v2
}
; run
function %test_extractlane_f32() -> b1 {
ebb0:
v0 = f32const 0x42.42
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
v2 = extractlane v1, 3
v10 = f32const 0x42.42 ; TODO this should not be necessary, v0 should be re-usable
v3 = fcmp eq v2, v10
return v3
}
; run

View File

@@ -1,35 +0,0 @@
test binemit
set enable_simd
target x86_64 haswell
function %test_extractlane_b8() {
ebb0:
[-, %rax] v0 = bconst.b8 true
[-, %xmm0] v1 = splat.b8x16 v0
[-, %rax] v2 = extractlane v1, 10 ; bin: 66 0f 3a 14 c0 0a
return
}
function %test_extractlane_i16() {
ebb0:
[-, %rax] v0 = iconst.i16 4
[-, %xmm1] v1 = splat.i16x8 v0
[-, %rax] v2 = extractlane v1, 4 ; bin: 66 0f c5 c8 04
return
}
function %test_extractlane_i32() {
ebb0:
[-, %rax] v0 = iconst.i32 42
[-, %xmm4] v1 = splat.i32x4 v0
[-, %rcx] v2 = extractlane v1, 2 ; bin: 66 0f 3a 16 e1 02
return
}
function %test_extractlane_f64() {
ebb0:
[-, %rax] v0 = f64const 0x0.0
[-, %xmm2] v1 = splat.f64x2 v0
[-, %rbx] v2 = extractlane v1, 1 ; bin: 66 48 0f 3a 16 d3 01
return
}