Optimize vconst for x86 when immediate contains all zeroes or ones
Instead of using MOVUPS to expensively load bits from memory, this change uses a predicate to optimize vconst without a memory access: - when the 128-bit immediate is all zeroes in all bits, use PXOR to zero out an XMM register - when the 128-bit immediate is all ones in all bits, use PCMPEQB to set an XMM register to all ones This leaves the constant data in the constant pool, which may increase code size (TODO)
This commit is contained in:
@@ -5,7 +5,7 @@ target x86_64
|
||||
|
||||
function %test_vconst_b8() {
|
||||
ebb0:
|
||||
[-, %xmm2] v0 = vconst.b8x16 0x00 ; bin: 0f 10 15 00000008 PCRelRodata4(15)
|
||||
[-, %xmm3] v1 = vconst.b8x16 0x01 ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
|
||||
[-, %xmm2] v0 = vconst.b8x16 0x01 ; bin: 0f 10 15 00000008 PCRelRodata4(15)
|
||||
[-, %xmm3] v1 = vconst.b8x16 0x02 ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
|
||||
return
|
||||
}
|
||||
|
||||
23
cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif
Normal file
23
cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif
Normal file
@@ -0,0 +1,23 @@
|
||||
test run
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
; TODO move to vconst-run.clif
|
||||
|
||||
function %test_vconst_zeroes() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i8x16 0x00
|
||||
v1 = extractlane v0, 4
|
||||
v2 = icmp_imm eq v1, 0
|
||||
return v2
|
||||
}
|
||||
; run
|
||||
|
||||
function %test_vconst_ones() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i8x16 0xffffffffffffffffffffffffffffffff
|
||||
v1 = extractlane v0, 2
|
||||
v2 = icmp_imm eq v1, 0xff
|
||||
return v2
|
||||
}
|
||||
; run
|
||||
12
cranelift/filetests/filetests/isa/x86/vconst-opt.clif
Normal file
12
cranelift/filetests/filetests/isa/x86/vconst-opt.clif
Normal file
@@ -0,0 +1,12 @@
|
||||
test binemit
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
; TODO move to vconst-compile.clif or vconst-binemit.clif
|
||||
|
||||
function %test_vconst_optimizations() {
|
||||
ebb0:
|
||||
[-, %xmm4] v0 = vconst.b8x16 0x00 ; bin: 66 0f ef e4
|
||||
[-, %xmm7] v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 ff
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user