Adds support for legalizing CLZ, CTZ and POPCOUNT on baseline x86_64 targets.
Changes: * Adds a new generic instruction, SELECTIF, that does value selection (a la conditional move) similarly to existing SELECT, except that it is controlled by condition code input and flags-register inputs. * Adds a new Intel x86_64 variant, 'baseline', that supports SSE2 and nothing else. * Adds new Intel x86_64 instructions BSR and BSF. * Implements generic CLZ, CTZ and POPCOUNT on x86_64 'baseline' targets using the new BSR, BSF and SELECTIF instructions. * Implements SELECTIF on x86_64 targets using conditional-moves. * new test filetests/isa/intel/baseline_clz_ctz_popcount.cton (for legalization) * new test filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton (for encoding) * Allow lib/cretonne/meta/gen_legalizer.py to generate non-snake-caseified Rust without rustc complaining. Fixes #238.
This commit is contained in:
committed by
Jakob Stoklund Olesen
parent
e3714ddd10
commit
6f8a54b6a5
@@ -700,6 +700,7 @@ Operations
|
||||
==========
|
||||
|
||||
.. autoinst:: select
|
||||
.. autoinst:: selectif
|
||||
|
||||
Constant materialization
|
||||
------------------------
|
||||
@@ -979,6 +980,10 @@ Instructions that can only be used by the Intel target ISA.
|
||||
.. autoinst:: isa.intel.instructions.cvtt2si
|
||||
.. autoinst:: isa.intel.instructions.fmin
|
||||
.. autoinst:: isa.intel.instructions.fmax
|
||||
.. autoinst:: isa.intel.instructions.bsf
|
||||
.. autoinst:: isa.intel.instructions.bsr
|
||||
.. autoinst:: isa.intel.instructions.push
|
||||
.. autoinst:: isa.intel.instructions.pop
|
||||
|
||||
Instruction groups
|
||||
==================
|
||||
|
||||
104
cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton
Normal file
104
cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton
Normal file
@@ -0,0 +1,104 @@
|
||||
|
||||
test compile
|
||||
set is_64bit
|
||||
isa intel baseline
|
||||
|
||||
|
||||
; clz/ctz on 64 bit operands
|
||||
|
||||
function %i64_clz(i64) -> i64 {
|
||||
ebb0(v10: i64):
|
||||
v11 = clz v10
|
||||
; check: x86_bsr
|
||||
; check: selectif.i64
|
||||
return v11
|
||||
}
|
||||
|
||||
function %i64_ctz(i64) -> i64 {
|
||||
ebb1(v20: i64):
|
||||
v21 = ctz v20
|
||||
; check: x86_bsf
|
||||
; check: selectif.i64
|
||||
return v21
|
||||
}
|
||||
|
||||
|
||||
; clz/ctz on 32 bit operands
|
||||
|
||||
function %i32_clz(i32) -> i32 {
|
||||
ebb0(v10: i32):
|
||||
v11 = clz v10
|
||||
; check: x86_bsr
|
||||
; check: selectif.i32
|
||||
return v11
|
||||
}
|
||||
|
||||
function %i32_ctz(i32) -> i32 {
|
||||
ebb1(v20: i32):
|
||||
v21 = ctz v20
|
||||
; check: x86_bsf
|
||||
; check: selectif.i32
|
||||
return v21
|
||||
}
|
||||
|
||||
|
||||
; popcount on 64 bit operands
|
||||
|
||||
function %i64_popcount(i64) -> i64 {
|
||||
ebb0(v30: i64):
|
||||
v31 = popcnt v30;
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: iconst.i64
|
||||
; check: band
|
||||
; check: isub
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: band
|
||||
; check: isub
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: band
|
||||
; check: isub
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: iadd
|
||||
; check: iconst.i64
|
||||
; check: band
|
||||
; check: iconst.i64
|
||||
; check: imul
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
return v31;
|
||||
}
|
||||
|
||||
|
||||
; popcount on 32 bit operands
|
||||
|
||||
function %i32_popcount(i32) -> i32 {
|
||||
ebb0(v40: i32):
|
||||
v41 = popcnt v40;
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: iconst.i32
|
||||
; check: band
|
||||
; check: isub
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: band
|
||||
; check: isub
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: band
|
||||
; check: isub
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
; check: iadd
|
||||
; check: iconst.i32
|
||||
; check: band
|
||||
; check: iconst.i32
|
||||
; check: imul
|
||||
; check: iconst.i32
|
||||
; check: ushr
|
||||
return v41;
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
|
||||
test binemit
|
||||
set is_64bit
|
||||
set is_compressed
|
||||
isa intel baseline
|
||||
|
||||
; The binary encodings can be verified with the command:
|
||||
;
|
||||
; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton | llvm-mc -show-encoding -triple=x86_64
|
||||
;
|
||||
|
||||
function %Foo() {
|
||||
ebb0:
|
||||
; 64-bit wide bsf
|
||||
|
||||
[-,%r11] v10 = iconst.i64 0x1234
|
||||
; asm: bsfq %r11, %rcx
|
||||
[-,%rcx,%eflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb
|
||||
|
||||
[-,%rdx] v14 = iconst.i64 0x5678
|
||||
; asm: bsfq %rdx, %r12
|
||||
[-,%r12,%eflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2
|
||||
|
||||
; asm: bsfq %rdx, %rdi
|
||||
[-,%rdi,%eflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa
|
||||
|
||||
|
||||
; 32-bit wide bsf
|
||||
|
||||
[-,%r11] v20 = iconst.i32 0x1234
|
||||
; asm: bsfl %r11d, %ecx
|
||||
[-,%rcx,%eflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb
|
||||
|
||||
[-,%rdx] v24 = iconst.i32 0x5678
|
||||
; asm: bsfl %edx, %r12d
|
||||
[-,%r12,%eflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2
|
||||
|
||||
; asm: bsfl %edx, %esi
|
||||
[-,%rsi,%eflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2
|
||||
|
||||
|
||||
; 64-bit wide bsr
|
||||
|
||||
[-,%r11] v30 = iconst.i64 0x1234
|
||||
; asm: bsrq %r11, %rcx
|
||||
[-,%rcx,%eflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb
|
||||
|
||||
[-,%rdx] v34 = iconst.i64 0x5678
|
||||
; asm: bsrq %rdx, %r12
|
||||
[-,%r12,%eflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2
|
||||
|
||||
; asm: bsrq %rdx, %rdi
|
||||
[-,%rdi,%eflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa
|
||||
|
||||
|
||||
; 32-bit wide bsr
|
||||
|
||||
[-,%r11] v40 = iconst.i32 0x1234
|
||||
; asm: bsrl %r11d, %ecx
|
||||
[-,%rcx,%eflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb
|
||||
|
||||
[-,%rdx] v44 = iconst.i32 0x5678
|
||||
; asm: bsrl %edx, %r12d
|
||||
[-,%r12,%eflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2
|
||||
|
||||
; asm: bsrl %edx, %esi
|
||||
[-,%rsi,%eflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2
|
||||
|
||||
|
||||
; 64-bit wide cmov
|
||||
|
||||
; asm: cmoveq %r11, %rdx
|
||||
[-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3
|
||||
|
||||
; asm: cmoveq %rdi, %rdx
|
||||
[-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7
|
||||
|
||||
|
||||
; 32-bit wide cmov
|
||||
|
||||
; asm: cmovnel %r11d, %edx
|
||||
[-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3
|
||||
|
||||
; asm: cmovlel %esi, %edx
|
||||
[-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6
|
||||
|
||||
|
||||
trap user0
|
||||
}
|
||||
@@ -42,7 +42,7 @@ ebb0:
|
||||
; nextln: $v3 = bxor v0, v2
|
||||
; nextln: }
|
||||
|
||||
; Polymorphic istruction controlled by second operand.
|
||||
; Polymorphic instruction controlled by second operand.
|
||||
function %select() {
|
||||
ebb0(v90: i32, v91: i32, v92: b1):
|
||||
v0 = select v92, v90, v91
|
||||
@@ -52,6 +52,16 @@ ebb0(v90: i32, v91: i32, v92: b1):
|
||||
; nextln: $v0 = select $v92, $v90, $v91
|
||||
; nextln: }
|
||||
|
||||
; Polymorphic instruction controlled by third operand.
|
||||
function %selectif() native {
|
||||
ebb0(v95: i32, v96: i32, v97: b1):
|
||||
v98 = selectif.i32 eq v97, v95, v96
|
||||
}
|
||||
; sameln: function %selectif() native {
|
||||
; nextln: ebb0(v0: i32, v1: i32, v2: b1):
|
||||
; nextln: v3 = selectif.i32 eq v2, v0, v1
|
||||
; nextln: }
|
||||
|
||||
; Lane indexes.
|
||||
function %lanes() {
|
||||
ebb0:
|
||||
|
||||
Reference in New Issue
Block a user