Adds support for legalizing CLZ, CTZ and POPCOUNT on baseline x86_64 targets.

Changes:

* Adds a new generic instruction, SELECTIF, that does value selection (a la
  conditional move) similarly to existing SELECT, except that it is
  controlled by condition code input and flags-register inputs.

* Adds a new Intel x86_64 variant, 'baseline', that supports SSE2 and
  nothing else.

* Adds new Intel x86_64 instructions BSR and BSF.

* Implements generic CLZ, CTZ and POPCOUNT on x86_64 'baseline' targets
  using the new BSR, BSF and SELECTIF instructions.

* Implements SELECTIF on x86_64 targets using conditional-moves.

* new test filetests/isa/intel/baseline_clz_ctz_popcount.cton
  (for legalization)

* new test filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton
  (for encoding)

* Allow lib/cretonne/meta/gen_legalizer.py to generate non-snake-caseified
  Rust without rustc complaining.

Fixes #238.
This commit is contained in:
Julian Seward
2018-01-17 06:23:30 +01:00
committed by Jakob Stoklund Olesen
parent e3714ddd10
commit 6f8a54b6a5
16 changed files with 440 additions and 3 deletions

View File

@@ -700,6 +700,7 @@ Operations
==========
.. autoinst:: select
.. autoinst:: selectif
Constant materialization
------------------------
@@ -979,6 +980,10 @@ Instructions that can only be used by the Intel target ISA.
.. autoinst:: isa.intel.instructions.cvtt2si
.. autoinst:: isa.intel.instructions.fmin
.. autoinst:: isa.intel.instructions.fmax
.. autoinst:: isa.intel.instructions.bsf
.. autoinst:: isa.intel.instructions.bsr
.. autoinst:: isa.intel.instructions.push
.. autoinst:: isa.intel.instructions.pop
Instruction groups
==================

View File

@@ -0,0 +1,104 @@
test compile
set is_64bit
isa intel baseline
; clz/ctz on 64 bit operands
function %i64_clz(i64) -> i64 {
ebb0(v10: i64):
v11 = clz v10
; check: x86_bsr
; check: selectif.i64
return v11
}
function %i64_ctz(i64) -> i64 {
ebb1(v20: i64):
v21 = ctz v20
; check: x86_bsf
; check: selectif.i64
return v21
}
; clz/ctz on 32 bit operands
function %i32_clz(i32) -> i32 {
ebb0(v10: i32):
v11 = clz v10
; check: x86_bsr
; check: selectif.i32
return v11
}
function %i32_ctz(i32) -> i32 {
ebb1(v20: i32):
v21 = ctz v20
; check: x86_bsf
; check: selectif.i32
return v21
}
; popcount on 64 bit operands
function %i64_popcount(i64) -> i64 {
ebb0(v30: i64):
v31 = popcnt v30;
; check: iconst.i32
; check: ushr
; check: iconst.i64
; check: band
; check: isub
; check: iconst.i32
; check: ushr
; check: band
; check: isub
; check: iconst.i32
; check: ushr
; check: band
; check: isub
; check: iconst.i32
; check: ushr
; check: iadd
; check: iconst.i64
; check: band
; check: iconst.i64
; check: imul
; check: iconst.i32
; check: ushr
return v31;
}
; popcount on 32 bit operands
function %i32_popcount(i32) -> i32 {
ebb0(v40: i32):
v41 = popcnt v40;
; check: iconst.i32
; check: ushr
; check: iconst.i32
; check: band
; check: isub
; check: iconst.i32
; check: ushr
; check: band
; check: isub
; check: iconst.i32
; check: ushr
; check: band
; check: isub
; check: iconst.i32
; check: ushr
; check: iadd
; check: iconst.i32
; check: band
; check: iconst.i32
; check: imul
; check: iconst.i32
; check: ushr
return v41;
}

View File

@@ -0,0 +1,89 @@
test binemit
set is_64bit
set is_compressed
isa intel baseline
; The binary encodings can be verified with the command:
;
; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton | llvm-mc -show-encoding -triple=x86_64
;
function %Foo() {
ebb0:
; 64-bit wide bsf
[-,%r11] v10 = iconst.i64 0x1234
; asm: bsfq %r11, %rcx
[-,%rcx,%eflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb
[-,%rdx] v14 = iconst.i64 0x5678
; asm: bsfq %rdx, %r12
[-,%r12,%eflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2
; asm: bsfq %rdx, %rdi
[-,%rdi,%eflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa
; 32-bit wide bsf
[-,%r11] v20 = iconst.i32 0x1234
; asm: bsfl %r11d, %ecx
[-,%rcx,%eflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb
[-,%rdx] v24 = iconst.i32 0x5678
; asm: bsfl %edx, %r12d
[-,%r12,%eflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2
; asm: bsfl %edx, %esi
[-,%rsi,%eflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2
; 64-bit wide bsr
[-,%r11] v30 = iconst.i64 0x1234
; asm: bsrq %r11, %rcx
[-,%rcx,%eflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb
[-,%rdx] v34 = iconst.i64 0x5678
; asm: bsrq %rdx, %r12
[-,%r12,%eflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2
; asm: bsrq %rdx, %rdi
[-,%rdi,%eflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa
; 32-bit wide bsr
[-,%r11] v40 = iconst.i32 0x1234
; asm: bsrl %r11d, %ecx
[-,%rcx,%eflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb
[-,%rdx] v44 = iconst.i32 0x5678
; asm: bsrl %edx, %r12d
[-,%r12,%eflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2
; asm: bsrl %edx, %esi
[-,%rsi,%eflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2
; 64-bit wide cmov
; asm: cmoveq %r11, %rdx
[-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3
; asm: cmoveq %rdi, %rdx
[-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7
; 32-bit wide cmov
; asm: cmovnel %r11d, %edx
[-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3
; asm: cmovlel %esi, %edx
[-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6
trap user0
}

View File

@@ -42,7 +42,7 @@ ebb0:
; nextln: $v3 = bxor v0, v2
; nextln: }
; Polymorphic istruction controlled by second operand.
; Polymorphic instruction controlled by second operand.
function %select() {
ebb0(v90: i32, v91: i32, v92: b1):
v0 = select v92, v90, v91
@@ -52,6 +52,16 @@ ebb0(v90: i32, v91: i32, v92: b1):
; nextln: $v0 = select $v92, $v90, $v91
; nextln: }
; Polymorphic instruction controlled by third operand.
function %selectif() native {
ebb0(v95: i32, v96: i32, v97: b1):
v98 = selectif.i32 eq v97, v95, v96
}
; sameln: function %selectif() native {
; nextln: ebb0(v0: i32, v1: i32, v2: b1):
; nextln: v3 = selectif.i32 eq v2, v0, v1
; nextln: }
; Lane indexes.
function %lanes() {
ebb0: