diff --git a/cranelift/docs/langref.rst b/cranelift/docs/langref.rst index a6fe32ff85..44204ad53f 100644 --- a/cranelift/docs/langref.rst +++ b/cranelift/docs/langref.rst @@ -700,6 +700,7 @@ Operations ========== .. autoinst:: select +.. autoinst:: selectif Constant materialization ------------------------ @@ -979,6 +980,10 @@ Instructions that can only be used by the Intel target ISA. .. autoinst:: isa.intel.instructions.cvtt2si .. autoinst:: isa.intel.instructions.fmin .. autoinst:: isa.intel.instructions.fmax +.. autoinst:: isa.intel.instructions.bsf +.. autoinst:: isa.intel.instructions.bsr +.. autoinst:: isa.intel.instructions.push +.. autoinst:: isa.intel.instructions.pop Instruction groups ================== diff --git a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton new file mode 100644 index 0000000000..62a793de60 --- /dev/null +++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton @@ -0,0 +1,104 @@ + +test compile +set is_64bit +isa intel baseline + + +; clz/ctz on 64 bit operands + +function %i64_clz(i64) -> i64 { +ebb0(v10: i64): + v11 = clz v10 + ; check: x86_bsr + ; check: selectif.i64 + return v11 +} + +function %i64_ctz(i64) -> i64 { +ebb1(v20: i64): + v21 = ctz v20 + ; check: x86_bsf + ; check: selectif.i64 + return v21 +} + + +; clz/ctz on 32 bit operands + +function %i32_clz(i32) -> i32 { +ebb0(v10: i32): + v11 = clz v10 + ; check: x86_bsr + ; check: selectif.i32 + return v11 +} + +function %i32_ctz(i32) -> i32 { +ebb1(v20: i32): + v21 = ctz v20 + ; check: x86_bsf + ; check: selectif.i32 + return v21 +} + + +; popcount on 64 bit operands + +function %i64_popcount(i64) -> i64 { +ebb0(v30: i64): + v31 = popcnt v30; + ; check: iconst.i32 + ; check: ushr + ; check: iconst.i64 + ; check: band + ; check: isub + ; check: iconst.i32 + ; check: ushr + ; check: band + ; check: isub + ; check: iconst.i32 + ; check: ushr + ; check: band + ; check: isub + ; check: iconst.i32 + ; check: ushr + ; check: iadd + ; check: iconst.i64 + ; check: band + ; check: iconst.i64 + ; check: imul + ; check: iconst.i32 + ; check: ushr + return v31; +} + + +; popcount on 32 bit operands + +function %i32_popcount(i32) -> i32 { +ebb0(v40: i32): + v41 = popcnt v40; + ; check: iconst.i32 + ; check: ushr + ; check: iconst.i32 + ; check: band + ; check: isub + ; check: iconst.i32 + ; check: ushr + ; check: band + ; check: isub + ; check: iconst.i32 + ; check: ushr + ; check: band + ; check: isub + ; check: iconst.i32 + ; check: ushr + ; check: iadd + ; check: iconst.i32 + ; check: band + ; check: iconst.i32 + ; check: imul + ; check: iconst.i32 + ; check: ushr + return v41; +} diff --git a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton new file mode 100644 index 0000000000..0b7003449d --- /dev/null +++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton @@ -0,0 +1,89 @@ + +test binemit +set is_64bit +set is_compressed +isa intel baseline + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton | llvm-mc -show-encoding -triple=x86_64 +; + +function %Foo() { +ebb0: + ; 64-bit wide bsf + + [-,%r11] v10 = iconst.i64 0x1234 + ; asm: bsfq %r11, %rcx + [-,%rcx,%eflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb + + [-,%rdx] v14 = iconst.i64 0x5678 + ; asm: bsfq %rdx, %r12 + [-,%r12,%eflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2 + + ; asm: bsfq %rdx, %rdi + [-,%rdi,%eflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa + + + ; 32-bit wide bsf + + [-,%r11] v20 = iconst.i32 0x1234 + ; asm: bsfl %r11d, %ecx + [-,%rcx,%eflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb + + [-,%rdx] v24 = iconst.i32 0x5678 + ; asm: bsfl %edx, %r12d + [-,%r12,%eflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2 + + ; asm: bsfl %edx, %esi + [-,%rsi,%eflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2 + + + ; 64-bit wide bsr + + [-,%r11] v30 = iconst.i64 0x1234 + ; asm: bsrq %r11, %rcx + [-,%rcx,%eflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb + + [-,%rdx] v34 = iconst.i64 0x5678 + ; asm: bsrq %rdx, %r12 + [-,%r12,%eflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2 + + ; asm: bsrq %rdx, %rdi + [-,%rdi,%eflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa + + + ; 32-bit wide bsr + + [-,%r11] v40 = iconst.i32 0x1234 + ; asm: bsrl %r11d, %ecx + [-,%rcx,%eflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb + + [-,%rdx] v44 = iconst.i32 0x5678 + ; asm: bsrl %edx, %r12d + [-,%r12,%eflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2 + + ; asm: bsrl %edx, %esi + [-,%rsi,%eflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2 + + + ; 64-bit wide cmov + + ; asm: cmoveq %r11, %rdx + [-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3 + + ; asm: cmoveq %rdi, %rdx + [-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7 + + + ; 32-bit wide cmov + + ; asm: cmovnel %r11d, %edx + [-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3 + + ; asm: cmovlel %esi, %edx + [-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6 + + + trap user0 +} diff --git a/cranelift/filetests/parser/tiny.cton b/cranelift/filetests/parser/tiny.cton index 20c4bbe2b3..5e8c8e85c3 100644 --- a/cranelift/filetests/parser/tiny.cton +++ b/cranelift/filetests/parser/tiny.cton @@ -42,7 +42,7 @@ ebb0: ; nextln: $v3 = bxor v0, v2 ; nextln: } -; Polymorphic istruction controlled by second operand. +; Polymorphic instruction controlled by second operand. function %select() { ebb0(v90: i32, v91: i32, v92: b1): v0 = select v92, v90, v91 @@ -52,6 +52,16 @@ ebb0(v90: i32, v91: i32, v92: b1): ; nextln: $v0 = select $v92, $v90, $v91 ; nextln: } +; Polymorphic instruction controlled by third operand. +function %selectif() native { +ebb0(v95: i32, v96: i32, v97: b1): + v98 = selectif.i32 eq v97, v95, v96 +} +; sameln: function %selectif() native { +; nextln: ebb0(v0: i32, v1: i32, v2: b1): +; nextln: v3 = selectif.i32 eq v2, v0, v1 +; nextln: } + ; Lane indexes. function %lanes() { ebb0: diff --git a/lib/cretonne/meta/base/formats.py b/lib/cretonne/meta/base/formats.py index 539d87561b..394e1f4dc4 100644 --- a/lib/cretonne/meta/base/formats.py +++ b/lib/cretonne/meta/base/formats.py @@ -43,6 +43,8 @@ IntCond = InstructionFormat(intcc, VALUE) FloatCompare = InstructionFormat(floatcc, VALUE, VALUE) FloatCond = InstructionFormat(floatcc, VALUE) +IntSelect = InstructionFormat(intcc, VALUE, VALUE, VALUE) + Jump = InstructionFormat(ebb, VARIABLE_ARGS) Branch = InstructionFormat(VALUE, ebb, VARIABLE_ARGS) BranchInt = InstructionFormat(intcc, VALUE, ebb, VARIABLE_ARGS) diff --git a/lib/cretonne/meta/base/instructions.py b/lib/cretonne/meta/base/instructions.py index 6a41725ff9..fad863d976 100644 --- a/lib/cretonne/meta/base/instructions.py +++ b/lib/cretonne/meta/base/instructions.py @@ -485,6 +485,15 @@ select = Instruction( """, ins=(c, x, y), outs=a) +cc = Operand('cc', intcc, doc='Controlling condition code') +flags = Operand('flags', iflags, doc='The machine\'s flag register') + +selectif = Instruction( + 'selectif', r""" + Conditional select, dependent on integer condition codes. + """, + ins=(cc, flags, x, y), outs=a) + x = Operand('x', Any) copy = Instruction( diff --git a/lib/cretonne/meta/gen_legalizer.py b/lib/cretonne/meta/gen_legalizer.py index dea0a53950..54678b4284 100644 --- a/lib/cretonne/meta/gen_legalizer.py +++ b/lib/cretonne/meta/gen_legalizer.py @@ -355,7 +355,7 @@ def gen_xform(xform, fmt, type_sets): def gen_xform_group(xgrp, fmt, type_sets): # type: (XFormGroup, Formatter, UniqueTable) -> None fmt.doc_comment("Legalize the instruction pointed to by `pos`.") - fmt.line('#[allow(unused_variables,unused_assignments)]') + fmt.line('#[allow(unused_variables,unused_assignments,non_snake_case)]') with fmt.indented('pub fn {}('.format(xgrp.name)): fmt.line('inst: ir::Inst,') fmt.line('func: &mut ir::Function,') diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 4e4ebc229c..0e962a66f1 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -367,6 +367,17 @@ enc_i32_i64(base.ifcmp, r.rcmp, 0x39) enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90) enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90) +# +# Conditional move (a.k.a integer select) +# +enc_i32_i64(base.selectif, r.cmov, 0x0F, 0x40) + +# +# Bit scan forwards and reverse +# +enc_i32_i64(x86.bsf, r.bsf_and_bsr, 0x0F, 0xBC) +enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD) + # # Convert bool to int. # diff --git a/lib/cretonne/meta/isa/intel/instructions.py b/lib/cretonne/meta/isa/intel/instructions.py index 0852d49c73..277cf62b4a 100644 --- a/lib/cretonne/meta/isa/intel/instructions.py +++ b/lib/cretonne/meta/isa/intel/instructions.py @@ -5,6 +5,7 @@ This module defines additional instructions that are useful only to the Intel target ISA. """ +from base.types import iflags from cdsl.operands import Operand from cdsl.typevar import TypeVar from cdsl.instructions import Instruction, InstructionGroup @@ -125,4 +126,26 @@ pop = Instruction( """, outs=x, can_load=True, other_side_effects=True) +y = Operand('y', iWord) +rflags = Operand('rflags', iflags) + +bsr = Instruction( + 'x86_bsr', r""" + Bit Scan Reverse -- returns the bit-index of the most significant 1 + in the word. Result is undefined if the argument is zero. However, it + sets the Z flag depending on the argument, so it is at least easy to + detect and handle that case. + + This is polymorphic in i32 and i64. It is implemented for both i64 and + i32 in 64-bit mode, and only for i32 in 32-bit mode. + """, + ins=x, outs=(y, rflags)) + +bsf = Instruction( + 'x86_bsf', r""" + Bit Scan Forwards -- returns the bit-index of the least significant 1 + in the word. Is otherwise identical to 'bsr', just above. + """, + ins=x, outs=(y, rflags)) + GROUP.close() diff --git a/lib/cretonne/meta/isa/intel/legalize.py b/lib/cretonne/meta/isa/intel/legalize.py index 5c883baf09..70a8e9166b 100644 --- a/lib/cretonne/meta/isa/intel/legalize.py +++ b/lib/cretonne/meta/isa/intel/legalize.py @@ -4,7 +4,7 @@ Custom legalization patterns for Intel. from __future__ import absolute_import from cdsl.ast import Var from cdsl.xform import Rtl, XFormGroup -from base.immediates import imm64, floatcc +from base.immediates import imm64, intcc, floatcc from base.types import i32, i64 from base import legalize as shared from base import instructions as insts @@ -100,3 +100,131 @@ intel_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint') # Conversions from float to int can trap. intel_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint') intel_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint') + +# Count leading and trailing zeroes, for baseline x86_64 +c_minus_one = Var('c_minus_one') +c_thirty_one = Var('c_thirty_one') +c_thirty_two = Var('c_thirty_two') +c_sixty_three = Var('c_sixty_three') +c_sixty_four = Var('c_sixty_four') +index1 = Var('index1') +r2flags = Var('r2flags') +index2 = Var('index2') + +intel_expand.legalize( + a << insts.clz.i64(x), + Rtl( + c_minus_one << insts.iconst(imm64(-1)), + c_sixty_three << insts.iconst(imm64(63)), + (index1, r2flags) << x86.bsr(x), + index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1), + a << insts.isub(c_sixty_three, index2), + )) + +intel_expand.legalize( + a << insts.clz.i32(x), + Rtl( + c_minus_one << insts.iconst(imm64(-1)), + c_thirty_one << insts.iconst(imm64(31)), + (index1, r2flags) << x86.bsr(x), + index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1), + a << insts.isub(c_thirty_one, index2), + )) + +intel_expand.legalize( + a << insts.ctz.i64(x), + Rtl( + c_sixty_four << insts.iconst(imm64(64)), + (index1, r2flags) << x86.bsf(x), + a << insts.selectif(intcc.eq, r2flags, c_sixty_four, index1), + )) + +intel_expand.legalize( + a << insts.ctz.i32(x), + Rtl( + c_thirty_two << insts.iconst(imm64(32)), + (index1, r2flags) << x86.bsf(x), + a << insts.selectif(intcc.eq, r2flags, c_thirty_two, index1), + )) + + +# Population count for baseline x86_64 +qv1 = Var('qv1') +qv3 = Var('qv3') +qv4 = Var('qv4') +qv5 = Var('qv5') +qv6 = Var('qv6') +qv7 = Var('qv7') +qv8 = Var('qv8') +qv9 = Var('qv9') +qv10 = Var('qv10') +qv11 = Var('qv11') +qv12 = Var('qv12') +qv13 = Var('qv13') +qv14 = Var('qv14') +qv15 = Var('qv15') +qv16 = Var('qv16') +qc77 = Var('qc77') +qc0F = Var('qc0F') +qc01 = Var('qc01') +intel_expand.legalize( + qv16 << insts.popcnt.i64(qv1), + Rtl( + qv3 << insts.ushr_imm(qv1, imm64(1)), + qc77 << insts.iconst(imm64(0x7777777777777777)), + qv4 << insts.band(qv3, qc77), + qv5 << insts.isub(qv1, qv4), + qv6 << insts.ushr_imm(qv4, imm64(1)), + qv7 << insts.band(qv6, qc77), + qv8 << insts.isub(qv5, qv7), + qv9 << insts.ushr_imm(qv7, imm64(1)), + qv10 << insts.band(qv9, qc77), + qv11 << insts.isub(qv8, qv10), + qv12 << insts.ushr_imm(qv11, imm64(4)), + qv13 << insts.iadd(qv11, qv12), + qc0F << insts.iconst(imm64(0x0F0F0F0F0F0F0F0F)), + qv14 << insts.band(qv13, qc0F), + qc01 << insts.iconst(imm64(0x0101010101010101)), + qv15 << insts.imul(qv14, qc01), + qv16 << insts.ushr_imm(qv15, imm64(56)) + )) + +lv1 = Var('lv1') +lv3 = Var('lv3') +lv4 = Var('lv4') +lv5 = Var('lv5') +lv6 = Var('lv6') +lv7 = Var('lv7') +lv8 = Var('lv8') +lv9 = Var('lv9') +lv10 = Var('lv10') +lv11 = Var('lv11') +lv12 = Var('lv12') +lv13 = Var('lv13') +lv14 = Var('lv14') +lv15 = Var('lv15') +lv16 = Var('lv16') +lc77 = Var('lc77') +lc0F = Var('lc0F') +lc01 = Var('lc01') +intel_expand.legalize( + lv16 << insts.popcnt.i32(lv1), + Rtl( + lv3 << insts.ushr_imm(lv1, imm64(1)), + lc77 << insts.iconst(imm64(0x77777777)), + lv4 << insts.band(lv3, lc77), + lv5 << insts.isub(lv1, lv4), + lv6 << insts.ushr_imm(lv4, imm64(1)), + lv7 << insts.band(lv6, lc77), + lv8 << insts.isub(lv5, lv7), + lv9 << insts.ushr_imm(lv7, imm64(1)), + lv10 << insts.band(lv9, lc77), + lv11 << insts.isub(lv8, lv10), + lv12 << insts.ushr_imm(lv11, imm64(4)), + lv13 << insts.iadd(lv11, lv12), + lc0F << insts.iconst(imm64(0x0F0F0F0F)), + lv14 << insts.band(lv13, lc0F), + lc01 << insts.iconst(imm64(0x01010101)), + lv15 << insts.imul(lv14, lc01), + lv16 << insts.ushr_imm(lv15, imm64(24)) + )) diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index 313ca7abd6..622430b9b2 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -8,6 +8,7 @@ from cdsl.registers import RegClass from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry, NullAry from base.formats import Trap, Call, IndirectCall, Store, Load from base.formats import IntCompare, FloatCompare, IntCond, FloatCond +from base.formats import IntSelect from base.formats import Jump, Branch, BranchInt, BranchFloat from base.formats import Ternary, FuncAddr, UnaryGlobalVar from base.formats import RegMove, RegSpill, RegFill, CopySpecial @@ -1021,6 +1022,32 @@ setf_abcd = TailRecipe( modrm_r_bits(out_reg0, bits, sink); ''') +# +# Conditional move (a.k.a integer select) +# (maybe-REX.W) 0F 4x modrm(r,r) +# 1 byte, modrm(r,r), is after the opcode +# +cmov = TailRecipe( + 'cmov', IntSelect, size=1, ins=(FLAG.eflags, GPR, GPR), outs=2, + requires_prefix=False, + clobbers_flags=False, + emit=''' + PUT_OP(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); + modrm_rr(in_reg1, in_reg2, sink); + ''') + +# +# Bit scan forwards and reverse +# +bsf_and_bsr = TailRecipe( + 'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.eflags), + requires_prefix=False, + clobbers_flags=True, + emit=''' + PUT_OP(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + ''') + # # Compare and set flags. # diff --git a/lib/cretonne/meta/isa/intel/settings.py b/lib/cretonne/meta/isa/intel/settings.py index a16303a3b5..5817c48c0e 100644 --- a/lib/cretonne/meta/isa/intel/settings.py +++ b/lib/cretonne/meta/isa/intel/settings.py @@ -40,6 +40,7 @@ use_lzcnt = And(has_lzcnt) # Presets corresponding to Intel CPUs. +baseline = Preset(has_sse2) nehalem = Preset( has_sse2, has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt) haswell = Preset(nehalem, has_bmi1, has_lzcnt) diff --git a/lib/cretonne/src/ir/instructions.rs b/lib/cretonne/src/ir/instructions.rs index 37ca23150c..9e28616377 100644 --- a/lib/cretonne/src/ir/instructions.rs +++ b/lib/cretonne/src/ir/instructions.rs @@ -157,6 +157,11 @@ pub enum InstructionData { cond: FloatCC, arg: Value, }, + IntSelect { + opcode: Opcode, + cond: IntCC, + args: [Value; 3], + }, Jump { opcode: Opcode, destination: Ebb, diff --git a/lib/cretonne/src/verifier/mod.rs b/lib/cretonne/src/verifier/mod.rs index 1860fecbbf..1b27c41766 100644 --- a/lib/cretonne/src/verifier/mod.rs +++ b/lib/cretonne/src/verifier/mod.rs @@ -358,6 +358,7 @@ impl<'a> Verifier<'a> { IntCond { .. } | FloatCompare { .. } | FloatCond { .. } | + IntSelect { .. } | Load { .. } | Store { .. } | RegMove { .. } | diff --git a/lib/cretonne/src/write.rs b/lib/cretonne/src/write.rs index 8c8c280ca9..2312e5644f 100644 --- a/lib/cretonne/src/write.rs +++ b/lib/cretonne/src/write.rs @@ -303,6 +303,9 @@ pub fn write_operands( IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg), FloatCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]), FloatCond { cond, arg, .. } => write!(w, " {} {}", cond, arg), + IntSelect { cond, args, .. } => { + write!(w, " {} {}, {}, {}", cond, args[0], args[1], args[2]) + } Jump { destination, ref args, diff --git a/lib/reader/src/parser.rs b/lib/reader/src/parser.rs index 17f14af9b1..95923d013d 100644 --- a/lib/reader/src/parser.rs +++ b/lib/reader/src/parser.rs @@ -2119,6 +2119,25 @@ impl<'a> Parser<'a> { let arg = self.match_value("expected SSA value")?; InstructionData::FloatCond { opcode, cond, arg } } + InstructionFormat::IntSelect => { + let cond = self.match_enum("expected intcc condition code")?; + let guard = self.match_value("expected SSA value first operand")?; + self.match_token( + Token::Comma, + "expected ',' between operands", + )?; + let v_true = self.match_value("expected SSA value second operand")?; + self.match_token( + Token::Comma, + "expected ',' between operands", + )?; + let v_false = self.match_value("expected SSA value third operand")?; + InstructionData::IntSelect { + opcode, + cond, + args: [guard, v_true, v_false], + } + } InstructionFormat::Call => { let func_ref = self.match_fn("expected function reference").and_then( |num| {