Adds support for legalizing CLZ, CTZ and POPCOUNT on baseline x86_64 targets.
Changes: * Adds a new generic instruction, SELECTIF, that does value selection (a la conditional move) similarly to existing SELECT, except that it is controlled by condition code input and flags-register inputs. * Adds a new Intel x86_64 variant, 'baseline', that supports SSE2 and nothing else. * Adds new Intel x86_64 instructions BSR and BSF. * Implements generic CLZ, CTZ and POPCOUNT on x86_64 'baseline' targets using the new BSR, BSF and SELECTIF instructions. * Implements SELECTIF on x86_64 targets using conditional-moves. * new test filetests/isa/intel/baseline_clz_ctz_popcount.cton (for legalization) * new test filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton (for encoding) * Allow lib/cretonne/meta/gen_legalizer.py to generate non-snake-caseified Rust without rustc complaining. Fixes #238.
This commit is contained in:
committed by
Jakob Stoklund Olesen
parent
e3714ddd10
commit
6f8a54b6a5
@@ -43,6 +43,8 @@ IntCond = InstructionFormat(intcc, VALUE)
|
||||
FloatCompare = InstructionFormat(floatcc, VALUE, VALUE)
|
||||
FloatCond = InstructionFormat(floatcc, VALUE)
|
||||
|
||||
IntSelect = InstructionFormat(intcc, VALUE, VALUE, VALUE)
|
||||
|
||||
Jump = InstructionFormat(ebb, VARIABLE_ARGS)
|
||||
Branch = InstructionFormat(VALUE, ebb, VARIABLE_ARGS)
|
||||
BranchInt = InstructionFormat(intcc, VALUE, ebb, VARIABLE_ARGS)
|
||||
|
||||
@@ -485,6 +485,15 @@ select = Instruction(
|
||||
""",
|
||||
ins=(c, x, y), outs=a)
|
||||
|
||||
cc = Operand('cc', intcc, doc='Controlling condition code')
|
||||
flags = Operand('flags', iflags, doc='The machine\'s flag register')
|
||||
|
||||
selectif = Instruction(
|
||||
'selectif', r"""
|
||||
Conditional select, dependent on integer condition codes.
|
||||
""",
|
||||
ins=(cc, flags, x, y), outs=a)
|
||||
|
||||
x = Operand('x', Any)
|
||||
|
||||
copy = Instruction(
|
||||
|
||||
@@ -355,7 +355,7 @@ def gen_xform(xform, fmt, type_sets):
|
||||
def gen_xform_group(xgrp, fmt, type_sets):
|
||||
# type: (XFormGroup, Formatter, UniqueTable) -> None
|
||||
fmt.doc_comment("Legalize the instruction pointed to by `pos`.")
|
||||
fmt.line('#[allow(unused_variables,unused_assignments)]')
|
||||
fmt.line('#[allow(unused_variables,unused_assignments,non_snake_case)]')
|
||||
with fmt.indented('pub fn {}('.format(xgrp.name)):
|
||||
fmt.line('inst: ir::Inst,')
|
||||
fmt.line('func: &mut ir::Function,')
|
||||
|
||||
@@ -367,6 +367,17 @@ enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
|
||||
enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90)
|
||||
enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90)
|
||||
|
||||
#
|
||||
# Conditional move (a.k.a integer select)
|
||||
#
|
||||
enc_i32_i64(base.selectif, r.cmov, 0x0F, 0x40)
|
||||
|
||||
#
|
||||
# Bit scan forwards and reverse
|
||||
#
|
||||
enc_i32_i64(x86.bsf, r.bsf_and_bsr, 0x0F, 0xBC)
|
||||
enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
|
||||
|
||||
#
|
||||
# Convert bool to int.
|
||||
#
|
||||
|
||||
@@ -5,6 +5,7 @@ This module defines additional instructions that are useful only to the Intel
|
||||
target ISA.
|
||||
"""
|
||||
|
||||
from base.types import iflags
|
||||
from cdsl.operands import Operand
|
||||
from cdsl.typevar import TypeVar
|
||||
from cdsl.instructions import Instruction, InstructionGroup
|
||||
@@ -125,4 +126,26 @@ pop = Instruction(
|
||||
""",
|
||||
outs=x, can_load=True, other_side_effects=True)
|
||||
|
||||
y = Operand('y', iWord)
|
||||
rflags = Operand('rflags', iflags)
|
||||
|
||||
bsr = Instruction(
|
||||
'x86_bsr', r"""
|
||||
Bit Scan Reverse -- returns the bit-index of the most significant 1
|
||||
in the word. Result is undefined if the argument is zero. However, it
|
||||
sets the Z flag depending on the argument, so it is at least easy to
|
||||
detect and handle that case.
|
||||
|
||||
This is polymorphic in i32 and i64. It is implemented for both i64 and
|
||||
i32 in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
""",
|
||||
ins=x, outs=(y, rflags))
|
||||
|
||||
bsf = Instruction(
|
||||
'x86_bsf', r"""
|
||||
Bit Scan Forwards -- returns the bit-index of the least significant 1
|
||||
in the word. Is otherwise identical to 'bsr', just above.
|
||||
""",
|
||||
ins=x, outs=(y, rflags))
|
||||
|
||||
GROUP.close()
|
||||
|
||||
@@ -4,7 +4,7 @@ Custom legalization patterns for Intel.
|
||||
from __future__ import absolute_import
|
||||
from cdsl.ast import Var
|
||||
from cdsl.xform import Rtl, XFormGroup
|
||||
from base.immediates import imm64, floatcc
|
||||
from base.immediates import imm64, intcc, floatcc
|
||||
from base.types import i32, i64
|
||||
from base import legalize as shared
|
||||
from base import instructions as insts
|
||||
@@ -100,3 +100,131 @@ intel_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint')
|
||||
# Conversions from float to int can trap.
|
||||
intel_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint')
|
||||
intel_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint')
|
||||
|
||||
# Count leading and trailing zeroes, for baseline x86_64
|
||||
c_minus_one = Var('c_minus_one')
|
||||
c_thirty_one = Var('c_thirty_one')
|
||||
c_thirty_two = Var('c_thirty_two')
|
||||
c_sixty_three = Var('c_sixty_three')
|
||||
c_sixty_four = Var('c_sixty_four')
|
||||
index1 = Var('index1')
|
||||
r2flags = Var('r2flags')
|
||||
index2 = Var('index2')
|
||||
|
||||
intel_expand.legalize(
|
||||
a << insts.clz.i64(x),
|
||||
Rtl(
|
||||
c_minus_one << insts.iconst(imm64(-1)),
|
||||
c_sixty_three << insts.iconst(imm64(63)),
|
||||
(index1, r2flags) << x86.bsr(x),
|
||||
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
|
||||
a << insts.isub(c_sixty_three, index2),
|
||||
))
|
||||
|
||||
intel_expand.legalize(
|
||||
a << insts.clz.i32(x),
|
||||
Rtl(
|
||||
c_minus_one << insts.iconst(imm64(-1)),
|
||||
c_thirty_one << insts.iconst(imm64(31)),
|
||||
(index1, r2flags) << x86.bsr(x),
|
||||
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
|
||||
a << insts.isub(c_thirty_one, index2),
|
||||
))
|
||||
|
||||
intel_expand.legalize(
|
||||
a << insts.ctz.i64(x),
|
||||
Rtl(
|
||||
c_sixty_four << insts.iconst(imm64(64)),
|
||||
(index1, r2flags) << x86.bsf(x),
|
||||
a << insts.selectif(intcc.eq, r2flags, c_sixty_four, index1),
|
||||
))
|
||||
|
||||
intel_expand.legalize(
|
||||
a << insts.ctz.i32(x),
|
||||
Rtl(
|
||||
c_thirty_two << insts.iconst(imm64(32)),
|
||||
(index1, r2flags) << x86.bsf(x),
|
||||
a << insts.selectif(intcc.eq, r2flags, c_thirty_two, index1),
|
||||
))
|
||||
|
||||
|
||||
# Population count for baseline x86_64
|
||||
qv1 = Var('qv1')
|
||||
qv3 = Var('qv3')
|
||||
qv4 = Var('qv4')
|
||||
qv5 = Var('qv5')
|
||||
qv6 = Var('qv6')
|
||||
qv7 = Var('qv7')
|
||||
qv8 = Var('qv8')
|
||||
qv9 = Var('qv9')
|
||||
qv10 = Var('qv10')
|
||||
qv11 = Var('qv11')
|
||||
qv12 = Var('qv12')
|
||||
qv13 = Var('qv13')
|
||||
qv14 = Var('qv14')
|
||||
qv15 = Var('qv15')
|
||||
qv16 = Var('qv16')
|
||||
qc77 = Var('qc77')
|
||||
qc0F = Var('qc0F')
|
||||
qc01 = Var('qc01')
|
||||
intel_expand.legalize(
|
||||
qv16 << insts.popcnt.i64(qv1),
|
||||
Rtl(
|
||||
qv3 << insts.ushr_imm(qv1, imm64(1)),
|
||||
qc77 << insts.iconst(imm64(0x7777777777777777)),
|
||||
qv4 << insts.band(qv3, qc77),
|
||||
qv5 << insts.isub(qv1, qv4),
|
||||
qv6 << insts.ushr_imm(qv4, imm64(1)),
|
||||
qv7 << insts.band(qv6, qc77),
|
||||
qv8 << insts.isub(qv5, qv7),
|
||||
qv9 << insts.ushr_imm(qv7, imm64(1)),
|
||||
qv10 << insts.band(qv9, qc77),
|
||||
qv11 << insts.isub(qv8, qv10),
|
||||
qv12 << insts.ushr_imm(qv11, imm64(4)),
|
||||
qv13 << insts.iadd(qv11, qv12),
|
||||
qc0F << insts.iconst(imm64(0x0F0F0F0F0F0F0F0F)),
|
||||
qv14 << insts.band(qv13, qc0F),
|
||||
qc01 << insts.iconst(imm64(0x0101010101010101)),
|
||||
qv15 << insts.imul(qv14, qc01),
|
||||
qv16 << insts.ushr_imm(qv15, imm64(56))
|
||||
))
|
||||
|
||||
lv1 = Var('lv1')
|
||||
lv3 = Var('lv3')
|
||||
lv4 = Var('lv4')
|
||||
lv5 = Var('lv5')
|
||||
lv6 = Var('lv6')
|
||||
lv7 = Var('lv7')
|
||||
lv8 = Var('lv8')
|
||||
lv9 = Var('lv9')
|
||||
lv10 = Var('lv10')
|
||||
lv11 = Var('lv11')
|
||||
lv12 = Var('lv12')
|
||||
lv13 = Var('lv13')
|
||||
lv14 = Var('lv14')
|
||||
lv15 = Var('lv15')
|
||||
lv16 = Var('lv16')
|
||||
lc77 = Var('lc77')
|
||||
lc0F = Var('lc0F')
|
||||
lc01 = Var('lc01')
|
||||
intel_expand.legalize(
|
||||
lv16 << insts.popcnt.i32(lv1),
|
||||
Rtl(
|
||||
lv3 << insts.ushr_imm(lv1, imm64(1)),
|
||||
lc77 << insts.iconst(imm64(0x77777777)),
|
||||
lv4 << insts.band(lv3, lc77),
|
||||
lv5 << insts.isub(lv1, lv4),
|
||||
lv6 << insts.ushr_imm(lv4, imm64(1)),
|
||||
lv7 << insts.band(lv6, lc77),
|
||||
lv8 << insts.isub(lv5, lv7),
|
||||
lv9 << insts.ushr_imm(lv7, imm64(1)),
|
||||
lv10 << insts.band(lv9, lc77),
|
||||
lv11 << insts.isub(lv8, lv10),
|
||||
lv12 << insts.ushr_imm(lv11, imm64(4)),
|
||||
lv13 << insts.iadd(lv11, lv12),
|
||||
lc0F << insts.iconst(imm64(0x0F0F0F0F)),
|
||||
lv14 << insts.band(lv13, lc0F),
|
||||
lc01 << insts.iconst(imm64(0x01010101)),
|
||||
lv15 << insts.imul(lv14, lc01),
|
||||
lv16 << insts.ushr_imm(lv15, imm64(24))
|
||||
))
|
||||
|
||||
@@ -8,6 +8,7 @@ from cdsl.registers import RegClass
|
||||
from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry, NullAry
|
||||
from base.formats import Trap, Call, IndirectCall, Store, Load
|
||||
from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
|
||||
from base.formats import IntSelect
|
||||
from base.formats import Jump, Branch, BranchInt, BranchFloat
|
||||
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
|
||||
from base.formats import RegMove, RegSpill, RegFill, CopySpecial
|
||||
@@ -1021,6 +1022,32 @@ setf_abcd = TailRecipe(
|
||||
modrm_r_bits(out_reg0, bits, sink);
|
||||
''')
|
||||
|
||||
#
|
||||
# Conditional move (a.k.a integer select)
|
||||
# (maybe-REX.W) 0F 4x modrm(r,r)
|
||||
# 1 byte, modrm(r,r), is after the opcode
|
||||
#
|
||||
cmov = TailRecipe(
|
||||
'cmov', IntSelect, size=1, ins=(FLAG.eflags, GPR, GPR), outs=2,
|
||||
requires_prefix=False,
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
PUT_OP(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
|
||||
modrm_rr(in_reg1, in_reg2, sink);
|
||||
''')
|
||||
|
||||
#
|
||||
# Bit scan forwards and reverse
|
||||
#
|
||||
bsf_and_bsr = TailRecipe(
|
||||
'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.eflags),
|
||||
requires_prefix=False,
|
||||
clobbers_flags=True,
|
||||
emit='''
|
||||
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
#
|
||||
# Compare and set flags.
|
||||
#
|
||||
|
||||
@@ -40,6 +40,7 @@ use_lzcnt = And(has_lzcnt)
|
||||
|
||||
# Presets corresponding to Intel CPUs.
|
||||
|
||||
baseline = Preset(has_sse2)
|
||||
nehalem = Preset(
|
||||
has_sse2, has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
|
||||
haswell = Preset(nehalem, has_bmi1, has_lzcnt)
|
||||
|
||||
@@ -157,6 +157,11 @@ pub enum InstructionData {
|
||||
cond: FloatCC,
|
||||
arg: Value,
|
||||
},
|
||||
IntSelect {
|
||||
opcode: Opcode,
|
||||
cond: IntCC,
|
||||
args: [Value; 3],
|
||||
},
|
||||
Jump {
|
||||
opcode: Opcode,
|
||||
destination: Ebb,
|
||||
|
||||
@@ -358,6 +358,7 @@ impl<'a> Verifier<'a> {
|
||||
IntCond { .. } |
|
||||
FloatCompare { .. } |
|
||||
FloatCond { .. } |
|
||||
IntSelect { .. } |
|
||||
Load { .. } |
|
||||
Store { .. } |
|
||||
RegMove { .. } |
|
||||
|
||||
@@ -303,6 +303,9 @@ pub fn write_operands(
|
||||
IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg),
|
||||
FloatCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]),
|
||||
FloatCond { cond, arg, .. } => write!(w, " {} {}", cond, arg),
|
||||
IntSelect { cond, args, .. } => {
|
||||
write!(w, " {} {}, {}, {}", cond, args[0], args[1], args[2])
|
||||
}
|
||||
Jump {
|
||||
destination,
|
||||
ref args,
|
||||
|
||||
Reference in New Issue
Block a user