Optimize immediates and compare and branch sequences (#286)
* Add a pre-opt optimization to change constants into immediates. This converts 'iadd' + 'iconst' into 'iadd_imm', and so on. * Optimize away redundant `bint` instructions. Cretonne has a concept of "Testable" values, which can be either boolean or integer. When the an instruction needing a "Testable" value receives the result of a `bint`, converting boolean to integer, eliminate the `bint`, as it's redundant. * Postopt: Optimize using CPU flags. This introduces a post-legalization optimization pass which converts compare+branch sequences to use flags values on CPUs which support it. * Define a form of x86's `urm` that doesn't clobber FLAGS. movzbl/movsbl/etc. don't clobber FLAGS; define a form of the `urm` recipe that represents this. * Implement a DCE pass. This pass deletes instructions with no side effects and no results that are used. * Clarify ambiguity about "32-bit" and "64-bit" in comments. * Add x86 encodings for icmp_imm. * Add a testcase for postopt CPU flags optimization. This covers the basic functionality of transforming compare+branch sequences to use CPU flags. * Pattern-match irsub_imm in preopt.
This commit is contained in:
@@ -378,6 +378,8 @@ X86_64.enc(base.trapff, r.trapff, 0)
|
||||
# Comparisons
|
||||
#
|
||||
enc_i32_i64(base.icmp, r.icscc, 0x39)
|
||||
enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
|
||||
enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
|
||||
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
|
||||
enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
|
||||
enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
|
||||
@@ -409,11 +411,13 @@ enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
|
||||
#
|
||||
# This assumes that b1 is represented as an 8-bit low register with the value 0
|
||||
# or 1.
|
||||
X86_32.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm.rex(0x0f, 0xb6)) # zext to i64 implicit.
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_abcd(0x0f, 0xb6)) # zext to i64 implicit.
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
|
||||
#
|
||||
# Encode movzbq as movzbl, because it's equivalent and shorter.
|
||||
X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
|
||||
# Numerical conversions.
|
||||
|
||||
@@ -430,41 +434,41 @@ X86_64.enc(base.ireduce.i32.i64, r.null, 0)
|
||||
# instructions for %al/%ax/%eax to %ax/%eax/%rax.
|
||||
|
||||
# movsbl
|
||||
X86_32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
|
||||
X86_32.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
|
||||
|
||||
# movswl
|
||||
X86_32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
|
||||
X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
|
||||
|
||||
# movsbq
|
||||
X86_64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
|
||||
X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
|
||||
|
||||
# movswq
|
||||
X86_64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
|
||||
X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
|
||||
|
||||
# movslq
|
||||
X86_64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
|
||||
X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
|
||||
|
||||
# movzbl
|
||||
X86_32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
|
||||
X86_32.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
|
||||
# movzwl
|
||||
X86_32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
|
||||
X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
|
||||
# movzbq, encoded as movzbl because it's equivalent and shorter
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
|
||||
# movzwq, encoded as movzwl because it's equivalent and shorter
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
|
||||
# A 32-bit register copy clears the high 32 bits.
|
||||
X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
|
||||
|
||||
@@ -8,7 +8,8 @@ from cdsl.registers import RegClass
|
||||
from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
|
||||
from base.formats import MultiAry, NullAry
|
||||
from base.formats import Trap, Call, IndirectCall, Store, Load
|
||||
from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
|
||||
from base.formats import IntCompare, IntCompareImm, FloatCompare
|
||||
from base.formats import IntCond, FloatCond
|
||||
from base.formats import IntSelect, IntCondTrap, FloatCondTrap
|
||||
from base.formats import Jump, Branch, BranchInt, BranchFloat
|
||||
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
|
||||
@@ -364,7 +365,7 @@ rfumr = TailRecipe(
|
||||
''')
|
||||
|
||||
# XX /r, but for a unary operator with separate input/output register.
|
||||
# RM form.
|
||||
# RM form. Clobbers FLAGS.
|
||||
urm = TailRecipe(
|
||||
'urm', Unary, size=1, ins=GPR, outs=GPR,
|
||||
emit='''
|
||||
@@ -372,10 +373,19 @@ urm = TailRecipe(
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r. Same as urm, but input limited to ABCD.
|
||||
urm_abcd = TailRecipe(
|
||||
'urm_abcd', Unary, size=1, ins=ABCD, outs=GPR,
|
||||
when_prefixed=urm,
|
||||
# XX /r. Same as urm, but doesn't clobber FLAGS.
|
||||
urm_noflags = TailRecipe(
|
||||
'urm_noflags', Unary, size=1, ins=GPR, outs=GPR,
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r. Same as urm_noflags, but input limited to ABCD.
|
||||
urm_noflags_abcd = TailRecipe(
|
||||
'urm_noflags_abcd', Unary, size=1, ins=ABCD, outs=GPR,
|
||||
when_prefixed=urm_noflags,
|
||||
emit='''
|
||||
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
@@ -1360,6 +1370,61 @@ icscc = TailRecipe(
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
icsccib = TailRecipe(
|
||||
'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
|
||||
instp=IsSignedInt(IntCompareImm.imm, 8),
|
||||
emit='''
|
||||
// Comparison instruction.
|
||||
PUT_OP(bits, rex1(in_reg0), sink);
|
||||
modrm_r_bits(in_reg0, bits, sink);
|
||||
let imm: i64 = imm.into();
|
||||
sink.put1(imm as u8);
|
||||
// `setCC` instruction, no REX.
|
||||
use ir::condcodes::IntCC::*;
|
||||
let setcc = match cond {
|
||||
Equal => 0x94,
|
||||
NotEqual => 0x95,
|
||||
SignedLessThan => 0x9c,
|
||||
SignedGreaterThanOrEqual => 0x9d,
|
||||
SignedGreaterThan => 0x9f,
|
||||
SignedLessThanOrEqual => 0x9e,
|
||||
UnsignedLessThan => 0x92,
|
||||
UnsignedGreaterThanOrEqual => 0x93,
|
||||
UnsignedGreaterThan => 0x97,
|
||||
UnsignedLessThanOrEqual => 0x96,
|
||||
};
|
||||
sink.put1(0x0f);
|
||||
sink.put1(setcc);
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
icsccid = TailRecipe(
|
||||
'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
|
||||
instp=IsSignedInt(IntCompareImm.imm, 32),
|
||||
emit='''
|
||||
// Comparison instruction.
|
||||
PUT_OP(bits, rex1(in_reg0), sink);
|
||||
modrm_r_bits(in_reg0, bits, sink);
|
||||
let imm: i64 = imm.into();
|
||||
sink.put4(imm as u32);
|
||||
// `setCC` instruction, no REX.
|
||||
use ir::condcodes::IntCC::*;
|
||||
let setcc = match cond {
|
||||
Equal => 0x94,
|
||||
NotEqual => 0x95,
|
||||
SignedLessThan => 0x9c,
|
||||
SignedGreaterThanOrEqual => 0x9d,
|
||||
SignedGreaterThan => 0x9f,
|
||||
SignedLessThanOrEqual => 0x9e,
|
||||
UnsignedLessThan => 0x92,
|
||||
UnsignedGreaterThanOrEqual => 0x93,
|
||||
UnsignedGreaterThan => 0x97,
|
||||
UnsignedLessThanOrEqual => 0x96,
|
||||
};
|
||||
sink.put1(0x0f);
|
||||
sink.put1(setcc);
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
# Make a FloatCompare instruction predicate with the supported condition codes.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user