Optimize immediates and compare and branch sequences (#286)

* Add a pre-opt optimization to change constants into immediates.

This converts 'iadd' + 'iconst' into 'iadd_imm', and so on.

* Optimize away redundant `bint` instructions.

Cretonne has a concept of "Testable" values, which can be either boolean
or integer. When the an instruction needing a "Testable" value receives
the result of a `bint`, converting boolean to integer, eliminate the
`bint`, as it's redundant.

* Postopt: Optimize using CPU flags.

This introduces a post-legalization optimization pass which converts
compare+branch sequences to use flags values on CPUs which support it.

* Define a form of x86's `urm` that doesn't clobber FLAGS.

movzbl/movsbl/etc. don't clobber FLAGS; define a form of the `urm`
recipe that represents this.

* Implement a DCE pass.

This pass deletes instructions with no side effects and no results that
are used.

* Clarify ambiguity about "32-bit" and "64-bit" in comments.

* Add x86 encodings for icmp_imm.

* Add a testcase for postopt CPU flags optimization.

This covers the basic functionality of transforming compare+branch
sequences to use CPU flags.

* Pattern-match irsub_imm in preopt.
This commit is contained in:
Dan Gohman
2018-03-30 12:30:07 -07:00
committed by GitHub
parent 5377092e5b
commit 6606b88136
22 changed files with 921 additions and 109 deletions

View File

@@ -334,6 +334,14 @@ Test the LICM pass.
The LICM pass is run on each function, and then results are run
through filecheck.
`test dce`
-----------------
Test the DCE pass.
The DCE pass is run on each function, and then results are run
through filecheck.
`test preopt`
-----------------
@@ -342,6 +350,14 @@ Test the preopt pass.
The preopt pass is run on each function, and then results are run
through filecheck.
`test postopt`
-----------------
Test the postopt pass.
The postopt pass is run on each function, and then results are run
through filecheck.
`test compile`
--------------

View File

@@ -0,0 +1,46 @@
test dce
function %simple() -> i32 {
ebb0:
v2 = iconst.i32 2
v3 = iconst.i32 3
return v3
}
; sameln: function %simple
; nextln: ebb0:
; nextln: v3 = iconst.i32 3
; nextln: return v3
; nextln: }
function %some_branching(i32, i32) -> i32 {
ebb0(v0: i32, v1: i32):
v3 = iconst.i32 70
v4 = iconst.i32 71
v5 = iconst.i32 72
v8 = iconst.i32 73
brz v0, ebb1
jump ebb2(v8)
ebb1:
v2 = iadd v0, v3
return v0
ebb2(v9: i32):
v6 = iadd v1, v4
v7 = iadd v6, v9
return v7
}
; sameln: function %some_branching
; nextln: ebb0(v0: i32, v1: i32):
; nextln: v4 = iconst.i32 71
; nextln: v8 = iconst.i32 73
; nextln: brz v0, ebb1
; nextln: jump ebb2(v8)
; nextln:
; nextln: ebb1:
; nextln: return v0
; nextln:
; nextln: ebb2(v9: i32):
; nextln: v6 = iadd.i32 v1, v4
; nextln: v7 = iadd v6, v9
; nextln: return v7

View File

@@ -1,4 +1,4 @@
; binary emission of 32-bit code.
; binary emission of x86-32 code.
test binemit
set is_compressed
isa intel haswell

View File

@@ -1,4 +1,4 @@
; binary emission of 64-bit code.
; binary emission of x86-64 code.
test binemit
set is_64bit
set is_compressed
@@ -458,6 +458,14 @@ ebb0:
; asm: setbe %dl
[-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2
; asm: cmpq $37, %rcx
; asm: setl %bl
[-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 48 83 f9 25 0f 9c c3
; asm: cmpq $100000, %rcx
; asm: setl %bl
[-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3
; Bool-to-int conversions.
; asm: movzbq %bl, %rcx
@@ -1031,6 +1039,14 @@ ebb0:
; asm: setbe %dl
[-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2
; asm: cmpl $37, %ecx
; asm: setl %bl
[-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3
; asm: cmpq $100000, %ecx
; asm: setl %bl
[-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3
; Bool-to-int conversions.
; asm: movzbl %bl, %ecx

View File

@@ -0,0 +1,100 @@
test postopt
isa intel
; Test that compare+branch sequences are folded effectively on x86.
function %br_icmp(i32, i32) -> i32 {
ebb0(v0: i32, v1: i32):
[Op1icscc#39,%rdx] v2 = icmp slt v0, v1
[Op1t8jccd_long#85] brnz v2, ebb1
[Op1ret#c3] return v1
ebb1:
[Op1puid#b8,%rax] v8 = iconst.i32 3
[Op1ret#c3] return v8
}
; sameln: function %br_icmp
; nextln: ebb0(v0: i32, v1: i32):
; nextln: v9 = ifcmp v0, v1
; nextln: v2 = trueif slt v9
; nextln: brif slt v9, ebb1
; nextln: return v1
; nextln:
; nextln: ebb1:
; nextln: v8 = iconst.i32 3
; nextln: return v8
; nextln: }
; Use brz instead of brnz, so the condition is inverted.
function %br_icmp_inverse(i32, i32) -> i32 {
ebb0(v0: i32, v1: i32):
[Op1icscc#39,%rdx] v2 = icmp slt v0, v1
[Op1t8jccd_long#84] brz v2, ebb1
[Op1ret#c3] return v1
ebb1:
[Op1puid#b8,%rax] v8 = iconst.i32 3
[Op1ret#c3] return v8
}
; sameln: function %br_icmp_inverse
; nextln: ebb0(v0: i32, v1: i32):
; nextln: v9 = ifcmp v0, v1
; nextln: v2 = trueif slt v9
; nextln: brif sge v9, ebb1
; nextln: return v1
; nextln:
; nextln: ebb1:
; nextln: v8 = iconst.i32 3
; nextln: return v8
; nextln: }
; Use icmp_imm instead of icmp.
function %br_icmp_imm(i32, i32) -> i32 {
ebb0(v0: i32, v1: i32):
[Op1icsccib#7083] v2 = icmp_imm slt v0, 2
[Op1t8jccd_long#84] brz v2, ebb1
[Op1ret#c3] return v1
ebb1:
[Op1puid#b8,%rax] v8 = iconst.i32 3
[Op1ret#c3] return v8
}
; sameln: function %br_icmp_imm
; nextln: ebb0(v0: i32, v1: i32):
; nextln: v9 = ifcmp_imm v0, 2
; nextln: v2 = trueif slt v9
; nextln: brif sge v9, ebb1
; nextln: return v1
; nextln:
; nextln: ebb1:
; nextln: v8 = iconst.i32 3
; nextln: return v8
; nextln: }
; Use fcmp instead of icmp.
function %br_fcmp(f32, f32) -> f32 {
ebb0(v0: f32, v1: f32):
[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1
[Op1t8jccd_long#84] brz v2, ebb1
[Op1ret#c3] return v1
ebb1:
[Op1puid#b8,%rax] v18 = iconst.i32 0x40a8_0000
[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
[Op1ret#c3] return v8
}
; sameln: function %br_fcmp
; nextln: ebb0(v0: f32, v1: f32):
; nextln: v19 = ffcmp v0, v1
; nextln: v2 = trueff gt v19
; nextln: brff ule v19, ebb1
; nextln: return v1
; nextln:
; nextln: ebb1:
; nextln: v18 = iconst.i32 0x40a8_0000
; nextln: v8 = bitcast.f32 v18
; nextln: return v8
; nextln: }

View File

@@ -0,0 +1,80 @@
test preopt
isa intel
function %iadd_imm(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 2
v2 = iadd v0, v1
return v2
}
; sameln: function %iadd_imm
; nextln: ebb0(v0: i32):
; nextln: v1 = iconst.i32 2
; nextln: v2 = iadd_imm v0, 2
; nextln: return v2
; nextln: }
function %isub_imm(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 2
v2 = isub v0, v1
return v2
}
; sameln: function %isub_imm
; nextln: ebb0(v0: i32):
; nextln: v1 = iconst.i32 2
; nextln: v2 = iadd_imm v0, -2
; nextln: return v2
; nextln: }
function %icmp_imm(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 2
v2 = icmp slt v0, v1
v3 = bint.i32 v2
return v3
}
; sameln: function %icmp_imm
; nextln: ebb0(v0: i32):
; nextln: v1 = iconst.i32 2
; nextln: v2 = icmp_imm slt v0, 2
; nextln: v3 = bint.i32 v2
; nextln: return v3
; nextln: }
function %brz_bint(i32) {
ebb0(v0: i32):
v3 = icmp_imm slt v0, 0
v1 = bint.i32 v3
v2 = select v1, v1, v1
trapz v1, user0
brz v1, ebb1
jump ebb2
ebb1:
return
ebb2:
return
}
; sameln: function %brz_bint
; nextln: (v0: i32):
; nextln: v3 = icmp_imm slt v0, 0
; nextln: v1 = bint.i32 v3
; nextln: v2 = select v3, v1, v1
; nextln: trapz v3, user0
; nextln: brz v3, ebb1
; nextln: jump ebb2
function %irsub_imm(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 2
v2 = isub v1, v0
return v2
}
; sameln: function %irsub_imm
; nextln: ebb0(v0: i32):
; nextln: v1 = iconst.i32 2
; nextln: v2 = irsub_imm v1, 2
; nextln: return v2
; nextln: }

View File

@@ -21,7 +21,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@0011 [RexOp1puid#b8] v9 = iconst.i32 0
@0015 [RexOp1puid#b8] v11 = iconst.i32 0
@0017 [RexOp1icscc#39] v12 = icmp.i32 eq v15, v11
@0017 [RexOp2urm#4b6] v13 = bint.i32 v12
@0017 [RexOp2urm_noflags#4b6] v13 = bint.i32 v12
@001a [RexOp1rr#21] v14 = band v9, v13
@001b [RexOp1tjccb#75] brnz v14, ebb6
@001d [RexOp1jmpb#eb] jump ebb7

View File

@@ -4,65 +4,65 @@ isa intel
; Simple, correct use of CPU flags.
function %simple(i32) -> i32 {
ebb0(v0: i32):
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op2urm_abcd#4b6] v3 = bint.i32 v2
[Op1ret#c3] return v3
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
[Op1ret#c3] return v3
}
; Overlapping flag values of different types.
function %overlap(i32, f32) -> i32 {
ebb0(v0: i32, v1: f32):
[Op1rcmp#39] v2 = ifcmp v0, v0
[Op2fcmp#42e] v3 = ffcmp v1, v1
[Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
[Op2seti_abcd#490] v5 = trueif ugt v2
[Op1rr#21] v6 = band v4, v5
[Op2urm_abcd#4b6] v7 = bint.i32 v6
[Op1ret#c3] return v7
[Op1rcmp#39] v2 = ifcmp v0, v0
[Op2fcmp#42e] v3 = ffcmp v1, v1
[Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
[Op2seti_abcd#490] v5 = trueif ugt v2
[Op1rr#21] v6 = band v4, v5
[Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6
[Op1ret#c3] return v7
}
; CPU flags clobbered by arithmetic.
function %clobbered(i32) -> i32 {
ebb0(v0: i32):
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
[Op2seti_abcd#490] v3 = trueif ugt v1
[Op2urm_abcd#4b6] v4 = bint.i32 v3
[Op1ret#c3] return v4
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
[Op2seti_abcd#490] v3 = trueif ugt v1
[Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
[Op1ret#c3] return v4
}
; CPU flags not clobbered by load.
function %live_across_load(i32) -> i32 {
ebb0(v0: i32):
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1ld#8b] v2 = load.i32 v0
[Op2seti_abcd#490] v3 = trueif ugt v1
[Op2urm_abcd#4b6] v4 = bint.i32 v3
[Op1ret#c3] return v4
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1ld#8b] v2 = load.i32 v0
[Op2seti_abcd#490] v3 = trueif ugt v1
[Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
[Op1ret#c3] return v4
}
; Correct use of CPU flags across EBB.
function %live_across_ebb(i32) -> i32 {
ebb0(v0: i32):
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1jmpb#eb] jump ebb1
ebb1:
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op2urm_abcd#4b6] v3 = bint.i32 v2
[Op1ret#c3] return v3
ebb0(v0: i32):
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1jmpb#eb] jump ebb1
ebb1:
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
[Op1ret#c3] return v3
}
function %live_across_ebb_backwards(i32) -> i32 {
ebb0(v0: i32):
[Op1jmpb#eb] jump ebb2
ebb1:
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op2urm_abcd#4b6] v3 = bint.i32 v2
[Op1ret#c3] return v3
ebb2:
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1jmpb#eb] jump ebb1
ebb0(v0: i32):
[Op1jmpb#eb] jump ebb2
ebb1:
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
[Op1ret#c3] return v3
ebb2:
[Op1rcmp#39] v1 = ifcmp v0, v0
[Op1jmpb#eb] jump ebb1
}
; Flags live into loop.
@@ -73,4 +73,4 @@ function %live_into_loop(i32) -> i32 {
ebb1:
[Op2seti_abcd#490] v2 = trueif ugt v1
[Op1jmpb#eb] jump ebb1
}
}

View File

@@ -378,6 +378,8 @@ X86_64.enc(base.trapff, r.trapff, 0)
# Comparisons
#
enc_i32_i64(base.icmp, r.icscc, 0x39)
enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
@@ -409,11 +411,13 @@ enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
#
# This assumes that b1 is represented as an 8-bit low register with the value 0
# or 1.
X86_32.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm.rex(0x0f, 0xb6)) # zext to i64 implicit.
X86_64.enc(base.bint.i64.b1, *r.urm_abcd(0x0f, 0xb6)) # zext to i64 implicit.
X86_64.enc(base.bint.i32.b1, *r.urm.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
#
# Encode movzbq as movzbl, because it's equivalent and shorter.
X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
# Numerical conversions.
@@ -430,41 +434,41 @@ X86_64.enc(base.ireduce.i32.i64, r.null, 0)
# instructions for %al/%ax/%eax to %ax/%eax/%rax.
# movsbl
X86_32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
X86_32.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
# movswl
X86_32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
# movsbq
X86_64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
# movswq
X86_64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
# movslq
X86_64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
# movzbl
X86_32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
X86_32.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
# movzwl
X86_32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
# movzbq, encoded as movzbl because it's equivalent and shorter
X86_64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))
# movzwq, encoded as movzwl because it's equivalent and shorter
X86_64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
# A 32-bit register copy clears the high 32 bits.
X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))

View File

@@ -8,7 +8,8 @@ from cdsl.registers import RegClass
from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
from base.formats import MultiAry, NullAry
from base.formats import Trap, Call, IndirectCall, Store, Load
from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
from base.formats import IntCompare, IntCompareImm, FloatCompare
from base.formats import IntCond, FloatCond
from base.formats import IntSelect, IntCondTrap, FloatCondTrap
from base.formats import Jump, Branch, BranchInt, BranchFloat
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
@@ -364,7 +365,7 @@ rfumr = TailRecipe(
''')
# XX /r, but for a unary operator with separate input/output register.
# RM form.
# RM form. Clobbers FLAGS.
urm = TailRecipe(
'urm', Unary, size=1, ins=GPR, outs=GPR,
emit='''
@@ -372,10 +373,19 @@ urm = TailRecipe(
modrm_rr(in_reg0, out_reg0, sink);
''')
# XX /r. Same as urm, but input limited to ABCD.
urm_abcd = TailRecipe(
'urm_abcd', Unary, size=1, ins=ABCD, outs=GPR,
when_prefixed=urm,
# XX /r. Same as urm, but doesn't clobber FLAGS.
urm_noflags = TailRecipe(
'urm_noflags', Unary, size=1, ins=GPR, outs=GPR,
clobbers_flags=False,
emit='''
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
modrm_rr(in_reg0, out_reg0, sink);
''')
# XX /r. Same as urm_noflags, but input limited to ABCD.
urm_noflags_abcd = TailRecipe(
'urm_noflags_abcd', Unary, size=1, ins=ABCD, outs=GPR,
when_prefixed=urm_noflags,
emit='''
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
modrm_rr(in_reg0, out_reg0, sink);
@@ -1360,6 +1370,61 @@ icscc = TailRecipe(
modrm_rr(out_reg0, 0, sink);
''')
icsccib = TailRecipe(
'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
instp=IsSignedInt(IntCompareImm.imm, 8),
emit='''
// Comparison instruction.
PUT_OP(bits, rex1(in_reg0), sink);
modrm_r_bits(in_reg0, bits, sink);
let imm: i64 = imm.into();
sink.put1(imm as u8);
// `setCC` instruction, no REX.
use ir::condcodes::IntCC::*;
let setcc = match cond {
Equal => 0x94,
NotEqual => 0x95,
SignedLessThan => 0x9c,
SignedGreaterThanOrEqual => 0x9d,
SignedGreaterThan => 0x9f,
SignedLessThanOrEqual => 0x9e,
UnsignedLessThan => 0x92,
UnsignedGreaterThanOrEqual => 0x93,
UnsignedGreaterThan => 0x97,
UnsignedLessThanOrEqual => 0x96,
};
sink.put1(0x0f);
sink.put1(setcc);
modrm_rr(out_reg0, 0, sink);
''')
icsccid = TailRecipe(
'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
instp=IsSignedInt(IntCompareImm.imm, 32),
emit='''
// Comparison instruction.
PUT_OP(bits, rex1(in_reg0), sink);
modrm_r_bits(in_reg0, bits, sink);
let imm: i64 = imm.into();
sink.put4(imm as u32);
// `setCC` instruction, no REX.
use ir::condcodes::IntCC::*;
let setcc = match cond {
Equal => 0x94,
NotEqual => 0x95,
SignedLessThan => 0x9c,
SignedGreaterThanOrEqual => 0x9d,
SignedGreaterThan => 0x9f,
SignedLessThanOrEqual => 0x9e,
UnsignedLessThan => 0x92,
UnsignedGreaterThanOrEqual => 0x93,
UnsignedGreaterThan => 0x97,
UnsignedLessThanOrEqual => 0x96,
};
sink.put1(0x0f);
sink.put1(setcc);
modrm_rr(out_reg0, 0, sink);
''')
# Make a FloatCompare instruction predicate with the supported condition codes.

View File

@@ -21,9 +21,11 @@ use result::{CtonError, CtonResult};
use settings::{FlagsOrIsa, OptLevel};
use unreachable_code::eliminate_unreachable_code;
use verifier;
use dce::do_dce;
use simple_gvn::do_simple_gvn;
use licm::do_licm;
use preopt::do_preopt;
use postopt::do_postopt;
use timing;
/// Persistent data structures and compilation pipeline.
@@ -92,6 +94,9 @@ impl Context {
self.preopt(isa)?;
}
self.legalize(isa)?;
if isa.flags().opt_level() != OptLevel::Fastest {
self.postopt(isa)?;
}
if isa.flags().opt_level() == OptLevel::Best {
self.compute_domtree();
self.compute_loop_analysis();
@@ -100,6 +105,7 @@ impl Context {
}
self.compute_domtree();
self.eliminate_unreachable_code(isa)?;
self.dce(isa)?;
self.regalloc(isa)?;
self.prologue_epilogue(isa)?;
self.relax_branches(isa)
@@ -153,6 +159,13 @@ impl Context {
}
}
/// Perform dead-code elimination on the function.
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
do_dce(&mut self.func, &mut self.domtree);
self.verify_if(fisa)?;
Ok(())
}
/// Perform pre-legalization rewrites on the function.
pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
do_preopt(&mut self.func);
@@ -170,6 +183,13 @@ impl Context {
self.verify_if(isa)
}
/// Perform post-legalization rewrites on the function.
pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
do_postopt(&mut self.func, isa);
self.verify_if(isa)?;
Ok(())
}
/// Compute the control flow graph.
pub fn compute_cfg(&mut self) {
self.cfg.compute(&self.func)

68
lib/cretonne/src/dce.rs Normal file
View File

@@ -0,0 +1,68 @@
//! A Dead-Code Elimination (DCE) pass.
//!
//! Dead code here means instructions that have no side effects and have no
//! result values used by other instructions.
use cursor::{Cursor, FuncCursor};
use dominator_tree::DominatorTree;
use entity::EntityRef;
use ir::{Function, Inst, Opcode, DataFlowGraph};
use ir::instructions::InstructionData;
use timing;
use std::vec::Vec;
/// Test whether the given opcode is unsafe to even consider for DCE.
fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
opcode.can_store()
}
/// Preserve instructions with used result values.
fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
dfg.inst_results(inst).iter().any(|v| live[v.index()])
}
/// Load instructions without the `notrap` flag are defined to trap when
/// operating on inaccessible memory, so we can't DCE them even if the
/// loaded value is unused.
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
if !opcode.can_load() {
return false;
}
match *data {
InstructionData::StackLoad { .. } => false,
InstructionData::Load { flags, .. } => !flags.notrap(),
_ => true,
}
}
/// Perform DCE on `func`.
pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
let _tt = timing::dce();
debug_assert!(domtree.is_valid());
let mut live = Vec::with_capacity(func.dfg.num_values());
live.resize(func.dfg.num_values(), false);
for &ebb in domtree.cfg_postorder().iter() {
let mut pos = FuncCursor::new(func).at_bottom(ebb);
while let Some(inst) = pos.prev_inst() {
{
let data = &pos.func.dfg[inst];
let opcode = data.opcode();
if trivially_unsafe_for_dce(opcode) ||
is_load_with_defined_trapping(opcode, &data) ||
any_inst_results_used(inst, &live, &pos.func.dfg)
{
for arg in pos.func.dfg.inst_args(inst) {
let v = pos.func.dfg.resolve_aliases(*arg);
live[v.index()] = true;
}
continue;
}
}
pos.remove_inst();
}
}
}

View File

@@ -21,6 +21,11 @@ impl Imm64 {
pub fn new(x: i64) -> Imm64 {
Imm64(x)
}
/// Return self negated.
pub fn wrapping_neg(self) -> Imm64 {
Imm64(self.0.wrapping_neg())
}
}
impl Into<i64> for Imm64 {

View File

@@ -58,6 +58,10 @@ impl TargetIsa for Isa {
&self.shared_flags
}
fn uses_cpu_flags(&self) -> bool {
true
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}

View File

@@ -158,6 +158,11 @@ pub trait TargetIsa: fmt::Display {
/// Get the ISA-independent flags that were used to make this trait object.
fn flags(&self) -> &settings::Flags;
/// Does the CPU implement scalar comparisons using a CPU flags register?
fn uses_cpu_flags(&self) -> bool {
false
}
/// Get a data structure describing the registers in this ISA.
fn register_info(&self) -> RegInfo;

View File

@@ -68,11 +68,13 @@ mod abi;
mod bitset;
mod constant_hash;
mod context;
mod dce;
mod divconst_magic_numbers;
mod iterators;
mod legalizer;
mod licm;
mod partition_slice;
mod postopt;
mod predicates;
mod preopt;
mod ref_slice;

211
lib/cretonne/src/postopt.rs Normal file
View File

@@ -0,0 +1,211 @@
//! A post-legalization rewriting pass.
#![allow(non_snake_case)]
use cursor::{Cursor, EncCursor};
use ir::dfg::ValueDef;
use ir::{Function, InstructionData, Value, InstBuilder, Ebb, Inst};
use ir::condcodes::{CondCode, IntCC, FloatCC};
use ir::instructions::{Opcode, ValueList};
use ir::immediates::Imm64;
use isa::TargetIsa;
use timing;
/// Information collected about a compare+branch sequence.
struct CmpBrInfo {
/// The branch instruction.
br_inst: Inst,
/// The icmp, icmp_imm, or fcmp instruction.
cmp_inst: Inst,
/// The destination of the branch.
destination: Ebb,
/// The arguments of the branch.
args: ValueList,
/// The first argument to the comparison. The second is in the `kind` field.
cmp_arg: Value,
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
/// before the branch.
invert_branch_cond: bool,
/// The kind of comparison, and the second argument.
kind: CmpBrKind,
}
enum CmpBrKind {
Icmp { cond: IntCC, arg: Value },
IcmpImm { cond: IntCC, imm: Imm64 },
Fcmp { cond: FloatCC, arg: Value },
}
/// Optimize comparisons to use flags values, to avoid materializing conditions
/// in integer registers.
///
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
/// sequences.
fn optimize_cpu_flags(
pos: &mut EncCursor,
inst: Inst,
last_flags_clobber: Option<Inst>,
isa: &TargetIsa,
) {
// Look for compare and branch patterns.
// This code could be considerably simplified with non-lexical lifetimes.
let info = match pos.func.dfg[inst] {
InstructionData::Branch {
opcode,
destination,
ref args,
} => {
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
let invert_branch_cond = match opcode {
Opcode::Brz => true,
Opcode::Brnz => false,
_ => panic!(),
};
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
match pos.func.dfg[cond_inst] {
InstructionData::IntCompare {
cond,
args: cmp_args,
..
} => {
CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Icmp {
cond,
arg: cmp_args[1],
},
}
}
InstructionData::IntCompareImm {
cond,
arg: cmp_arg,
imm: cmp_imm,
..
} => {
CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg,
invert_branch_cond,
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
}
}
InstructionData::FloatCompare {
cond,
args: cmp_args,
..
} => {
CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Fcmp {
cond,
arg: cmp_args[1],
},
}
}
_ => return,
}
} else {
return;
}
}
// TODO: trapif, trueif, selectif, and their ff counterparts.
_ => return,
};
// If any instructions clobber the flags between the comparison and the branch,
// don't optimize them.
if last_flags_clobber != Some(info.cmp_inst) {
return;
}
// We found a compare+branch pattern. Transform it to use flags.
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
pos.goto_inst(info.cmp_inst);
match info.kind {
CmpBrKind::Icmp { mut cond, arg } => {
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func.dfg.replace(info.br_inst).brif(
cond,
flags,
info.destination,
&args,
);
}
CmpBrKind::IcmpImm { mut cond, imm } => {
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func.dfg.replace(info.br_inst).brif(
cond,
flags,
info.destination,
&args,
);
}
CmpBrKind::Fcmp { mut cond, arg } => {
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func.dfg.replace(info.br_inst).brff(
cond,
flags,
info.destination,
&args,
);
}
}
pos.func.update_encoding(info.cmp_inst, isa).is_ok();
pos.func.update_encoding(info.br_inst, isa).is_ok();
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
let _tt = timing::postopt();
let mut pos = EncCursor::new(func, isa);
while let Some(_ebb) = pos.next_ebb() {
let mut last_flags_clobber = None;
while let Some(inst) = pos.next_inst() {
if isa.uses_cpu_flags() {
// Optimize instructions to make use of flags.
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
// Track the most recent seen instruction that clobbers the flags.
if let Some(constraints) =
isa.encoding_info().operand_constraints(
pos.func.encodings[inst],
)
{
if constraints.clobbers_flags {
last_flags_clobber = Some(inst)
}
}
}
}
}
}

View File

@@ -127,28 +127,6 @@ fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
}
// TODO: should we actually bother to do this (that is, manually match
// the case that the second argument is an iconst)? Or should we assume
// that some previous constant propagation pass has pushed all such
// immediates to their use points, creating BinaryImm instructions
// instead? For now we take the conservative approach.
if let InstructionData::Binary { opcode, args } = *idata {
let (isSigned, isRem) = match opcode {
Opcode::Udiv => (false, false),
Opcode::Urem => (false, true),
Opcode::Sdiv => (true, false),
Opcode::Srem => (true, true),
_other => return None,
};
let argR: Value = args[1];
if let Some(simm64) = get_const(argR, dfg) {
let argL: Value = args[0];
// Pull the operation size (type) from the left arg
let argL_ty = dfg.value_type(argL);
return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
}
}
None
}
@@ -473,25 +451,106 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
}
}
//----------------------------------------------------------------------
//
// General pattern-match helpers.
/// Find out if `value` actually resolves to a constant, and if so what its
/// value is.
fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
match dfg.value_def(value) {
ValueDef::Result(definingInst, resultNo) => {
let definingIData: &InstructionData = &dfg[definingInst];
if let InstructionData::UnaryImm { opcode, imm } = *definingIData {
if opcode == Opcode::Iconst && resultNo == 0 {
return Some(imm.into());
/// Apply basic simplifications.
///
/// This folds constants with arithmetic to form `_imm` instructions, and other
/// minor simplifications.
fn simplify(pos: &mut FuncCursor, inst: Inst) {
match pos.func.dfg[inst] {
InstructionData::Binary { opcode, args } => {
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
mut imm,
} = pos.func.dfg[iconst_inst]
{
let new_opcode = match opcode {
Opcode::Iadd => Opcode::IaddImm,
Opcode::Imul => Opcode::ImulImm,
Opcode::Sdiv => Opcode::SdivImm,
Opcode::Udiv => Opcode::UdivImm,
Opcode::Srem => Opcode::SremImm,
Opcode::Urem => Opcode::UremImm,
Opcode::Band => Opcode::BandImm,
Opcode::Bor => Opcode::BorImm,
Opcode::Bxor => Opcode::BxorImm,
Opcode::Rotl => Opcode::RotlImm,
Opcode::Rotr => Opcode::RotrImm,
Opcode::Ishl => Opcode::IshlImm,
Opcode::Ushr => Opcode::UshrImm,
Opcode::Sshr => Opcode::SshrImm,
Opcode::Isub => {
imm = imm.wrapping_neg();
Opcode::IaddImm
}
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
pos.func.dfg.replace(inst).BinaryImm(
new_opcode,
ty,
imm,
args[0],
);
}
} else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
mut imm,
} = pos.func.dfg[iconst_inst]
{
let new_opcode = match opcode {
Opcode::Isub => Opcode::IrsubImm,
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
pos.func.dfg.replace(inst).BinaryImm(
new_opcode,
ty,
imm,
args[0],
);
}
}
None
}
ValueDef::Param(_definingEbb, _paramNo) => None,
InstructionData::IntCompare { opcode, cond, args } => {
debug_assert_eq!(opcode, Opcode::Icmp);
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
imm,
} = pos.func.dfg[iconst_inst]
{
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
}
}
}
InstructionData::CondTrap { .. } |
InstructionData::Branch { .. } |
InstructionData::Ternary { opcode: Opcode::Select, .. } => {
// Fold away a redundant `bint`.
let maybe = {
let args = pos.func.dfg.inst_args(inst);
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
if let InstructionData::Unary {
opcode: Opcode::Bint,
arg: bool_val,
} = pos.func.dfg[def_inst]
{
Some(bool_val)
} else {
None
}
} else {
None
}
};
if let Some(bool_val) = maybe {
let args = pos.func.dfg.inst_args_mut(inst);
args[0] = bool_val;
}
}
_ => {}
}
}
@@ -503,6 +562,8 @@ pub fn do_preopt(func: &mut Function) {
while let Some(_ebb) = pos.next_ebb() {
while let Some(inst) = pos.next_inst() {
// Apply basic simplifications.
simplify(&mut pos, inst);
//-- BEGIN -- division by constants ----------------

View File

@@ -55,7 +55,9 @@ define_passes!{
flowgraph: "Control flow graph",
domtree: "Dominator tree",
loop_analysis: "Loop analysis",
postopt: "Post-legalization rewriting",
preopt: "Pre-legalization rewriting",
dce: "Dead code elimination",
legalize: "Legalization",
gvn: "Global value numbering",
licm: "Loop invariant code motion",

View File

@@ -28,9 +28,11 @@ mod match_directive;
mod test_binemit;
mod test_cat;
mod test_compile;
mod test_dce;
mod test_domtree;
mod test_legalizer;
mod test_licm;
mod test_postopt;
mod test_preopt;
mod test_print_cfg;
mod test_regalloc;
@@ -73,9 +75,11 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
"binemit" => test_binemit::subtest(parsed),
"cat" => test_cat::subtest(parsed),
"compile" => test_compile::subtest(parsed),
"dce" => test_dce::subtest(parsed),
"domtree" => test_domtree::subtest(parsed),
"legalizer" => test_legalizer::subtest(parsed),
"licm" => test_licm::subtest(parsed),
"postopt" => test_postopt::subtest(parsed),
"preopt" => test_preopt::subtest(parsed),
"print-cfg" => test_print_cfg::subtest(parsed),
"regalloc" => test_regalloc::subtest(parsed),

View File

@@ -0,0 +1,53 @@
//! Test command for testing the DCE pass.
//!
//! The `dce` test command runs each function through the DCE pass after ensuring
//! that all instructions are legal for the target.
//!
//! The resulting function is sent to `filecheck`.
use cretonne::ir::Function;
use cretonne;
use cretonne::print_errors::pretty_error;
use cton_reader::TestCommand;
use subtest::{SubTest, Context, Result, run_filecheck};
use std::borrow::Cow;
use std::fmt::Write;
struct TestDCE;
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
assert_eq!(parsed.command, "dce");
if !parsed.options.is_empty() {
Err(format!("No options allowed on {}", parsed))
} else {
Ok(Box::new(TestDCE))
}
}
impl SubTest for TestDCE {
fn name(&self) -> Cow<str> {
Cow::from("dce")
}
fn is_mutating(&self) -> bool {
true
}
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
// Create a compilation context, and drop in the function.
let mut comp_ctx = cretonne::Context::new();
comp_ctx.func = func.into_owned();
comp_ctx.flowgraph();
comp_ctx.compute_loop_analysis();
comp_ctx.dce(context.flags_or_isa()).map_err(|e| {
pretty_error(&comp_ctx.func, context.isa, Into::into(e))
})?;
let mut text = String::new();
write!(&mut text, "{}", &comp_ctx.func).map_err(
|e| e.to_string(),
)?;
run_filecheck(&text, context)
}
}

View File

@@ -0,0 +1,50 @@
//! Test command for testing the postopt pass.
//!
//! The resulting function is sent to `filecheck`.
use cretonne::ir::Function;
use cretonne;
use cretonne::print_errors::pretty_error;
use cton_reader::TestCommand;
use subtest::{SubTest, Context, Result, run_filecheck};
use std::borrow::Cow;
use std::fmt::Write;
struct TestPostopt;
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
assert_eq!(parsed.command, "postopt");
if !parsed.options.is_empty() {
Err(format!("No options allowed on {}", parsed))
} else {
Ok(Box::new(TestPostopt))
}
}
impl SubTest for TestPostopt {
fn name(&self) -> Cow<str> {
Cow::from("postopt")
}
fn is_mutating(&self) -> bool {
true
}
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
// Create a compilation context, and drop in the function.
let mut comp_ctx = cretonne::Context::new();
comp_ctx.func = func.into_owned();
let isa = context.isa.expect("postopt needs an ISA");
comp_ctx.flowgraph();
comp_ctx.postopt(isa).map_err(|e| {
pretty_error(&comp_ctx.func, context.isa, Into::into(e))
})?;
let mut text = String::new();
write!(&mut text, "{}", &comp_ctx.func).map_err(
|e| e.to_string(),
)?;
run_filecheck(&text, context)
}
}