Optimize immediates and compare and branch sequences (#286)
* Add a pre-opt optimization to change constants into immediates. This converts 'iadd' + 'iconst' into 'iadd_imm', and so on. * Optimize away redundant `bint` instructions. Cretonne has a concept of "Testable" values, which can be either boolean or integer. When the an instruction needing a "Testable" value receives the result of a `bint`, converting boolean to integer, eliminate the `bint`, as it's redundant. * Postopt: Optimize using CPU flags. This introduces a post-legalization optimization pass which converts compare+branch sequences to use flags values on CPUs which support it. * Define a form of x86's `urm` that doesn't clobber FLAGS. movzbl/movsbl/etc. don't clobber FLAGS; define a form of the `urm` recipe that represents this. * Implement a DCE pass. This pass deletes instructions with no side effects and no results that are used. * Clarify ambiguity about "32-bit" and "64-bit" in comments. * Add x86 encodings for icmp_imm. * Add a testcase for postopt CPU flags optimization. This covers the basic functionality of transforming compare+branch sequences to use CPU flags. * Pattern-match irsub_imm in preopt.
This commit is contained in:
@@ -334,6 +334,14 @@ Test the LICM pass.
|
||||
The LICM pass is run on each function, and then results are run
|
||||
through filecheck.
|
||||
|
||||
`test dce`
|
||||
-----------------
|
||||
|
||||
Test the DCE pass.
|
||||
|
||||
The DCE pass is run on each function, and then results are run
|
||||
through filecheck.
|
||||
|
||||
`test preopt`
|
||||
-----------------
|
||||
|
||||
@@ -342,6 +350,14 @@ Test the preopt pass.
|
||||
The preopt pass is run on each function, and then results are run
|
||||
through filecheck.
|
||||
|
||||
`test postopt`
|
||||
-----------------
|
||||
|
||||
Test the postopt pass.
|
||||
|
||||
The postopt pass is run on each function, and then results are run
|
||||
through filecheck.
|
||||
|
||||
`test compile`
|
||||
--------------
|
||||
|
||||
|
||||
46
cranelift/filetests/dce/basic.cton
Normal file
46
cranelift/filetests/dce/basic.cton
Normal file
@@ -0,0 +1,46 @@
|
||||
test dce
|
||||
|
||||
function %simple() -> i32 {
|
||||
ebb0:
|
||||
v2 = iconst.i32 2
|
||||
v3 = iconst.i32 3
|
||||
return v3
|
||||
}
|
||||
; sameln: function %simple
|
||||
; nextln: ebb0:
|
||||
; nextln: v3 = iconst.i32 3
|
||||
; nextln: return v3
|
||||
; nextln: }
|
||||
|
||||
function %some_branching(i32, i32) -> i32 {
|
||||
ebb0(v0: i32, v1: i32):
|
||||
v3 = iconst.i32 70
|
||||
v4 = iconst.i32 71
|
||||
v5 = iconst.i32 72
|
||||
v8 = iconst.i32 73
|
||||
brz v0, ebb1
|
||||
jump ebb2(v8)
|
||||
|
||||
ebb1:
|
||||
v2 = iadd v0, v3
|
||||
return v0
|
||||
|
||||
ebb2(v9: i32):
|
||||
v6 = iadd v1, v4
|
||||
v7 = iadd v6, v9
|
||||
return v7
|
||||
}
|
||||
; sameln: function %some_branching
|
||||
; nextln: ebb0(v0: i32, v1: i32):
|
||||
; nextln: v4 = iconst.i32 71
|
||||
; nextln: v8 = iconst.i32 73
|
||||
; nextln: brz v0, ebb1
|
||||
; nextln: jump ebb2(v8)
|
||||
; nextln:
|
||||
; nextln: ebb1:
|
||||
; nextln: return v0
|
||||
; nextln:
|
||||
; nextln: ebb2(v9: i32):
|
||||
; nextln: v6 = iadd.i32 v1, v4
|
||||
; nextln: v7 = iadd v6, v9
|
||||
; nextln: return v7
|
||||
@@ -1,4 +1,4 @@
|
||||
; binary emission of 32-bit code.
|
||||
; binary emission of x86-32 code.
|
||||
test binemit
|
||||
set is_compressed
|
||||
isa intel haswell
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; binary emission of 64-bit code.
|
||||
; binary emission of x86-64 code.
|
||||
test binemit
|
||||
set is_64bit
|
||||
set is_compressed
|
||||
@@ -458,6 +458,14 @@ ebb0:
|
||||
; asm: setbe %dl
|
||||
[-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2
|
||||
|
||||
; asm: cmpq $37, %rcx
|
||||
; asm: setl %bl
|
||||
[-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 48 83 f9 25 0f 9c c3
|
||||
|
||||
; asm: cmpq $100000, %rcx
|
||||
; asm: setl %bl
|
||||
[-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3
|
||||
|
||||
; Bool-to-int conversions.
|
||||
|
||||
; asm: movzbq %bl, %rcx
|
||||
@@ -1031,6 +1039,14 @@ ebb0:
|
||||
; asm: setbe %dl
|
||||
[-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2
|
||||
|
||||
; asm: cmpl $37, %ecx
|
||||
; asm: setl %bl
|
||||
[-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3
|
||||
|
||||
; asm: cmpq $100000, %ecx
|
||||
; asm: setl %bl
|
||||
[-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3
|
||||
|
||||
; Bool-to-int conversions.
|
||||
|
||||
; asm: movzbl %bl, %ecx
|
||||
|
||||
100
cranelift/filetests/postopt/basic.cton
Normal file
100
cranelift/filetests/postopt/basic.cton
Normal file
@@ -0,0 +1,100 @@
|
||||
test postopt
|
||||
isa intel
|
||||
|
||||
; Test that compare+branch sequences are folded effectively on x86.
|
||||
|
||||
function %br_icmp(i32, i32) -> i32 {
|
||||
ebb0(v0: i32, v1: i32):
|
||||
[Op1icscc#39,%rdx] v2 = icmp slt v0, v1
|
||||
[Op1t8jccd_long#85] brnz v2, ebb1
|
||||
[Op1ret#c3] return v1
|
||||
|
||||
ebb1:
|
||||
[Op1puid#b8,%rax] v8 = iconst.i32 3
|
||||
[Op1ret#c3] return v8
|
||||
}
|
||||
; sameln: function %br_icmp
|
||||
; nextln: ebb0(v0: i32, v1: i32):
|
||||
; nextln: v9 = ifcmp v0, v1
|
||||
; nextln: v2 = trueif slt v9
|
||||
; nextln: brif slt v9, ebb1
|
||||
; nextln: return v1
|
||||
; nextln:
|
||||
; nextln: ebb1:
|
||||
; nextln: v8 = iconst.i32 3
|
||||
; nextln: return v8
|
||||
; nextln: }
|
||||
|
||||
; Use brz instead of brnz, so the condition is inverted.
|
||||
|
||||
function %br_icmp_inverse(i32, i32) -> i32 {
|
||||
ebb0(v0: i32, v1: i32):
|
||||
[Op1icscc#39,%rdx] v2 = icmp slt v0, v1
|
||||
[Op1t8jccd_long#84] brz v2, ebb1
|
||||
[Op1ret#c3] return v1
|
||||
|
||||
ebb1:
|
||||
[Op1puid#b8,%rax] v8 = iconst.i32 3
|
||||
[Op1ret#c3] return v8
|
||||
}
|
||||
; sameln: function %br_icmp_inverse
|
||||
; nextln: ebb0(v0: i32, v1: i32):
|
||||
; nextln: v9 = ifcmp v0, v1
|
||||
; nextln: v2 = trueif slt v9
|
||||
; nextln: brif sge v9, ebb1
|
||||
; nextln: return v1
|
||||
; nextln:
|
||||
; nextln: ebb1:
|
||||
; nextln: v8 = iconst.i32 3
|
||||
; nextln: return v8
|
||||
; nextln: }
|
||||
|
||||
; Use icmp_imm instead of icmp.
|
||||
|
||||
function %br_icmp_imm(i32, i32) -> i32 {
|
||||
ebb0(v0: i32, v1: i32):
|
||||
[Op1icsccib#7083] v2 = icmp_imm slt v0, 2
|
||||
[Op1t8jccd_long#84] brz v2, ebb1
|
||||
[Op1ret#c3] return v1
|
||||
|
||||
ebb1:
|
||||
[Op1puid#b8,%rax] v8 = iconst.i32 3
|
||||
[Op1ret#c3] return v8
|
||||
}
|
||||
; sameln: function %br_icmp_imm
|
||||
; nextln: ebb0(v0: i32, v1: i32):
|
||||
; nextln: v9 = ifcmp_imm v0, 2
|
||||
; nextln: v2 = trueif slt v9
|
||||
; nextln: brif sge v9, ebb1
|
||||
; nextln: return v1
|
||||
; nextln:
|
||||
; nextln: ebb1:
|
||||
; nextln: v8 = iconst.i32 3
|
||||
; nextln: return v8
|
||||
; nextln: }
|
||||
|
||||
; Use fcmp instead of icmp.
|
||||
|
||||
function %br_fcmp(f32, f32) -> f32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1
|
||||
[Op1t8jccd_long#84] brz v2, ebb1
|
||||
[Op1ret#c3] return v1
|
||||
|
||||
ebb1:
|
||||
[Op1puid#b8,%rax] v18 = iconst.i32 0x40a8_0000
|
||||
[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
|
||||
[Op1ret#c3] return v8
|
||||
}
|
||||
; sameln: function %br_fcmp
|
||||
; nextln: ebb0(v0: f32, v1: f32):
|
||||
; nextln: v19 = ffcmp v0, v1
|
||||
; nextln: v2 = trueff gt v19
|
||||
; nextln: brff ule v19, ebb1
|
||||
; nextln: return v1
|
||||
; nextln:
|
||||
; nextln: ebb1:
|
||||
; nextln: v18 = iconst.i32 0x40a8_0000
|
||||
; nextln: v8 = bitcast.f32 v18
|
||||
; nextln: return v8
|
||||
; nextln: }
|
||||
80
cranelift/filetests/preopt/simplify.cton
Normal file
80
cranelift/filetests/preopt/simplify.cton
Normal file
@@ -0,0 +1,80 @@
|
||||
test preopt
|
||||
isa intel
|
||||
|
||||
function %iadd_imm(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
v1 = iconst.i32 2
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; sameln: function %iadd_imm
|
||||
; nextln: ebb0(v0: i32):
|
||||
; nextln: v1 = iconst.i32 2
|
||||
; nextln: v2 = iadd_imm v0, 2
|
||||
; nextln: return v2
|
||||
; nextln: }
|
||||
|
||||
function %isub_imm(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
v1 = iconst.i32 2
|
||||
v2 = isub v0, v1
|
||||
return v2
|
||||
}
|
||||
; sameln: function %isub_imm
|
||||
; nextln: ebb0(v0: i32):
|
||||
; nextln: v1 = iconst.i32 2
|
||||
; nextln: v2 = iadd_imm v0, -2
|
||||
; nextln: return v2
|
||||
; nextln: }
|
||||
|
||||
function %icmp_imm(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
v1 = iconst.i32 2
|
||||
v2 = icmp slt v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
; sameln: function %icmp_imm
|
||||
; nextln: ebb0(v0: i32):
|
||||
; nextln: v1 = iconst.i32 2
|
||||
; nextln: v2 = icmp_imm slt v0, 2
|
||||
; nextln: v3 = bint.i32 v2
|
||||
; nextln: return v3
|
||||
; nextln: }
|
||||
|
||||
function %brz_bint(i32) {
|
||||
ebb0(v0: i32):
|
||||
v3 = icmp_imm slt v0, 0
|
||||
v1 = bint.i32 v3
|
||||
v2 = select v1, v1, v1
|
||||
trapz v1, user0
|
||||
brz v1, ebb1
|
||||
jump ebb2
|
||||
|
||||
ebb1:
|
||||
return
|
||||
|
||||
ebb2:
|
||||
return
|
||||
}
|
||||
; sameln: function %brz_bint
|
||||
; nextln: (v0: i32):
|
||||
; nextln: v3 = icmp_imm slt v0, 0
|
||||
; nextln: v1 = bint.i32 v3
|
||||
; nextln: v2 = select v3, v1, v1
|
||||
; nextln: trapz v3, user0
|
||||
; nextln: brz v3, ebb1
|
||||
; nextln: jump ebb2
|
||||
|
||||
function %irsub_imm(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
v1 = iconst.i32 2
|
||||
v2 = isub v1, v0
|
||||
return v2
|
||||
}
|
||||
; sameln: function %irsub_imm
|
||||
; nextln: ebb0(v0: i32):
|
||||
; nextln: v1 = iconst.i32 2
|
||||
; nextln: v2 = irsub_imm v1, 2
|
||||
; nextln: return v2
|
||||
; nextln: }
|
||||
@@ -21,7 +21,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
|
||||
@0011 [RexOp1puid#b8] v9 = iconst.i32 0
|
||||
@0015 [RexOp1puid#b8] v11 = iconst.i32 0
|
||||
@0017 [RexOp1icscc#39] v12 = icmp.i32 eq v15, v11
|
||||
@0017 [RexOp2urm#4b6] v13 = bint.i32 v12
|
||||
@0017 [RexOp2urm_noflags#4b6] v13 = bint.i32 v12
|
||||
@001a [RexOp1rr#21] v14 = band v9, v13
|
||||
@001b [RexOp1tjccb#75] brnz v14, ebb6
|
||||
@001d [RexOp1jmpb#eb] jump ebb7
|
||||
|
||||
@@ -4,65 +4,65 @@ isa intel
|
||||
; Simple, correct use of CPU flags.
|
||||
function %simple(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op2urm_abcd#4b6] v3 = bint.i32 v2
|
||||
[Op1ret#c3] return v3
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
|
||||
[Op1ret#c3] return v3
|
||||
}
|
||||
|
||||
; Overlapping flag values of different types.
|
||||
function %overlap(i32, f32) -> i32 {
|
||||
ebb0(v0: i32, v1: f32):
|
||||
[Op1rcmp#39] v2 = ifcmp v0, v0
|
||||
[Op2fcmp#42e] v3 = ffcmp v1, v1
|
||||
[Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
|
||||
[Op2seti_abcd#490] v5 = trueif ugt v2
|
||||
[Op1rr#21] v6 = band v4, v5
|
||||
[Op2urm_abcd#4b6] v7 = bint.i32 v6
|
||||
[Op1ret#c3] return v7
|
||||
[Op1rcmp#39] v2 = ifcmp v0, v0
|
||||
[Op2fcmp#42e] v3 = ffcmp v1, v1
|
||||
[Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
|
||||
[Op2seti_abcd#490] v5 = trueif ugt v2
|
||||
[Op1rr#21] v6 = band v4, v5
|
||||
[Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6
|
||||
[Op1ret#c3] return v7
|
||||
}
|
||||
|
||||
; CPU flags clobbered by arithmetic.
|
||||
function %clobbered(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
|
||||
[Op2seti_abcd#490] v3 = trueif ugt v1
|
||||
[Op2urm_abcd#4b6] v4 = bint.i32 v3
|
||||
[Op1ret#c3] return v4
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
|
||||
[Op2seti_abcd#490] v3 = trueif ugt v1
|
||||
[Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
|
||||
[Op1ret#c3] return v4
|
||||
}
|
||||
|
||||
; CPU flags not clobbered by load.
|
||||
function %live_across_load(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1ld#8b] v2 = load.i32 v0
|
||||
[Op2seti_abcd#490] v3 = trueif ugt v1
|
||||
[Op2urm_abcd#4b6] v4 = bint.i32 v3
|
||||
[Op1ret#c3] return v4
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1ld#8b] v2 = load.i32 v0
|
||||
[Op2seti_abcd#490] v3 = trueif ugt v1
|
||||
[Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
|
||||
[Op1ret#c3] return v4
|
||||
}
|
||||
|
||||
; Correct use of CPU flags across EBB.
|
||||
function %live_across_ebb(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1jmpb#eb] jump ebb1
|
||||
ebb1:
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op2urm_abcd#4b6] v3 = bint.i32 v2
|
||||
[Op1ret#c3] return v3
|
||||
ebb0(v0: i32):
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1jmpb#eb] jump ebb1
|
||||
ebb1:
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
|
||||
[Op1ret#c3] return v3
|
||||
}
|
||||
|
||||
function %live_across_ebb_backwards(i32) -> i32 {
|
||||
ebb0(v0: i32):
|
||||
[Op1jmpb#eb] jump ebb2
|
||||
ebb1:
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op2urm_abcd#4b6] v3 = bint.i32 v2
|
||||
[Op1ret#c3] return v3
|
||||
ebb2:
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1jmpb#eb] jump ebb1
|
||||
ebb0(v0: i32):
|
||||
[Op1jmpb#eb] jump ebb2
|
||||
ebb1:
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
|
||||
[Op1ret#c3] return v3
|
||||
ebb2:
|
||||
[Op1rcmp#39] v1 = ifcmp v0, v0
|
||||
[Op1jmpb#eb] jump ebb1
|
||||
}
|
||||
|
||||
; Flags live into loop.
|
||||
@@ -73,4 +73,4 @@ function %live_into_loop(i32) -> i32 {
|
||||
ebb1:
|
||||
[Op2seti_abcd#490] v2 = trueif ugt v1
|
||||
[Op1jmpb#eb] jump ebb1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -378,6 +378,8 @@ X86_64.enc(base.trapff, r.trapff, 0)
|
||||
# Comparisons
|
||||
#
|
||||
enc_i32_i64(base.icmp, r.icscc, 0x39)
|
||||
enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
|
||||
enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
|
||||
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
|
||||
enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
|
||||
enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
|
||||
@@ -409,11 +411,13 @@ enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
|
||||
#
|
||||
# This assumes that b1 is represented as an 8-bit low register with the value 0
|
||||
# or 1.
|
||||
X86_32.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm.rex(0x0f, 0xb6)) # zext to i64 implicit.
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_abcd(0x0f, 0xb6)) # zext to i64 implicit.
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
|
||||
#
|
||||
# Encode movzbq as movzbl, because it's equivalent and shorter.
|
||||
X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
|
||||
# Numerical conversions.
|
||||
|
||||
@@ -430,41 +434,41 @@ X86_64.enc(base.ireduce.i32.i64, r.null, 0)
|
||||
# instructions for %al/%ax/%eax to %ax/%eax/%rax.
|
||||
|
||||
# movsbl
|
||||
X86_32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
|
||||
X86_32.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
|
||||
|
||||
# movswl
|
||||
X86_32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
|
||||
X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
|
||||
|
||||
# movsbq
|
||||
X86_64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
|
||||
X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
|
||||
|
||||
# movswq
|
||||
X86_64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
|
||||
X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
|
||||
|
||||
# movslq
|
||||
X86_64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
|
||||
X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
|
||||
|
||||
# movzbl
|
||||
X86_32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
|
||||
X86_32.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
|
||||
# movzwl
|
||||
X86_32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
|
||||
X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
|
||||
# movzbq, encoded as movzbl because it's equivalent and shorter
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
|
||||
# movzwq, encoded as movzwl because it's equivalent and shorter
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
|
||||
# A 32-bit register copy clears the high 32 bits.
|
||||
X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
|
||||
|
||||
@@ -8,7 +8,8 @@ from cdsl.registers import RegClass
|
||||
from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
|
||||
from base.formats import MultiAry, NullAry
|
||||
from base.formats import Trap, Call, IndirectCall, Store, Load
|
||||
from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
|
||||
from base.formats import IntCompare, IntCompareImm, FloatCompare
|
||||
from base.formats import IntCond, FloatCond
|
||||
from base.formats import IntSelect, IntCondTrap, FloatCondTrap
|
||||
from base.formats import Jump, Branch, BranchInt, BranchFloat
|
||||
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
|
||||
@@ -364,7 +365,7 @@ rfumr = TailRecipe(
|
||||
''')
|
||||
|
||||
# XX /r, but for a unary operator with separate input/output register.
|
||||
# RM form.
|
||||
# RM form. Clobbers FLAGS.
|
||||
urm = TailRecipe(
|
||||
'urm', Unary, size=1, ins=GPR, outs=GPR,
|
||||
emit='''
|
||||
@@ -372,10 +373,19 @@ urm = TailRecipe(
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r. Same as urm, but input limited to ABCD.
|
||||
urm_abcd = TailRecipe(
|
||||
'urm_abcd', Unary, size=1, ins=ABCD, outs=GPR,
|
||||
when_prefixed=urm,
|
||||
# XX /r. Same as urm, but doesn't clobber FLAGS.
|
||||
urm_noflags = TailRecipe(
|
||||
'urm_noflags', Unary, size=1, ins=GPR, outs=GPR,
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r. Same as urm_noflags, but input limited to ABCD.
|
||||
urm_noflags_abcd = TailRecipe(
|
||||
'urm_noflags_abcd', Unary, size=1, ins=ABCD, outs=GPR,
|
||||
when_prefixed=urm_noflags,
|
||||
emit='''
|
||||
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
@@ -1360,6 +1370,61 @@ icscc = TailRecipe(
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
icsccib = TailRecipe(
|
||||
'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
|
||||
instp=IsSignedInt(IntCompareImm.imm, 8),
|
||||
emit='''
|
||||
// Comparison instruction.
|
||||
PUT_OP(bits, rex1(in_reg0), sink);
|
||||
modrm_r_bits(in_reg0, bits, sink);
|
||||
let imm: i64 = imm.into();
|
||||
sink.put1(imm as u8);
|
||||
// `setCC` instruction, no REX.
|
||||
use ir::condcodes::IntCC::*;
|
||||
let setcc = match cond {
|
||||
Equal => 0x94,
|
||||
NotEqual => 0x95,
|
||||
SignedLessThan => 0x9c,
|
||||
SignedGreaterThanOrEqual => 0x9d,
|
||||
SignedGreaterThan => 0x9f,
|
||||
SignedLessThanOrEqual => 0x9e,
|
||||
UnsignedLessThan => 0x92,
|
||||
UnsignedGreaterThanOrEqual => 0x93,
|
||||
UnsignedGreaterThan => 0x97,
|
||||
UnsignedLessThanOrEqual => 0x96,
|
||||
};
|
||||
sink.put1(0x0f);
|
||||
sink.put1(setcc);
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
icsccid = TailRecipe(
|
||||
'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
|
||||
instp=IsSignedInt(IntCompareImm.imm, 32),
|
||||
emit='''
|
||||
// Comparison instruction.
|
||||
PUT_OP(bits, rex1(in_reg0), sink);
|
||||
modrm_r_bits(in_reg0, bits, sink);
|
||||
let imm: i64 = imm.into();
|
||||
sink.put4(imm as u32);
|
||||
// `setCC` instruction, no REX.
|
||||
use ir::condcodes::IntCC::*;
|
||||
let setcc = match cond {
|
||||
Equal => 0x94,
|
||||
NotEqual => 0x95,
|
||||
SignedLessThan => 0x9c,
|
||||
SignedGreaterThanOrEqual => 0x9d,
|
||||
SignedGreaterThan => 0x9f,
|
||||
SignedLessThanOrEqual => 0x9e,
|
||||
UnsignedLessThan => 0x92,
|
||||
UnsignedGreaterThanOrEqual => 0x93,
|
||||
UnsignedGreaterThan => 0x97,
|
||||
UnsignedLessThanOrEqual => 0x96,
|
||||
};
|
||||
sink.put1(0x0f);
|
||||
sink.put1(setcc);
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
# Make a FloatCompare instruction predicate with the supported condition codes.
|
||||
|
||||
|
||||
@@ -21,9 +21,11 @@ use result::{CtonError, CtonResult};
|
||||
use settings::{FlagsOrIsa, OptLevel};
|
||||
use unreachable_code::eliminate_unreachable_code;
|
||||
use verifier;
|
||||
use dce::do_dce;
|
||||
use simple_gvn::do_simple_gvn;
|
||||
use licm::do_licm;
|
||||
use preopt::do_preopt;
|
||||
use postopt::do_postopt;
|
||||
use timing;
|
||||
|
||||
/// Persistent data structures and compilation pipeline.
|
||||
@@ -92,6 +94,9 @@ impl Context {
|
||||
self.preopt(isa)?;
|
||||
}
|
||||
self.legalize(isa)?;
|
||||
if isa.flags().opt_level() != OptLevel::Fastest {
|
||||
self.postopt(isa)?;
|
||||
}
|
||||
if isa.flags().opt_level() == OptLevel::Best {
|
||||
self.compute_domtree();
|
||||
self.compute_loop_analysis();
|
||||
@@ -100,6 +105,7 @@ impl Context {
|
||||
}
|
||||
self.compute_domtree();
|
||||
self.eliminate_unreachable_code(isa)?;
|
||||
self.dce(isa)?;
|
||||
self.regalloc(isa)?;
|
||||
self.prologue_epilogue(isa)?;
|
||||
self.relax_branches(isa)
|
||||
@@ -153,6 +159,13 @@ impl Context {
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform dead-code elimination on the function.
|
||||
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
|
||||
do_dce(&mut self.func, &mut self.domtree);
|
||||
self.verify_if(fisa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform pre-legalization rewrites on the function.
|
||||
pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
do_preopt(&mut self.func);
|
||||
@@ -170,6 +183,13 @@ impl Context {
|
||||
self.verify_if(isa)
|
||||
}
|
||||
|
||||
/// Perform post-legalization rewrites on the function.
|
||||
pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
do_postopt(&mut self.func, isa);
|
||||
self.verify_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the control flow graph.
|
||||
pub fn compute_cfg(&mut self) {
|
||||
self.cfg.compute(&self.func)
|
||||
|
||||
68
lib/cretonne/src/dce.rs
Normal file
68
lib/cretonne/src/dce.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
//! A Dead-Code Elimination (DCE) pass.
|
||||
//!
|
||||
//! Dead code here means instructions that have no side effects and have no
|
||||
//! result values used by other instructions.
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use dominator_tree::DominatorTree;
|
||||
use entity::EntityRef;
|
||||
use ir::{Function, Inst, Opcode, DataFlowGraph};
|
||||
use ir::instructions::InstructionData;
|
||||
use timing;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider for DCE.
|
||||
fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
|
||||
opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
|
||||
opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
|
||||
opcode.can_store()
|
||||
}
|
||||
|
||||
/// Preserve instructions with used result values.
|
||||
fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
|
||||
dfg.inst_results(inst).iter().any(|v| live[v.index()])
|
||||
}
|
||||
|
||||
/// Load instructions without the `notrap` flag are defined to trap when
|
||||
/// operating on inaccessible memory, so we can't DCE them even if the
|
||||
/// loaded value is unused.
|
||||
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
|
||||
if !opcode.can_load() {
|
||||
return false;
|
||||
}
|
||||
match *data {
|
||||
InstructionData::StackLoad { .. } => false,
|
||||
InstructionData::Load { flags, .. } => !flags.notrap(),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform DCE on `func`.
|
||||
pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let _tt = timing::dce();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
let mut live = Vec::with_capacity(func.dfg.num_values());
|
||||
live.resize(func.dfg.num_values(), false);
|
||||
|
||||
for &ebb in domtree.cfg_postorder().iter() {
|
||||
let mut pos = FuncCursor::new(func).at_bottom(ebb);
|
||||
while let Some(inst) = pos.prev_inst() {
|
||||
{
|
||||
let data = &pos.func.dfg[inst];
|
||||
let opcode = data.opcode();
|
||||
if trivially_unsafe_for_dce(opcode) ||
|
||||
is_load_with_defined_trapping(opcode, &data) ||
|
||||
any_inst_results_used(inst, &live, &pos.func.dfg)
|
||||
{
|
||||
for arg in pos.func.dfg.inst_args(inst) {
|
||||
let v = pos.func.dfg.resolve_aliases(*arg);
|
||||
live[v.index()] = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pos.remove_inst();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,11 @@ impl Imm64 {
|
||||
pub fn new(x: i64) -> Imm64 {
|
||||
Imm64(x)
|
||||
}
|
||||
|
||||
/// Return self negated.
|
||||
pub fn wrapping_neg(self) -> Imm64 {
|
||||
Imm64(self.0.wrapping_neg())
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<i64> for Imm64 {
|
||||
|
||||
@@ -58,6 +58,10 @@ impl TargetIsa for Isa {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn uses_cpu_flags(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
@@ -158,6 +158,11 @@ pub trait TargetIsa: fmt::Display {
|
||||
/// Get the ISA-independent flags that were used to make this trait object.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
/// Does the CPU implement scalar comparisons using a CPU flags register?
|
||||
fn uses_cpu_flags(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Get a data structure describing the registers in this ISA.
|
||||
fn register_info(&self) -> RegInfo;
|
||||
|
||||
|
||||
@@ -68,11 +68,13 @@ mod abi;
|
||||
mod bitset;
|
||||
mod constant_hash;
|
||||
mod context;
|
||||
mod dce;
|
||||
mod divconst_magic_numbers;
|
||||
mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
mod preopt;
|
||||
mod ref_slice;
|
||||
|
||||
211
lib/cretonne/src/postopt.rs
Normal file
211
lib/cretonne/src/postopt.rs
Normal file
@@ -0,0 +1,211 @@
|
||||
//! A post-legalization rewriting pass.
|
||||
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use cursor::{Cursor, EncCursor};
|
||||
use ir::dfg::ValueDef;
|
||||
use ir::{Function, InstructionData, Value, InstBuilder, Ebb, Inst};
|
||||
use ir::condcodes::{CondCode, IntCC, FloatCC};
|
||||
use ir::instructions::{Opcode, ValueList};
|
||||
use ir::immediates::Imm64;
|
||||
use isa::TargetIsa;
|
||||
use timing;
|
||||
|
||||
/// Information collected about a compare+branch sequence.
|
||||
struct CmpBrInfo {
|
||||
/// The branch instruction.
|
||||
br_inst: Inst,
|
||||
/// The icmp, icmp_imm, or fcmp instruction.
|
||||
cmp_inst: Inst,
|
||||
/// The destination of the branch.
|
||||
destination: Ebb,
|
||||
/// The arguments of the branch.
|
||||
args: ValueList,
|
||||
/// The first argument to the comparison. The second is in the `kind` field.
|
||||
cmp_arg: Value,
|
||||
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
|
||||
/// before the branch.
|
||||
invert_branch_cond: bool,
|
||||
/// The kind of comparison, and the second argument.
|
||||
kind: CmpBrKind,
|
||||
}
|
||||
|
||||
enum CmpBrKind {
|
||||
Icmp { cond: IntCC, arg: Value },
|
||||
IcmpImm { cond: IntCC, imm: Imm64 },
|
||||
Fcmp { cond: FloatCC, arg: Value },
|
||||
}
|
||||
|
||||
/// Optimize comparisons to use flags values, to avoid materializing conditions
|
||||
/// in integer registers.
|
||||
///
|
||||
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
|
||||
/// sequences.
|
||||
fn optimize_cpu_flags(
|
||||
pos: &mut EncCursor,
|
||||
inst: Inst,
|
||||
last_flags_clobber: Option<Inst>,
|
||||
isa: &TargetIsa,
|
||||
) {
|
||||
// Look for compare and branch patterns.
|
||||
// This code could be considerably simplified with non-lexical lifetimes.
|
||||
let info = match pos.func.dfg[inst] {
|
||||
InstructionData::Branch {
|
||||
opcode,
|
||||
destination,
|
||||
ref args,
|
||||
} => {
|
||||
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
|
||||
let invert_branch_cond = match opcode {
|
||||
Opcode::Brz => true,
|
||||
Opcode::Brnz => false,
|
||||
_ => panic!(),
|
||||
};
|
||||
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
match pos.func.dfg[cond_inst] {
|
||||
InstructionData::IntCompare {
|
||||
cond,
|
||||
args: cmp_args,
|
||||
..
|
||||
} => {
|
||||
CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg: cmp_args[0],
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::Icmp {
|
||||
cond,
|
||||
arg: cmp_args[1],
|
||||
},
|
||||
}
|
||||
}
|
||||
InstructionData::IntCompareImm {
|
||||
cond,
|
||||
arg: cmp_arg,
|
||||
imm: cmp_imm,
|
||||
..
|
||||
} => {
|
||||
CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg,
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
|
||||
}
|
||||
}
|
||||
InstructionData::FloatCompare {
|
||||
cond,
|
||||
args: cmp_args,
|
||||
..
|
||||
} => {
|
||||
CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg: cmp_args[0],
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::Fcmp {
|
||||
cond,
|
||||
arg: cmp_args[1],
|
||||
},
|
||||
}
|
||||
}
|
||||
_ => return,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO: trapif, trueif, selectif, and their ff counterparts.
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// If any instructions clobber the flags between the comparison and the branch,
|
||||
// don't optimize them.
|
||||
if last_flags_clobber != Some(info.cmp_inst) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We found a compare+branch pattern. Transform it to use flags.
|
||||
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
|
||||
pos.goto_inst(info.cmp_inst);
|
||||
match info.kind {
|
||||
CmpBrKind::Icmp { mut cond, arg } => {
|
||||
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func.dfg.replace(info.br_inst).brif(
|
||||
cond,
|
||||
flags,
|
||||
info.destination,
|
||||
&args,
|
||||
);
|
||||
}
|
||||
CmpBrKind::IcmpImm { mut cond, imm } => {
|
||||
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func.dfg.replace(info.br_inst).brif(
|
||||
cond,
|
||||
flags,
|
||||
info.destination,
|
||||
&args,
|
||||
);
|
||||
}
|
||||
CmpBrKind::Fcmp { mut cond, arg } => {
|
||||
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func.dfg.replace(info.br_inst).brff(
|
||||
cond,
|
||||
flags,
|
||||
info.destination,
|
||||
&args,
|
||||
);
|
||||
}
|
||||
}
|
||||
pos.func.update_encoding(info.cmp_inst, isa).is_ok();
|
||||
pos.func.update_encoding(info.br_inst, isa).is_ok();
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// The main post-opt pass.
|
||||
|
||||
pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
|
||||
let _tt = timing::postopt();
|
||||
let mut pos = EncCursor::new(func, isa);
|
||||
while let Some(_ebb) = pos.next_ebb() {
|
||||
let mut last_flags_clobber = None;
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if isa.uses_cpu_flags() {
|
||||
// Optimize instructions to make use of flags.
|
||||
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
|
||||
|
||||
// Track the most recent seen instruction that clobbers the flags.
|
||||
if let Some(constraints) =
|
||||
isa.encoding_info().operand_constraints(
|
||||
pos.func.encodings[inst],
|
||||
)
|
||||
{
|
||||
if constraints.clobbers_flags {
|
||||
last_flags_clobber = Some(inst)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -127,28 +127,6 @@ fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
|
||||
return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
|
||||
}
|
||||
|
||||
// TODO: should we actually bother to do this (that is, manually match
|
||||
// the case that the second argument is an iconst)? Or should we assume
|
||||
// that some previous constant propagation pass has pushed all such
|
||||
// immediates to their use points, creating BinaryImm instructions
|
||||
// instead? For now we take the conservative approach.
|
||||
if let InstructionData::Binary { opcode, args } = *idata {
|
||||
let (isSigned, isRem) = match opcode {
|
||||
Opcode::Udiv => (false, false),
|
||||
Opcode::Urem => (false, true),
|
||||
Opcode::Sdiv => (true, false),
|
||||
Opcode::Srem => (true, true),
|
||||
_other => return None,
|
||||
};
|
||||
let argR: Value = args[1];
|
||||
if let Some(simm64) = get_const(argR, dfg) {
|
||||
let argL: Value = args[0];
|
||||
// Pull the operation size (type) from the left arg
|
||||
let argL_ty = dfg.value_type(argL);
|
||||
return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
@@ -473,25 +451,106 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// General pattern-match helpers.
|
||||
|
||||
/// Find out if `value` actually resolves to a constant, and if so what its
|
||||
/// value is.
|
||||
fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
|
||||
match dfg.value_def(value) {
|
||||
ValueDef::Result(definingInst, resultNo) => {
|
||||
let definingIData: &InstructionData = &dfg[definingInst];
|
||||
if let InstructionData::UnaryImm { opcode, imm } = *definingIData {
|
||||
if opcode == Opcode::Iconst && resultNo == 0 {
|
||||
return Some(imm.into());
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other
|
||||
/// minor simplifications.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst) {
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
mut imm,
|
||||
} = pos.func.dfg[iconst_inst]
|
||||
{
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
pos.func.dfg.replace(inst).BinaryImm(
|
||||
new_opcode,
|
||||
ty,
|
||||
imm,
|
||||
args[0],
|
||||
);
|
||||
}
|
||||
} else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
mut imm,
|
||||
} = pos.func.dfg[iconst_inst]
|
||||
{
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
pos.func.dfg.replace(inst).BinaryImm(
|
||||
new_opcode,
|
||||
ty,
|
||||
imm,
|
||||
args[0],
|
||||
);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
ValueDef::Param(_definingEbb, _paramNo) => None,
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = pos.func.dfg[iconst_inst]
|
||||
{
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
InstructionData::CondTrap { .. } |
|
||||
InstructionData::Branch { .. } |
|
||||
InstructionData::Ternary { opcode: Opcode::Select, .. } => {
|
||||
// Fold away a redundant `bint`.
|
||||
let maybe = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Bint,
|
||||
arg: bool_val,
|
||||
} = pos.func.dfg[def_inst]
|
||||
{
|
||||
Some(bool_val)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(bool_val) = maybe {
|
||||
let args = pos.func.dfg.inst_args_mut(inst);
|
||||
args[0] = bool_val;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -503,6 +562,8 @@ pub fn do_preopt(func: &mut Function) {
|
||||
while let Some(_ebb) = pos.next_ebb() {
|
||||
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
// Apply basic simplifications.
|
||||
simplify(&mut pos, inst);
|
||||
|
||||
//-- BEGIN -- division by constants ----------------
|
||||
|
||||
|
||||
@@ -55,7 +55,9 @@ define_passes!{
|
||||
flowgraph: "Control flow graph",
|
||||
domtree: "Dominator tree",
|
||||
loop_analysis: "Loop analysis",
|
||||
postopt: "Post-legalization rewriting",
|
||||
preopt: "Pre-legalization rewriting",
|
||||
dce: "Dead code elimination",
|
||||
legalize: "Legalization",
|
||||
gvn: "Global value numbering",
|
||||
licm: "Loop invariant code motion",
|
||||
|
||||
@@ -28,9 +28,11 @@ mod match_directive;
|
||||
mod test_binemit;
|
||||
mod test_cat;
|
||||
mod test_compile;
|
||||
mod test_dce;
|
||||
mod test_domtree;
|
||||
mod test_legalizer;
|
||||
mod test_licm;
|
||||
mod test_postopt;
|
||||
mod test_preopt;
|
||||
mod test_print_cfg;
|
||||
mod test_regalloc;
|
||||
@@ -73,9 +75,11 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
|
||||
"binemit" => test_binemit::subtest(parsed),
|
||||
"cat" => test_cat::subtest(parsed),
|
||||
"compile" => test_compile::subtest(parsed),
|
||||
"dce" => test_dce::subtest(parsed),
|
||||
"domtree" => test_domtree::subtest(parsed),
|
||||
"legalizer" => test_legalizer::subtest(parsed),
|
||||
"licm" => test_licm::subtest(parsed),
|
||||
"postopt" => test_postopt::subtest(parsed),
|
||||
"preopt" => test_preopt::subtest(parsed),
|
||||
"print-cfg" => test_print_cfg::subtest(parsed),
|
||||
"regalloc" => test_regalloc::subtest(parsed),
|
||||
|
||||
53
lib/filetests/src/test_dce.rs
Normal file
53
lib/filetests/src/test_dce.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
//! Test command for testing the DCE pass.
|
||||
//!
|
||||
//! The `dce` test command runs each function through the DCE pass after ensuring
|
||||
//! that all instructions are legal for the target.
|
||||
//!
|
||||
//! The resulting function is sent to `filecheck`.
|
||||
|
||||
use cretonne::ir::Function;
|
||||
use cretonne;
|
||||
use cretonne::print_errors::pretty_error;
|
||||
use cton_reader::TestCommand;
|
||||
use subtest::{SubTest, Context, Result, run_filecheck};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Write;
|
||||
|
||||
struct TestDCE;
|
||||
|
||||
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
|
||||
assert_eq!(parsed.command, "dce");
|
||||
if !parsed.options.is_empty() {
|
||||
Err(format!("No options allowed on {}", parsed))
|
||||
} else {
|
||||
Ok(Box::new(TestDCE))
|
||||
}
|
||||
}
|
||||
|
||||
impl SubTest for TestDCE {
|
||||
fn name(&self) -> Cow<str> {
|
||||
Cow::from("dce")
|
||||
}
|
||||
|
||||
fn is_mutating(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
|
||||
// Create a compilation context, and drop in the function.
|
||||
let mut comp_ctx = cretonne::Context::new();
|
||||
comp_ctx.func = func.into_owned();
|
||||
|
||||
comp_ctx.flowgraph();
|
||||
comp_ctx.compute_loop_analysis();
|
||||
comp_ctx.dce(context.flags_or_isa()).map_err(|e| {
|
||||
pretty_error(&comp_ctx.func, context.isa, Into::into(e))
|
||||
})?;
|
||||
|
||||
let mut text = String::new();
|
||||
write!(&mut text, "{}", &comp_ctx.func).map_err(
|
||||
|e| e.to_string(),
|
||||
)?;
|
||||
run_filecheck(&text, context)
|
||||
}
|
||||
}
|
||||
50
lib/filetests/src/test_postopt.rs
Normal file
50
lib/filetests/src/test_postopt.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
//! Test command for testing the postopt pass.
|
||||
//!
|
||||
//! The resulting function is sent to `filecheck`.
|
||||
|
||||
use cretonne::ir::Function;
|
||||
use cretonne;
|
||||
use cretonne::print_errors::pretty_error;
|
||||
use cton_reader::TestCommand;
|
||||
use subtest::{SubTest, Context, Result, run_filecheck};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Write;
|
||||
|
||||
struct TestPostopt;
|
||||
|
||||
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
|
||||
assert_eq!(parsed.command, "postopt");
|
||||
if !parsed.options.is_empty() {
|
||||
Err(format!("No options allowed on {}", parsed))
|
||||
} else {
|
||||
Ok(Box::new(TestPostopt))
|
||||
}
|
||||
}
|
||||
|
||||
impl SubTest for TestPostopt {
|
||||
fn name(&self) -> Cow<str> {
|
||||
Cow::from("postopt")
|
||||
}
|
||||
|
||||
fn is_mutating(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
|
||||
// Create a compilation context, and drop in the function.
|
||||
let mut comp_ctx = cretonne::Context::new();
|
||||
comp_ctx.func = func.into_owned();
|
||||
let isa = context.isa.expect("postopt needs an ISA");
|
||||
|
||||
comp_ctx.flowgraph();
|
||||
comp_ctx.postopt(isa).map_err(|e| {
|
||||
pretty_error(&comp_ctx.func, context.isa, Into::into(e))
|
||||
})?;
|
||||
|
||||
let mut text = String::new();
|
||||
write!(&mut text, "{}", &comp_ctx.func).map_err(
|
||||
|e| e.to_string(),
|
||||
)?;
|
||||
run_filecheck(&text, context)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user