Adds support to transform integer div and rem by constants into cheaper equivalents.

Adds support for transforming integer division and remainder by constants
into sequences that do not involve division instructions.

* div/rem by constant powers of two are turned into right shifts, plus some
  fixups for the signed cases.

* div/rem by constant non-powers of two are turned into double length
  multiplies by a magic constant, plus some fixups involving shifts,
  addition and subtraction, that depends on the constant, the word size and
  the signedness involved.

* The following cases are transformed: div and rem, signed or unsigned, 32
  or 64 bit.  The only un-transformed cases are: unsigned div and rem by
  zero, signed div and rem by zero or -1.

* This is all incorporated within a new transformation pass, "preopt", in
  lib/cretonne/src/preopt.rs.

* In preopt.rs, fn do_preopt() is the main driver.  It is designed to be
  extensible to transformations of other kinds of instructions.  Currently
  it merely uses a helper to identify div/rem transformation candidates and
  another helper to perform the transformation.

* In preopt.rs, fn get_div_info() pattern matches to find candidates, both
  cases where the second arg is an immediate, and cases where the second
  arg is an identifier bound to an immediate at its definition point.

* In preopt.rs, fn do_divrem_transformation() does the heavy lifting of the
  transformation proper.  It in turn uses magic{S,U}{32,64} to calculate the
  magic numbers required for the transformations.

* There are many test cases for the transformation proper:
    filetests/preopt/div_by_const_non_power_of_2.cton
    filetests/preopt/div_by_const_power_of_2.cton
    filetests/preopt/rem_by_const_non_power_of_2.cton
    filetests/preopt/rem_by_const_power_of_2.cton
    filetests/preopt/div_by_const_indirect.cton
  preopt.rs also contains a set of tests for magic number generation.

* The main (non-power-of-2) transformation requires instructions that return
  the high word of a double-length multiply.  For this, instructions umulhi
  and smulhi have been added to the core instruction set.  These will map
  directly to single instructions on most non-intel targets.

* intel does not have an instruction exactly like that.  For intel,
  instructions x86_umulx and x86_smulx have been added.  These map to real
  instructions and return both result words.  The intel legaliser will
  rewrite {s,u}mulhi into x86_{s,u}mulx uses that throw away the lower half
  word.  Tests:
    filetests/isa/intel/legalize-mulhi.cton (new file)
    filetests/isa/intel/binary64.cton (added x86_{s,u}mulx encoding tests)
This commit is contained in:
Julian Seward
2018-02-28 12:28:55 +01:00
committed by Dan Gohman
parent e4b30d3284
commit 7054f25abb
20 changed files with 2464 additions and 0 deletions

View File

@@ -336,6 +336,28 @@ ebb0:
; asm: divq %r10
[-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3 ; bin: 49 f7 f2
; double-length multiply instructions, 64 bit
[-,%rax] v1001 = iconst.i64 1
[-,%r15] v1002 = iconst.i64 2
; asm: mulq %r15
[-,%rax,%rdx] v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7
; asm: imulq %r15
[-,%rax,%rdx] v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef
; double-length multiply instructions, 32 bit
[-,%rax] v1011 = iconst.i32 1
[-,%r15] v1012 = iconst.i32 2
[-,%rcx] v1017 = iconst.i32 3
; asm: mull %r15d
[-,%rax,%rdx] v1013, v1014 = x86_umulx v1011, v1012 ; bin: 41 f7 e7
; asm: imull %r15d
[-,%rax,%rdx] v1015, v1016 = x86_smulx v1011, v1012 ; bin: 41 f7 ef
; asm: mull %ecx
[-,%rax,%rdx] v1018, v1019 = x86_umulx v1011, v1017 ; bin: f7 e1
; asm: imull %ecx
[-,%rax,%rdx] v1020, v1021 = x86_smulx v1011, v1017 ; bin: f7 e9
; Bit-counting instructions.
; asm: popcntq %rsi, %rcx

View File

@@ -0,0 +1,45 @@
test compile
set is_64bit
isa intel baseline
; umulhi/smulhi on 64 bit operands
function %i64_umulhi(i64, i64) -> i64 {
ebb0(v10: i64, v11: i64):
v12 = umulhi v10, v11
; check: %rdi -> %rax
; check: x86_umulx
; check: %rdx -> %rax
return v12
}
function %i64_smulhi(i64, i64) -> i64 {
ebb0(v20: i64, v21: i64):
v22 = smulhi v20, v21
; check: %rdi -> %rax
; check: x86_smulx
; check: %rdx -> %rax
return v22
}
; umulhi/smulhi on 32 bit operands
function %i32_umulhi(i32, i32) -> i32 {
ebb0(v30: i32, v31: i32):
v32 = umulhi v30, v31
; check: %rdi -> %rax
; check: x86_umulx
; check: %rdx -> %rax
return v32
}
function %i32_smulhi(i32, i32) -> i32 {
ebb0(v40: i32, v41: i32):
v42 = smulhi v40, v41
; check: %rdi -> %rax
; check: x86_smulx
; check: %rdx -> %rax
return v42
}

View File

@@ -0,0 +1,60 @@
test preopt
isa intel baseline
; Cases where the denominator is created by an iconst
function %indir_udiv32(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 7
v2 = udiv v0, v1
; check: iconst.i32 7
; check: iconst.i32 0x2492_4925
; check: umulhi v0, v3
; check: isub v0, v4
; check: ushr_imm v5, 1
; check: iadd v6, v4
; check: ushr_imm v7, 2
; check: copy v8
return v2
}
function %indir_sdiv32(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 -17
v2 = sdiv v0, v1
; check: iconst.i32 -17
; check: iconst.i32 0xffff_ffff_8787_8787
; check: smulhi v0, v3
; check: sshr_imm v4, 3
; check: ushr_imm v5, 31
; check: iadd v5, v6
; check: copy v7
return v2
}
function %indir_udiv64(i64) -> i64 {
ebb0(v0: i64):
v1 = iconst.i64 1337
v2 = udiv v0, v1
; check: iconst.i64 1337
; check: iconst.i64 0xc411_9d95_2866_a139
; check: umulhi v0, v3
; check: ushr_imm v4, 10
; check: copy v5
return v2
}
function %indir_sdiv64(i64) -> i64 {
ebb0(v0: i64):
v1 = iconst.i64 -90210
v2 = sdiv v0, v1
; check: iconst.i64 0xffff_ffff_fffe_9f9e
; check: iconst.i64 0xd181_4ee8_939c_b8bb
; check: smulhi v0, v3
; check: sshr_imm v4, 14
; check: ushr_imm v5, 63
; check: iadd v5, v6
; check: copy v7
return v2
}

View File

@@ -0,0 +1,267 @@
test preopt
isa intel baseline
; -------- U32 --------
; complex case (mul, sub, shift, add, shift)
function %t_udiv32_p7(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 7
; check: iconst.i32 0x2492_4925
; check: umulhi v0, v2
; check: isub v0, v3
; check: ushr_imm v4, 1
; check: iadd v5, v3
; check: ushr_imm v6, 2
; check: copy v7
return v1
}
; simple case (mul, shift)
function %t_udiv32_p125(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 125
; check: iconst.i32 0x1062_4dd3
; check: umulhi v0, v2
; check: ushr_imm v3, 3
; check: copy v4
return v1
}
; simple case w/ shift by zero (mul)
function %t_udiv32_p641(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 641
; check: iconst.i32 0x0066_3d81
; check: umulhi v0, v2
; check: copy v3
return v1
}
; -------- S32 --------
; simple case w/ shift by zero (mul, add-sign-bit)
function %t_sdiv32_n6(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -6
; check: iconst.i32 0xffff_ffff_d555_5555
; check: smulhi v0, v2
; check: ushr_imm v3, 31
; check: iadd v3, v4
; check: copy v5
return v1
}
; simple case (mul, shift, add-sign-bit)
function %t_sdiv32_n5(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -5
; check: iconst.i32 0xffff_ffff_9999_9999
; check: smulhi v0, v2
; check: sshr_imm v3, 1
; check: ushr_imm v4, 31
; check: iadd v4, v5
; check: copy v6
return v1
}
; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
function %t_sdiv32_n3(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -3
; check: iconst.i32 0x5555_5555
; check: smulhi v0, v2
; check: isub v3, v0
; check: sshr_imm v4, 1
; check: ushr_imm v5, 31
; check: iadd v5, v6
; check: copy v7
return v1
}
; simple case w/ shift by zero (mul, add-sign-bit)
function %t_sdiv32_p6(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 6
; check: iconst.i32 0x2aaa_aaab
; check: smulhi v0, v2
; check: ushr_imm v3, 31
; check: iadd v3, v4
; check: copy v5
return v1
}
; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
function %t_sdiv32_p7(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 7
; check: iconst.i32 0xffff_ffff_9249_2493
; check: smulhi v0, v2
; check: iadd v3, v0
; check: sshr_imm v4, 2
; check: ushr_imm v5, 31
; check: iadd v5, v6
; check: copy v7
return v1
}
; simple case (mul, shift, add-sign-bit)
function %t_sdiv32_p625(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 625
; check: iconst.i32 0x68db_8bad
; check: smulhi v0, v2
; check: sshr_imm v3, 8
; check: ushr_imm v4, 31
; check: iadd v4, v5
; check: copy v6
return v1
}
; -------- U64 --------
; complex case (mul, sub, shift, add, shift)
function %t_udiv64_p7(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 7
; check: iconst.i64 0x2492_4924_9249_2493
; check: umulhi v0, v2
; check: isub v0, v3
; check: ushr_imm v4, 1
; check: iadd v5, v3
; check: ushr_imm v6, 2
; check: copy v7
return v1
}
; simple case (mul, shift)
function %t_udiv64_p9(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 9
; check: iconst.i64 0xe38e_38e3_8e38_e38f
; check: umulhi v0, v2
; check: ushr_imm v3, 3
; check: copy v4
return v1
}
; complex case (mul, sub, shift, add, shift)
function %t_udiv64_p125(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 125
; check: iconst.i64 0x0624_dd2f_1a9f_be77
; check: umulhi v0, v2
; check: isub v0, v3
; check: ushr_imm v4, 1
; check: iadd v5, v3
; check: ushr_imm v6, 6
; check: copy v7
return v1
}
; simple case w/ shift by zero (mul)
function %t_udiv64_p274177(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 274177
; check: iconst.i64 0x3d30_f19c_d101
; check: umulhi v0, v2
; check: copy v3
return v1
}
; -------- S64 --------
; simple case (mul, shift, add-sign-bit)
function %t_sdiv64_n625(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -625
; check: iconst.i64 0xcb92_3a29_c779_a6b5
; check: smulhi v0, v2
; check: sshr_imm v3, 7
; check: ushr_imm v4, 63
; check: iadd v4, v5
; check: copy v6
return v1
}
; simple case w/ zero shift (mul, add-sign-bit)
function %t_sdiv64_n6(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -6
; check: iconst.i64 0xd555_5555_5555_5555
; check: smulhi v0, v2
; check: ushr_imm v3, 63
; check: iadd v3, v4
; check: copy v5
return v1
}
; simple case w/ zero shift (mul, add-sign-bit)
function %t_sdiv64_n5(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -5
; check: iconst.i64 0x9999_9999_9999_9999
; check: smulhi v0, v2
; check: sshr_imm v3, 1
; check: ushr_imm v4, 63
; check: iadd v4, v5
; check: copy v6
return v1
}
; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
function %t_sdiv64_n3(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -3
; check: iconst.i64 0x5555_5555_5555_5555
; check: smulhi v0, v2
; check: isub v3, v0
; check: sshr_imm v4, 1
; check: ushr_imm v5, 63
; check: iadd v5, v6
; check: copy v7
return v1
}
; simple case w/ zero shift (mul, add-sign-bit)
function %t_sdiv64_p6(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 6
; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
; check: smulhi v0, v2
; check: ushr_imm v3, 63
; check: iadd v3, v4
; check: copy v5
return v1
}
; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
function %t_sdiv64_p15(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 15
; check: iconst.i64 0x8888_8888_8888_8889
; check: smulhi v0, v2
; check: iadd v3, v0
; check: sshr_imm v4, 3
; check: ushr_imm v5, 63
; check: iadd v5, v6
; check: copy v7
return v1
}
; simple case (mul, shift, add-sign-bit)
function %t_sdiv64_p625(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 625
; check: iconst.i64 0x346d_c5d6_3886_594b
; check: smulhi v0, v2
; check: sshr_imm v3, 7
; check: ushr_imm v4, 63
; check: iadd v4, v5
; check: copy v6
return v1
}

View File

@@ -0,0 +1,293 @@
test preopt
isa intel baseline
; -------- U32 --------
; ignored
function %t_udiv32_p0(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 0
; check: udiv_imm v0, 0
return v1
}
; converted to a copy
function %t_udiv32_p1(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 1
; check: copy v0
return v1
}
; shift
function %t_udiv32_p2(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 2
; check: ushr_imm v0, 1
return v1
}
; shift
function %t_udiv32_p2p31(i32) -> i32 {
ebb0(v0: i32):
v1 = udiv_imm v0, 0x8000_0000
; check: ushr_imm v0, 31
return v1
}
; -------- U64 --------
; ignored
function %t_udiv64_p0(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 0
; check: udiv_imm v0, 0
return v1
}
; converted to a copy
function %t_udiv64_p1(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 1
; check: copy v0
return v1
}
; shift
function %t_udiv64_p2(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 2
; check: ushr_imm v0, 1
return v1
}
; shift
function %t_udiv64_p2p63(i64) -> i64 {
ebb0(v0: i64):
v1 = udiv_imm v0, 0x8000_0000_0000_0000
; check: ushr_imm v0, 63
return v1
}
; -------- S32 --------
; ignored
function %t_sdiv32_p0(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 0
; check: sdiv_imm v0, 0
return v1
}
; converted to a copy
function %t_sdiv32_p1(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 1
; check: copy v0
return v1
}
; ignored
function %t_sdiv32_n1(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -1
; check: sdiv_imm v0, -1
return v1
}
; shift
function %t_sdiv32_p2(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 2
; check: ushr_imm v0, 31
; check: iadd v0, v2
; check: sshr_imm v3, 1
; check: copy v4
return v1
}
; shift
function %t_sdiv32_n2(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -2
; check: ushr_imm v0, 31
; check: iadd v0, v2
; check: sshr_imm v3, 1
; check: irsub_imm v4, 0
return v1
}
; shift
function %t_sdiv32_p4(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 4
; check: v2 = sshr_imm v0, 1
; check: ushr_imm v2, 30
; check: iadd v0, v3
; check: sshr_imm v4, 2
; check: copy v5
return v1
}
; shift
function %t_sdiv32_n4(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 30
; check: iadd v0, v3
; check: sshr_imm v4, 2
; check: irsub_imm v5, 0
return v1
}
; shift
function %t_sdiv32_p2p30(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, 0x4000_0000
; check: sshr_imm v0, 29
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: sshr_imm v4, 30
; check: copy v5
return v1
}
; shift
function %t_sdiv32_n2p30(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -0x4000_0000
; check: sshr_imm v0, 29
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: sshr_imm v4, 30
; check: irsub_imm v5, 0
return v1
}
; there's no positive version of this, since -(-0x8000_0000) isn't
; representable.
function %t_sdiv32_n2p31(i32) -> i32 {
ebb0(v0: i32):
v1 = sdiv_imm v0, -0x8000_0000
; check: sshr_imm v0, 30
; check: ushr_imm v2, 1
; check: iadd v0, v3
; check: sshr_imm v4, 31
; check: irsub_imm v5, 0
return v1
}
; -------- S64 --------
; ignored
function %t_sdiv64_p0(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 0
; check: sdiv_imm v0, 0
return v1
}
; converted to a copy
function %t_sdiv64_p1(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 1
; check: copy v0
return v1
}
; ignored
function %t_sdiv64_n1(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -1
; check: sdiv_imm v0, -1
return v1
}
; shift
function %t_sdiv64_p2(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 2
; check: ushr_imm v0, 63
; check: iadd v0, v2
; check: sshr_imm v3, 1
; check: copy v4
return v1
}
; shift
function %t_sdiv64_n2(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -2
; check: ushr_imm v0, 63
; check: iadd v0, v2
; check: sshr_imm v3, 1
; check: irsub_imm v4, 0
return v1
}
; shift
function %t_sdiv64_p4(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 62
; check: iadd v0, v3
; check: sshr_imm v4, 2
; check: copy v5
return v1
}
; shift
function %t_sdiv64_n4(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 62
; check: iadd v0, v3
; check: sshr_imm v4, 2
; check: irsub_imm v5, 0
return v1
}
; shift
function %t_sdiv64_p2p62(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, 0x4000_0000_0000_0000
; check: sshr_imm v0, 61
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: sshr_imm v4, 62
; check: copy v5
return v1
}
; shift
function %t_sdiv64_n2p62(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -0x4000_0000_0000_0000
; check: sshr_imm v0, 61
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: sshr_imm v4, 62
; check: irsub_imm v5, 0
return v1
}
; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
; representable.
function %t_sdiv64_n2p63(i64) -> i64 {
ebb0(v0: i64):
v1 = sdiv_imm v0, -0x8000_0000_0000_0000
; check: sshr_imm v0, 62
; check: ushr_imm v2, 1
; check: iadd v0, v3
; check: sshr_imm v4, 63
; check: irsub_imm v5, 0
return v1
}

View File

@@ -0,0 +1,286 @@
test preopt
isa intel baseline
; -------- U32 --------
; complex case (mul, sub, shift, add, shift)
function %t_urem32_p7(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 7
; check: iconst.i32 0x2492_4925
; check: umulhi v0, v2
; check: isub v0, v3
; check: ushr_imm v4, 1
; check: iadd v5, v3
; check: ushr_imm v6, 2
; check: imul_imm v7, 7
; check: isub v0, v8
return v1
}
; simple case (mul, shift)
function %t_urem32_p125(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 125
; check: iconst.i32 0x1062_4dd3
; check: umulhi v0, v2
; check: ushr_imm v3, 3
; check: imul_imm v4, 125
; check: isub v0, v5
return v1
}
; simple case w/ shift by zero (mul)
function %t_urem32_p641(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 641
; check: iconst.i32 0x0066_3d81
; check: umulhi v0, v2
; check: imul_imm v3, 641
; check: isub v0, v4
return v1
}
; -------- S32 --------
; simple case w/ shift by zero (mul, add-sign-bit)
function %t_srem32_n6(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -6
; check: iconst.i32 0xffff_ffff_d555_5555
; check: smulhi v0, v2
; check: ushr_imm v3, 31
; check: iadd v3, v4
; check: imul_imm v5, -6
; check: isub v0, v6
return v1
}
; simple case (mul, shift, add-sign-bit)
function %t_srem32_n5(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -5
; check: iconst.i32 0xffff_ffff_9999_9999
; check: smulhi v0, v2
; check: sshr_imm v3, 1
; check: ushr_imm v4, 31
; check: iadd v4, v5
; check: imul_imm v6, -5
; check: isub v0, v7
return v1
}
; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
function %t_srem32_n3(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -3
; check: iconst.i32 0x5555_5555
; check: smulhi v0, v2
; check: isub v3, v0
; check: sshr_imm v4, 1
; check: ushr_imm v5, 31
; check: iadd v5, v6
; check: imul_imm v7, -3
; check: isub v0, v8
return v1
}
; simple case w/ shift by zero (mul, add-sign-bit)
function %t_srem32_p6(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 6
; check: iconst.i32 0x2aaa_aaab
; check: smulhi v0, v2
; check: ushr_imm v3, 31
; check: iadd v3, v4
; check: imul_imm v5, 6
; check: isub v0, v6
return v1
}
; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
function %t_srem32_p7(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 7
; check: iconst.i32 0xffff_ffff_9249_2493
; check: smulhi v0, v2
; check: iadd v3, v0
; check: sshr_imm v4, 2
; check: ushr_imm v5, 31
; check: iadd v5, v6
; check: imul_imm v7, 7
; check: isub v0, v8
return v1
}
; simple case (mul, shift, add-sign-bit)
function %t_srem32_p625(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 625
; check: iconst.i32 0x68db_8bad
; check: smulhi v0, v2
; check: sshr_imm v3, 8
; check: ushr_imm v4, 31
; check: iadd v4, v5
; check: imul_imm v6, 625
; check: isub v0, v7
return v1
}
; -------- U64 --------
; complex case (mul, sub, shift, add, shift)
function %t_urem64_p7(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 7
; check: umulhi v0, v2
; check: isub v0, v3
; check: ushr_imm v4, 1
; check: iadd v5, v3
; check: ushr_imm v6, 2
; check: imul_imm v7, 7
; check: isub v0, v8
return v1
}
; simple case (mul, shift)
function %t_urem64_p9(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 9
; check: iconst.i64 0xe38e_38e3_8e38_e38f
; check: umulhi v0, v2
; check: ushr_imm v3, 3
; check: imul_imm v4, 9
; check: isub v0, v5
return v1
}
; complex case (mul, sub, shift, add, shift)
function %t_urem64_p125(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 125
; check: iconst.i64 0x0624_dd2f_1a9f_be77
; check: umulhi v0, v2
; check: isub v0, v3
; check: ushr_imm v4, 1
; check: iadd v5, v3
; check: ushr_imm v6, 6
; check: imul_imm v7, 125
; check: isub v0, v8
return v1
}
; simple case w/ shift by zero (mul)
function %t_urem64_p274177(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 274177
; check: iconst.i64 0x3d30_f19c_d101
; check: umulhi v0, v2
; check: imul_imm v3, 0x0004_2f01
; check: isub v0, v4
return v1
}
; -------- S64 --------
; simple case (mul, shift, add-sign-bit)
function %t_srem64_n625(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -625
; check: iconst.i64 0xcb92_3a29_c779_a6b5
; check: smulhi v0, v2
; check: sshr_imm v3, 7
; check: ushr_imm v4, 63
; check: iadd v4, v5
; check: imul_imm v6, -625
; check: isub v0, v7
return v1
}
; simple case w/ zero shift (mul, add-sign-bit)
function %t_srem64_n6(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -6
; check: iconst.i64 0xd555_5555_5555_5555
; check: smulhi v0, v2
; check: ushr_imm v3, 63
; check: iadd v3, v4
; check: imul_imm v5, -6
; check: isub v0, v6
return v1
}
; simple case w/ zero shift (mul, add-sign-bit)
function %t_srem64_n5(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -5
; check: iconst.i64 0x9999_9999_9999_9999
; check: smulhi v0, v2
; check: sshr_imm v3, 1
; check: ushr_imm v4, 63
; check: iadd v4, v5
; check: imul_imm v6, -5
; check: isub v0, v7
return v1
}
; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
function %t_srem64_n3(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -3
; check: iconst.i64 0x5555_5555_5555_5555
; check: smulhi v0, v2
; check: isub v3, v0
; check: sshr_imm v4, 1
; check: ushr_imm v5, 63
; check: iadd v5, v6
; check: imul_imm v7, -3
; check: isub v0, v8
return v1
}
; simple case w/ zero shift (mul, add-sign-bit)
function %t_srem64_p6(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 6
; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
; check: smulhi v0, v2
; check: ushr_imm v3, 63
; check: iadd v3, v4
; check: imul_imm v5, 6
; check: isub v0, v6
return v1
}
; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
function %t_srem64_p15(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 15
; check: iconst.i64 0x8888_8888_8888_8889
; check: smulhi v0, v2
; check: iadd v3, v0
; check: sshr_imm v4, 3
; check: ushr_imm v5, 63
; check: iadd v5, v6
; check: imul_imm v7, 15
; check: isub v0, v8
return v1
}
; simple case (mul, shift, add-sign-bit)
function %t_srem64_p625(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 625
; check: iconst.i64 0x346d_c5d6_3886_594b
; check: smulhi v0, v2
; check: sshr_imm v3, 7
; check: ushr_imm v4, 63
; check: iadd v4, v5
; check: imul_imm v6, 625
; check: isub v0, v7
return v1
}

View File

@@ -0,0 +1,292 @@
test preopt
isa intel baseline
; -------- U32 --------
; ignored
function %t_urem32_p0(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 0
; check: urem_imm v0, 0
return v1
}
; converted to constant zero
function %t_urem32_p1(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 1
; check: iconst.i32 0
return v1
}
; shift
function %t_urem32_p2(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 2
; check: band_imm v0, 1
return v1
}
; shift
function %t_urem32_p2p31(i32) -> i32 {
ebb0(v0: i32):
v1 = urem_imm v0, 0x8000_0000
; check: band_imm v0, 0x7fff_ffff
return v1
}
; -------- U64 --------
; ignored
function %t_urem64_p0(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 0
; check: urem_imm v0, 0
return v1
}
; converted to constant zero
function %t_urem64_p1(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 1
; check: iconst.i64 0
return v1
}
; shift
function %t_urem64_p2(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 2
; check: band_imm v0, 1
return v1
}
; shift
function %t_urem64_p2p63(i64) -> i64 {
ebb0(v0: i64):
v1 = urem_imm v0, 0x8000_0000_0000_0000
; check: band_imm v0, 0x7fff_ffff_ffff_ffff
return v1
}
; -------- S32 --------
; ignored
function %t_srem32_n1(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -1
; check: srem_imm v0, -1
return v1
}
; ignored
function %t_srem32_p0(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 0
; check: srem_imm v0, 0
return v1
}
; converted to constant zero
function %t_srem32_p1(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 1
; check: iconst.i32 0
return v1
}
; shift
function %t_srem32_p2(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 2
; check: ushr_imm v0, 31
; check: iadd v0, v2
; check: band_imm v3, -2
; check: isub v0, v4
return v1
}
; shift
function %t_srem32_n2(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -2
; check: ushr_imm v0, 31
; check: iadd v0, v2
; check: band_imm v3, -2
; check: isub v0, v4
return v1
}
; shift
function %t_srem32_p4(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 30
; check: iadd v0, v3
; check: band_imm v4, -4
; check: isub v0, v5
return v1
}
; shift
function %t_srem32_n4(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 30
; check: iadd v0, v3
; check: band_imm v4, -4
; check: isub v0, v5
return v1
}
; shift
function %t_srem32_p2p30(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, 0x4000_0000
; check: sshr_imm v0, 29
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: band_imm v4, 0xffff_ffff_c000_0000
; check: isub v0, v5
return v1
}
; shift
function %t_srem32_n2p30(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -0x4000_0000
; check: sshr_imm v0, 29
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: band_imm v4, 0xffff_ffff_c000_0000
; check: isub v0, v5
return v1
}
; there's no positive version of this, since -(-0x8000_0000) isn't
; representable.
function %t_srem32_n2p31(i32) -> i32 {
ebb0(v0: i32):
v1 = srem_imm v0, -0x8000_0000
; check: sshr_imm v0, 30
; check: ushr_imm v2, 1
; check: iadd v0, v3
; check: band_imm v4, 0xffff_ffff_8000_0000
; check: isub v0, v5
return v1
}
; -------- S64 --------
; ignored
function %t_srem64_n1(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -1
; check: srem_imm v0, -1
return v1
}
; ignored
function %t_srem64_p0(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 0
; check: srem_imm v0, 0
return v1
}
; converted to constant zero
function %t_srem64_p1(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 1
; check: iconst.i64 0
return v1
}
; shift
function %t_srem64_p2(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 2
; check: ushr_imm v0, 63
; check: iadd v0, v2
; check: band_imm v3, -2
; check: isub v0, v4
return v1
}
; shift
function %t_srem64_n2(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -2
; check: ushr_imm v0, 63
; check: iadd v0, v2
; check: band_imm v3, -2
; check: isub v0, v4
return v1
}
; shift
function %t_srem64_p4(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 62
; check: iadd v0, v3
; check: band_imm v4, -4
; check: isub v0, v5
return v1
}
; shift
function %t_srem64_n4(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -4
; check: sshr_imm v0, 1
; check: ushr_imm v2, 62
; check: iadd v0, v3
; check: band_imm v4, -4
; check: isub v0, v5
return v1
}
; shift
function %t_srem64_p2p62(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, 0x4000_0000_0000_0000
; check: sshr_imm v0, 61
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: band_imm v4, 0xc000_0000_0000_0000
; check: isub v0, v5
return v1
}
; shift
function %t_srem64_n2p62(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -0x4000_0000_0000_0000
; check: sshr_imm v0, 61
; check: ushr_imm v2, 2
; check: iadd v0, v3
; check: band_imm v4, 0xc000_0000_0000_0000
; check: isub v0, v5
return v1
}
; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
; representable.
function %t_srem64_n2p63(i64) -> i64 {
ebb0(v0: i64):
v1 = srem_imm v0, -0x8000_0000_0000_0000
; check: sshr_imm v0, 62
; check: ushr_imm v2, 1
; check: iadd v0, v3
; check: band_imm v4, 0x8000_0000_0000_0000
; check: isub v0, v5
return v1
}

View File

@@ -19,6 +19,7 @@ mod concurrent;
mod domtree;
mod legalizer;
mod licm;
mod preopt;
mod regalloc;
mod runner;
mod runone;
@@ -64,6 +65,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
"domtree" => domtree::subtest(parsed),
"legalizer" => legalizer::subtest(parsed),
"licm" => licm::subtest(parsed),
"preopt" => preopt::subtest(parsed),
"print-cfg" => print_cfg::subtest(parsed),
"regalloc" => regalloc::subtest(parsed),
"simple-gvn" => simple_gvn::subtest(parsed),

View File

@@ -0,0 +1,50 @@
//! Test command for testing the preopt pass.
//!
//! The resulting function is sent to `filecheck`.
use cretonne::ir::Function;
use cretonne;
use cton_reader::TestCommand;
use filetest::subtest::{SubTest, Context, Result, run_filecheck};
use std::borrow::Cow;
use std::fmt::Write;
use utils::pretty_error;
struct TestPreopt;
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
assert_eq!(parsed.command, "preopt");
if !parsed.options.is_empty() {
Err(format!("No options allowed on {}", parsed))
} else {
Ok(Box::new(TestPreopt))
}
}
impl SubTest for TestPreopt {
fn name(&self) -> Cow<str> {
Cow::from("preopt")
}
fn is_mutating(&self) -> bool {
true
}
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
// Create a compilation context, and drop in the function.
let mut comp_ctx = cretonne::Context::new();
comp_ctx.func = func.into_owned();
let isa = context.isa.expect("preopt needs an ISA");
comp_ctx.flowgraph();
comp_ctx.preopt(isa).map_err(|e| {
pretty_error(&comp_ctx.func, context.isa, Into::into(e))
})?;
let mut text = String::new();
write!(&mut text, "{}", &comp_ctx.func).map_err(
|e| e.to_string(),
)?;
run_filecheck(&text, context)
}
}