Legalize some more i8/i16 intructions (#524)

* Legalize some more i8/i16 intructions
This commit is contained in:
bjorn3
2018-09-26 01:10:23 +02:00
committed by Dan Gohman
parent 2c53e2102c
commit 2eec1469a8
9 changed files with 368 additions and 116 deletions

View File

@@ -0,0 +1,36 @@
test compile
target x86_64
; regex: V=v\d+
function u0:0(i8, i8) fast {
fn0 = %black_box(i8)
ss0 = explicit_slot 1 ; black box
ebb0(v0: i8, v1: i8):
v99 = stack_addr.i64 ss0
; check: istore8 $(V), $(V)
v2 = band v0, v1
store v2, v99
v3 = bor v0, v1
store v3, v99
v4 = bxor v0, v1
store v4, v99
v5 = bnot v0
store v5, v99
v6 = band_not v0, v1
store v6, v99
v7 = bor_not v0, v1
store v7, v99
v8 = bxor_not v0, v1
store v8, v99
v9 = band_imm v0, 42
store v9, v99
v10 = bor_imm v0, 42
store v10, v99
v11 = bxor_imm v0, 42
store v11, v99
return
}

View File

@@ -0,0 +1,19 @@
test compile
target x86_64
; regex: V=v\d+
function u0:0(i8, i8) -> i8 fast {
ebb0(v0: i8, v1: i8):
v2 = icmp_imm sle v0, 0
; check: $(e1=$V) = sextend.i32 v0
; nextln: v2 = icmp_imm sle $e1, 0
v3 = bint.i8 v2
v4 = icmp eq v0, v1
; check: $(e2=$V) = uextend.i32 v0
; nextln: $(e3=$V) = uextend.i32 v1
; nextln: v4 = icmp eq $e2, $e3
v5 = bint.i8 v4
v6 = iadd v3, v5
return v6
}

View File

@@ -0,0 +1,24 @@
test compile
target x86_64
; regex: V=v\d+
function u0:0(i8, i8) -> i8 fast {
ebb0(v0: i8, v1: i8):
v2 = ishl v0, v1
; check: $(e1=$V) = uextend.i32 v0
; check: $(r1=$V) = ishl $e1, v1
; check v2 = ireduce.i8 $r1
v3 = ushr v0, v1
; check: $(e2=$V) = uextend.i32 v0
; check: $(r2=$V) = ushr $e2, v1
; check v2 = ireduce.i8 $r2
v4 = sshr v0, v1
; check: $(e3=$V) = sextend.i32 v0
; check: $(r3=$V) = sshr $e3, v1
; check v2 = ireduce.i8 $r3
v5 = iadd v2, v3
v6 = iadd v4, v5
return v6
}

View File

@@ -0,0 +1,15 @@
test compile
target x86_64
; regex: V=v\d+
function u0:0(i8, i8) -> i8 fast {
ebb0(v0: i8, v1: i8):
v2 = urem v0, v1
; check: $(a=$V) = uextend.i32 v0
; nextln: $(b=$V) = uextend.i32 v1
; nextln: $(c=$V) = iconst.i32 0
; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b
; nextln: v2 = ireduce.i8 $r
return v2
}

View File

@@ -6,68 +6,125 @@ ebb0(v0: i8):
v1 = bitrev.i8 v0 v1 = bitrev.i8 v0
return v1 return v1
} }
; check: v2 = band_imm v0, 170 ; check: v16 = uextend.i32 v0
; check: v3 = ushr_imm v2, 1 ; check: v17 = band_imm v16, 170
; check: v4 = band_imm v0, 85 ; check: v2 = ireduce.i8 v17
; check: v5 = ishl_imm v4, 1 ; check: v18 = uextend.i32 v2
; check: v16 = uextend.i32 v3 ; check: v19 = ushr_imm v18, 1
; check: v17 = uextend.i32 v5 ; check: v3 = ireduce.i8 v19
; check: v18 = bor v16, v17 ; check: v20 = uextend.i32 v0
; check: v6 = ireduce.i8 v18 ; check: v21 = band_imm v20, 85
; check: v7 = band_imm v6, 204 ; check: v4 = ireduce.i8 v21
; check: v8 = ushr_imm v7, 2 ; check: v22 = uextend.i32 v4
; check: v9 = band_imm v6, 51 ; check: v23 = ishl_imm v22, 1
; check: v10 = ushr_imm v9, 2 ; check: v5 = ireduce.i8 v23
; check: v19 = uextend.i32 v8 ; check: v24 = uextend.i32 v3
; check: v20 = uextend.i32 v10 ; check: v25 = uextend.i32 v5
; check: v21 = bor v19, v20 ; check: v26 = bor v24, v25
; check: v11 = ireduce.i8 v21 ; check: v6 = ireduce.i8 v26
; check: v12 = band_imm v11, 240 ; check: v27 = uextend.i32 v6
; check: v13 = ushr_imm v12, 4 ; check: v28 = band_imm v27, 204
; check: v14 = band_imm v11, 15 ; check: v7 = ireduce.i8 v28
; check: v15 = ishl_imm v14, 4 ; check: v29 = uextend.i32 v7
; check: v22 = uextend.i32 v13 ; check: v30 = ushr_imm v29, 2
; check: v23 = uextend.i32 v15 ; check: v8 = ireduce.i8 v30
; check: v24 = bor v22, v23 ; check: v31 = uextend.i32 v6
; check: v1 = ireduce.i8 v24 ; check: v32 = band_imm v31, 51
; check: v9 = ireduce.i8 v32
; check: v33 = uextend.i32 v9
; check: v34 = ushr_imm v33, 2
; check: v10 = ireduce.i8 v34
; check: v35 = uextend.i32 v8
; check: v36 = uextend.i32 v10
; check: v37 = bor v35, v36
; check: v11 = ireduce.i8 v37
; check: v38 = uextend.i32 v11
; check: v39 = band_imm v38, 240
; check: v12 = ireduce.i8 v39
; check: v40 = uextend.i32 v12
; check: v41 = ushr_imm v40, 4
; check: v13 = ireduce.i8 v41
; check: v42 = uextend.i32 v11
; check: v43 = band_imm v42, 15
; check: v14 = ireduce.i8 v43
; check: v44 = uextend.i32 v14
; check: v45 = ishl_imm v44, 4
; check: v15 = ireduce.i8 v45
; check: v46 = uextend.i32 v13
; check: v47 = uextend.i32 v15
; check: v48 = bor v46, v47
; check: v1 = ireduce.i8 v48
; check: return v1
function %reverse_bits_16(i16) -> i16 { function %reverse_bits_16(i16) -> i16 {
ebb0(v0: i16): ebb0(v0: i16):
v1 = bitrev.i16 v0 v1 = bitrev.i16 v0
return v1 return v1
} }
; check: v2 = band_imm v0, 0xaaaa ; check: v21 = uextend.i32 v0
; check: v3 = ushr_imm v2, 1 ; check: v22 = band_imm v21, 0xaaaa
; check: v4 = band_imm v0, 0x5555 ; check: v2 = ireduce.i16 v22
; check: v5 = ishl_imm v4, 1 ; check: v23 = uextend.i32 v2
; check: v21 = uextend.i32 v3 ; check: v24 = ushr_imm v23, 1
; check: v22 = uextend.i32 v5 ; check: v3 = ireduce.i16 v24
; check: v23 = bor v21, v22 ; check: v25 = uextend.i32 v0
; check: v6 = ireduce.i16 v23 ; check: v26 = band_imm v25, 0x5555
; check: v7 = band_imm v6, 0xcccc ; check: v4 = ireduce.i16 v26
; check: v8 = ushr_imm v7, 2 ; check: v27 = uextend.i32 v4
; check: v9 = band_imm v6, 0x3333 ; check: v28 = ishl_imm v27, 1
; check: v10 = ushr_imm v9, 2 ; check: v5 = ireduce.i16 v28
; check: v24 = uextend.i32 v8 ; check: v29 = uextend.i32 v3
; check: v25 = uextend.i32 v10 ; check: v30 = uextend.i32 v5
; check: v26 = bor v24, v25 ; check: v31 = bor v29, v30
; check: v11 = ireduce.i16 v26 ; check: v6 = ireduce.i16 v31
; check: v12 = band_imm v11, 0xf0f0 ; check: v32 = uextend.i32 v6
; check: v13 = ushr_imm v12, 4 ; check: v33 = band_imm v32, 0xcccc
; check: v14 = band_imm v11, 3855 ; check: v7 = ireduce.i16 v33
; check: v15 = ishl_imm v14, 4 ; check: v34 = uextend.i32 v7
; check: v27 = uextend.i32 v13 ; check: v35 = ushr_imm v34, 2
; check: v28 = uextend.i32 v15 ; check: v8 = ireduce.i16 v35
; check: v29 = bor v27, v28 ; check: v36 = uextend.i32 v6
; check: v16 = ireduce.i16 v29 ; check: v37 = band_imm v36, 0x3333
; check: v17 = band_imm v16, 0xff00 ; check: v9 = ireduce.i16 v37
; check: v18 = ushr_imm v17, 8 ; check: v38 = uextend.i32 v9
; check: v19 = band_imm v16, 255 ; check: v39 = ushr_imm v38, 2
; check: v20 = ishl_imm v19, 8 ; check: v10 = ireduce.i16 v39
; check: v30 = uextend.i32 v18 ; check: v40 = uextend.i32 v8
; check: v31 = uextend.i32 v20 ; check: v41 = uextend.i32 v10
; check: v32 = bor v30, v31 ; check: v42 = bor v40, v41
; check: v1 = ireduce.i16 v32 ; check: v11 = ireduce.i16 v42
; check: v43 = uextend.i32 v11
; check: v44 = band_imm v43, 0xf0f0
; check: v12 = ireduce.i16 v44
; check: v45 = uextend.i32 v12
; check: v46 = ushr_imm v45, 4
; check: v13 = ireduce.i16 v46
; check: v47 = uextend.i32 v11
; check: v48 = band_imm v47, 3855
; check: v14 = ireduce.i16 v48
; check: v49 = uextend.i32 v14
; check: v50 = ishl_imm v49, 4
; check: v15 = ireduce.i16 v50
; check: v51 = uextend.i32 v13
; check: v52 = uextend.i32 v15
; check: v53 = bor v51, v52
; check: v16 = ireduce.i16 v53
; check: v54 = uextend.i32 v16
; check: v55 = band_imm v54, 0xff00
; check: v17 = ireduce.i16 v55
; check: v56 = uextend.i32 v17
; check: v57 = ushr_imm v56, 8
; check: v18 = ireduce.i16 v57
; check: v58 = uextend.i32 v16
; check: v59 = band_imm v58, 255
; check: v19 = ireduce.i16 v59
; check: v60 = uextend.i32 v19
; check: v61 = ishl_imm v60, 8
; check: v20 = ireduce.i16 v61
; check: v62 = uextend.i32 v18
; check: v63 = uextend.i32 v20
; check: v64 = bor v62, v63
; check: v1 = ireduce.i16 v64
; check: return v1 ; check: return v1
function %reverse_bits_32(i32) -> i32 { function %reverse_bits_32(i32) -> i32 {

View File

@@ -30,6 +30,13 @@ from .instructions import bitrev
from cdsl.ast import Var from cdsl.ast import Var
from cdsl.xform import Rtl, XFormGroup from cdsl.xform import Rtl, XFormGroup
try:
from typing import TYPE_CHECKING # noqa
if TYPE_CHECKING:
from cdsl.instructions import Instruction # noqa
except ImportError:
TYPE_CHECKING = False
narrow = XFormGroup('narrow', """ narrow = XFormGroup('narrow', """
Legalize instructions by narrowing. Legalize instructions by narrowing.
@@ -89,6 +96,7 @@ expand.custom_legalize(insts.stack_store, 'expand_stack_store')
x = Var('x') x = Var('x')
y = Var('y') y = Var('y')
z = Var('z')
a = Var('a') a = Var('a')
a1 = Var('a1') a1 = Var('a1')
a2 = Var('a2') a2 = Var('a2')
@@ -174,6 +182,92 @@ narrow.legalize(
a << iconcat(al, ah) a << iconcat(al, ah)
)) ))
def widen_one_arg(signed, op):
# type: (bool, Instruction) -> None
for int_ty in [types.i8, types.i16]:
if signed:
widen.legalize(
a << op.bind(int_ty)(b),
Rtl(
x << sextend.i32(b),
z << op.i32(x),
a << ireduce.bind(int_ty)(z)
))
else:
widen.legalize(
a << op.bind(int_ty)(b),
Rtl(
x << uextend.i32(b),
z << op.i32(x),
a << ireduce.bind(int_ty)(z)
))
def widen_two_arg(signed, op):
# type: (bool, Instruction) -> None
for int_ty in [types.i8, types.i16]:
if signed:
widen.legalize(
a << op.bind(int_ty)(b, c),
Rtl(
x << sextend.i32(b),
y << sextend.i32(c),
z << op.i32(x, y),
a << ireduce.bind(int_ty)(z)
))
else:
widen.legalize(
a << op.bind(int_ty)(b, c),
Rtl(
x << uextend.i32(b),
y << uextend.i32(c),
z << op.i32(x, y),
a << ireduce.bind(int_ty)(z)
))
def widen_imm(signed, op):
# type: (bool, Instruction) -> None
for int_ty in [types.i8, types.i16]:
if signed:
widen.legalize(
a << op.bind(int_ty)(b, c),
Rtl(
x << sextend.i32(b),
z << op.i32(x, c),
a << ireduce.bind(int_ty)(z)
))
else:
widen.legalize(
a << op.bind(int_ty)(b, c),
Rtl(
x << uextend.i32(b),
z << op.i32(x, c),
a << ireduce.bind(int_ty)(z)
))
for binop in [iadd, isub, imul, udiv, urem]:
widen_two_arg(False, binop)
widen_two_arg(True, sdiv)
widen_one_arg(False, bnot)
for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]:
widen_imm(False, binop)
for binop in [sdiv_imm, srem_imm]:
widen_imm(True, binop)
# bit ops
for binop in [band, bor, bxor, band_not, bor_not, bxor_not]:
widen_two_arg(False, binop)
for binop in [band_imm, bor_imm, bxor_imm]:
widen_imm(False, binop)
for int_ty in [types.i8, types.i16]: for int_ty in [types.i8, types.i16]:
widen.legalize( widen.legalize(
a << iconst.bind(int_ty)(b), a << iconst.bind(int_ty)(b),
@@ -210,63 +304,6 @@ widen.legalize(
a << ireduce(b) a << ireduce(b)
)) ))
for binop in [iadd, isub, imul, udiv, band, bor, bxor]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << uextend.i32(x),
c << uextend.i32(y),
d << binop(b, c),
a << ireduce(d)
)
)
for binop in [sdiv]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << sextend.i32(x),
c << sextend.i32(y),
d << binop(b, c),
a << ireduce(d)
)
)
for unop in [bnot]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << unop.bind(int_ty)(x),
Rtl(
b << sextend.i32(x),
d << unop(b),
a << ireduce(d)
)
)
for binop in [iadd_imm, imul_imm, udiv_imm]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << uextend.i32(x),
c << binop(b, y),
a << ireduce(c)
)
)
for binop in [sdiv_imm]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << sextend.i32(x),
c << binop(b, y),
a << ireduce(c)
)
)
for int_ty in [types.i8, types.i16]: for int_ty in [types.i8, types.i16]:
widen.legalize( widen.legalize(
br_table.bind(int_ty)(x, y), br_table.bind(int_ty)(x, y),
@@ -285,6 +322,72 @@ for int_ty in [types.i8, types.i16]:
) )
) )
for int_ty in [types.i8, types.i16]:
for op in [ushr_imm, ishl_imm]:
widen.legalize(
a << op.bind(int_ty)(b, c),
Rtl(
x << uextend.i32(b),
z << op.i32(x, c),
a << ireduce.bind(int_ty)(z)
))
widen.legalize(
a << ishl.bind(int_ty)(b, c),
Rtl(
x << uextend.i32(b),
z << ishl.i32(x, c),
a << ireduce.bind(int_ty)(z)
))
widen.legalize(
a << ushr.bind(int_ty)(b, c),
Rtl(
x << uextend.i32(b),
z << ushr.i32(x, c),
a << ireduce.bind(int_ty)(z)
))
widen.legalize(
a << sshr.bind(int_ty)(b, c),
Rtl(
x << sextend.i32(b),
z << sshr.i32(x, c),
a << ireduce.bind(int_ty)(z)
))
for w_cc in [
intcc.eq, intcc.ne, intcc.ugt, intcc.ult, intcc.uge, intcc.ule
]:
widen.legalize(
a << insts.icmp_imm.bind(int_ty)(w_cc, b, c),
Rtl(
x << uextend.i32(b),
a << insts.icmp_imm(w_cc, x, c)
))
widen.legalize(
a << insts.icmp.bind(int_ty)(w_cc, b, c),
Rtl(
x << uextend.i32(b),
y << uextend.i32(c),
a << insts.icmp.i32(w_cc, x, y)
))
for w_cc in [intcc.sgt, intcc.slt, intcc.sge, intcc.sle]:
widen.legalize(
a << insts.icmp_imm.bind(int_ty)(w_cc, b, c),
Rtl(
x << sextend.i32(b),
a << insts.icmp_imm(w_cc, x, c)
))
widen.legalize(
a << insts.icmp.bind(int_ty)(w_cc, b, c),
Rtl(
x << sextend.i32(b),
y << sextend.i32(c),
a << insts.icmp(w_cc, x, y)
)
)
# Expand integer operations with carry for RISC architectures that don't have # Expand integer operations with carry for RISC architectures that don't have
# the flags. # the flags.
expand.legalize( expand.legalize(

View File

@@ -244,7 +244,7 @@ class FieldPredicate(object):
""" """
# Prepend `field` to the predicate function arguments. # Prepend `field` to the predicate function arguments.
args = (self.field.rust_name(),) + tuple(map(str, self.args)) args = (self.field.rust_name(),) + tuple(map(str, self.args))
return 'predicates::{}({})'.format(self.function, ', '.join(args)) return '::predicates::{}({})'.format(self.function, ', '.join(args))
class IsEqual(FieldPredicate): class IsEqual(FieldPredicate):

View File

@@ -6,7 +6,6 @@ use isa;
use isa::constraints::*; use isa::constraints::*;
use isa::enc_tables::*; use isa::enc_tables::*;
use isa::encoding::RecipeSizing; use isa::encoding::RecipeSizing;
use predicates;
// Include the generated encoding tables: // Include the generated encoding tables:
// - `LEVEL1_RV32` // - `LEVEL1_RV32`

View File

@@ -10,7 +10,6 @@ use isa;
use isa::constraints::*; use isa::constraints::*;
use isa::enc_tables::*; use isa::enc_tables::*;
use isa::encoding::RecipeSizing; use isa::encoding::RecipeSizing;
use predicates;
include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));