Legalize several i8 insts (#380)

* Legalize several i8 insts

* X86: implement regmove.{i8,i16}

* Legalize bnot

* Remove comments

* Nicer type param binding in legalize.py

* Legalize sdiv_imm.i8

* Hopefully fix mypy error

* Add missing trailing newlines

* Fix tests
This commit is contained in:
bjorn3
2018-07-04 15:31:00 +02:00
committed by Dan Gohman
parent dd72b54eef
commit 5db45d26cc
9 changed files with 275 additions and 5 deletions

View File

@@ -0,0 +1,28 @@
test compile
target x86_64
function u0:51(i64, i64) system_v {
ss0 = explicit_slot 0
ss1 = explicit_slot 1
ss2 = explicit_slot 1
ss3 = explicit_slot 1
ebb0(v0: i64, v1: i64):
v2 = stack_addr.i64 ss1
v3 = load.i8 v1
store v3, v2
v4 = stack_addr.i64 ss2
v5 = stack_addr.i64 ss3
jump ebb1
ebb1:
v6 = load.i8 v2
store v6, v5
v7 = load.i8 v5
v8 = bnot v7
store v8, v4
v9 = load.i8 v4
store v9, v0
return
}

View File

@@ -0,0 +1,17 @@
test compile
target x86_64
function u0:0(i64) system_v {
ss0 = explicit_slot 1
jt0 = jump_table ebb1
ebb0(v0: i64):
v1 = stack_addr.i64 ss0
v2 = load.i8 v1
br_table v2, jt0
jump ebb1
ebb1:
return
}

View File

@@ -0,0 +1,18 @@
test compile
target x86_64
function u0:0(i64) system_v {
ss0 = explicit_slot 0
ebb0(v0: i64):
jump ebb1
ebb1:
; _0 = const 42u8
v1 = iconst.i8 42
store v1, v0
;
; return
return
}

View File

@@ -0,0 +1,11 @@
test compile
target x86_64
function u0:0(i64, i8, i8) system_v {
ebb0(v0: i64, v1: i8, v2: i8):
v11 = imul v1, v2
store v11, v0
return
}

View File

@@ -0,0 +1,15 @@
test compile
target x86_64
function u0:0(i64, i8) system_v {
ss0 = explicit_slot 1
ebb0(v0: i64, v1: i8):
v3 = stack_addr.i64 ss0
v5 = load.i8 v3
v6 = iconst.i8 2
v7 = imul_imm v5, 42
store v7, v0
return
}

View File

@@ -0,0 +1,31 @@
test compile
target x86_64
function u0:0(i64, i8, i8) system_v {
ss0 = explicit_slot 0
ss1 = explicit_slot 1
ss2 = explicit_slot 1
ss3 = explicit_slot 1
ss4 = explicit_slot 1
ebb0(v0: i64, v1: i8, v2: i8):
v3 = stack_addr.i64 ss1
store v1, v3
v4 = stack_addr.i64 ss2
store v2, v4
v5 = stack_addr.i64 ss3
v6 = stack_addr.i64 ss4
jump ebb1
ebb1:
v7 = load.i8 v3
store v7, v5
v8 = load.i8 v4
store v8, v6
v9 = load.i8 v5
v10 = load.i8 v6
v11 = imul v9, v10
store v11, v0
return
}

View File

@@ -0,0 +1,36 @@
test compile
target x86_64
function u0:0(i64, i64, i64) system_v {
ss0 = explicit_slot 0
ss1 = explicit_slot 8
ss2 = explicit_slot 8
ss3 = explicit_slot 2
ss4 = explicit_slot 8
sig0 = (i64, i16, i64) system_v
fn0 = colocated u0:11 sig0
ebb0(v0: i64, v1: i64, v2: i64):
v3 = stack_addr.i64 ss1
store v1, v3
v4 = stack_addr.i64 ss2
store v2, v4
v5 = stack_addr.i64 ss3
v6 = stack_addr.i64 ss4
jump ebb1
ebb1:
v7 = load.i64 v3
v8 = load.i16 v7
store v8, v5
v9 = load.i64 v4
store v9, v6
v10 = load.i16 v5
v11 = load.i64 v6
call fn0(v0, v10, v11)
jump ebb2
ebb2:
return
}

View File

@@ -10,6 +10,7 @@ from __future__ import absolute_import
from .immediates import intcc, imm64, ieee32, ieee64 from .immediates import intcc, imm64, ieee32, ieee64
from . import instructions as insts from . import instructions as insts
from . import types from . import types
from .instructions import uextend, sextend, ireduce
from .instructions import iadd, iadd_cout, iadd_cin, iadd_carry, iadd_imm from .instructions import iadd, iadd_cout, iadd_cin, iadd_carry, iadd_imm
from .instructions import isub, isub_bin, isub_bout, isub_borrow, irsub_imm from .instructions import isub, isub_bin, isub_bout, isub_borrow, irsub_imm
from .instructions import imul, imul_imm from .instructions import imul, imul_imm
@@ -23,6 +24,8 @@ from .instructions import iconst, bint, select
from .instructions import ishl, ishl_imm, sshr, sshr_imm, ushr, ushr_imm from .instructions import ishl, ishl_imm, sshr, sshr_imm, ushr, ushr_imm
from .instructions import rotl, rotl_imm, rotr, rotr_imm from .instructions import rotl, rotl_imm, rotr, rotr_imm
from .instructions import f32const, f64const from .instructions import f32const, f64const
from .instructions import store, load
from .instructions import br_table
from cdsl.ast import Var from cdsl.ast import Var
from cdsl.xform import Rtl, XFormGroup from cdsl.xform import Rtl, XFormGroup
@@ -41,8 +44,6 @@ widen = XFormGroup('widen', """
The transformations in the 'widen' group work by expressing The transformations in the 'widen' group work by expressing
instructions in terms of larger types. instructions in terms of larger types.
This group is not yet implemented.
""") """)
expand = XFormGroup('expand', """ expand = XFormGroup('expand', """
@@ -99,6 +100,7 @@ c1 = Var('c1')
c2 = Var('c2') c2 = Var('c2')
c_in = Var('c_in') c_in = Var('c_in')
c_int = Var('c_int') c_int = Var('c_int')
d = Var('d')
xl = Var('xl') xl = Var('xl')
xh = Var('xh') xh = Var('xh')
yl = Var('yl') yl = Var('yl')
@@ -106,6 +108,10 @@ yh = Var('yh')
al = Var('al') al = Var('al')
ah = Var('ah') ah = Var('ah')
cc = Var('cc') cc = Var('cc')
ptr = Var('ptr')
flags = Var('flags')
offset = Var('off')
ss = Var('ss')
narrow.legalize( narrow.legalize(
a << iadd(x, y), a << iadd(x, y),
@@ -148,6 +154,108 @@ narrow.legalize(
a << iconcat(al, ah) a << iconcat(al, ah)
)) ))
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << iconst.bind(int_ty)(b),
Rtl(
c << iconst.i32(b),
a << ireduce.bind(int_ty)(c)
))
widen.legalize(
store.i8(flags, a, ptr, offset),
Rtl(
b << uextend.i32(a),
insts.istore8(flags, b, ptr, offset)
))
widen.legalize(
store.i16(flags, a, ptr, offset),
Rtl(
b << uextend.i32(a),
insts.istore16(flags, b, ptr, offset)
))
widen.legalize(
a << load.i8(flags, ptr, offset),
Rtl(
b << insts.uload8.i32(flags, ptr, offset),
a << ireduce(b)
))
widen.legalize(
a << load.i16(flags, ptr, offset),
Rtl(
b << insts.uload16.i32(flags, ptr, offset),
a << ireduce(b)
))
for binop in [iadd, isub, imul, udiv, band, bor, bxor]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << uextend.i32(x),
c << uextend.i32(y),
d << binop(b, c),
a << ireduce(d)
)
)
for binop in [sdiv]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << sextend.i32(x),
c << sextend.i32(y),
d << binop(b, c),
a << ireduce(d)
)
)
for unop in [bnot]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << unop.bind(int_ty)(x),
Rtl(
b << sextend.i32(x),
d << unop(b),
a << ireduce(d)
)
)
for binop in [iadd_imm, imul_imm, udiv_imm]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << uextend.i32(x),
c << binop(b, y),
a << ireduce(c)
)
)
for binop in [sdiv_imm]:
for int_ty in [types.i8, types.i16]:
widen.legalize(
a << binop.bind(int_ty)(x, y),
Rtl(
b << sextend.i32(x),
c << binop(b, y),
a << ireduce(c)
)
)
for int_ty in [types.i8, types.i16]:
widen.legalize(
br_table.bind(int_ty)(x, y),
Rtl(
b << uextend.i32(x),
br_table(b, y),
)
)
# Expand integer operations with carry for RISC architectures that don't have # Expand integer operations with carry for RISC architectures that don't have
# the flags. # the flags.
expand.legalize( expand.legalize(

View File

@@ -6,6 +6,7 @@ from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
from cdsl.predicates import IsUnsignedInt, Not, And from cdsl.predicates import IsUnsignedInt, Not, And
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
from base import instructions as base from base import instructions as base
from base import types
from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
from base.formats import FuncAddr, Call, LoadComplex, StoreComplex from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
from .defs import X86_64, X86_32 from .defs import X86_64, X86_32
@@ -13,7 +14,7 @@ from . import recipes as r
from . import settings as cfg from . import settings as cfg
from . import instructions as x86 from . import instructions as x86
from .legalize import x86_expand from .legalize import x86_expand
from base.legalize import narrow, expand_flags from base.legalize import narrow, widen, expand_flags
from base.settings import allones_funcaddrs, is_pic from base.settings import allones_funcaddrs, is_pic
from .settings import use_sse41 from .settings import use_sse41
@@ -30,6 +31,8 @@ X86_32.legalize_monomorphic(expand_flags)
X86_32.legalize_type( X86_32.legalize_type(
default=narrow, default=narrow,
b1=expand_flags, b1=expand_flags,
i8=widen,
i16=widen,
i32=x86_expand, i32=x86_expand,
f32=x86_expand, f32=x86_expand,
f64=x86_expand) f64=x86_expand)
@@ -38,6 +41,8 @@ X86_64.legalize_monomorphic(expand_flags)
X86_64.legalize_type( X86_64.legalize_type(
default=narrow, default=narrow,
b1=expand_flags, b1=expand_flags,
i8=widen,
i16=widen,
i32=x86_expand, i32=x86_expand,
i64=x86_expand, i64=x86_expand,
f32=x86_expand, f32=x86_expand,
@@ -172,8 +177,9 @@ enc_both(base.copy.b1, r.umr, 0x89)
# For x86-64, only define REX forms for now, since we can't describe the # For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language. # special regunit immediate operands with the current constraint language.
X86_32.enc(base.regmove.i32, *r.rmov(0x89)) for ty in [types.i8, types.i16, types.i32]:
X86_64.enc(base.regmove.i32, *r.rmov.rex(0x89)) X86_32.enc(base.regmove.bind(ty), *r.rmov(0x89))
X86_64.enc(base.regmove.bind(ty), *r.rmov.rex(0x89))
X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1)) X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
enc_both(base.regmove.b1, r.rmov, 0x89) enc_both(base.regmove.b1, r.rmov, 0x89)