Legalize several i8 insts (#380)

* Legalize several i8 insts * X86: implement regmove.{i8,i16} * Legalize bnot * Remove comments * Nicer type param binding in legalize.py * Legalize sdiv_imm.i8 * Hopefully fix mypy error * Add missing trailing newlines * Fix tests
2018-07-04 15:31:00 +02:00
parent dd72b54eef
commit 5db45d26cc
9 changed files with 275 additions and 5 deletions
--- a/cranelift/filetests/isa/x86/legalize-bnot.cton
+++ b/cranelift/filetests/isa/x86/legalize-bnot.cton
@@ -0,0 +1,28 @@
 test compile
 target x86_64
 function u0:51(i64, i64) system_v {
    ss0 = explicit_slot 0
    ss1 = explicit_slot 1
    ss2 = explicit_slot 1
    ss3 = explicit_slot 1
 ebb0(v0: i64, v1: i64):
    v2 = stack_addr.i64 ss1
    v3 = load.i8 v1
    store v3, v2
    v4 = stack_addr.i64 ss2
    v5 = stack_addr.i64 ss3
    jump ebb1
 ebb1:
    v6 = load.i8 v2
    store v6, v5
    v7 = load.i8 v5
    v8 = bnot v7
    store v8, v4
    v9 = load.i8 v4
    store v9, v0
    return
 }
--- a/cranelift/filetests/isa/x86/legalize-br-table.cton
+++ b/cranelift/filetests/isa/x86/legalize-br-table.cton
@@ -0,0 +1,17 @@
 test compile
 target x86_64
 function u0:0(i64) system_v {
    ss0 = explicit_slot 1
    jt0 = jump_table ebb1
 ebb0(v0: i64):
    v1 = stack_addr.i64 ss0
    v2 = load.i8 v1
    br_table v2, jt0
    jump ebb1
 ebb1:
    return
 }
--- a/cranelift/filetests/isa/x86/legalize-iconst-i8.cton
+++ b/cranelift/filetests/isa/x86/legalize-iconst-i8.cton
@@ -0,0 +1,18 @@
 test compile
 target x86_64
 function u0:0(i64) system_v {
    ss0 = explicit_slot 0
 ebb0(v0: i64):
    jump ebb1
 ebb1:
 ; _0 = const 42u8
    v1 = iconst.i8 42
    store v1, v0
 ; 
 ; return
    return
 }
--- a/cranelift/filetests/isa/x86/legalize-imul-i8.cton
+++ b/cranelift/filetests/isa/x86/legalize-imul-i8.cton
@@ -0,0 +1,11 @@
 test compile
 target x86_64
 function u0:0(i64, i8, i8) system_v {
 ebb0(v0: i64, v1: i8, v2: i8):
    v11 = imul v1, v2
    store v11, v0
    return
 }
--- a/cranelift/filetests/isa/x86/legalize-imul-imm-i8.cton
+++ b/cranelift/filetests/isa/x86/legalize-imul-imm-i8.cton
@@ -0,0 +1,15 @@
 test compile
 target x86_64
 function u0:0(i64, i8) system_v {
    ss0 = explicit_slot 1
 ebb0(v0: i64, v1: i8):
    v3 = stack_addr.i64 ss0
    v5 = load.i8 v3
    v6 = iconst.i8 2
    v7 = imul_imm v5, 42
    store v7, v0
    return
 }
--- a/cranelift/filetests/isa/x86/legalize-load-store-i8.cton
+++ b/cranelift/filetests/isa/x86/legalize-load-store-i8.cton
@@ -0,0 +1,31 @@
 test compile
 target x86_64
 function u0:0(i64, i8, i8) system_v {
    ss0 = explicit_slot 0
    ss1 = explicit_slot 1
    ss2 = explicit_slot 1
    ss3 = explicit_slot 1
    ss4 = explicit_slot 1
 ebb0(v0: i64, v1: i8, v2: i8):
    v3 = stack_addr.i64 ss1
    store v1, v3
    v4 = stack_addr.i64 ss2
    store v2, v4
    v5 = stack_addr.i64 ss3
    v6 = stack_addr.i64 ss4
    jump ebb1
 ebb1:
    v7 = load.i8 v3
    store v7, v5
    v8 = load.i8 v4
    store v8, v6
    v9 = load.i8 v5
    v10 = load.i8 v6
    v11 = imul v9, v10
    store v11, v0
    return
 }
--- a/cranelift/filetests/isa/x86/legalize-regmove-i8.cton
+++ b/cranelift/filetests/isa/x86/legalize-regmove-i8.cton
@@ -0,0 +1,36 @@
 test compile
 target x86_64
 function u0:0(i64, i64, i64) system_v {
    ss0 = explicit_slot 0
    ss1 = explicit_slot 8
    ss2 = explicit_slot 8
    ss3 = explicit_slot 2
    ss4 = explicit_slot 8
    sig0 = (i64, i16, i64) system_v
    fn0 = colocated u0:11 sig0
 ebb0(v0: i64, v1: i64, v2: i64):
    v3 = stack_addr.i64 ss1
    store v1, v3
    v4 = stack_addr.i64 ss2
    store v2, v4
    v5 = stack_addr.i64 ss3
    v6 = stack_addr.i64 ss4
    jump ebb1
 ebb1:
    v7 = load.i64 v3
    v8 = load.i16 v7
    store v8, v5
    v9 = load.i64 v4
    store v9, v6
    v10 = load.i16 v5
    v11 = load.i64 v6
    call fn0(v0, v10, v11)
    jump ebb2
 ebb2:
    return
 }
--- a/lib/codegen/meta/base/legalize.py
+++ b/lib/codegen/meta/base/legalize.py
@@ -10,6 +10,7 @@ from __future__ import absolute_import
 from .immediates import intcc, imm64, ieee32, ieee64
 from . import instructions as insts
 from . import types
 from .instructions import uextend, sextend, ireduce
 from .instructions import iadd, iadd_cout, iadd_cin, iadd_carry, iadd_imm
 from .instructions import isub, isub_bin, isub_bout, isub_borrow, irsub_imm
 from .instructions import imul, imul_imm
@@ -23,6 +24,8 @@ from .instructions import iconst, bint, select
 from .instructions import ishl, ishl_imm, sshr, sshr_imm, ushr, ushr_imm
 from .instructions import rotl, rotl_imm, rotr, rotr_imm
 from .instructions import f32const, f64const
 from .instructions import store, load
 from .instructions import br_table
 from cdsl.ast import Var
 from cdsl.xform import Rtl, XFormGroup
@@ -41,8 +44,6 @@ widen = XFormGroup('widen', """
        The transformations in the 'widen' group work by expressing
        instructions in terms of larger types.
        This group is not yet implemented.
        """)
 expand = XFormGroup('expand', """
@@ -99,6 +100,7 @@ c1 = Var('c1')
 c2 = Var('c2')
 c_in = Var('c_in')
 c_int = Var('c_int')
 d = Var('d')
 xl = Var('xl')
 xh = Var('xh')
 yl = Var('yl')
@@ -106,6 +108,10 @@ yh = Var('yh')
 al = Var('al')
 ah = Var('ah')
 cc = Var('cc')
 ptr = Var('ptr')
 flags = Var('flags')
 offset = Var('off')
 ss = Var('ss')
 narrow.legalize(
        a << iadd(x, y),
@@ -148,6 +154,108 @@ narrow.legalize(
            a << iconcat(al, ah)
        ))
 for int_ty in [types.i8, types.i16]:
    widen.legalize(
        a << iconst.bind(int_ty)(b),
        Rtl(
            c << iconst.i32(b),
            a << ireduce.bind(int_ty)(c)
        ))
 widen.legalize(
    store.i8(flags, a, ptr, offset),
    Rtl(
        b << uextend.i32(a),
        insts.istore8(flags, b, ptr, offset)
    ))
 widen.legalize(
    store.i16(flags, a, ptr, offset),
    Rtl(
        b << uextend.i32(a),
        insts.istore16(flags, b, ptr, offset)
    ))
 widen.legalize(
    a << load.i8(flags, ptr, offset),
    Rtl(
        b << insts.uload8.i32(flags, ptr, offset),
        a << ireduce(b)
    ))
 widen.legalize(
    a << load.i16(flags, ptr, offset),
    Rtl(
        b << insts.uload16.i32(flags, ptr, offset),
        a << ireduce(b)
    ))
 for binop in [iadd, isub, imul, udiv, band, bor, bxor]:
    for int_ty in [types.i8, types.i16]:
        widen.legalize(
            a << binop.bind(int_ty)(x, y),
            Rtl(
                b << uextend.i32(x),
                c << uextend.i32(y),
                d << binop(b, c),
                a << ireduce(d)
            )
        )
 for binop in [sdiv]:
    for int_ty in [types.i8, types.i16]:
        widen.legalize(
            a << binop.bind(int_ty)(x, y),
            Rtl(
                b << sextend.i32(x),
                c << sextend.i32(y),
                d << binop(b, c),
                a << ireduce(d)
            )
        )
 for unop in [bnot]:
    for int_ty in [types.i8, types.i16]:
        widen.legalize(
            a << unop.bind(int_ty)(x),
            Rtl(
                b << sextend.i32(x),
                d << unop(b),
                a << ireduce(d)
            )
        )
 for binop in [iadd_imm, imul_imm, udiv_imm]:
    for int_ty in [types.i8, types.i16]:
        widen.legalize(
            a << binop.bind(int_ty)(x, y),
            Rtl(
                b << uextend.i32(x),
                c << binop(b, y),
                a << ireduce(c)
            )
        )
 for binop in [sdiv_imm]:
    for int_ty in [types.i8, types.i16]:
        widen.legalize(
            a << binop.bind(int_ty)(x, y),
            Rtl(
                b << sextend.i32(x),
                c << binop(b, y),
                a << ireduce(c)
            )
        )
 for int_ty in [types.i8, types.i16]:
    widen.legalize(
        br_table.bind(int_ty)(x, y),
        Rtl(
            b << uextend.i32(x),
            br_table(b, y),
        )
    )
 # Expand integer operations with carry for RISC architectures that don't have
 # the flags.
 expand.legalize(
--- a/lib/codegen/meta/isa/x86/encodings.py
+++ b/lib/codegen/meta/isa/x86/encodings.py
@@ -6,6 +6,7 @@ from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
 from cdsl.predicates import IsUnsignedInt, Not, And
 from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
 from base import instructions as base
 from base import types
 from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
 from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
 from .defs import X86_64, X86_32
@@ -13,7 +14,7 @@ from . import recipes as r
 from . import settings as cfg
 from . import instructions as x86
 from .legalize import x86_expand
-from base.legalize import narrow, expand_flags
+from base.legalize import narrow, widen, expand_flags
 from base.settings import allones_funcaddrs, is_pic
 from .settings import use_sse41
@@ -30,6 +31,8 @@ X86_32.legalize_monomorphic(expand_flags)
 X86_32.legalize_type(
    default=narrow,
    b1=expand_flags,
    i8=widen,
    i16=widen,
    i32=x86_expand,
    f32=x86_expand,
    f64=x86_expand)
@@ -38,6 +41,8 @@ X86_64.legalize_monomorphic(expand_flags)
 X86_64.legalize_type(
    default=narrow,
    b1=expand_flags,
    i8=widen,
    i16=widen,
    i32=x86_expand,
    i64=x86_expand,
    f32=x86_expand,
@@ -172,8 +177,9 @@ enc_both(base.copy.b1, r.umr, 0x89)
 # For x86-64, only define REX forms for now, since we can't describe the
 # special regunit immediate operands with the current constraint language.
-X86_32.enc(base.regmove.i32, *r.rmov(0x89))
+for ty in [types.i8, types.i16, types.i32]:
-X86_64.enc(base.regmove.i32, *r.rmov.rex(0x89))
+    X86_32.enc(base.regmove.bind(ty), *r.rmov(0x89))
    X86_64.enc(base.regmove.bind(ty), *r.rmov.rex(0x89))
 X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
 enc_both(base.regmove.b1, r.rmov, 0x89)