From 2eec1469a83f426d5c4c2c6ec94f2033d23542e5 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 26 Sep 2018 01:10:23 +0200 Subject: [PATCH] Legalize some more i8/i16 intructions (#524) * Legalize some more i8/i16 intructions --- .../isa/x86/legalize-byte-ops-i8.clif | 36 +++ .../filetests/isa/x86/legalize-icmp-i8.clif | 19 ++ .../filetests/isa/x86/legalize-shlr-i8.clif | 24 ++ .../filetests/isa/x86/legalize-urem-i8.clif | 15 ++ cranelift/filetests/legalizer/bitrev.clif | 169 +++++++++----- lib/codegen/meta-python/base/legalize.py | 217 +++++++++++++----- lib/codegen/meta-python/cdsl/predicates.py | 2 +- lib/codegen/src/isa/riscv/enc_tables.rs | 1 - lib/codegen/src/isa/x86/enc_tables.rs | 1 - 9 files changed, 368 insertions(+), 116 deletions(-) create mode 100644 cranelift/filetests/isa/x86/legalize-byte-ops-i8.clif create mode 100644 cranelift/filetests/isa/x86/legalize-icmp-i8.clif create mode 100644 cranelift/filetests/isa/x86/legalize-shlr-i8.clif create mode 100644 cranelift/filetests/isa/x86/legalize-urem-i8.clif diff --git a/cranelift/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/isa/x86/legalize-byte-ops-i8.clif new file mode 100644 index 0000000000..b0a318b8d4 --- /dev/null +++ b/cranelift/filetests/isa/x86/legalize-byte-ops-i8.clif @@ -0,0 +1,36 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) fast { +fn0 = %black_box(i8) +ss0 = explicit_slot 1 ; black box + +ebb0(v0: i8, v1: i8): + v99 = stack_addr.i64 ss0 + + ; check: istore8 $(V), $(V) + + v2 = band v0, v1 + store v2, v99 + v3 = bor v0, v1 + store v3, v99 + v4 = bxor v0, v1 + store v4, v99 + v5 = bnot v0 + store v5, v99 + v6 = band_not v0, v1 + store v6, v99 + v7 = bor_not v0, v1 + store v7, v99 + v8 = bxor_not v0, v1 + store v8, v99 + v9 = band_imm v0, 42 + store v9, v99 + v10 = bor_imm v0, 42 + store v10, v99 + v11 = bxor_imm v0, 42 + store v11, v99 + return +} diff --git a/cranelift/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/isa/x86/legalize-icmp-i8.clif new file mode 100644 index 0000000000..41bd27950f --- /dev/null +++ b/cranelift/filetests/isa/x86/legalize-icmp-i8.clif @@ -0,0 +1,19 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) -> i8 fast { +ebb0(v0: i8, v1: i8): + v2 = icmp_imm sle v0, 0 + ; check: $(e1=$V) = sextend.i32 v0 + ; nextln: v2 = icmp_imm sle $e1, 0 + v3 = bint.i8 v2 + v4 = icmp eq v0, v1 + ; check: $(e2=$V) = uextend.i32 v0 + ; nextln: $(e3=$V) = uextend.i32 v1 + ; nextln: v4 = icmp eq $e2, $e3 + v5 = bint.i8 v4 + v6 = iadd v3, v5 + return v6 +} diff --git a/cranelift/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/isa/x86/legalize-shlr-i8.clif new file mode 100644 index 0000000000..dbd0da1204 --- /dev/null +++ b/cranelift/filetests/isa/x86/legalize-shlr-i8.clif @@ -0,0 +1,24 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) -> i8 fast { +ebb0(v0: i8, v1: i8): + v2 = ishl v0, v1 + ; check: $(e1=$V) = uextend.i32 v0 + ; check: $(r1=$V) = ishl $e1, v1 + ; check v2 = ireduce.i8 $r1 + v3 = ushr v0, v1 + ; check: $(e2=$V) = uextend.i32 v0 + ; check: $(r2=$V) = ushr $e2, v1 + ; check v2 = ireduce.i8 $r2 + v4 = sshr v0, v1 + ; check: $(e3=$V) = sextend.i32 v0 + ; check: $(r3=$V) = sshr $e3, v1 + ; check v2 = ireduce.i8 $r3 + + v5 = iadd v2, v3 + v6 = iadd v4, v5 + return v6 +} diff --git a/cranelift/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/isa/x86/legalize-urem-i8.clif new file mode 100644 index 0000000000..0c66a3f580 --- /dev/null +++ b/cranelift/filetests/isa/x86/legalize-urem-i8.clif @@ -0,0 +1,15 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) -> i8 fast { +ebb0(v0: i8, v1: i8): + v2 = urem v0, v1 + ; check: $(a=$V) = uextend.i32 v0 + ; nextln: $(b=$V) = uextend.i32 v1 + ; nextln: $(c=$V) = iconst.i32 0 + ; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b + ; nextln: v2 = ireduce.i8 $r + return v2 +} diff --git a/cranelift/filetests/legalizer/bitrev.clif b/cranelift/filetests/legalizer/bitrev.clif index 0f75c373a9..b4098b87b4 100644 --- a/cranelift/filetests/legalizer/bitrev.clif +++ b/cranelift/filetests/legalizer/bitrev.clif @@ -6,68 +6,125 @@ ebb0(v0: i8): v1 = bitrev.i8 v0 return v1 } -; check: v2 = band_imm v0, 170 -; check: v3 = ushr_imm v2, 1 -; check: v4 = band_imm v0, 85 -; check: v5 = ishl_imm v4, 1 -; check: v16 = uextend.i32 v3 -; check: v17 = uextend.i32 v5 -; check: v18 = bor v16, v17 -; check: v6 = ireduce.i8 v18 -; check: v7 = band_imm v6, 204 -; check: v8 = ushr_imm v7, 2 -; check: v9 = band_imm v6, 51 -; check: v10 = ushr_imm v9, 2 -; check: v19 = uextend.i32 v8 -; check: v20 = uextend.i32 v10 -; check: v21 = bor v19, v20 -; check: v11 = ireduce.i8 v21 -; check: v12 = band_imm v11, 240 -; check: v13 = ushr_imm v12, 4 -; check: v14 = band_imm v11, 15 -; check: v15 = ishl_imm v14, 4 -; check: v22 = uextend.i32 v13 -; check: v23 = uextend.i32 v15 -; check: v24 = bor v22, v23 -; check: v1 = ireduce.i8 v24 +; check: v16 = uextend.i32 v0 +; check: v17 = band_imm v16, 170 +; check: v2 = ireduce.i8 v17 +; check: v18 = uextend.i32 v2 +; check: v19 = ushr_imm v18, 1 +; check: v3 = ireduce.i8 v19 +; check: v20 = uextend.i32 v0 +; check: v21 = band_imm v20, 85 +; check: v4 = ireduce.i8 v21 +; check: v22 = uextend.i32 v4 +; check: v23 = ishl_imm v22, 1 +; check: v5 = ireduce.i8 v23 +; check: v24 = uextend.i32 v3 +; check: v25 = uextend.i32 v5 +; check: v26 = bor v24, v25 +; check: v6 = ireduce.i8 v26 +; check: v27 = uextend.i32 v6 +; check: v28 = band_imm v27, 204 +; check: v7 = ireduce.i8 v28 +; check: v29 = uextend.i32 v7 +; check: v30 = ushr_imm v29, 2 +; check: v8 = ireduce.i8 v30 +; check: v31 = uextend.i32 v6 +; check: v32 = band_imm v31, 51 +; check: v9 = ireduce.i8 v32 +; check: v33 = uextend.i32 v9 +; check: v34 = ushr_imm v33, 2 +; check: v10 = ireduce.i8 v34 +; check: v35 = uextend.i32 v8 +; check: v36 = uextend.i32 v10 +; check: v37 = bor v35, v36 +; check: v11 = ireduce.i8 v37 +; check: v38 = uextend.i32 v11 +; check: v39 = band_imm v38, 240 +; check: v12 = ireduce.i8 v39 +; check: v40 = uextend.i32 v12 +; check: v41 = ushr_imm v40, 4 +; check: v13 = ireduce.i8 v41 +; check: v42 = uextend.i32 v11 +; check: v43 = band_imm v42, 15 +; check: v14 = ireduce.i8 v43 +; check: v44 = uextend.i32 v14 +; check: v45 = ishl_imm v44, 4 +; check: v15 = ireduce.i8 v45 +; check: v46 = uextend.i32 v13 +; check: v47 = uextend.i32 v15 +; check: v48 = bor v46, v47 +; check: v1 = ireduce.i8 v48 +; check: return v1 function %reverse_bits_16(i16) -> i16 { ebb0(v0: i16): v1 = bitrev.i16 v0 return v1 } -; check: v2 = band_imm v0, 0xaaaa -; check: v3 = ushr_imm v2, 1 -; check: v4 = band_imm v0, 0x5555 -; check: v5 = ishl_imm v4, 1 -; check: v21 = uextend.i32 v3 -; check: v22 = uextend.i32 v5 -; check: v23 = bor v21, v22 -; check: v6 = ireduce.i16 v23 -; check: v7 = band_imm v6, 0xcccc -; check: v8 = ushr_imm v7, 2 -; check: v9 = band_imm v6, 0x3333 -; check: v10 = ushr_imm v9, 2 -; check: v24 = uextend.i32 v8 -; check: v25 = uextend.i32 v10 -; check: v26 = bor v24, v25 -; check: v11 = ireduce.i16 v26 -; check: v12 = band_imm v11, 0xf0f0 -; check: v13 = ushr_imm v12, 4 -; check: v14 = band_imm v11, 3855 -; check: v15 = ishl_imm v14, 4 -; check: v27 = uextend.i32 v13 -; check: v28 = uextend.i32 v15 -; check: v29 = bor v27, v28 -; check: v16 = ireduce.i16 v29 -; check: v17 = band_imm v16, 0xff00 -; check: v18 = ushr_imm v17, 8 -; check: v19 = band_imm v16, 255 -; check: v20 = ishl_imm v19, 8 -; check: v30 = uextend.i32 v18 -; check: v31 = uextend.i32 v20 -; check: v32 = bor v30, v31 -; check: v1 = ireduce.i16 v32 +; check: v21 = uextend.i32 v0 +; check: v22 = band_imm v21, 0xaaaa +; check: v2 = ireduce.i16 v22 +; check: v23 = uextend.i32 v2 +; check: v24 = ushr_imm v23, 1 +; check: v3 = ireduce.i16 v24 +; check: v25 = uextend.i32 v0 +; check: v26 = band_imm v25, 0x5555 +; check: v4 = ireduce.i16 v26 +; check: v27 = uextend.i32 v4 +; check: v28 = ishl_imm v27, 1 +; check: v5 = ireduce.i16 v28 +; check: v29 = uextend.i32 v3 +; check: v30 = uextend.i32 v5 +; check: v31 = bor v29, v30 +; check: v6 = ireduce.i16 v31 +; check: v32 = uextend.i32 v6 +; check: v33 = band_imm v32, 0xcccc +; check: v7 = ireduce.i16 v33 +; check: v34 = uextend.i32 v7 +; check: v35 = ushr_imm v34, 2 +; check: v8 = ireduce.i16 v35 +; check: v36 = uextend.i32 v6 +; check: v37 = band_imm v36, 0x3333 +; check: v9 = ireduce.i16 v37 +; check: v38 = uextend.i32 v9 +; check: v39 = ushr_imm v38, 2 +; check: v10 = ireduce.i16 v39 +; check: v40 = uextend.i32 v8 +; check: v41 = uextend.i32 v10 +; check: v42 = bor v40, v41 +; check: v11 = ireduce.i16 v42 +; check: v43 = uextend.i32 v11 +; check: v44 = band_imm v43, 0xf0f0 +; check: v12 = ireduce.i16 v44 +; check: v45 = uextend.i32 v12 +; check: v46 = ushr_imm v45, 4 +; check: v13 = ireduce.i16 v46 +; check: v47 = uextend.i32 v11 +; check: v48 = band_imm v47, 3855 +; check: v14 = ireduce.i16 v48 +; check: v49 = uextend.i32 v14 +; check: v50 = ishl_imm v49, 4 +; check: v15 = ireduce.i16 v50 +; check: v51 = uextend.i32 v13 +; check: v52 = uextend.i32 v15 +; check: v53 = bor v51, v52 +; check: v16 = ireduce.i16 v53 +; check: v54 = uextend.i32 v16 +; check: v55 = band_imm v54, 0xff00 +; check: v17 = ireduce.i16 v55 +; check: v56 = uextend.i32 v17 +; check: v57 = ushr_imm v56, 8 +; check: v18 = ireduce.i16 v57 +; check: v58 = uextend.i32 v16 +; check: v59 = band_imm v58, 255 +; check: v19 = ireduce.i16 v59 +; check: v60 = uextend.i32 v19 +; check: v61 = ishl_imm v60, 8 +; check: v20 = ireduce.i16 v61 +; check: v62 = uextend.i32 v18 +; check: v63 = uextend.i32 v20 +; check: v64 = bor v62, v63 +; check: v1 = ireduce.i16 v64 ; check: return v1 function %reverse_bits_32(i32) -> i32 { diff --git a/lib/codegen/meta-python/base/legalize.py b/lib/codegen/meta-python/base/legalize.py index 9f6175f138..ec82ed9d3f 100644 --- a/lib/codegen/meta-python/base/legalize.py +++ b/lib/codegen/meta-python/base/legalize.py @@ -30,6 +30,13 @@ from .instructions import bitrev from cdsl.ast import Var from cdsl.xform import Rtl, XFormGroup +try: + from typing import TYPE_CHECKING # noqa + if TYPE_CHECKING: + from cdsl.instructions import Instruction # noqa +except ImportError: + TYPE_CHECKING = False + narrow = XFormGroup('narrow', """ Legalize instructions by narrowing. @@ -89,6 +96,7 @@ expand.custom_legalize(insts.stack_store, 'expand_stack_store') x = Var('x') y = Var('y') +z = Var('z') a = Var('a') a1 = Var('a1') a2 = Var('a2') @@ -174,6 +182,92 @@ narrow.legalize( a << iconcat(al, ah) )) + +def widen_one_arg(signed, op): + # type: (bool, Instruction) -> None + for int_ty in [types.i8, types.i16]: + if signed: + widen.legalize( + a << op.bind(int_ty)(b), + Rtl( + x << sextend.i32(b), + z << op.i32(x), + a << ireduce.bind(int_ty)(z) + )) + else: + widen.legalize( + a << op.bind(int_ty)(b), + Rtl( + x << uextend.i32(b), + z << op.i32(x), + a << ireduce.bind(int_ty)(z) + )) + + +def widen_two_arg(signed, op): + # type: (bool, Instruction) -> None + for int_ty in [types.i8, types.i16]: + if signed: + widen.legalize( + a << op.bind(int_ty)(b, c), + Rtl( + x << sextend.i32(b), + y << sextend.i32(c), + z << op.i32(x, y), + a << ireduce.bind(int_ty)(z) + )) + else: + widen.legalize( + a << op.bind(int_ty)(b, c), + Rtl( + x << uextend.i32(b), + y << uextend.i32(c), + z << op.i32(x, y), + a << ireduce.bind(int_ty)(z) + )) + + +def widen_imm(signed, op): + # type: (bool, Instruction) -> None + for int_ty in [types.i8, types.i16]: + if signed: + widen.legalize( + a << op.bind(int_ty)(b, c), + Rtl( + x << sextend.i32(b), + z << op.i32(x, c), + a << ireduce.bind(int_ty)(z) + )) + else: + widen.legalize( + a << op.bind(int_ty)(b, c), + Rtl( + x << uextend.i32(b), + z << op.i32(x, c), + a << ireduce.bind(int_ty)(z) + )) + + +for binop in [iadd, isub, imul, udiv, urem]: + widen_two_arg(False, binop) + +widen_two_arg(True, sdiv) + +widen_one_arg(False, bnot) + +for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]: + widen_imm(False, binop) + +for binop in [sdiv_imm, srem_imm]: + widen_imm(True, binop) + +# bit ops +for binop in [band, bor, bxor, band_not, bor_not, bxor_not]: + widen_two_arg(False, binop) + +for binop in [band_imm, bor_imm, bxor_imm]: + widen_imm(False, binop) + for int_ty in [types.i8, types.i16]: widen.legalize( a << iconst.bind(int_ty)(b), @@ -210,63 +304,6 @@ widen.legalize( a << ireduce(b) )) -for binop in [iadd, isub, imul, udiv, band, bor, bxor]: - for int_ty in [types.i8, types.i16]: - widen.legalize( - a << binop.bind(int_ty)(x, y), - Rtl( - b << uextend.i32(x), - c << uextend.i32(y), - d << binop(b, c), - a << ireduce(d) - ) - ) - -for binop in [sdiv]: - for int_ty in [types.i8, types.i16]: - widen.legalize( - a << binop.bind(int_ty)(x, y), - Rtl( - b << sextend.i32(x), - c << sextend.i32(y), - d << binop(b, c), - a << ireduce(d) - ) - ) - -for unop in [bnot]: - for int_ty in [types.i8, types.i16]: - widen.legalize( - a << unop.bind(int_ty)(x), - Rtl( - b << sextend.i32(x), - d << unop(b), - a << ireduce(d) - ) - ) - -for binop in [iadd_imm, imul_imm, udiv_imm]: - for int_ty in [types.i8, types.i16]: - widen.legalize( - a << binop.bind(int_ty)(x, y), - Rtl( - b << uextend.i32(x), - c << binop(b, y), - a << ireduce(c) - ) - ) - -for binop in [sdiv_imm]: - for int_ty in [types.i8, types.i16]: - widen.legalize( - a << binop.bind(int_ty)(x, y), - Rtl( - b << sextend.i32(x), - c << binop(b, y), - a << ireduce(c) - ) - ) - for int_ty in [types.i8, types.i16]: widen.legalize( br_table.bind(int_ty)(x, y), @@ -285,6 +322,72 @@ for int_ty in [types.i8, types.i16]: ) ) +for int_ty in [types.i8, types.i16]: + for op in [ushr_imm, ishl_imm]: + widen.legalize( + a << op.bind(int_ty)(b, c), + Rtl( + x << uextend.i32(b), + z << op.i32(x, c), + a << ireduce.bind(int_ty)(z) + )) + + widen.legalize( + a << ishl.bind(int_ty)(b, c), + Rtl( + x << uextend.i32(b), + z << ishl.i32(x, c), + a << ireduce.bind(int_ty)(z) + )) + + widen.legalize( + a << ushr.bind(int_ty)(b, c), + Rtl( + x << uextend.i32(b), + z << ushr.i32(x, c), + a << ireduce.bind(int_ty)(z) + )) + + widen.legalize( + a << sshr.bind(int_ty)(b, c), + Rtl( + x << sextend.i32(b), + z << sshr.i32(x, c), + a << ireduce.bind(int_ty)(z) + )) + + for w_cc in [ + intcc.eq, intcc.ne, intcc.ugt, intcc.ult, intcc.uge, intcc.ule + ]: + widen.legalize( + a << insts.icmp_imm.bind(int_ty)(w_cc, b, c), + Rtl( + x << uextend.i32(b), + a << insts.icmp_imm(w_cc, x, c) + )) + widen.legalize( + a << insts.icmp.bind(int_ty)(w_cc, b, c), + Rtl( + x << uextend.i32(b), + y << uextend.i32(c), + a << insts.icmp.i32(w_cc, x, y) + )) + for w_cc in [intcc.sgt, intcc.slt, intcc.sge, intcc.sle]: + widen.legalize( + a << insts.icmp_imm.bind(int_ty)(w_cc, b, c), + Rtl( + x << sextend.i32(b), + a << insts.icmp_imm(w_cc, x, c) + )) + widen.legalize( + a << insts.icmp.bind(int_ty)(w_cc, b, c), + Rtl( + x << sextend.i32(b), + y << sextend.i32(c), + a << insts.icmp(w_cc, x, y) + ) + ) + # Expand integer operations with carry for RISC architectures that don't have # the flags. expand.legalize( diff --git a/lib/codegen/meta-python/cdsl/predicates.py b/lib/codegen/meta-python/cdsl/predicates.py index ee7b81f3da..c81173ad5c 100644 --- a/lib/codegen/meta-python/cdsl/predicates.py +++ b/lib/codegen/meta-python/cdsl/predicates.py @@ -244,7 +244,7 @@ class FieldPredicate(object): """ # Prepend `field` to the predicate function arguments. args = (self.field.rust_name(),) + tuple(map(str, self.args)) - return 'predicates::{}({})'.format(self.function, ', '.join(args)) + return '::predicates::{}({})'.format(self.function, ', '.join(args)) class IsEqual(FieldPredicate): diff --git a/lib/codegen/src/isa/riscv/enc_tables.rs b/lib/codegen/src/isa/riscv/enc_tables.rs index 46b2458b49..5f7c084a67 100644 --- a/lib/codegen/src/isa/riscv/enc_tables.rs +++ b/lib/codegen/src/isa/riscv/enc_tables.rs @@ -6,7 +6,6 @@ use isa; use isa::constraints::*; use isa::enc_tables::*; use isa::encoding::RecipeSizing; -use predicates; // Include the generated encoding tables: // - `LEVEL1_RV32` diff --git a/lib/codegen/src/isa/x86/enc_tables.rs b/lib/codegen/src/isa/x86/enc_tables.rs index ebaaa526f7..65b7d7c38e 100644 --- a/lib/codegen/src/isa/x86/enc_tables.rs +++ b/lib/codegen/src/isa/x86/enc_tables.rs @@ -10,7 +10,6 @@ use isa; use isa::constraints::*; use isa::enc_tables::*; use isa::encoding::RecipeSizing; -use predicates; include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));