diff --git a/cranelift/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/isa/x86/ireduce-i16-to-i8.clif new file mode 100644 index 0000000000..0f8303dfc4 --- /dev/null +++ b/cranelift/filetests/isa/x86/ireduce-i16-to-i8.clif @@ -0,0 +1,8 @@ +test compile +target x86_64 + +function u0:0(i16) -> i8 fast { +ebb0(v0: i16): + v1 = ireduce.i8 v0 + return v1 +} diff --git a/cranelift/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/isa/x86/isub_imm-i8.clif new file mode 100644 index 0000000000..8958b1afa4 --- /dev/null +++ b/cranelift/filetests/isa/x86/isub_imm-i8.clif @@ -0,0 +1,13 @@ +test compile +target x86_64 + +function u0:0(i8) -> i8 fast { +ebb0(v0: i8): + v1 = iconst.i8 0 + v2 = isub v1, v0 + ; check: v4 = uextend.i32 v0 + ; nextln: v6 = iconst.i32 0 + ; nextln = isub v6, v4 + ; nextln = ireduce.i8 v5 + return v2 +} diff --git a/cranelift/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/isa/x86/legalize-clz-ctz-i8.clif new file mode 100644 index 0000000000..914bcb0e30 --- /dev/null +++ b/cranelift/filetests/isa/x86/legalize-clz-ctz-i8.clif @@ -0,0 +1,25 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8) -> i8, i8 fast { +ebb0(v0: i8): + v1 = clz v0 + ; check: v3 = uextend.i32 v0 + ; nextln: v6 = iconst.i32 -1 + ; nextln: v7 = iconst.i32 31 + ; nextln: v8, v9 = x86_bsr v3 + ; nextln: v10 = selectif.i32 eq v9, v6, v8 + ; nextln: v4 = isub v7, v10 + ; nextln: v5 = iadd_imm v4, -24 + ; nextln: v1 = ireduce.i8 v5 + v2 = ctz v0 + ; nextln: v11 = uextend.i32 v0 + ; nextln: v12 = bor_imm v11, 256 + ; nextln: v14 = iconst.i32 32 + ; nextln: v15, v16 = x86_bsf v12 + ; nextln: v13 = selectif.i32 eq v16, v14, v15 + ; nextln: v2 = ireduce.i8 v13 + return v1, v2 +} diff --git a/cranelift/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/isa/x86/legalize-popcnt-i8.clif new file mode 100644 index 0000000000..e761a2c7ca --- /dev/null +++ b/cranelift/filetests/isa/x86/legalize-popcnt-i8.clif @@ -0,0 +1,9 @@ +test compile +target x86_64 + +function u0:0(i8) -> i8 fast { +ebb0(v0: i8): + v1 = popcnt v0 + ; check-not: sextend.i32 v0 + return v1 +} diff --git a/cranelift/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/isa/x86/uextend-i8-to-i16.clif new file mode 100644 index 0000000000..d92da90343 --- /dev/null +++ b/cranelift/filetests/isa/x86/uextend-i8-to-i16.clif @@ -0,0 +1,14 @@ +test compile +target x86_64 + +function u0:0(i8) -> i16 fast { +ebb0(v0: i8): + v1 = uextend.i16 v0 + return v1 +} + +function u0:1(i8) -> i16 fast { +ebb0(v0: i8): + v1 = sextend.i16 v0 + return v1 +} diff --git a/lib/codegen/meta-python/base/legalize.py b/lib/codegen/meta-python/base/legalize.py index a1c6883356..0625bb3963 100644 --- a/lib/codegen/meta-python/base/legalize.py +++ b/lib/codegen/meta-python/base/legalize.py @@ -248,12 +248,12 @@ def widen_imm(signed, op): )) +# int ops for binop in [iadd, isub, imul, udiv, urem]: widen_two_arg(False, binop) -widen_two_arg(True, sdiv) - -widen_one_arg(False, bnot) +for binop in [sdiv, srem]: + widen_two_arg(True, binop) for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]: widen_imm(False, binop) @@ -261,13 +261,50 @@ for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]: for binop in [sdiv_imm, srem_imm]: widen_imm(True, binop) +widen_imm(False, irsub_imm) + # bit ops +widen_one_arg(False, bnot) + for binop in [band, bor, bxor, band_not, bor_not, bxor_not]: widen_two_arg(False, binop) for binop in [band_imm, bor_imm, bxor_imm]: widen_imm(False, binop) +widen_one_arg(False, insts.popcnt) + +for (int_ty, num) in [(types.i8, 24), (types.i16, 16)]: + widen.legalize( + a << insts.clz.bind(int_ty)(b), + Rtl( + c << uextend.i32(b), + d << insts.clz.i32(c), + e << iadd_imm(d, imm64(-num)), + a << ireduce.bind(int_ty)(e) + )) + + widen.legalize( + a << insts.cls.bind(int_ty)(b), + Rtl( + c << sextend.i32(b), + d << insts.cls.i32(c), + e << iadd_imm(d, imm64(-num)), + a << ireduce.bind(int_ty)(e) + )) + +for (int_ty, num) in [(types.i8, 1 << 8), (types.i16, 1 << 16)]: + widen.legalize( + a << insts.ctz.bind(int_ty)(b), + Rtl( + c << uextend.i32(b), + # When `b` is zero, returns the size of x in bits. + d << bor_imm(c, imm64(num)), + e << insts.ctz.i32(d), + a << ireduce.bind(int_ty)(e) + )) + +# iconst for int_ty in [types.i8, types.i16]: widen.legalize( a << iconst.bind(int_ty)(b), @@ -276,6 +313,21 @@ for int_ty in [types.i8, types.i16]: a << ireduce.bind(int_ty)(c) )) +widen.legalize( + a << uextend.i16.i8(b), + Rtl( + c << uextend.i32(b), + a << ireduce(c) + )) + +widen.legalize( + a << sextend.i16.i8(b), + Rtl( + c << sextend.i32(b), + a << ireduce(c) + )) + + widen.legalize( store.i8(flags, a, ptr, offset), Rtl( diff --git a/lib/codegen/meta-python/isa/x86/encodings.py b/lib/codegen/meta-python/isa/x86/encodings.py index 5db4e63b9a..d07e003c3c 100644 --- a/lib/codegen/meta-python/isa/x86/encodings.py +++ b/lib/codegen/meta-python/isa/x86/encodings.py @@ -173,7 +173,8 @@ enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5) enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4) enc_i32_i64(base.copy, r.umr, 0x89) -enc_both(base.copy.b1, r.umr, 0x89) +for ty in [types.b1, types.i8, types.i16]: + enc_both(base.copy.bind(ty), r.umr, 0x89) # For x86-64, only define REX forms for now, since we can't describe the # special regunit immediate operands with the current constraint language. @@ -301,11 +302,12 @@ for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]: enc_i32_i64(base.spill, r.spillSib32, 0x89) enc_i32_i64(base.regspill, r.regspill32, 0x89) -# Use a 32-bit write for spilling `b1` to avoid constraining the permitted -# registers. +# Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid +# constraining the permitted registers. # See MIN_SPILL_SLOT_SIZE which makes this safe. -enc_both(base.spill.b1, r.spillSib32, 0x89) -enc_both(base.regspill.b1, r.regspill32, 0x89) +for ty in [types.b1, types.i8, types.i16]: + enc_both(base.spill.bind(ty), r.spillSib32, 0x89) + enc_both(base.regspill.bind(ty), r.regspill32, 0x89) for recipe in [r.ld, r.ldDisp8, r.ldDisp32]: enc_i32_i64_ld_st(base.load, True, recipe, 0x8b) @@ -319,9 +321,10 @@ for recipe in [r.ld, r.ldDisp8, r.ldDisp32]: enc_i32_i64(base.fill, r.fillSib32, 0x8b) enc_i32_i64(base.regfill, r.regfill32, 0x8b) -# Load 32 bits from `b1` spill slots. See `spill.b1` above. -enc_both(base.fill.b1, r.fillSib32, 0x8b) -enc_both(base.regfill.b1, r.regfill32, 0x8b) +# Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. +for ty in [types.b1, types.i8, types.i16]: + enc_both(base.fill.bind(ty), r.fillSib32, 0x8b) + enc_both(base.regfill.bind(ty), r.regfill32, 0x8b) # Push and Pop X86_32.enc(x86.push.i32, *r.pushq(0x50)) @@ -578,8 +581,11 @@ X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6)) # Numerical conversions. # Reducing an integer is a no-op. +X86_32.enc(base.ireduce.i8.i16, r.null, 0) X86_32.enc(base.ireduce.i8.i32, r.null, 0) X86_32.enc(base.ireduce.i16.i32, r.null, 0) + +X86_64.enc(base.ireduce.i8.i16, r.null, 0) X86_64.enc(base.ireduce.i8.i32, r.null, 0) X86_64.enc(base.ireduce.i16.i32, r.null, 0) X86_64.enc(base.ireduce.i8.i64, r.null, 0) diff --git a/lib/frontend/src/frontend.rs b/lib/frontend/src/frontend.rs index 930466da5b..c2befee997 100644 --- a/lib/frontend/src/frontend.rs +++ b/lib/frontend/src/frontend.rs @@ -591,7 +591,7 @@ impl<'a> FunctionBuilder<'a> { colocated: false, }); - self.ins().uextend(types::I32, ch); + let ch = self.ins().uextend(types::I32, ch); self.ins().call(libc_memset, &[buffer, ch, len]); }