Add encodings for i8 and i16 copy, spill, fill, ireduce.i8.i16 (#534)
* Add encodings for i8 and i16 copy, spill, fill, ireduce.i8.i16
Also adds legalization for srem, irsub_imm, {u,s}extend.i16.i8
Fixes #477 cc #466
* Legalize popcnt, clz and ctz for i8 and i16
* Fix bug in call_memset
This commit is contained in:
8
cranelift/filetests/isa/x86/ireduce-i16-to-i8.clif
Normal file
8
cranelift/filetests/isa/x86/ireduce-i16-to-i8.clif
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function u0:0(i16) -> i8 fast {
|
||||||
|
ebb0(v0: i16):
|
||||||
|
v1 = ireduce.i8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
13
cranelift/filetests/isa/x86/isub_imm-i8.clif
Normal file
13
cranelift/filetests/isa/x86/isub_imm-i8.clif
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function u0:0(i8) -> i8 fast {
|
||||||
|
ebb0(v0: i8):
|
||||||
|
v1 = iconst.i8 0
|
||||||
|
v2 = isub v1, v0
|
||||||
|
; check: v4 = uextend.i32 v0
|
||||||
|
; nextln: v6 = iconst.i32 0
|
||||||
|
; nextln = isub v6, v4
|
||||||
|
; nextln = ireduce.i8 v5
|
||||||
|
return v2
|
||||||
|
}
|
||||||
25
cranelift/filetests/isa/x86/legalize-clz-ctz-i8.clif
Normal file
25
cranelift/filetests/isa/x86/legalize-clz-ctz-i8.clif
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
; regex: V=v\d+
|
||||||
|
|
||||||
|
function u0:0(i8) -> i8, i8 fast {
|
||||||
|
ebb0(v0: i8):
|
||||||
|
v1 = clz v0
|
||||||
|
; check: v3 = uextend.i32 v0
|
||||||
|
; nextln: v6 = iconst.i32 -1
|
||||||
|
; nextln: v7 = iconst.i32 31
|
||||||
|
; nextln: v8, v9 = x86_bsr v3
|
||||||
|
; nextln: v10 = selectif.i32 eq v9, v6, v8
|
||||||
|
; nextln: v4 = isub v7, v10
|
||||||
|
; nextln: v5 = iadd_imm v4, -24
|
||||||
|
; nextln: v1 = ireduce.i8 v5
|
||||||
|
v2 = ctz v0
|
||||||
|
; nextln: v11 = uextend.i32 v0
|
||||||
|
; nextln: v12 = bor_imm v11, 256
|
||||||
|
; nextln: v14 = iconst.i32 32
|
||||||
|
; nextln: v15, v16 = x86_bsf v12
|
||||||
|
; nextln: v13 = selectif.i32 eq v16, v14, v15
|
||||||
|
; nextln: v2 = ireduce.i8 v13
|
||||||
|
return v1, v2
|
||||||
|
}
|
||||||
9
cranelift/filetests/isa/x86/legalize-popcnt-i8.clif
Normal file
9
cranelift/filetests/isa/x86/legalize-popcnt-i8.clif
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function u0:0(i8) -> i8 fast {
|
||||||
|
ebb0(v0: i8):
|
||||||
|
v1 = popcnt v0
|
||||||
|
; check-not: sextend.i32 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
14
cranelift/filetests/isa/x86/uextend-i8-to-i16.clif
Normal file
14
cranelift/filetests/isa/x86/uextend-i8-to-i16.clif
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function u0:0(i8) -> i16 fast {
|
||||||
|
ebb0(v0: i8):
|
||||||
|
v1 = uextend.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
function u0:1(i8) -> i16 fast {
|
||||||
|
ebb0(v0: i8):
|
||||||
|
v1 = sextend.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
@@ -248,12 +248,12 @@ def widen_imm(signed, op):
|
|||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
# int ops
|
||||||
for binop in [iadd, isub, imul, udiv, urem]:
|
for binop in [iadd, isub, imul, udiv, urem]:
|
||||||
widen_two_arg(False, binop)
|
widen_two_arg(False, binop)
|
||||||
|
|
||||||
widen_two_arg(True, sdiv)
|
for binop in [sdiv, srem]:
|
||||||
|
widen_two_arg(True, binop)
|
||||||
widen_one_arg(False, bnot)
|
|
||||||
|
|
||||||
for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]:
|
for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]:
|
||||||
widen_imm(False, binop)
|
widen_imm(False, binop)
|
||||||
@@ -261,13 +261,50 @@ for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]:
|
|||||||
for binop in [sdiv_imm, srem_imm]:
|
for binop in [sdiv_imm, srem_imm]:
|
||||||
widen_imm(True, binop)
|
widen_imm(True, binop)
|
||||||
|
|
||||||
|
widen_imm(False, irsub_imm)
|
||||||
|
|
||||||
# bit ops
|
# bit ops
|
||||||
|
widen_one_arg(False, bnot)
|
||||||
|
|
||||||
for binop in [band, bor, bxor, band_not, bor_not, bxor_not]:
|
for binop in [band, bor, bxor, band_not, bor_not, bxor_not]:
|
||||||
widen_two_arg(False, binop)
|
widen_two_arg(False, binop)
|
||||||
|
|
||||||
for binop in [band_imm, bor_imm, bxor_imm]:
|
for binop in [band_imm, bor_imm, bxor_imm]:
|
||||||
widen_imm(False, binop)
|
widen_imm(False, binop)
|
||||||
|
|
||||||
|
widen_one_arg(False, insts.popcnt)
|
||||||
|
|
||||||
|
for (int_ty, num) in [(types.i8, 24), (types.i16, 16)]:
|
||||||
|
widen.legalize(
|
||||||
|
a << insts.clz.bind(int_ty)(b),
|
||||||
|
Rtl(
|
||||||
|
c << uextend.i32(b),
|
||||||
|
d << insts.clz.i32(c),
|
||||||
|
e << iadd_imm(d, imm64(-num)),
|
||||||
|
a << ireduce.bind(int_ty)(e)
|
||||||
|
))
|
||||||
|
|
||||||
|
widen.legalize(
|
||||||
|
a << insts.cls.bind(int_ty)(b),
|
||||||
|
Rtl(
|
||||||
|
c << sextend.i32(b),
|
||||||
|
d << insts.cls.i32(c),
|
||||||
|
e << iadd_imm(d, imm64(-num)),
|
||||||
|
a << ireduce.bind(int_ty)(e)
|
||||||
|
))
|
||||||
|
|
||||||
|
for (int_ty, num) in [(types.i8, 1 << 8), (types.i16, 1 << 16)]:
|
||||||
|
widen.legalize(
|
||||||
|
a << insts.ctz.bind(int_ty)(b),
|
||||||
|
Rtl(
|
||||||
|
c << uextend.i32(b),
|
||||||
|
# When `b` is zero, returns the size of x in bits.
|
||||||
|
d << bor_imm(c, imm64(num)),
|
||||||
|
e << insts.ctz.i32(d),
|
||||||
|
a << ireduce.bind(int_ty)(e)
|
||||||
|
))
|
||||||
|
|
||||||
|
# iconst
|
||||||
for int_ty in [types.i8, types.i16]:
|
for int_ty in [types.i8, types.i16]:
|
||||||
widen.legalize(
|
widen.legalize(
|
||||||
a << iconst.bind(int_ty)(b),
|
a << iconst.bind(int_ty)(b),
|
||||||
@@ -276,6 +313,21 @@ for int_ty in [types.i8, types.i16]:
|
|||||||
a << ireduce.bind(int_ty)(c)
|
a << ireduce.bind(int_ty)(c)
|
||||||
))
|
))
|
||||||
|
|
||||||
|
widen.legalize(
|
||||||
|
a << uextend.i16.i8(b),
|
||||||
|
Rtl(
|
||||||
|
c << uextend.i32(b),
|
||||||
|
a << ireduce(c)
|
||||||
|
))
|
||||||
|
|
||||||
|
widen.legalize(
|
||||||
|
a << sextend.i16.i8(b),
|
||||||
|
Rtl(
|
||||||
|
c << sextend.i32(b),
|
||||||
|
a << ireduce(c)
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
widen.legalize(
|
widen.legalize(
|
||||||
store.i8(flags, a, ptr, offset),
|
store.i8(flags, a, ptr, offset),
|
||||||
Rtl(
|
Rtl(
|
||||||
|
|||||||
@@ -173,7 +173,8 @@ enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
|
|||||||
enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
|
enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
|
||||||
|
|
||||||
enc_i32_i64(base.copy, r.umr, 0x89)
|
enc_i32_i64(base.copy, r.umr, 0x89)
|
||||||
enc_both(base.copy.b1, r.umr, 0x89)
|
for ty in [types.b1, types.i8, types.i16]:
|
||||||
|
enc_both(base.copy.bind(ty), r.umr, 0x89)
|
||||||
|
|
||||||
# For x86-64, only define REX forms for now, since we can't describe the
|
# For x86-64, only define REX forms for now, since we can't describe the
|
||||||
# special regunit immediate operands with the current constraint language.
|
# special regunit immediate operands with the current constraint language.
|
||||||
@@ -301,11 +302,12 @@ for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
|
|||||||
enc_i32_i64(base.spill, r.spillSib32, 0x89)
|
enc_i32_i64(base.spill, r.spillSib32, 0x89)
|
||||||
enc_i32_i64(base.regspill, r.regspill32, 0x89)
|
enc_i32_i64(base.regspill, r.regspill32, 0x89)
|
||||||
|
|
||||||
# Use a 32-bit write for spilling `b1` to avoid constraining the permitted
|
# Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
|
||||||
# registers.
|
# constraining the permitted registers.
|
||||||
# See MIN_SPILL_SLOT_SIZE which makes this safe.
|
# See MIN_SPILL_SLOT_SIZE which makes this safe.
|
||||||
enc_both(base.spill.b1, r.spillSib32, 0x89)
|
for ty in [types.b1, types.i8, types.i16]:
|
||||||
enc_both(base.regspill.b1, r.regspill32, 0x89)
|
enc_both(base.spill.bind(ty), r.spillSib32, 0x89)
|
||||||
|
enc_both(base.regspill.bind(ty), r.regspill32, 0x89)
|
||||||
|
|
||||||
for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
|
for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
|
||||||
enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
|
enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
|
||||||
@@ -319,9 +321,10 @@ for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
|
|||||||
enc_i32_i64(base.fill, r.fillSib32, 0x8b)
|
enc_i32_i64(base.fill, r.fillSib32, 0x8b)
|
||||||
enc_i32_i64(base.regfill, r.regfill32, 0x8b)
|
enc_i32_i64(base.regfill, r.regfill32, 0x8b)
|
||||||
|
|
||||||
# Load 32 bits from `b1` spill slots. See `spill.b1` above.
|
# Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
|
||||||
enc_both(base.fill.b1, r.fillSib32, 0x8b)
|
for ty in [types.b1, types.i8, types.i16]:
|
||||||
enc_both(base.regfill.b1, r.regfill32, 0x8b)
|
enc_both(base.fill.bind(ty), r.fillSib32, 0x8b)
|
||||||
|
enc_both(base.regfill.bind(ty), r.regfill32, 0x8b)
|
||||||
|
|
||||||
# Push and Pop
|
# Push and Pop
|
||||||
X86_32.enc(x86.push.i32, *r.pushq(0x50))
|
X86_32.enc(x86.push.i32, *r.pushq(0x50))
|
||||||
@@ -578,8 +581,11 @@ X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
|||||||
# Numerical conversions.
|
# Numerical conversions.
|
||||||
|
|
||||||
# Reducing an integer is a no-op.
|
# Reducing an integer is a no-op.
|
||||||
|
X86_32.enc(base.ireduce.i8.i16, r.null, 0)
|
||||||
X86_32.enc(base.ireduce.i8.i32, r.null, 0)
|
X86_32.enc(base.ireduce.i8.i32, r.null, 0)
|
||||||
X86_32.enc(base.ireduce.i16.i32, r.null, 0)
|
X86_32.enc(base.ireduce.i16.i32, r.null, 0)
|
||||||
|
|
||||||
|
X86_64.enc(base.ireduce.i8.i16, r.null, 0)
|
||||||
X86_64.enc(base.ireduce.i8.i32, r.null, 0)
|
X86_64.enc(base.ireduce.i8.i32, r.null, 0)
|
||||||
X86_64.enc(base.ireduce.i16.i32, r.null, 0)
|
X86_64.enc(base.ireduce.i16.i32, r.null, 0)
|
||||||
X86_64.enc(base.ireduce.i8.i64, r.null, 0)
|
X86_64.enc(base.ireduce.i8.i64, r.null, 0)
|
||||||
|
|||||||
@@ -591,7 +591,7 @@ impl<'a> FunctionBuilder<'a> {
|
|||||||
colocated: false,
|
colocated: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
self.ins().uextend(types::I32, ch);
|
let ch = self.ins().uextend(types::I32, ch);
|
||||||
self.ins().call(libc_memset, &[buffer, ch, len]);
|
self.ins().call(libc_memset, &[buffer, ch, len]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user