diff --git a/cranelift/filetests/legalizer/bitrev.clif b/cranelift/filetests/legalizer/bitrev.clif new file mode 100644 index 0000000000..0f75c373a9 --- /dev/null +++ b/cranelift/filetests/legalizer/bitrev.clif @@ -0,0 +1,149 @@ +test legalizer +target x86_64 + +function %reverse_bits_8(i8) -> i8 { +ebb0(v0: i8): + v1 = bitrev.i8 v0 + return v1 +} +; check: v2 = band_imm v0, 170 +; check: v3 = ushr_imm v2, 1 +; check: v4 = band_imm v0, 85 +; check: v5 = ishl_imm v4, 1 +; check: v16 = uextend.i32 v3 +; check: v17 = uextend.i32 v5 +; check: v18 = bor v16, v17 +; check: v6 = ireduce.i8 v18 +; check: v7 = band_imm v6, 204 +; check: v8 = ushr_imm v7, 2 +; check: v9 = band_imm v6, 51 +; check: v10 = ushr_imm v9, 2 +; check: v19 = uextend.i32 v8 +; check: v20 = uextend.i32 v10 +; check: v21 = bor v19, v20 +; check: v11 = ireduce.i8 v21 +; check: v12 = band_imm v11, 240 +; check: v13 = ushr_imm v12, 4 +; check: v14 = band_imm v11, 15 +; check: v15 = ishl_imm v14, 4 +; check: v22 = uextend.i32 v13 +; check: v23 = uextend.i32 v15 +; check: v24 = bor v22, v23 +; check: v1 = ireduce.i8 v24 + +function %reverse_bits_16(i16) -> i16 { +ebb0(v0: i16): + v1 = bitrev.i16 v0 + return v1 +} +; check: v2 = band_imm v0, 0xaaaa +; check: v3 = ushr_imm v2, 1 +; check: v4 = band_imm v0, 0x5555 +; check: v5 = ishl_imm v4, 1 +; check: v21 = uextend.i32 v3 +; check: v22 = uextend.i32 v5 +; check: v23 = bor v21, v22 +; check: v6 = ireduce.i16 v23 +; check: v7 = band_imm v6, 0xcccc +; check: v8 = ushr_imm v7, 2 +; check: v9 = band_imm v6, 0x3333 +; check: v10 = ushr_imm v9, 2 +; check: v24 = uextend.i32 v8 +; check: v25 = uextend.i32 v10 +; check: v26 = bor v24, v25 +; check: v11 = ireduce.i16 v26 +; check: v12 = band_imm v11, 0xf0f0 +; check: v13 = ushr_imm v12, 4 +; check: v14 = band_imm v11, 3855 +; check: v15 = ishl_imm v14, 4 +; check: v27 = uextend.i32 v13 +; check: v28 = uextend.i32 v15 +; check: v29 = bor v27, v28 +; check: v16 = ireduce.i16 v29 +; check: v17 = band_imm v16, 0xff00 +; check: v18 = ushr_imm v17, 8 +; check: v19 = band_imm v16, 255 +; check: v20 = ishl_imm v19, 8 +; check: v30 = uextend.i32 v18 +; check: v31 = uextend.i32 v20 +; check: v32 = bor v30, v31 +; check: v1 = ireduce.i16 v32 +; check: return v1 + +function %reverse_bits_32(i32) -> i32 { +ebb0(v0: i32): + v1 = bitrev.i32 v0 + return v1 +} +; check: v24 = iconst.i32 0xaaaa_aaaa +; check: v2 = band v0, v24 +; check: v3 = ushr_imm v2, 1 +; check: v4 = band_imm v0, 0x5555_5555 +; check: v5 = ishl_imm v4, 1 +; check: v6 = bor v3, v5 +; check: v25 = iconst.i32 0xcccc_cccc +; check: v7 = band v6, v25 +; check: v8 = ushr_imm v7, 2 +; check: v9 = band_imm v6, 0x3333_3333 +; check: v10 = ushr_imm v9, 2 +; check: v11 = bor v8, v10 +; check: v26 = iconst.i32 0xf0f0_f0f0 +; check: v12 = band v11, v26 +; check: v13 = ushr_imm v12, 4 +; check: v14 = band_imm v11, 0x0f0f_0f0f +; check: v15 = ishl_imm v14, 4 +; check: v16 = bor v13, v15 +; check: v27 = iconst.i32 0xff00_ff00 +; check: v17 = band v16, v27 +; check: v18 = ushr_imm v17, 8 +; check: v19 = band_imm v16, 0x00ff_00ff +; check: v20 = ishl_imm v19, 8 +; check: v21 = bor v18, v20 +; check: v22 = ushr_imm v21, 16 +; check: v23 = ishl_imm v21, 16 +; check: v1 = bor v22, v23 + + +function %reverse_bits_64(i64) -> i64 { +ebb0(v0: i64): + v1 = bitrev.i64 v0 + return v1 +} +; check: v29 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa +; check: v2 = band v0, v29 +; check: v3 = ushr_imm v2, 1 +; check: v30 = iconst.i64 0x5555_5555_5555_5555 +; check: v4 = band v0, v30 +; check: v5 = ishl_imm v4, 1 +; check: v6 = bor v3, v5 +; check: v31 = iconst.i64 0xcccc_cccc_cccc_cccc +; check: v7 = band v6, v31 +; check: v8 = ushr_imm v7, 2 +; check: v32 = iconst.i64 0x3333_3333_3333_3333 +; check: v9 = band v6, v32 +; check: v10 = ushr_imm v9, 2 +; check: v11 = bor v8, v10 +; check: v33 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 +; check: v12 = band v11, v33 +; check: v13 = ushr_imm v12, 4 +; check: v34 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f +; check: v14 = band v11, v34 +; check: v15 = ishl_imm v14, 4 +; check: v16 = bor v13, v15 +; check: v35 = iconst.i64 0xff00_ff00_ff00_ff00 +; check: v17 = band v16, v35 +; check: v18 = ushr_imm v17, 8 +; check: v36 = iconst.i64 0x00ff_00ff_00ff_00ff +; check: v19 = band v16, v36 +; check: v20 = ishl_imm v19, 8 +; check: v21 = bor v18, v20 +; check: v37 = iconst.i64 0xffff_0000_ffff_0000 +; check: v22 = band v21, v37 +; check: v23 = ushr_imm v22, 16 +; check: v38 = iconst.i64 0xffff_0000_ffff +; check: v24 = band v21, v38 +; check: v25 = ishl_imm v24, 16 +; check: v26 = bor v23, v25 +; check: v27 = ushr_imm v26, 32 +; check: v28 = ishl_imm v26, 32 +; check: v1 = bor v27, v28 diff --git a/lib/codegen/meta-python/base/instructions.py b/lib/codegen/meta-python/base/instructions.py index 25b8ba414b..9f7caf5c48 100644 --- a/lib/codegen/meta-python/base/instructions.py +++ b/lib/codegen/meta-python/base/instructions.py @@ -1409,6 +1409,14 @@ sshr_imm = Instruction( x = Operand('x', iB) a = Operand('a', iB) +bitrev = Instruction( + 'bitrev', r""" + Reverse the bits of a integer. + + Reverses the bits in ``x``. + """, + ins=x, outs=a) + clz = Instruction( 'clz', r""" Count leading zero bits. diff --git a/lib/codegen/meta-python/base/legalize.py b/lib/codegen/meta-python/base/legalize.py index 3427d8ef44..9f6175f138 100644 --- a/lib/codegen/meta-python/base/legalize.py +++ b/lib/codegen/meta-python/base/legalize.py @@ -26,6 +26,7 @@ from .instructions import rotl, rotl_imm, rotr, rotr_imm from .instructions import f32const, f64const from .instructions import store, load from .instructions import br_table +from .instructions import bitrev from cdsl.ast import Var from cdsl.xform import Rtl, XFormGroup @@ -91,17 +92,35 @@ y = Var('y') a = Var('a') a1 = Var('a1') a2 = Var('a2') +a3 = Var('a3') +a4 = Var('a4') b = Var('b') b1 = Var('b1') b2 = Var('b2') +b3 = Var('b3') +b4 = Var('b4') b_in = Var('b_in') b_int = Var('b_int') c = Var('c') c1 = Var('c1') c2 = Var('c2') +c3 = Var('c3') +c4 = Var('c4') c_in = Var('c_in') c_int = Var('c_int') d = Var('d') +d1 = Var('d1') +d2 = Var('d2') +d3 = Var('d3') +d4 = Var('d4') +e = Var('e') +e1 = Var('e1') +e2 = Var('e2') +e3 = Var('e3') +e4 = Var('e4') +f = Var('f') +f1 = Var('f1') +f2 = Var('f2') xl = Var('xl') xh = Var('xh') yl = Var('yl') @@ -382,6 +401,115 @@ expand.legalize( a << bxor(x, y) )) +# Expand bitrev +# Adapted from Stack Overflow. +# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c +widen.legalize( + a << bitrev.i8(x), + Rtl( + a1 << band_imm(x, imm64(0xaa)), + a2 << ushr_imm(a1, imm64(1)), + a3 << band_imm(x, imm64(0x55)), + a4 << ishl_imm(a3, imm64(1)), + b << bor(a2, a4), + b1 << band_imm(b, imm64(0xcc)), + b2 << ushr_imm(b1, imm64(2)), + b3 << band_imm(b, imm64(0x33)), + b4 << ushr_imm(b3, imm64(2)), + c << bor(b2, b4), + c1 << band_imm(c, imm64(0xf0)), + c2 << ushr_imm(c1, imm64(4)), + c3 << band_imm(c, imm64(0x0f)), + c4 << ishl_imm(c3, imm64(4)), + a << bor(c2, c4), + )) + +widen.legalize( + a << bitrev.i16(x), + Rtl( + a1 << band_imm(x, imm64(0xaaaa)), + a2 << ushr_imm(a1, imm64(1)), + a3 << band_imm(x, imm64(0x5555)), + a4 << ishl_imm(a3, imm64(1)), + b << bor(a2, a4), + b1 << band_imm(b, imm64(0xcccc)), + b2 << ushr_imm(b1, imm64(2)), + b3 << band_imm(b, imm64(0x3333)), + b4 << ushr_imm(b3, imm64(2)), + c << bor(b2, b4), + c1 << band_imm(c, imm64(0xf0f0)), + c2 << ushr_imm(c1, imm64(4)), + c3 << band_imm(c, imm64(0x0f0f)), + c4 << ishl_imm(c3, imm64(4)), + d << bor(c2, c4), + d1 << band_imm(d, imm64(0xff00)), + d2 << ushr_imm(d1, imm64(8)), + d3 << band_imm(d, imm64(0x00ff)), + d4 << ishl_imm(d3, imm64(8)), + a << bor(d2, d4), + )) + +expand.legalize( + a << bitrev.i32(x), + Rtl( + a1 << band_imm(x, imm64(0xaaaaaaaa)), + a2 << ushr_imm(a1, imm64(1)), + a3 << band_imm(x, imm64(0x55555555)), + a4 << ishl_imm(a3, imm64(1)), + b << bor(a2, a4), + b1 << band_imm(b, imm64(0xcccccccc)), + b2 << ushr_imm(b1, imm64(2)), + b3 << band_imm(b, imm64(0x33333333)), + b4 << ushr_imm(b3, imm64(2)), + c << bor(b2, b4), + c1 << band_imm(c, imm64(0xf0f0f0f0)), + c2 << ushr_imm(c1, imm64(4)), + c3 << band_imm(c, imm64(0x0f0f0f0f)), + c4 << ishl_imm(c3, imm64(4)), + d << bor(c2, c4), + d1 << band_imm(d, imm64(0xff00ff00)), + d2 << ushr_imm(d1, imm64(8)), + d3 << band_imm(d, imm64(0x00ff00ff)), + d4 << ishl_imm(d3, imm64(8)), + e << bor(d2, d4), + e1 << ushr_imm(e, imm64(16)), + e2 << ishl_imm(e, imm64(16)), + a << bor(e1, e2), + )) + +expand.legalize( + a << bitrev.i64(x), + Rtl( + a1 << band_imm(x, imm64(0xaaaaaaaaaaaaaaaa)), + a2 << ushr_imm(a1, imm64(1)), + a3 << band_imm(x, imm64(0x5555555555555555)), + a4 << ishl_imm(a3, imm64(1)), + b << bor(a2, a4), + b1 << band_imm(b, imm64(0xcccccccccccccccc)), + b2 << ushr_imm(b1, imm64(2)), + b3 << band_imm(b, imm64(0x3333333333333333)), + b4 << ushr_imm(b3, imm64(2)), + c << bor(b2, b4), + c1 << band_imm(c, imm64(0xf0f0f0f0f0f0f0f0)), + c2 << ushr_imm(c1, imm64(4)), + c3 << band_imm(c, imm64(0x0f0f0f0f0f0f0f0f)), + c4 << ishl_imm(c3, imm64(4)), + d << bor(c2, c4), + d1 << band_imm(d, imm64(0xff00ff00ff00ff00)), + d2 << ushr_imm(d1, imm64(8)), + d3 << band_imm(d, imm64(0x00ff00ff00ff00ff)), + d4 << ishl_imm(d3, imm64(8)), + e << bor(d2, d4), + e1 << band_imm(e, imm64(0xffff0000ffff0000)), + e2 << ushr_imm(e1, imm64(16)), + e3 << band_imm(e, imm64(0x0000ffff0000ffff)), + e4 << ishl_imm(e3, imm64(16)), + f << bor(e2, e4), + f1 << ushr_imm(f, imm64(32)), + f2 << ishl_imm(f, imm64(32)), + a << bor(f1, f2), + )) + # Floating-point sign manipulations. for ty, minus_zero in [ (types.f32, f32const(ieee32.bits(0x80000000))), diff --git a/lib/codegen/meta-python/cdsl/ast.py b/lib/codegen/meta-python/cdsl/ast.py index 09d4ad2514..75716d36a9 100644 --- a/lib/codegen/meta-python/cdsl/ast.py +++ b/lib/codegen/meta-python/cdsl/ast.py @@ -523,10 +523,14 @@ class ConstantInt(Literal): def __str__(self): # type: () -> str - """ - Get the Rust expression form of this constant. - """ - return str(self.value) + # If the value is in the signed imm64 range, print it as-is. + if self.value >= -(2**63) and self.value < (2**63): + return str(self.value) + # Otherwise if the value is in the unsigned imm64 range, print its + # bitwise counterpart in the signed imm64 range. + if self.value >= (2**63) and self.value < (2**64): + return str(self.value - (2**64)) + assert False, "immediate value not in signed or unsigned imm64 range" class ConstantBits(Literal):