diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index 865c773bd8..64a64a3a1f 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -160,6 +160,19 @@ ;; TODO: strength reduction: div to shifts ;; TODO: div/rem by constants -> magic multiplications +;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if +;; this is a signed or unsigned shift right). +(rule (simplify (ishl (fits_in_64 ty) + (ushr ty x (iconst _ (u64_from_imm64 k))) + (iconst _ (u64_from_imm64 k)))) + (let ((mask u64 (u64_shl 0xFFFFFFFFFFFFFFFF k))) + (band ty x (iconst ty (imm64_masked ty mask))))) +(rule (simplify (ishl (fits_in_64 ty) + (sshr ty x (iconst _ (u64_from_imm64 k))) + (iconst _ (u64_from_imm64 k)))) + (let ((mask u64 (u64_shl 0xFFFFFFFFFFFFFFFF k))) + (band ty x (iconst ty (imm64_masked ty mask))))) + ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're ;; used. This is neutral (add-with-imm) or positive (iconst) for ;; register pressure, and these ops are very cheap. diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index 409788ce82..33e83936c7 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -20,3 +20,69 @@ block0: ; check: v2 = iconst.i64 0x9876_5432 ; check: return v2 ; v2 = 0x9876_5432 } + +function %unsigned_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %unsigned_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i32 0xffff_ffe0 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %unsigned_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i64 -32 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %signed_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %signed_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i32 0xffff_ffe0 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %signed_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i64 -32 + ; check: v5 = band v0, v4 + ; return v5 +} diff --git a/cranelift/filetests/filetests/runtests/shift-right-left.clif b/cranelift/filetests/filetests/runtests/shift-right-left.clif new file mode 100644 index 0000000000..258ae78d41 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/shift-right-left.clif @@ -0,0 +1,74 @@ +;; Test that our rewrite of `(x >> k) << k` into masking is correct. + +test interpret +test run +target aarch64 +target x86_64 +target riscv64 +target s390x + +function %unsigned_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %unsigned_shift_right_shift_left_i8(-1) == 0xe0 +; run: %unsigned_shift_right_shift_left_i8(0) == 0 +; run: %unsigned_shift_right_shift_left_i8(0xaa) == 0xa0 + +function %unsigned_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %unsigned_shift_right_shift_left_i32(-1) == 0xffffffe0 +; run: %unsigned_shift_right_shift_left_i32(0) == 0 +; run: %unsigned_shift_right_shift_left_i32(0xaaaaaaaa) == 0xaaaaaaa0 + +function %unsigned_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %unsigned_shift_right_shift_left_i64(-1) == 0xffffffffffffffe0 +; run: %unsigned_shift_right_shift_left_i64(0) == 0 +; run: %unsigned_shift_right_shift_left_i64(0xaaaaaaaaaaaaaaaa) == 0xaaaaaaaaaaaaaaa0 + +function %signed_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %signed_shift_right_shift_left_i8(-1) == 0xe0 +; run: %signed_shift_right_shift_left_i8(0) == 0 +; run: %signed_shift_right_shift_left_i8(0xaa) == 0xa0 + +function %signed_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %signed_shift_right_shift_left_i32(-1) == 0xffffffe0 +; run: %signed_shift_right_shift_left_i32(0) == 0 +; run: %signed_shift_right_shift_left_i32(0xaaaaaaaa) == 0xaaaaaaa0 + +function %signed_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %signed_shift_right_shift_left_i64(-1) == 0xffffffffffffffe0 +; run: %signed_shift_right_shift_left_i64(0) == 0 +; run: %signed_shift_right_shift_left_i64(0xaaaaaaaaaaaaaaaa) == 0xaaaaaaaaaaaaaaa0