diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 0e3e293c1f..435955fa21 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -129,6 +129,16 @@ macro_rules! isle_common_prelude_methods { x == y } + #[inline] + fn u64_le(&mut self, x: u64, y: u64) -> bool { + x <= y + } + + #[inline] + fn u64_lt(&mut self, x: u64, y: u64) -> bool { + x < y + } + #[inline] fn u64_is_zero(&mut self, value: u64) -> bool { 0 == value diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index da177603eb..ecd71988ce 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -187,16 +187,10 @@ (rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x)) (ishl ty x (iconst ty (imm64 c)))) -;; x<<32>>32: uextend/sextend 32->64. -(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) - (uextend $I64 x)) - -(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) - (sextend $I64 x)) - ;; TODO: strength reduction: div to shifts ;; TODO: div/rem by constants -> magic multiplications + ;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if ;; this is a signed or unsigned shift right). (rule (simplify (ishl (fits_in_64 ty) @@ -210,6 +204,66 @@ (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k))) (band ty x (iconst ty mask)))) + +;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top +;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only +;; works for scalar integers. +(rule (simplify (ushr (fits_in_64 (ty_int ty)) + (ishl ty x (iconst _ k)) + (iconst _ k))) + (band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k)))) + + +;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to +;; `n+k` bits. In the special case where `x` is the result of either `sextend` +;; or `uextend` from `n` bits to `n+k` bits, we can implement this using +;; `sextend`. +(rule (simplify (sshr wide + (ishl wide + (uextend wide x @ (value_type narrow)) + (iconst _ shift)) + (iconst _ shift))) + (if-let (u64_from_imm64 shift_u64) shift) + (if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) + (sextend wide x)) + +;; If `k` is smaller than the difference in bit widths of the two types, then +;; the intermediate sign bit comes from the extend op, so the final result is +;; the same as the original extend op. +(rule (simplify (sshr wide + (ishl wide + x @ (uextend wide (value_type narrow)) + (iconst _ shift)) + (iconst _ shift))) + (if-let (u64_from_imm64 shift_u64) shift) + (if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) + x) + +;; If the original extend op was `sextend`, then both of the above cases say +;; the result should also be `sextend`. +(rule (simplify (sshr wide + (ishl wide + x @ (sextend wide (value_type narrow)) + (iconst _ shift)) + (iconst _ shift))) + (if-let (u64_from_imm64 shift_u64) shift) + (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) + x) + + +;; Masking out any of the top bits of the result of `uextend` is a no-op. (This +;; is like a cheap version of known-bits analysis.) +(rule (simplify (band wide x @ (uextend _ (value_type narrow)) (iconst _ (u64_from_imm64 mask)))) + ; Check that `narrow_mask` has a subset of the bits that `mask` does. + (if-let $true (let ((narrow_mask u64 (ty_mask narrow))) (u64_eq narrow_mask (u64_and mask narrow_mask)))) + x) + +;; Masking out the sign-extended bits of an `sextend` turns it into a `uextend`. +(rule (simplify (band wide (sextend _ x @ (value_type narrow)) (iconst _ (u64_from_imm64 mask)))) + (if-let $true (u64_eq mask (ty_mask narrow))) + (uextend wide x)) + + ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're ;; used. This is neutral (add-with-imm) or positive (iconst) for ;; register pressure, and these ops are very cheap. diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 701f414a92..89fc8e9813 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -144,6 +144,12 @@ (decl pure u64_eq (u64 u64) bool) (extern constructor u64_eq u64_eq) +(decl pure u64_le (u64 u64) bool) +(extern constructor u64_le u64_le) + +(decl pure u64_lt (u64 u64) bool) +(extern constructor u64_lt u64_lt) + (decl pure i64_sextend_imm64 (Type Imm64) i64) (extern constructor i64_sextend_imm64 i64_sextend_imm64) diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index 3e8577cdbe..24460845a9 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -29,7 +29,7 @@ block0(v0: i8): return v3 ; check: v4 = iconst.i8 224 ; check: v5 = band v0, v4 - ; return v5 + ; check: return v5 } function %unsigned_shift_right_shift_left_i32(i32) -> i32 { @@ -51,7 +51,7 @@ block0(v0: i64): return v3 ; check: v4 = iconst.i64 -32 ; check: v5 = band v0, v4 - ; return v5 + ; check: return v5 } function %signed_shift_right_shift_left_i8(i8) -> i8 { @@ -62,7 +62,7 @@ block0(v0: i8): return v3 ; check: v4 = iconst.i8 224 ; check: v5 = band v0, v4 - ; return v5 + ; check: return v5 } function %signed_shift_right_shift_left_i32(i32) -> i32 { @@ -73,7 +73,7 @@ block0(v0: i32): return v3 ; check: v4 = iconst.i32 0xffff_ffe0 ; check: v5 = band v0, v4 - ; return v5 + ; check: return v5 } function %signed_shift_right_shift_left_i64(i64) -> i64 { @@ -84,7 +84,7 @@ block0(v0: i64): return v3 ; check: v4 = iconst.i64 -32 ; check: v5 = band v0, v4 - ; return v5 + ; check: return v5 } function %signed_shift_right_shift_left_i8_mask_rhs(i8) -> i8 { @@ -95,7 +95,133 @@ block0(v0: i8): return v3 ; check: v4 = iconst.i8 224 ; check: v5 = band v0, v4 - ; return v5 + ; check: return v5 +} + +function %sextend_shift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: v7 = uextend.i64 v0 + ; check: return v7 +} + +function %sextend_shift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %sextend_undershift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: v5 = iconst.i64 0x0001_ffff_ffff + ; check: v6 = band v2, v5 + ; check: return v6 +} + +function %sextend_undershift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %sextend_shift_8_64_unsigned(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: v7 = uextend.i64 v0 + ; check: return v7 +} + +function %sextend_shift_8_64_signed(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: v5 = sextend.i64 v0 + ; check: return v5 +} + +function %uextend_undershift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_undershift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_8_64_unsigned(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_8_64_signed(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: v5 = sextend.i64 v0 + ; check: return v5 } function %or_and_y_with_not_y_i8(i8, i8) -> i8 {