Cranelift: Generalize (x << k) >> k optimization (#5746)
* Generalize unsigned `(x << k) >> k` optimization Split the existing rule into three parts: - A dual of the rule for `(x >> k) << k` that is only valid for unsigned shifts. - Known-bits analysis for `(band (uextend x) k)`. - A new rule for converting `sextend` to `uextend` if the sign-extended bits are masked out anyway. The first two together cover the existing rule. * Generalize signed `(x << k) >> k` optimization * Review comments * Generalize sign-extending shifts further The shifts can be eliminated even if the shift amount isn't exactly equal to the difference in bit-widths between the narrow and wide types. * Add filetests
This commit is contained in:
@@ -129,6 +129,16 @@ macro_rules! isle_common_prelude_methods {
|
||||
x == y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_le(&mut self, x: u64, y: u64) -> bool {
|
||||
x <= y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_lt(&mut self, x: u64, y: u64) -> bool {
|
||||
x < y
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_is_zero(&mut self, value: u64) -> bool {
|
||||
0 == value
|
||||
|
||||
@@ -187,16 +187,10 @@
|
||||
(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x))
|
||||
(ishl ty x (iconst ty (imm64 c))))
|
||||
|
||||
;; x<<32>>32: uextend/sextend 32->64.
|
||||
(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
|
||||
(uextend $I64 x))
|
||||
|
||||
(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
|
||||
(sextend $I64 x))
|
||||
|
||||
;; TODO: strength reduction: div to shifts
|
||||
;; TODO: div/rem by constants -> magic multiplications
|
||||
|
||||
|
||||
;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if
|
||||
;; this is a signed or unsigned shift right).
|
||||
(rule (simplify (ishl (fits_in_64 ty)
|
||||
@@ -210,6 +204,66 @@
|
||||
(let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
|
||||
(band ty x (iconst ty mask))))
|
||||
|
||||
|
||||
;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top
|
||||
;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only
|
||||
;; works for scalar integers.
|
||||
(rule (simplify (ushr (fits_in_64 (ty_int ty))
|
||||
(ishl ty x (iconst _ k))
|
||||
(iconst _ k)))
|
||||
(band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k))))
|
||||
|
||||
|
||||
;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to
|
||||
;; `n+k` bits. In the special case where `x` is the result of either `sextend`
|
||||
;; or `uextend` from `n` bits to `n+k` bits, we can implement this using
|
||||
;; `sextend`.
|
||||
(rule (simplify (sshr wide
|
||||
(ishl wide
|
||||
(uextend wide x @ (value_type narrow))
|
||||
(iconst _ shift))
|
||||
(iconst _ shift)))
|
||||
(if-let (u64_from_imm64 shift_u64) shift)
|
||||
(if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
|
||||
(sextend wide x))
|
||||
|
||||
;; If `k` is smaller than the difference in bit widths of the two types, then
|
||||
;; the intermediate sign bit comes from the extend op, so the final result is
|
||||
;; the same as the original extend op.
|
||||
(rule (simplify (sshr wide
|
||||
(ishl wide
|
||||
x @ (uextend wide (value_type narrow))
|
||||
(iconst _ shift))
|
||||
(iconst _ shift)))
|
||||
(if-let (u64_from_imm64 shift_u64) shift)
|
||||
(if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
|
||||
x)
|
||||
|
||||
;; If the original extend op was `sextend`, then both of the above cases say
|
||||
;; the result should also be `sextend`.
|
||||
(rule (simplify (sshr wide
|
||||
(ishl wide
|
||||
x @ (sextend wide (value_type narrow))
|
||||
(iconst _ shift))
|
||||
(iconst _ shift)))
|
||||
(if-let (u64_from_imm64 shift_u64) shift)
|
||||
(if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
|
||||
x)
|
||||
|
||||
|
||||
;; Masking out any of the top bits of the result of `uextend` is a no-op. (This
|
||||
;; is like a cheap version of known-bits analysis.)
|
||||
(rule (simplify (band wide x @ (uextend _ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
|
||||
; Check that `narrow_mask` has a subset of the bits that `mask` does.
|
||||
(if-let $true (let ((narrow_mask u64 (ty_mask narrow))) (u64_eq narrow_mask (u64_and mask narrow_mask))))
|
||||
x)
|
||||
|
||||
;; Masking out the sign-extended bits of an `sextend` turns it into a `uextend`.
|
||||
(rule (simplify (band wide (sextend _ x @ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
|
||||
(if-let $true (u64_eq mask (ty_mask narrow)))
|
||||
(uextend wide x))
|
||||
|
||||
|
||||
;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
|
||||
;; used. This is neutral (add-with-imm) or positive (iconst) for
|
||||
;; register pressure, and these ops are very cheap.
|
||||
|
||||
@@ -144,6 +144,12 @@
|
||||
(decl pure u64_eq (u64 u64) bool)
|
||||
(extern constructor u64_eq u64_eq)
|
||||
|
||||
(decl pure u64_le (u64 u64) bool)
|
||||
(extern constructor u64_le u64_le)
|
||||
|
||||
(decl pure u64_lt (u64 u64) bool)
|
||||
(extern constructor u64_lt u64_lt)
|
||||
|
||||
(decl pure i64_sextend_imm64 (Type Imm64) i64)
|
||||
(extern constructor i64_sextend_imm64 i64_sextend_imm64)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ block0(v0: i8):
|
||||
return v3
|
||||
; check: v4 = iconst.i8 224
|
||||
; check: v5 = band v0, v4
|
||||
; return v5
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %unsigned_shift_right_shift_left_i32(i32) -> i32 {
|
||||
@@ -51,7 +51,7 @@ block0(v0: i64):
|
||||
return v3
|
||||
; check: v4 = iconst.i64 -32
|
||||
; check: v5 = band v0, v4
|
||||
; return v5
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %signed_shift_right_shift_left_i8(i8) -> i8 {
|
||||
@@ -62,7 +62,7 @@ block0(v0: i8):
|
||||
return v3
|
||||
; check: v4 = iconst.i8 224
|
||||
; check: v5 = band v0, v4
|
||||
; return v5
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %signed_shift_right_shift_left_i32(i32) -> i32 {
|
||||
@@ -73,7 +73,7 @@ block0(v0: i32):
|
||||
return v3
|
||||
; check: v4 = iconst.i32 0xffff_ffe0
|
||||
; check: v5 = band v0, v4
|
||||
; return v5
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %signed_shift_right_shift_left_i64(i64) -> i64 {
|
||||
@@ -84,7 +84,7 @@ block0(v0: i64):
|
||||
return v3
|
||||
; check: v4 = iconst.i64 -32
|
||||
; check: v5 = band v0, v4
|
||||
; return v5
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %signed_shift_right_shift_left_i8_mask_rhs(i8) -> i8 {
|
||||
@@ -95,7 +95,133 @@ block0(v0: i8):
|
||||
return v3
|
||||
; check: v4 = iconst.i8 224
|
||||
; check: v5 = band v0, v4
|
||||
; return v5
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %sextend_shift_32_64_unsigned(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 32
|
||||
v2 = sextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = ushr v3, v1
|
||||
return v4
|
||||
; check: v7 = uextend.i64 v0
|
||||
; check: return v7
|
||||
}
|
||||
|
||||
function %sextend_shift_32_64_signed(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 32
|
||||
v2 = sextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = sshr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %sextend_undershift_32_64_unsigned(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 31
|
||||
v2 = sextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = ushr v3, v1
|
||||
return v4
|
||||
; check: v5 = iconst.i64 0x0001_ffff_ffff
|
||||
; check: v6 = band v2, v5
|
||||
; check: return v6
|
||||
}
|
||||
|
||||
function %sextend_undershift_32_64_signed(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 31
|
||||
v2 = sextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = sshr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %sextend_shift_8_64_unsigned(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = iconst.i8 56
|
||||
v2 = sextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = ushr v3, v1
|
||||
return v4
|
||||
; check: v7 = uextend.i64 v0
|
||||
; check: return v7
|
||||
}
|
||||
|
||||
function %sextend_shift_8_64_signed(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = iconst.i8 56
|
||||
v2 = sextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = sshr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %uextend_shift_32_64_unsigned(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 32
|
||||
v2 = uextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = ushr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %uextend_shift_32_64_signed(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 32
|
||||
v2 = uextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = sshr v3, v1
|
||||
return v4
|
||||
; check: v5 = sextend.i64 v0
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %uextend_undershift_32_64_unsigned(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 31
|
||||
v2 = uextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = ushr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %uextend_undershift_32_64_signed(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i8 31
|
||||
v2 = uextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = sshr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %uextend_shift_8_64_unsigned(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = iconst.i8 56
|
||||
v2 = uextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = ushr v3, v1
|
||||
return v4
|
||||
; check: return v2
|
||||
}
|
||||
|
||||
function %uextend_shift_8_64_signed(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = iconst.i8 56
|
||||
v2 = uextend.i64 v0
|
||||
v3 = ishl v2, v1
|
||||
v4 = sshr v3, v1
|
||||
return v4
|
||||
; check: v5 = sextend.i64 v0
|
||||
; check: return v5
|
||||
}
|
||||
|
||||
function %or_and_y_with_not_y_i8(i8, i8) -> i8 {
|
||||
|
||||
Reference in New Issue
Block a user