Cranelift: Rewrite (x>>k)<<k into masking off the bottom k bits (#5673)

* Cranelift: Rewrite `(x>>k)<<k` into masking off the bottom `k` bits

* Add a runtest for exercising our rewrite of `(x >> k) << k` into masking
This commit is contained in:
Nick Fitzgerald
2023-01-31 13:11:12 -08:00
committed by GitHub
parent 7f2c8e6344
commit 253e28ca4f
3 changed files with 153 additions and 0 deletions

View File

@@ -160,6 +160,19 @@
;; TODO: strength reduction: div to shifts ;; TODO: strength reduction: div to shifts
;; TODO: div/rem by constants -> magic multiplications ;; TODO: div/rem by constants -> magic multiplications
;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if
;; this is a signed or unsigned shift right).
(rule (simplify (ishl (fits_in_64 ty)
(ushr ty x (iconst _ (u64_from_imm64 k)))
(iconst _ (u64_from_imm64 k))))
(let ((mask u64 (u64_shl 0xFFFFFFFFFFFFFFFF k)))
(band ty x (iconst ty (imm64_masked ty mask)))))
(rule (simplify (ishl (fits_in_64 ty)
(sshr ty x (iconst _ (u64_from_imm64 k)))
(iconst _ (u64_from_imm64 k))))
(let ((mask u64 (u64_shl 0xFFFFFFFFFFFFFFFF k)))
(band ty x (iconst ty (imm64_masked ty mask)))))
;; Rematerialize ALU-op-with-imm and iconsts in each block where they're ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
;; used. This is neutral (add-with-imm) or positive (iconst) for ;; used. This is neutral (add-with-imm) or positive (iconst) for
;; register pressure, and these ops are very cheap. ;; register pressure, and these ops are very cheap.

View File

@@ -20,3 +20,69 @@ block0:
; check: v2 = iconst.i64 0x9876_5432 ; check: v2 = iconst.i64 0x9876_5432
; check: return v2 ; v2 = 0x9876_5432 ; check: return v2 ; v2 = 0x9876_5432
} }
function %unsigned_shift_right_shift_left_i8(i8) -> i8 {
block0(v0: i8):
v1 = iconst.i8 5
v2 = ushr v0, v1
v3 = ishl v2, v1
return v3
; check: v4 = iconst.i8 224
; check: v5 = band v0, v4
; return v5
}
function %unsigned_shift_right_shift_left_i32(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 5
v2 = ushr v0, v1
v3 = ishl v2, v1
return v3
; check: v4 = iconst.i32 0xffff_ffe0
; check: v5 = band v0, v4
; return v5
}
function %unsigned_shift_right_shift_left_i64(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 5
v2 = ushr v0, v1
v3 = ishl v2, v1
return v3
; check: v4 = iconst.i64 -32
; check: v5 = band v0, v4
; return v5
}
function %signed_shift_right_shift_left_i8(i8) -> i8 {
block0(v0: i8):
v1 = iconst.i8 5
v2 = sshr v0, v1
v3 = ishl v2, v1
return v3
; check: v4 = iconst.i8 224
; check: v5 = band v0, v4
; return v5
}
function %signed_shift_right_shift_left_i32(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 5
v2 = sshr v0, v1
v3 = ishl v2, v1
return v3
; check: v4 = iconst.i32 0xffff_ffe0
; check: v5 = band v0, v4
; return v5
}
function %signed_shift_right_shift_left_i64(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 5
v2 = sshr v0, v1
v3 = ishl v2, v1
return v3
; check: v4 = iconst.i64 -32
; check: v5 = band v0, v4
; return v5
}

View File

@@ -0,0 +1,74 @@
;; Test that our rewrite of `(x >> k) << k` into masking is correct.
test interpret
test run
target aarch64
target x86_64
target riscv64
target s390x
function %unsigned_shift_right_shift_left_i8(i8) -> i8 {
block0(v0: i8):
v1 = iconst.i8 5
v2 = ushr v0, v1
v3 = ishl v2, v1
return v3
}
; run: %unsigned_shift_right_shift_left_i8(-1) == 0xe0
; run: %unsigned_shift_right_shift_left_i8(0) == 0
; run: %unsigned_shift_right_shift_left_i8(0xaa) == 0xa0
function %unsigned_shift_right_shift_left_i32(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 5
v2 = ushr v0, v1
v3 = ishl v2, v1
return v3
}
; run: %unsigned_shift_right_shift_left_i32(-1) == 0xffffffe0
; run: %unsigned_shift_right_shift_left_i32(0) == 0
; run: %unsigned_shift_right_shift_left_i32(0xaaaaaaaa) == 0xaaaaaaa0
function %unsigned_shift_right_shift_left_i64(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 5
v2 = ushr v0, v1
v3 = ishl v2, v1
return v3
}
; run: %unsigned_shift_right_shift_left_i64(-1) == 0xffffffffffffffe0
; run: %unsigned_shift_right_shift_left_i64(0) == 0
; run: %unsigned_shift_right_shift_left_i64(0xaaaaaaaaaaaaaaaa) == 0xaaaaaaaaaaaaaaa0
function %signed_shift_right_shift_left_i8(i8) -> i8 {
block0(v0: i8):
v1 = iconst.i8 5
v2 = sshr v0, v1
v3 = ishl v2, v1
return v3
}
; run: %signed_shift_right_shift_left_i8(-1) == 0xe0
; run: %signed_shift_right_shift_left_i8(0) == 0
; run: %signed_shift_right_shift_left_i8(0xaa) == 0xa0
function %signed_shift_right_shift_left_i32(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 5
v2 = sshr v0, v1
v3 = ishl v2, v1
return v3
}
; run: %signed_shift_right_shift_left_i32(-1) == 0xffffffe0
; run: %signed_shift_right_shift_left_i32(0) == 0
; run: %signed_shift_right_shift_left_i32(0xaaaaaaaa) == 0xaaaaaaa0
function %signed_shift_right_shift_left_i64(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 5
v2 = sshr v0, v1
v3 = ishl v2, v1
return v3
}
; run: %signed_shift_right_shift_left_i64(-1) == 0xffffffffffffffe0
; run: %signed_shift_right_shift_left_i64(0) == 0
; run: %signed_shift_right_shift_left_i64(0xaaaaaaaaaaaaaaaa) == 0xaaaaaaaaaaaaaaa0