riscv64: Improve ctz/clz/cls codegen (#5854)

* cranelift: Add extra runtests for `clz`/`ctz`

* riscv64: Restrict lowering rules for `ctz`/`clz`

* cranelift: Add `u64` isle helpers

* riscv64: Improve `ctz` codegen

* riscv64: Improve `clz` codegen

* riscv64: Improve `cls` codegen

* riscv64: Improve `clz.i128` codegen

Instead of checking if we have 64 zeros in the top half. Check
if it *is* 0, that way we avoid loading the `64` constant.

* riscv64: Improve `ctz.i128` codegen

Instead of checking if we have 64 zeros in the bottom half. Check
if it *is* 0, that way we avoid loading the `64` constant.

* riscv64: Use extended value in `lower_cls`

* riscv64: Use pattern matches on `bseti`
This commit is contained in:
Afonso Bordado
2023-03-21 23:15:14 +00:00
committed by GitHub
parent ff6f17ca52
commit 7a3df7dcc0
14 changed files with 617 additions and 167 deletions

View File

@@ -2,6 +2,7 @@ test interpret
test run
target aarch64
target riscv64
target riscv64 has_zbb
target s390x
; not implemented on `x86_64`

View File

@@ -5,12 +5,14 @@ target s390x
target x86_64
target x86_64 has_lzcnt
target riscv64
target riscv64 has_zbb
function %clz_i8(i8) -> i8 {
block0(v0: i8):
v1 = clz v0
return v1
}
; run: %clz_i8(0) == 8
; run: %clz_i8(1) == 7
; run: %clz_i8(0x40) == 1
; run: %clz_i8(-1) == 0
@@ -20,6 +22,7 @@ block0(v0: i16):
v1 = clz v0
return v1
}
; run: %clz_i16(0) == 16
; run: %clz_i16(1) == 15
; run: %clz_i16(0x4000) == 1
; run: %clz_i16(-1) == 0
@@ -29,6 +32,7 @@ block0(v0: i32):
v1 = clz v0
return v1
}
; run: %clz_i32(0) == 32
; run: %clz_i32(1) == 31
; run: %clz_i32(0x40000000) == 1
; run: %clz_i32(-1) == 0
@@ -38,6 +42,7 @@ block0(v0: i64):
v1 = clz v0
return v1
}
; run: %clz_i64(0) == 64
; run: %clz_i64(1) == 63
; run: %clz_i64(0x4000000000000000) == 1
; run: %clz_i64(-1) == 0

View File

@@ -3,14 +3,17 @@ test run
target aarch64
target s390x
target x86_64
target riscv64
target x86_64 has_bmi1
target riscv64
target riscv64 has_zbb
target riscv64 has_zbb has_zbs
function %ctz_i8(i8) -> i8 {
block0(v0: i8):
v1 = ctz v0
return v1
}
; run: %ctz_i8(0) == 8
; run: %ctz_i8(1) == 0
; run: %ctz_i8(0x40) == 6
; run: %ctz_i8(-1) == 0
@@ -20,6 +23,7 @@ block0(v0: i16):
v1 = ctz v0
return v1
}
; run: %ctz_i16(0) == 16
; run: %ctz_i16(1) == 0
; run: %ctz_i16(0x4000) == 14
; run: %ctz_i16(-1) == 0
@@ -29,6 +33,7 @@ block0(v0: i32):
v1 = ctz v0
return v1
}
; run: %ctz_i32(0) == 32
; run: %ctz_i32(1) == 0
; run: %ctz_i32(0x40000000) == 30
; run: %ctz_i32(-1) == 0
@@ -38,6 +43,7 @@ block0(v0: i64):
v1 = ctz v0
return v1
}
; run: %ctz_i64(0) == 64
; run: %ctz_i64(1) == 0
; run: %ctz_i64(0x4000000000000000) == 62
; run: %ctz_i64(-1) == 0

View File

@@ -4,6 +4,8 @@ target aarch64
target s390x
target x86_64
target riscv64
target riscv64 has_zbb
target riscv64 has_zbb has_zbs
function %ctz_i128(i128) -> i128 {
block0(v0: i128):

View File

@@ -1,6 +1,7 @@
test run
target aarch64
target riscv64
target riscv64
target riscv64 has_zbb
target s390x
function %cls_i128(i128) -> i128 {