riscv64: Add Zba extension instructions (#6087)

* riscv64: Use `add.uw` to zero extend

* riscv64: Implement `add.uw` optimizations

* riscv64: Add `Zba` `iadd+ishl` optimizations

* riscv64: Add `shl+uextend` optimizations based on `Zba`

* riscv64: Fix some issues with `Zba` instructions

* riscv64: Restrict shnadd selection

* riscv64: Fix `extend` priorities

* riscv64: Remove redundant `addw` rule

* riscv64: Specify type for `add` extend rules

* riscv64: Use `u64_from_imm64` extractor instead of `uimm8`

* riscv64: Restrict `uextend` in `shnadd.uw` rules

* riscv64: Use concrete type in `slli.uw` rule

* riscv64: Add extra arithmetic extends tests

Co-authored-by: Jamey Sharp <jsharp@fastly.com>

* riscv64: Make `Adduw` types concrete

* riscv64: Add extra arithmetic extend tests

* riscv64: Add `sextend`+Arithmetic rules

* riscv64: Fix whitespace

* cranelift: Move arithmetic extends tests with i128 to separate file

---------

Co-authored-by: Jamey Sharp <jsharp@fastly.com>
This commit is contained in:
Afonso Bordado
2023-03-23 20:06:03 +00:00
committed by GitHub
parent 6f66abd5c7
commit 602ff71fe4
14 changed files with 782 additions and 33 deletions

View File

@@ -0,0 +1,179 @@
test compile precise-output
set unwind_info=false
target riscv64
function %sext_add_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = iadd.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; addw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addw a0, a0, a1
; ret
function %sext_sub_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = isub.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; subw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; subw a0, a0, a1
; ret
function %sext_ishl_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = ishl.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; sllw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sllw a0, a0, a1
; ret
function %sext_ushr_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = ushr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; srlw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; srlw a0, a0, a1
; ret
function %sext_sshr_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; sraw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sraw a0, a0, a1
; ret
function %sext_add_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = iadd.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; addiw a0,a0,-1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addiw a0, a0, -1
; ret
function %sext_ishl_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = ishl.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; slliw a0,a0,31
; ret
;
; Disassembled:
; block0: ; offset 0x0
; slliw a0, a0, 0x1f
; ret
function %sext_ushr_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = ushr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; srliw a0,a0,31
; ret
;
; Disassembled:
; block0: ; offset 0x0
; srliw a0, a0, 0x1f
; ret
function %sext_sshr_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; sraiw a0,a0,31
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sraiw a0, a0, 0x1f
; ret
function %sext_sshr_i32_i128(i32, i128) -> i64 {
block0(v0: i32, v1: i128):
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; VCode:
; block0:
; sraw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sraw a0, a0, a1
; ret

View File

@@ -0,0 +1,190 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zba
function %add_uw_i32(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iadd.i64 v0, v2
return v3
}
; VCode:
; block0:
; add.uw a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x3b, 0x85, 0xa5, 0x08
; ret
function %sh1add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 1
v3 = ishl v1, v2
v4 = iadd.i64 v0, v3
return v4
}
; VCode:
; block0:
; sh1add a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0xa5, 0xa5, 0x20
; ret
function %sh1add_uw(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iconst.i64 1
v4 = ishl v2, v3
v5 = iadd.i64 v0, v4
return v5
}
; VCode:
; block0:
; sh1add.uw a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x3b, 0xa5, 0xa5, 0x20
; ret
function %sh2add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 2
v3 = ishl v1, v2
v4 = iadd.i64 v0, v3
return v4
}
; VCode:
; block0:
; sh2add a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0xc5, 0xa5, 0x20
; ret
function %sh2add_uw(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iconst.i64 2
v4 = ishl v2, v3
v5 = iadd.i64 v0, v4
return v5
}
; VCode:
; block0:
; sh2add.uw a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x3b, 0xc5, 0xa5, 0x20
; ret
function %sh3add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl v1, v2
v4 = iadd.i64 v0, v3
return v4
}
; VCode:
; block0:
; sh3add a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0xe5, 0xa5, 0x20
; ret
function %sh3add_uw(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iconst.i64 3
v4 = ishl v2, v3
v5 = iadd.i64 v0, v4
return v5
}
; VCode:
; block0:
; sh3add.uw a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x3b, 0xe5, 0xa5, 0x20
; ret
;; Same as %sh1add but with the operands reversed
function %sh1add_r(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 1
v3 = ishl v1, v2
v4 = iadd.i64 v3, v0
return v4
}
; VCode:
; block0:
; sh1add a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0xa5, 0xa5, 0x20
; ret
;; Same as %sh1add but with an uextended const
function %sh1add_uextend(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i32 1
v3 = uextend.i64 v2
v4 = ishl v1, v3
v5 = iadd.i64 v0, v4
return v5
}
; VCode:
; block0:
; sh1add a0,a1,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0xa5, 0xa5, 0x20
; ret
function %slli_uw(i32) -> i64 {
block0(v0: i32):
v1 = uextend.i64 v0
v2 = iconst.i64 5
v3 = ishl v1, v2
return v3
}
; VCode:
; block0:
; slli.uw a0,a0,5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x1b, 0x15, 0x55, 0x08
; ret

View File

@@ -716,13 +716,13 @@ block0(v0: i32, v1: i32, v2: i32):
; VCode:
; block0:
; mulw a2,a1,a2
; addw a0,a2,a0
; add a0,a2,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mulw a2, a1, a2
; addw a0, a2, a0
; add a0, a2, a0
; ret
function %msub_i32(i32, i32, i32) -> i32 {

View File

@@ -47,7 +47,7 @@ block5(v5: i32):
; li a2,4
; j label7
; block7:
; addw a0,a0,a2
; add a0,a0,a2
; ret
;
; Disassembled:
@@ -84,6 +84,6 @@ block5(v5: i32):
; block5: ; offset 0x64
; addi a2, zero, 4
; block6: ; offset 0x68
; addw a0, a0, a2
; add a0, a0, a2
; ret

View File

@@ -0,0 +1,20 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zba=true
function %uextend32_64(i32) -> i64 {
block0(v0: i32):
v1 = uextend.i64 v0
return v1
}
; VCode:
; block0:
; zext.w a0,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x3b, 0x05, 0x05, 0x08
; ret

View File

@@ -10,12 +10,12 @@ block0(v0: i8, v1: i8):
; VCode:
; block0:
; addw a0,a0,a1
; add a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addw a0, a0, a1
; add a0, a0, a1
; ret
function %add16(i16, i16) -> i16 {
@@ -26,12 +26,12 @@ block0(v0: i16, v1: i16):
; VCode:
; block0:
; addw a0,a0,a1
; add a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addw a0, a0, a1
; add a0, a0, a1
; ret
function %add32(i32, i32) -> i32 {
@@ -42,12 +42,12 @@ block0(v0: i32, v1: i32):
; VCode:
; block0:
; addw a0,a0,a1
; add a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addw a0, a0, a1
; add a0, a0, a1
; ret
function %add32_8(i32, i8) -> i32 {
@@ -61,14 +61,14 @@ block0(v0: i32, v1: i8):
; block0:
; slli a1,a1,56
; srai a3,a1,56
; addw a0,a0,a3
; add a0,a0,a3
; ret
;
; Disassembled:
; block0: ; offset 0x0
; slli a1, a1, 0x38
; srai a3, a1, 0x38
; addw a0, a0, a3
; add a0, a0, a3
; ret
function %add64_32(i64, i32) -> i64 {

View File

@@ -0,0 +1,235 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target riscv64
target riscv64 has_zba
;; Various runtests intended to target the instructions encoded by the RISC-V `Zba` Extension
;; Although other targets may also benefit from these tests and may implement similar optimizations
function %add_uext_i32(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iadd.i64 v0, v2
return v3
}
; run: %add_uext_i32(0, 0) == 0
; run: %add_uext_i32(2, 1) == 3
; run: %add_uext_i32(2, 0xFFFFFFFF) == 0x100000001
function %sh1add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 1
v3 = ishl v1, v2
v4 = iadd.i64 v0, v3
return v4
}
; run: %sh1add(0, 0) == 0
; run: %sh1add(2, 1) == 4
; run: %sh1add(2, 0xFFFFFFFFFFFFFFFF) == 0
function %sh1add_uext(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iconst.i64 1
v4 = ishl v2, v3
v5 = iadd.i64 v0, v4
return v5
}
; run: %sh1add_uext(0, 0) == 0
; run: %sh1add_uext(2, 1) == 4
; run: %sh1add_uext(2, 0xFFFFFFFF) == 0x200000000
; run: %sh1add_uext(0x100000000, 0x80000000) == 0x200000000
function %sh2add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 2
v3 = ishl v1, v2
v4 = iadd.i64 v0, v3
return v4
}
; run: %sh2add(0, 0) == 0
; run: %sh2add(2, 1) == 6
; run: %sh2add(2, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFE
function %sh2add_uext(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iconst.i64 2
v4 = ishl v2, v3
v5 = iadd.i64 v0, v4
return v5
}
; run: %sh2add_uext(0, 0) == 0
; run: %sh2add_uext(2, 1) == 6
; run: %sh2add_uext(4, 0xFFFFFFFF) == 0x400000000
; run: %sh2add_uext(0x100000000, 0x80000000) == 0x300000000
function %sh3add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl v1, v2
v4 = iadd.i64 v0, v3
return v4
}
; run: %sh3add(0, 0) == 0
; run: %sh3add(2, 1) == 10
; run: %sh3add(2, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFA
function %sh3add_uext(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iconst.i64 3
v4 = ishl v2, v3
v5 = iadd.i64 v0, v4
return v5
}
; run: %sh3add_uext(0, 0) == 0
; run: %sh3add_uext(2, 1) == 10
; run: %sh3add_uext(8, 0xFFFFFFFF) == 0x800000000
; run: %sh3add_uext(0x100000000, 0x80000000) == 0x500000000
function %ishl_uextend(i32) -> i64 {
block0(v0: i32):
v1 = uextend.i64 v0
v2 = iconst.i64 5
v3 = ishl v1, v2
return v3
}
; run: %ishl_uextend(0) == 0
; run: %ishl_uextend(1) == 0x20
; run: %ishl_uextend(0xFFFFFFFF) == 0x1FFFFFFFE0
;; These tests ensure that we don't merge the `uextend` and `ishl` instructions
;; in a way that doesen't respect the `ishl` semantics of cutting off the high bits.
function %add_uext_ishl_1(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = iconst.i32 1
v3 = ishl v1, v2
v4 = uextend.i64 v3
v5 = iadd.i64 v0, v4
return v5
}
; run: %add_uext_ishl_1(0x0123_4567, 0x8000_0000) == 0x0123_4567
; run: %add_uext_ishl_1(0x0123_4567, 0xC000_0000) == 0x8123_4567
; run: %add_uext_ishl_1(0x0123_4567, 0xE000_0000) == 0xC123_4567
function %add_uext_ishl_2(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = iconst.i32 2
v3 = ishl v1, v2
v4 = uextend.i64 v3
v5 = iadd.i64 v0, v4
return v5
}
; run: %add_uext_ishl_2(0x0123_4567, 0x8000_0000) == 0x0123_4567
; run: %add_uext_ishl_2(0x0123_4567, 0xC000_0000) == 0x0123_4567
; run: %add_uext_ishl_2(0x0123_4567, 0xE000_0000) == 0x8123_4567
function %add_uext_ishl_3(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = iconst.i32 3
v3 = ishl v1, v2
v4 = uextend.i64 v3
v5 = iadd.i64 v0, v4
return v5
}
; run: %add_uext_ishl_3(0x0123_4567, 0x8000_0000) == 0x0123_4567
; run: %add_uext_ishl_3(0x0123_4567, 0xC000_0000) == 0x0123_4567
; run: %add_uext_ishl_3(0x0123_4567, 0xE000_0000) == 0x0123_4567
;; These tests perform the operations in 32bits but then sign extend the results to 64bits
function %sext_add_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = iadd.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_add_i32(1, 0) == 1
; run: %sext_add_i32(0, -1) == -1
function %sext_sub_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = isub.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_sub_i32(1, 0) == 1
; run: %sext_sub_i32(0, 1) == -1
function %sext_ishl_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = ishl.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_ishl_i32(1, 31) == 0xFFFFFFFF80000000
function %sext_ushr_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = ushr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_ushr_i32(0x8000_0000, 0) == 0xFFFFFFFF80000000
; run: %sext_ushr_i32(0x8000_0000, 32) == 0xFFFFFFFF80000000
function %sext_sshr_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_sshr_i32(0x8000_0000, 0) == 0xFFFFFFFF80000000
; run: %sext_sshr_i32(0x8000_0000, 32) == 0xFFFFFFFF80000000
function %sext_add_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = iadd.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_add_const_i32(0) == -1
function %sext_ishl_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = ishl.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_ishl_const_i32(1) == 0xFFFFFFFF80000000
function %sext_ushr_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 32
v2 = ushr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_ushr_const_i32(0x8000_0000) == 0xFFFFFFFF80000000
function %sext_sshr_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 32
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_sshr_const_i32(0x8000_0000) == 0xFFFFFFFF80000000

View File

@@ -4,6 +4,7 @@ target aarch64
target s390x
target x86_64
target riscv64
target riscv64 has_zba
target riscv64 has_zbb
target riscv64 has_zbkb

View File

@@ -0,0 +1,18 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
target riscv64
target riscv64 has_zba
function %sext_sshr_i32_i128(i32, i128) -> i64 {
block0(v0: i32, v1: i128):
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}
; run: %sext_sshr_i32_i128(0x8000_0000, 0) == 0xFFFFFFFF80000000
; run: %sext_sshr_i32_i128(0x8000_0000, 32) == 0xFFFFFFFF80000000
; run: %sext_sshr_i32_i128(0x8000_0000, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFF20) == 0xFFFFFFFF80000000

View File

@@ -5,6 +5,7 @@ target aarch64
target s390x
target x86_64
target riscv64
target riscv64 has_zba
target riscv64 has_zbb
target riscv64 has_zbkb