s390x: Implement bitrev (#4617)

Since we do not have an instruction for this, this is a simple
open-coded implementation.

Needed by the cg_clif frontend.
This commit is contained in:
Ulrich Weigand
2022-08-05 01:24:55 +02:00
committed by GitHub
parent 42233e8eda
commit f552a53654
5 changed files with 193 additions and 33 deletions

View File

@@ -1246,6 +1246,41 @@
(vec_and ty x (vec_imm ty 1)))
;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (bitrev x)))
(bitrev_bytes ty
(bitrev_bits 4 0xf0f0_f0f0_f0f0_f0f0 ty
(bitrev_bits 2 0xcccc_cccc_cccc_cccc ty
(bitrev_bits 1 0xaaaa_aaaa_aaaa_aaaa ty x)))))
(decl bitrev_bits (u8 u64 Type Reg) Reg)
(rule (bitrev_bits size bitmask (fits_in_64 ty) x)
(let ((mask Reg (imm ty bitmask))
(xh Reg (lshl_imm (ty_ext32 ty) x size))
(xl Reg (lshr_imm (ty_ext32 ty) x size))
(xh_masked Reg (and_reg ty xh mask))
(xl_masked Reg (and_reg ty xl (not_reg ty mask))))
(or_reg ty xh_masked xl_masked)))
(rule (bitrev_bits size bitmask (vr128_ty ty) x)
(let ((mask Reg (vec_imm_splat $I64X2 bitmask))
(size_reg Reg (vec_imm_splat $I8X16 (u8_as_u64 size)))
(xh Reg (vec_lshl_by_bit x size_reg))
(xl Reg (vec_lshr_by_bit x size_reg)))
(vec_select ty xh xl mask)))
(decl bitrev_bytes (Type Reg) Reg)
(rule (bitrev_bytes $I8 x) x)
(rule (bitrev_bytes $I16 x) (lshr_imm $I32 (bswap_reg $I32 x) 16))
(rule (bitrev_bytes $I32 x) (bswap_reg $I32 x))
(rule (bitrev_bytes $I64 x) (bswap_reg $I64 x))
(rule (bitrev_bytes $I128 x)
(vec_permute $I128 x x
(vec_imm $I8X16 (imm8x16 15 14 13 12 11 10 9 8
7 6 5 4 3 2 1 0))))
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The FLOGR hardware instruction always operates on the full 64-bit register.

View File

@@ -104,6 +104,7 @@ impl LowerBackend for S390xBackend {
| Opcode::Bextend
| Opcode::Bmask
| Opcode::Bint
| Opcode::Bitrev
| Opcode::Clz
| Opcode::Cls
| Opcode::Ctz
@@ -198,8 +199,7 @@ impl LowerBackend for S390xBackend {
)
}
Opcode::Bitrev
| Opcode::ConstAddr
Opcode::ConstAddr
| Opcode::TlsValue
| Opcode::GetPinnedReg
| Opcode::SetPinnedReg

View File

@@ -1,40 +1,163 @@
test compile precise-output
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BITREV
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bitrev_i128(i128) -> i128 {
block0(v0: i128):
v1 = bitrev v0
return v1
}
; FIXME: bitrev not yet implemented
; block0:
; vl %v0, 0(%r3)
; vrepib %v5, 170
; vrepib %v7, 1
; vsl %v17, %v0, %v7
; vsrl %v19, %v0, %v7
; vsel %v21, %v17, %v19, %v5
; vrepib %v23, 204
; vrepib %v25, 2
; vsl %v27, %v21, %v25
; vsrl %v29, %v21, %v25
; vsel %v31, %v27, %v29, %v23
; vrepib %v1, 240
; vrepib %v3, 4
; vsl %v5, %v31, %v3
; vsrl %v7, %v31, %v3
; vsel %v17, %v5, %v7, %v1
; bras %r1, 20 ; data.u128 0x0f0e0d0c0b0a09080706050403020100 ; vl %v19, 0(%r1)
; vperm %v21, %v17, %v17, %v19
; vst %v21, 0(%r2)
; br %r14
;function %bitrev_i64(i64) -> i64 {
;block0(v0: i64):
; v1 = bitrev v0
; return v1
;}
;
;function %bitrev_i32(i32) -> i32 {
;block0(v0: i32):
; v1 = bitrev v0
; return v1
;}
;
;function %bitrev_i16(i16) -> i16 {
;block0(v0: i16):
; v1 = bitrev v0
; return v1
;}
;
;function %bitrev_i8(i8) -> i8 {
;block0(v0: i8):
; v1 = bitrev v0
; return v1
;}
;
function %bitrev_i64(i64) -> i64 {
block0(v0: i64):
v1 = bitrev v0
return v1
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CLZ
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; block0:
; llihf %r5, 2863311530
; iilf %r5, 2863311530
; sllg %r4, %r2, 1
; srlg %r2, %r2, 1
; ngr %r4, %r5
; xilf %r5, 4294967295
; xihf %r5, 4294967295
; ngrk %r5, %r2, %r5
; ogrk %r2, %r4, %r5
; llihf %r4, 3435973836
; iilf %r4, 3435973836
; sllg %r3, %r2, 2
; srlg %r5, %r2, 2
; ngr %r3, %r4
; xilf %r4, 4294967295
; xihf %r4, 4294967295
; ngrk %r4, %r5, %r4
; ogrk %r5, %r3, %r4
; llihf %r3, 4042322160
; iilf %r3, 4042322160
; sllg %r2, %r5, 4
; srlg %r4, %r5, 4
; ngr %r2, %r3
; xilf %r3, 4294967295
; xihf %r3, 4294967295
; ngrk %r3, %r4, %r3
; ogrk %r4, %r2, %r3
; lrvgr %r2, %r4
; br %r14
function %bitrev_i32(i32) -> i32 {
block0(v0: i32):
v1 = bitrev v0
return v1
}
; block0:
; iilf %r5, 2863311530
; sllk %r3, %r2, 1
; srlk %r2, %r2, 1
; nr %r3, %r5
; xilf %r5, 4294967295
; nrk %r4, %r2, %r5
; ork %r2, %r3, %r4
; iilf %r4, 3435973836
; sllk %r3, %r2, 2
; srlk %r5, %r2, 2
; nrk %r2, %r3, %r4
; xilf %r4, 4294967295
; nrk %r3, %r5, %r4
; ork %r5, %r2, %r3
; iilf %r3, 4042322160
; sllk %r2, %r5, 4
; srlk %r4, %r5, 4
; nrk %r5, %r2, %r3
; xilf %r3, 4294967295
; nrk %r2, %r4, %r3
; ork %r4, %r5, %r2
; lrvr %r2, %r4
; br %r14
function %bitrev_i16(i16) -> i16 {
block0(v0: i16):
v1 = bitrev v0
return v1
}
; block0:
; lhi %r5, -21846
; sllk %r3, %r2, 1
; srlk %r2, %r2, 1
; nr %r3, %r5
; xilf %r5, 4294967295
; nrk %r4, %r2, %r5
; ork %r2, %r3, %r4
; lhi %r4, -13108
; sllk %r3, %r2, 2
; srlk %r5, %r2, 2
; nrk %r2, %r3, %r4
; xilf %r4, 4294967295
; nrk %r3, %r5, %r4
; ork %r5, %r2, %r3
; lhi %r3, -3856
; sllk %r2, %r5, 4
; srlk %r4, %r5, 4
; nrk %r5, %r2, %r3
; xilf %r3, 4294967295
; nrk %r2, %r4, %r3
; ork %r4, %r5, %r2
; lrvr %r2, %r4
; srlk %r2, %r2, 16
; br %r14
function %bitrev_i8(i8) -> i8 {
block0(v0: i8):
v1 = bitrev v0
return v1
}
; block0:
; lhi %r5, -21846
; sllk %r3, %r2, 1
; srlk %r2, %r2, 1
; nr %r3, %r5
; xilf %r5, 4294967295
; nrk %r4, %r2, %r5
; ork %r2, %r3, %r4
; lhi %r4, -13108
; sllk %r3, %r2, 2
; srlk %r5, %r2, 2
; nrk %r2, %r3, %r4
; xilf %r4, 4294967295
; nrk %r3, %r5, %r4
; ork %r5, %r2, %r3
; lhi %r3, -3856
; sllk %r2, %r5, 4
; srlk %r4, %r5, 4
; nrk %r5, %r2, %r3
; xilf %r3, 4294967295
; nrk %r2, %r4, %r3
; ork %r2, %r5, %r2
; br %r14
function %clz_i128(i128) -> i128 {
block0(v0: i128):

View File

@@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %bitrev_i8(i8) -> i8 {

View File

@@ -1,6 +1,7 @@
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
function %reverse_bits_zero() -> b1 {