diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index c727be087a..85ed948fa5 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -1246,6 +1246,41 @@ (vec_and ty x (vec_imm ty 1))) +;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (bitrev x))) + (bitrev_bytes ty + (bitrev_bits 4 0xf0f0_f0f0_f0f0_f0f0 ty + (bitrev_bits 2 0xcccc_cccc_cccc_cccc ty + (bitrev_bits 1 0xaaaa_aaaa_aaaa_aaaa ty x))))) + +(decl bitrev_bits (u8 u64 Type Reg) Reg) +(rule (bitrev_bits size bitmask (fits_in_64 ty) x) + (let ((mask Reg (imm ty bitmask)) + (xh Reg (lshl_imm (ty_ext32 ty) x size)) + (xl Reg (lshr_imm (ty_ext32 ty) x size)) + (xh_masked Reg (and_reg ty xh mask)) + (xl_masked Reg (and_reg ty xl (not_reg ty mask)))) + (or_reg ty xh_masked xl_masked))) + +(rule (bitrev_bits size bitmask (vr128_ty ty) x) + (let ((mask Reg (vec_imm_splat $I64X2 bitmask)) + (size_reg Reg (vec_imm_splat $I8X16 (u8_as_u64 size))) + (xh Reg (vec_lshl_by_bit x size_reg)) + (xl Reg (vec_lshr_by_bit x size_reg))) + (vec_select ty xh xl mask))) + +(decl bitrev_bytes (Type Reg) Reg) +(rule (bitrev_bytes $I8 x) x) +(rule (bitrev_bytes $I16 x) (lshr_imm $I32 (bswap_reg $I32 x) 16)) +(rule (bitrev_bytes $I32 x) (bswap_reg $I32 x)) +(rule (bitrev_bytes $I64 x) (bswap_reg $I64 x)) +(rule (bitrev_bytes $I128 x) + (vec_permute $I128 x x + (vec_imm $I8X16 (imm8x16 15 14 13 12 11 10 9 8 + 7 6 5 4 3 2 1 0)))) + + ;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The FLOGR hardware instruction always operates on the full 64-bit register. diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index f6c24b0a32..e59fbda137 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -104,6 +104,7 @@ impl LowerBackend for S390xBackend { | Opcode::Bextend | Opcode::Bmask | Opcode::Bint + | Opcode::Bitrev | Opcode::Clz | Opcode::Cls | Opcode::Ctz @@ -198,8 +199,7 @@ impl LowerBackend for S390xBackend { ) } - Opcode::Bitrev - | Opcode::ConstAddr + Opcode::ConstAddr | Opcode::TlsValue | Opcode::GetPinnedReg | Opcode::SetPinnedReg diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index 847a239797..2213deb118 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -1,40 +1,163 @@ test compile precise-output target s390x -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BITREV -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +function %bitrev_i128(i128) -> i128 { +block0(v0: i128): + v1 = bitrev v0 + return v1 +} -; FIXME: bitrev not yet implemented +; block0: +; vl %v0, 0(%r3) +; vrepib %v5, 170 +; vrepib %v7, 1 +; vsl %v17, %v0, %v7 +; vsrl %v19, %v0, %v7 +; vsel %v21, %v17, %v19, %v5 +; vrepib %v23, 204 +; vrepib %v25, 2 +; vsl %v27, %v21, %v25 +; vsrl %v29, %v21, %v25 +; vsel %v31, %v27, %v29, %v23 +; vrepib %v1, 240 +; vrepib %v3, 4 +; vsl %v5, %v31, %v3 +; vsrl %v7, %v31, %v3 +; vsel %v17, %v5, %v7, %v1 +; bras %r1, 20 ; data.u128 0x0f0e0d0c0b0a09080706050403020100 ; vl %v19, 0(%r1) +; vperm %v21, %v17, %v17, %v19 +; vst %v21, 0(%r2) +; br %r14 -;function %bitrev_i64(i64) -> i64 { -;block0(v0: i64): -; v1 = bitrev v0 -; return v1 -;} -; -;function %bitrev_i32(i32) -> i32 { -;block0(v0: i32): -; v1 = bitrev v0 -; return v1 -;} -; -;function %bitrev_i16(i16) -> i16 { -;block0(v0: i16): -; v1 = bitrev v0 -; return v1 -;} -; -;function %bitrev_i8(i8) -> i8 { -;block0(v0: i8): -; v1 = bitrev v0 -; return v1 -;} -; +function %bitrev_i64(i64) -> i64 { +block0(v0: i64): + v1 = bitrev v0 + return v1 +} -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; CLZ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llihf %r5, 2863311530 +; iilf %r5, 2863311530 +; sllg %r4, %r2, 1 +; srlg %r2, %r2, 1 +; ngr %r4, %r5 +; xilf %r5, 4294967295 +; xihf %r5, 4294967295 +; ngrk %r5, %r2, %r5 +; ogrk %r2, %r4, %r5 +; llihf %r4, 3435973836 +; iilf %r4, 3435973836 +; sllg %r3, %r2, 2 +; srlg %r5, %r2, 2 +; ngr %r3, %r4 +; xilf %r4, 4294967295 +; xihf %r4, 4294967295 +; ngrk %r4, %r5, %r4 +; ogrk %r5, %r3, %r4 +; llihf %r3, 4042322160 +; iilf %r3, 4042322160 +; sllg %r2, %r5, 4 +; srlg %r4, %r5, 4 +; ngr %r2, %r3 +; xilf %r3, 4294967295 +; xihf %r3, 4294967295 +; ngrk %r3, %r4, %r3 +; ogrk %r4, %r2, %r3 +; lrvgr %r2, %r4 +; br %r14 + +function %bitrev_i32(i32) -> i32 { +block0(v0: i32): + v1 = bitrev v0 + return v1 +} + +; block0: +; iilf %r5, 2863311530 +; sllk %r3, %r2, 1 +; srlk %r2, %r2, 1 +; nr %r3, %r5 +; xilf %r5, 4294967295 +; nrk %r4, %r2, %r5 +; ork %r2, %r3, %r4 +; iilf %r4, 3435973836 +; sllk %r3, %r2, 2 +; srlk %r5, %r2, 2 +; nrk %r2, %r3, %r4 +; xilf %r4, 4294967295 +; nrk %r3, %r5, %r4 +; ork %r5, %r2, %r3 +; iilf %r3, 4042322160 +; sllk %r2, %r5, 4 +; srlk %r4, %r5, 4 +; nrk %r5, %r2, %r3 +; xilf %r3, 4294967295 +; nrk %r2, %r4, %r3 +; ork %r4, %r5, %r2 +; lrvr %r2, %r4 +; br %r14 + +function %bitrev_i16(i16) -> i16 { +block0(v0: i16): + v1 = bitrev v0 + return v1 +} + +; block0: +; lhi %r5, -21846 +; sllk %r3, %r2, 1 +; srlk %r2, %r2, 1 +; nr %r3, %r5 +; xilf %r5, 4294967295 +; nrk %r4, %r2, %r5 +; ork %r2, %r3, %r4 +; lhi %r4, -13108 +; sllk %r3, %r2, 2 +; srlk %r5, %r2, 2 +; nrk %r2, %r3, %r4 +; xilf %r4, 4294967295 +; nrk %r3, %r5, %r4 +; ork %r5, %r2, %r3 +; lhi %r3, -3856 +; sllk %r2, %r5, 4 +; srlk %r4, %r5, 4 +; nrk %r5, %r2, %r3 +; xilf %r3, 4294967295 +; nrk %r2, %r4, %r3 +; ork %r4, %r5, %r2 +; lrvr %r2, %r4 +; srlk %r2, %r2, 16 +; br %r14 + +function %bitrev_i8(i8) -> i8 { +block0(v0: i8): + v1 = bitrev v0 + return v1 +} + +; block0: +; lhi %r5, -21846 +; sllk %r3, %r2, 1 +; srlk %r2, %r2, 1 +; nr %r3, %r5 +; xilf %r5, 4294967295 +; nrk %r4, %r2, %r5 +; ork %r2, %r3, %r4 +; lhi %r4, -13108 +; sllk %r3, %r2, 2 +; srlk %r5, %r2, 2 +; nrk %r2, %r3, %r4 +; xilf %r4, 4294967295 +; nrk %r3, %r5, %r4 +; ork %r5, %r2, %r3 +; lhi %r3, -3856 +; sllk %r2, %r5, 4 +; srlk %r4, %r5, 4 +; nrk %r5, %r2, %r3 +; xilf %r3, 4294967295 +; nrk %r2, %r4, %r3 +; ork %r2, %r5, %r2 +; br %r14 function %clz_i128(i128) -> i128 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/bitrev.clif b/cranelift/filetests/filetests/runtests/bitrev.clif index 2f3e7974e6..f0aa2194e3 100644 --- a/cranelift/filetests/filetests/runtests/bitrev.clif +++ b/cranelift/filetests/filetests/runtests/bitrev.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 function %bitrev_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/i128-bitrev.clif b/cranelift/filetests/filetests/runtests/i128-bitrev.clif index c685c45ee0..5561646744 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif @@ -1,6 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 +target s390x target x86_64 function %reverse_bits_zero() -> b1 {