diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 0685d7e653..1468c60b2a 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -1149,17 +1149,26 @@ ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; The result of cls is not supposed to count the sign bit itself, just +;; additional copies of it. Therefore, when computing cls in terms of clz, +;; we need to subtract one. Fold this into the offset computation. +(decl cls_offset (Type Reg) Reg) +(rule (cls_offset $I8 x) (add_simm16 $I8 x -57)) +(rule (cls_offset $I16 x) (add_simm16 $I16 x -49)) +(rule (cls_offset $I32 x) (add_simm16 $I32 x -33)) +(rule (cls_offset $I64 x) (add_simm16 $I64 x -1)) + ;; Count leading sign-bit copies. We don't have any instruction for that, ;; so we instead count the leading zeros after inverting the input if negative, ;; i.e. computing -;; cls(x) == clz(x ^ (x >> 63)) +;; cls(x) == clz(x ^ (x >> 63)) - 1 ;; where x is the sign-extended input. (rule (lower (has_type (fits_in_64 ty) (cls x))) (let ((ext_reg Reg (put_in_reg_sext64 x)) (signbit_copies Reg (ashr_imm $I64 ext_reg 63)) (inv_reg Reg (xor_reg $I64 ext_reg signbit_copies)) (clz RegPair (clz_reg 64 inv_reg))) - (clz_offset ty (regpair_hi clz)))) + (cls_offset ty (regpair_hi clz)))) ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1214,12 +1223,14 @@ ;; of each input byte separately, so we need to accumulate those partial ;; results via a series of log2(type size in bytes) - 1 additions. We ;; accumulate in the high byte, so that a final right shift will zero out -;; any unrelated bits to give a clean result. +;; any unrelated bits to give a clean result. (This does not work with +;; $I16, where we instead accumulate in the low byte and clear high bits +;; via an explicit and operation.) (rule (lower (has_type (and (mie2_disabled) $I16) (popcnt x))) (let ((cnt2 Reg (popcnt_byte x)) - (cnt1 Reg (add_reg $I32 cnt2 (lshl_imm $I32 cnt2 8)))) - (lshr_imm $I32 cnt1 8))) + (cnt1 Reg (add_reg $I32 cnt2 (lshr_imm $I32 cnt2 8)))) + (and_uimm16shifted $I32 cnt1 (uimm16shifted 255 0)))) (rule (lower (has_type (and (mie2_disabled) $I32) (popcnt x))) (let ((cnt4 Reg (popcnt_byte x)) diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index 444b5242e1..3aa4b12dec 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -93,7 +93,7 @@ block0(v0: i64): ; srag %r5, %r2, 63 ; xgrk %r3, %r2, %r5 ; flogr %r0, %r3 -; lgr %r2, %r0 +; aghik %r2, %r0, -1 ; br %r14 function %cls_i32(i32) -> i32 { @@ -107,7 +107,7 @@ block0(v0: i32): ; srag %r3, %r5, 63 ; xgr %r5, %r3 ; flogr %r0, %r5 -; ahik %r2, %r0, -32 +; ahik %r2, %r0, -33 ; br %r14 function %cls_i16(i16) -> i16 { @@ -121,7 +121,7 @@ block0(v0: i16): ; srag %r3, %r5, 63 ; xgr %r5, %r3 ; flogr %r0, %r5 -; ahik %r2, %r0, -48 +; ahik %r2, %r0, -49 ; br %r14 function %cls_i8(i8) -> i8 { @@ -135,7 +135,7 @@ block0(v0: i8): ; srag %r3, %r5, 63 ; xgr %r5, %r3 ; flogr %r0, %r5 -; ahik %r2, %r0, -56 +; ahik %r2, %r0, -57 ; br %r14 function %ctz_i64(i64) -> i64 { @@ -238,9 +238,9 @@ block0(v0: i16): ; block0: ; popcnt %r5, %r2 -; sllk %r3, %r5, 8 -; ar %r5, %r3 -; srlk %r2, %r5, 8 +; srlk %r3, %r5, 8 +; ark %r2, %r5, %r3 +; nill %r2, 255 ; br %r14 function %popcnt_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/bint.clif b/cranelift/filetests/filetests/runtests/bint.clif index f107d12648..996b635312 100644 --- a/cranelift/filetests/filetests/runtests/bint.clif +++ b/cranelift/filetests/filetests/runtests/bint.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 function %bint_b1_i8_true() -> i8 { diff --git a/cranelift/filetests/filetests/runtests/cls.clif b/cranelift/filetests/filetests/runtests/cls.clif index 00411c33d2..fdd937bd35 100644 --- a/cranelift/filetests/filetests/runtests/cls.clif +++ b/cranelift/filetests/filetests/runtests/cls.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x ; not implemented on `x86_64` function %cls_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/clz.clif b/cranelift/filetests/filetests/runtests/clz.clif index a6f7dde54b..dced407b74 100644 --- a/cranelift/filetests/filetests/runtests/clz.clif +++ b/cranelift/filetests/filetests/runtests/clz.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 target x86_64 has_lzcnt diff --git a/cranelift/filetests/filetests/runtests/conversions.clif b/cranelift/filetests/filetests/runtests/conversions.clif index 42becd8b00..38ba57c7ab 100644 --- a/cranelift/filetests/filetests/runtests/conversions.clif +++ b/cranelift/filetests/filetests/runtests/conversions.clif @@ -1,6 +1,7 @@ test run target x86_64 +target s390x target aarch64 function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 { diff --git a/cranelift/filetests/filetests/runtests/ctz.clif b/cranelift/filetests/filetests/runtests/ctz.clif index a7776d1897..5f8f7023da 100644 --- a/cranelift/filetests/filetests/runtests/ctz.clif +++ b/cranelift/filetests/filetests/runtests/ctz.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 target x86_64 has_bmi1 diff --git a/cranelift/filetests/filetests/runtests/iabs.clif b/cranelift/filetests/filetests/runtests/iabs.clif index 8781744ece..f5552c30ec 100644 --- a/cranelift/filetests/filetests/runtests/iabs.clif +++ b/cranelift/filetests/filetests/runtests/iabs.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x ; x86_64 only supports vector iabs function %iabs_i8(i8) -> i8 { @@ -41,4 +42,4 @@ block0(v0: i64): ; run: %iabs_i64(0) == 0 ; run: %iabs_i64(9223372036854775807) == 9223372036854775807 ; run: %iabs_i64(-9223372036854775807) == 9223372036854775807 -; run: %iabs_i64(-9223372036854775808) == -9223372036854775808 \ No newline at end of file +; run: %iabs_i64(-9223372036854775808) == -9223372036854775808 diff --git a/cranelift/filetests/filetests/runtests/icmp-ugt.clif b/cranelift/filetests/filetests/runtests/icmp-ugt.clif index 5e99ec4ff2..76d67e0cba 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ugt.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 function %icmp_ugt_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/ireduce.clif b/cranelift/filetests/filetests/runtests/ireduce.clif index 292bc14f2c..b103cbb5b8 100644 --- a/cranelift/filetests/filetests/runtests/ireduce.clif +++ b/cranelift/filetests/filetests/runtests/ireduce.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 function %ireduce_i16_i8(i16) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/load-op-store.clif b/cranelift/filetests/filetests/runtests/load-op-store.clif index fd6a8b8291..ebf692b447 100644 --- a/cranelift/filetests/filetests/runtests/load-op-store.clif +++ b/cranelift/filetests/filetests/runtests/load-op-store.clif @@ -1,5 +1,6 @@ test run target x86_64 +target s390x target aarch64 function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 { @@ -28,7 +29,7 @@ function %load_op_store_iadd_i32(i64 vmctx, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): v3 = heap_addr.i64 heap0, v1, 4 - v4 = iconst.i64 42 + v4 = iconst.i32 42 store.i32 v4, v3 v5 = load.i32 v3 v6 = iadd.i32 v5, v2 diff --git a/cranelift/filetests/filetests/runtests/popcnt.clif b/cranelift/filetests/filetests/runtests/popcnt.clif index 1afcca329a..560031d4de 100644 --- a/cranelift/filetests/filetests/runtests/popcnt.clif +++ b/cranelift/filetests/filetests/runtests/popcnt.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x target x86_64 target x86_64 has_popcnt diff --git a/cranelift/filetests/filetests/runtests/select.clif b/cranelift/filetests/filetests/runtests/select.clif index 6356b5226c..0aa575af1f 100644 --- a/cranelift/filetests/filetests/runtests/select.clif +++ b/cranelift/filetests/filetests/runtests/select.clif @@ -1,5 +1,6 @@ test interpret test run +target s390x target x86_64 function %select_eq_f32(f32, f32) -> i32 { diff --git a/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif index 5a458f7de6..a49b525260 100644 --- a/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif @@ -1,5 +1,6 @@ test run target aarch64 +target s390x ; i8 and i16 are invalid source sizes for x86_64 function %scalartovector_i8(i8) -> i8x16 { @@ -16,4 +17,4 @@ block0(v0: i16): return v1 } ; run: %scalartovector_i16(1) == [1 0 0 0 0 0 0 0] -; run: %scalartovector_i16(65535) == [65535 0 0 0 0 0 0 0] \ No newline at end of file +; run: %scalartovector_i16(65535) == [65535 0 0 0 0 0 0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-scalartovector.clif b/cranelift/filetests/filetests/runtests/simd-scalartovector.clif index 37d726f8bf..737f70866c 100644 --- a/cranelift/filetests/filetests/runtests/simd-scalartovector.clif +++ b/cranelift/filetests/filetests/runtests/simd-scalartovector.clif @@ -1,5 +1,6 @@ test run target aarch64 +target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 diff --git a/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif b/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif index b123f2f0c9..031602552e 100644 --- a/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x ; x86_64 backend only supports `i16`, `i32`, and `i64` types. function %smulhi_i8(i8, i8) -> i8 { @@ -10,4 +11,4 @@ block0(v0: i8, v1: i8): } ; run: %smulhi_i8(-2, -4) == 0 ; run: %smulhi_i8(2, -4) == -1 -; run: %smulhi_i8(127, 127) == 63 \ No newline at end of file +; run: %smulhi_i8(127, 127) == 63 diff --git a/cranelift/filetests/filetests/runtests/smulhi.clif b/cranelift/filetests/filetests/runtests/smulhi.clif index 306ee70913..979ee2588e 100644 --- a/cranelift/filetests/filetests/runtests/smulhi.clif +++ b/cranelift/filetests/filetests/runtests/smulhi.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41