From a0a97f5e8f514ff099b7b2239d519e567ee9212e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 13 Feb 2023 09:41:18 -0600 Subject: [PATCH] Add (bnot (bxor x y)) lowerings for s390x/aarch64 (#5763) * Add (bnot (bxor x y)) lowerings for s390x/aarch64 I originally thought that s390x's original lowering in #5709, but as was rightfully pointed out `(bnot (bxor x y))` is equivalent to `(bxor x (bnot y))` so the special lowering for one should apply as a special lowering for the other. For the s390x and aarch64 backend that have already have a fused lowering of the bxor/bnot add a lowering additionally for the bnot/bxor combination. * Add bnot(bxor(..)) tests for s390x 128-bit sizes --- cranelift/codegen/src/isa/aarch64/lower.isle | 5 +++ cranelift/codegen/src/isa/s390x/lower.isle | 9 +++++ .../filetests/isa/aarch64/bitops.clif | 23 ++++++++++++ .../filetests/isa/s390x/bitops-optimized.clif | 11 ++++++ .../filetests/isa/s390x/bitwise.clif | 37 +++++++++++++++++++ 5 files changed, 85 insertions(+) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index c42bc2c411..fca811ea82 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1052,6 +1052,11 @@ (rule -2 (lower (has_type (ty_vec128 ty) (bnot x))) (not x (vector_size ty))) +;; Special-cases for fusing a bnot with bxor +(rule 2 (lower (has_type (fits_in_64 ty) (bnot (bxor x y)))) + (alu_rs_imm_logic (ALUOp.EorNot) ty x y)) +(rule 3 (lower (has_type $I128 (bnot (bxor x y)))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) + ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 ty) (band x y))) diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index f1768f95d5..ddec17a081 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -956,6 +956,15 @@ (rule (lower (has_type (vr128_ty ty) (bnot x))) (vec_not ty x)) +;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the +;; (bxor _ (bnot _)) lowering. +(rule 3 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot (bxor x y)))) + (not_xor_reg ty x y)) + +;; Combine a not/xor operation of vector types into one. +(rule 4 (lower (has_type (vr128_ty ty) (bnot (bxor x y)))) + (vec_not_xor ty x y)) + ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index 8c616221dc..21eec6c51a 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -924,3 +924,26 @@ block0(v0: i128, v1: i128): ; csel x1, x15, x7, ne ; ret +function %bnot_of_bxor(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; eon w0, w0, w1 +; ret + +function %bnot_of_bxor(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; eon x0, x0, x2 +; eon x1, x1, x3 +; ret + diff --git a/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif b/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif index 091f1a6bf4..2f2ad86908 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif @@ -64,3 +64,14 @@ block0(v0: i32, v1: i32): ; block0: ; nxrk %r2, %r3, %r2 ; br %r14 + +function %bnot_of_bxor(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; nxrk %r2, %r2, %r3 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index ffa698326e..37f0b5488c 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -632,3 +632,40 @@ block0(v0: i8, v1: i8, v2: i8): ; or %r2, %r3 ; br %r14 +function %bnot_of_bxor(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; xr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 + +function %bnot_of_bxor(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; vl %v1, 0(%r3) +; vl %v3, 0(%r4) +; vnx %v6, %v1, %v3 +; vst %v6, 0(%r2) +; br %r14 + +function %bnot_of_bxor(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = bxor v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; vnx %v24, %v24, %v25 +; br %r14 +