s390x: Fix bitwise operations (#4146)

Current codegen had a number of logic errors confusing
NAND with AND WITH COMPLEMENT, and NOR with OR WITH COMPLEMENT.

Add support for the missing z15 instructions and fix logic.
This commit is contained in:
Ulrich Weigand
2022-05-12 19:05:22 +02:00
committed by GitHub
parent 9538336f82
commit 0243a16679
7 changed files with 151 additions and 75 deletions

View File

@@ -744,14 +744,20 @@
(Xor32)
(Xor64)
;; NAND
(NotAnd32)
(NotAnd64)
;; NOR
(NotOrr32)
(NotOrr64)
;; XNOR
(NotXor32)
(NotXor64)
;; And with complement
(AndNot32)
(AndNot64)
;; NOR
;; Or with complement
(OrrNot32)
(OrrNot64)
;; XNOR
(XorNot32)
(XorNot64)
))
;; A unary operation.
@@ -2776,6 +2782,36 @@
(push_xor_uimm32shifted ib ty dst val (uimm32shifted 0xffffffff 32))))
;; Helpers for generating `not_and` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl aluop_not_and (Type) ALUOp)
(rule (aluop_not_and (gpr32_ty _ty)) (ALUOp.NotAnd32))
(rule (aluop_not_and (gpr64_ty _ty)) (ALUOp.NotAnd64))
(decl not_and_reg (Type Reg Reg) Reg)
(rule (not_and_reg ty x y) (alu_rrr ty (aluop_not_and ty) x y))
;; Helpers for generating `not_or` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl aluop_not_or (Type) ALUOp)
(rule (aluop_not_or (gpr32_ty _ty)) (ALUOp.NotOrr32))
(rule (aluop_not_or (gpr64_ty _ty)) (ALUOp.NotOrr64))
(decl not_or_reg (Type Reg Reg) Reg)
(rule (not_or_reg ty x y) (alu_rrr ty (aluop_not_or ty) x y))
;; Helpers for generating `not_xor` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl aluop_not_xor (Type) ALUOp)
(rule (aluop_not_xor (gpr32_ty _ty)) (ALUOp.NotXor32))
(rule (aluop_not_xor (gpr64_ty _ty)) (ALUOp.NotXor64))
(decl not_xor_reg (Type Reg Reg) Reg)
(rule (not_xor_reg ty x y) (alu_rrr ty (aluop_not_xor ty) x y))
;; Helpers for generating `and_not` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl aluop_and_not (Type) ALUOp)
@@ -2796,16 +2832,6 @@
(rule (or_not_reg ty x y) (alu_rrr ty (aluop_or_not ty) x y))
;; Helpers for generating `xor_not` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl aluop_xor_not (Type) ALUOp)
(rule (aluop_xor_not (gpr32_ty _ty)) (ALUOp.XorNot32))
(rule (aluop_xor_not (gpr64_ty _ty)) (ALUOp.XorNot64))
(decl xor_not_reg (Type Reg Reg) Reg)
(rule (xor_not_reg ty x y) (alu_rrr ty (aluop_xor_not ty) x y))
;; Helpers for generating `abs` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl unaryop_abs (Type) UnaryOp)

View File

@@ -996,12 +996,16 @@ impl MachInstEmit for Inst {
ALUOp::Orr64 => (0xb9e6, true), // OGRK
ALUOp::Xor32 => (0xb9f7, true), // XRK
ALUOp::Xor64 => (0xb9e7, true), // XGRK
ALUOp::AndNot32 => (0xb974, false), // NNRK
ALUOp::AndNot64 => (0xb964, false), // NNGRK
ALUOp::OrrNot32 => (0xb976, false), // NORK
ALUOp::OrrNot64 => (0xb966, false), // NOGRK
ALUOp::XorNot32 => (0xb977, false), // NXRK
ALUOp::XorNot64 => (0xb967, false), // NXGRK
ALUOp::NotAnd32 => (0xb974, false), // NNRK
ALUOp::NotAnd64 => (0xb964, false), // NNGRK
ALUOp::NotOrr32 => (0xb976, false), // NORK
ALUOp::NotOrr64 => (0xb966, false), // NOGRK
ALUOp::NotXor32 => (0xb977, false), // NXRK
ALUOp::NotXor64 => (0xb967, false), // NXGRK
ALUOp::AndNot32 => (0xb9f5, false), // NCRK
ALUOp::AndNot64 => (0xb9e5, false), // NCGRK
ALUOp::OrrNot32 => (0xb975, false), // OCRK
ALUOp::OrrNot64 => (0xb965, false), // OCGRK
_ => unreachable!(),
};
if have_rr && rd.to_reg() == rn {

View File

@@ -178,7 +178,7 @@ fn test_s390x_binemit() {
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::AndNot32,
alu_op: ALUOp::NotAnd32,
rd: writable_gpr(1),
rn: gpr(2),
rm: gpr(3),
@@ -188,7 +188,7 @@ fn test_s390x_binemit() {
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::AndNot64,
alu_op: ALUOp::NotAnd64,
rd: writable_gpr(4),
rn: gpr(5),
rm: gpr(6),
@@ -198,7 +198,7 @@ fn test_s390x_binemit() {
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::OrrNot32,
alu_op: ALUOp::NotOrr32,
rd: writable_gpr(1),
rn: gpr(2),
rm: gpr(3),
@@ -208,7 +208,7 @@ fn test_s390x_binemit() {
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::OrrNot64,
alu_op: ALUOp::NotOrr64,
rd: writable_gpr(4),
rn: gpr(5),
rm: gpr(6),
@@ -218,7 +218,7 @@ fn test_s390x_binemit() {
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::XorNot32,
alu_op: ALUOp::NotXor32,
rd: writable_gpr(1),
rn: gpr(2),
rm: gpr(3),
@@ -228,7 +228,7 @@ fn test_s390x_binemit() {
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::XorNot64,
alu_op: ALUOp::NotXor64,
rd: writable_gpr(4),
rn: gpr(5),
rm: gpr(6),
@@ -236,6 +236,46 @@ fn test_s390x_binemit() {
"B9676045",
"nxgrk %r4, %r5, %r6",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::AndNot32,
rd: writable_gpr(1),
rn: gpr(2),
rm: gpr(3),
},
"B9F53012",
"ncrk %r1, %r2, %r3",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::AndNot64,
rd: writable_gpr(4),
rn: gpr(5),
rm: gpr(6),
},
"B9E56045",
"ncgrk %r4, %r5, %r6",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::OrrNot32,
rd: writable_gpr(1),
rn: gpr(2),
rm: gpr(3),
},
"B9753012",
"ocrk %r1, %r2, %r3",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::OrrNot64,
rd: writable_gpr(4),
rn: gpr(5),
rm: gpr(6),
},
"B9656045",
"ocgrk %r4, %r5, %r6",
));
insns.push((
Inst::AluRRSImm16 {

View File

@@ -193,9 +193,11 @@ impl Inst {
// These depend on the opcode
Inst::AluRRR { alu_op, .. } => match alu_op {
ALUOp::NotAnd32 | ALUOp::NotAnd64 => InstructionSet::MIE2,
ALUOp::NotOrr32 | ALUOp::NotOrr64 => InstructionSet::MIE2,
ALUOp::NotXor32 | ALUOp::NotXor64 => InstructionSet::MIE2,
ALUOp::AndNot32 | ALUOp::AndNot64 => InstructionSet::MIE2,
ALUOp::OrrNot32 | ALUOp::OrrNot64 => InstructionSet::MIE2,
ALUOp::XorNot32 | ALUOp::XorNot64 => InstructionSet::MIE2,
_ => InstructionSet::Base,
},
Inst::UnaryRR { op, .. } => match op {
@@ -933,12 +935,16 @@ impl Inst {
ALUOp::Orr64 => ("ogrk", true),
ALUOp::Xor32 => ("xrk", true),
ALUOp::Xor64 => ("xgrk", true),
ALUOp::AndNot32 => ("nnrk", false),
ALUOp::AndNot64 => ("nngrk", false),
ALUOp::OrrNot32 => ("nork", false),
ALUOp::OrrNot64 => ("nogrk", false),
ALUOp::XorNot32 => ("nxrk", false),
ALUOp::XorNot64 => ("nxgrk", false),
ALUOp::NotAnd32 => ("nnrk", false),
ALUOp::NotAnd64 => ("nngrk", false),
ALUOp::NotOrr32 => ("nork", false),
ALUOp::NotOrr64 => ("nogrk", false),
ALUOp::NotXor32 => ("nxrk", false),
ALUOp::NotXor64 => ("nxgrk", false),
ALUOp::AndNot32 => ("ncrk", false),
ALUOp::AndNot64 => ("ncgrk", false),
ALUOp::OrrNot32 => ("ocrk", false),
ALUOp::OrrNot64 => ("ocgrk", false),
_ => unreachable!(),
};
if have_rr && rd.to_reg() == rn {

View File

@@ -628,7 +628,7 @@
;; z15 version using a single instruction (NOR).
(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot x)))
(let ((rx Reg x))
(or_not_reg ty rx rx)))
(not_or_reg ty rx rx)))
;; z14 version using XOR with -1.
(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bnot x)))
@@ -708,7 +708,7 @@
;; z14 version using XOR with -1.
(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (band_not x y)))
(not_reg ty (and_reg ty x y)))
(and_reg ty x (not_reg ty y)))
;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -719,14 +719,14 @@
;; z14 version using XOR with -1.
(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bor_not x y)))
(not_reg ty (or_reg ty x y)))
(or_reg ty x (not_reg ty y)))
;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; z15 version using a single instruction.
(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor_not x y)))
(xor_not_reg ty x y))
(not_xor_reg ty x y))
;; z14 version using XOR with -1.
(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bxor_not x y)))
@@ -746,7 +746,7 @@
(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bitselect x y z)))
(let ((rx Reg x)
(if_true Reg (and_reg ty y rx))
(if_false Reg (not_reg ty (and_reg ty z rx))))
(if_false Reg (and_reg ty z (not_reg ty rx))))
(or_reg ty if_false if_true)))
@@ -1599,10 +1599,10 @@
;; simply byte-swap the source operand.
(rule (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (bigendian)
(AtomicRmwOp.Nand) tmp val src)
(push_alu_reg ib (aluop_and_not ty) tmp val src))
(push_alu_reg ib (aluop_not_and ty) tmp val src))
(rule (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (littleendian)
(AtomicRmwOp.Nand) tmp val src)
(push_alu_reg ib (aluop_and_not ty) tmp val (bswap_reg ty src)))
(push_alu_reg ib (aluop_not_and ty) tmp val (bswap_reg ty src)))
(rule (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (bigendian)
(AtomicRmwOp.Nand) tmp val src)
(push_not_reg ib ty tmp

View File

@@ -13,7 +13,7 @@ block0(v0: i64, v1: i64):
}
; block0:
; nngrk %r2, %r2, %r3
; ncgrk %r2, %r2, %r3
; br %r14
function %band_not_i32(i32, i32) -> i32 {
@@ -23,7 +23,7 @@ block0(v0: i32, v1: i32):
}
; block0:
; nnrk %r2, %r2, %r3
; ncrk %r2, %r2, %r3
; br %r14
function %band_not_i16(i16, i16) -> i16 {
@@ -33,7 +33,7 @@ block0(v0: i16, v1: i16):
}
; block0:
; nnrk %r2, %r2, %r3
; ncrk %r2, %r2, %r3
; br %r14
function %band_not_i8(i8, i8) -> i8 {
@@ -43,7 +43,7 @@ block0(v0: i8, v1: i8):
}
; block0:
; nnrk %r2, %r2, %r3
; ncrk %r2, %r2, %r3
; br %r14
function %bor_not_i64(i64, i64) -> i64 {
@@ -53,7 +53,7 @@ block0(v0: i64, v1: i64):
}
; block0:
; nogrk %r2, %r2, %r3
; ocgrk %r2, %r2, %r3
; br %r14
function %bor_not_i32(i32, i32) -> i32 {
@@ -63,7 +63,7 @@ block0(v0: i32, v1: i32):
}
; block0:
; nork %r2, %r2, %r3
; ocrk %r2, %r2, %r3
; br %r14
function %bor_not_i16(i16, i16) -> i16 {
@@ -73,7 +73,7 @@ block0(v0: i16, v1: i16):
}
; block0:
; nork %r2, %r2, %r3
; ocrk %r2, %r2, %r3
; br %r14
function %bor_not_i8(i8, i8) -> i8 {
@@ -83,7 +83,7 @@ block0(v0: i8, v1: i8):
}
; block0:
; nork %r2, %r2, %r3
; ocrk %r2, %r2, %r3
; br %r14
function %bxor_not_i64(i64, i64) -> i64 {
@@ -174,7 +174,7 @@ block0(v0: i64, v1: i64, v2: i64):
; block0:
; ngrk %r5, %r3, %r2
; nngrk %r3, %r4, %r2
; ncgrk %r3, %r4, %r2
; ogrk %r2, %r3, %r5
; br %r14
@@ -186,7 +186,7 @@ block0(v0: i32, v1: i32, v2: i32):
; block0:
; nrk %r5, %r3, %r2
; nnrk %r3, %r4, %r2
; ncrk %r3, %r4, %r2
; ork %r2, %r3, %r5
; br %r14
@@ -198,7 +198,7 @@ block0(v0: i16, v1: i16, v2: i16):
; block0:
; nrk %r5, %r3, %r2
; nnrk %r3, %r4, %r2
; ncrk %r3, %r4, %r2
; ork %r2, %r3, %r5
; br %r14
@@ -210,7 +210,7 @@ block0(v0: i8, v1: i8, v2: i8):
; block0:
; nrk %r5, %r3, %r2
; nnrk %r3, %r4, %r2
; ncrk %r3, %r4, %r2
; ork %r2, %r3, %r5
; br %r14

View File

@@ -306,9 +306,9 @@ block0(v0: i64, v1: i64):
}
; block0:
; xilf %r3, 4294967295
; xihf %r3, 4294967295
; ngr %r2, %r3
; xilf %r2, 4294967295
; xihf %r2, 4294967295
; br %r14
function %band_not_i32(i32, i32) -> i32 {
@@ -318,8 +318,8 @@ block0(v0: i32, v1: i32):
}
; block0:
; xilf %r3, 4294967295
; nr %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %band_not_i16(i16, i16) -> i16 {
@@ -329,8 +329,8 @@ block0(v0: i16, v1: i16):
}
; block0:
; xilf %r3, 4294967295
; nr %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %band_not_i8(i8, i8) -> i8 {
@@ -340,8 +340,8 @@ block0(v0: i8, v1: i8):
}
; block0:
; xilf %r3, 4294967295
; nr %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %bor_not_i64(i64, i64) -> i64 {
@@ -351,9 +351,9 @@ block0(v0: i64, v1: i64):
}
; block0:
; xilf %r3, 4294967295
; xihf %r3, 4294967295
; ogr %r2, %r3
; xilf %r2, 4294967295
; xihf %r2, 4294967295
; br %r14
function %bor_not_i32(i32, i32) -> i32 {
@@ -363,8 +363,8 @@ block0(v0: i32, v1: i32):
}
; block0:
; xilf %r3, 4294967295
; or %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %bor_not_i16(i16, i16) -> i16 {
@@ -374,8 +374,8 @@ block0(v0: i16, v1: i16):
}
; block0:
; xilf %r3, 4294967295
; or %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %bor_not_i8(i8, i8) -> i8 {
@@ -385,8 +385,8 @@ block0(v0: i8, v1: i8):
}
; block0:
; xilf %r3, 4294967295
; or %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %bxor_not_i64(i64, i64) -> i64 {
@@ -483,10 +483,10 @@ block0(v0: i64, v1: i64, v2: i64):
; block0:
; ngrk %r5, %r3, %r2
; ngrk %r3, %r4, %r2
; xilf %r3, 4294967295
; xihf %r3, 4294967295
; ogrk %r2, %r3, %r5
; xilf %r2, 4294967295
; xihf %r2, 4294967295
; ngrk %r2, %r4, %r2
; ogr %r2, %r5
; br %r14
function %bitselect_i32(i32, i32, i32) -> i32 {
@@ -497,9 +497,9 @@ block0(v0: i32, v1: i32, v2: i32):
; block0:
; nrk %r5, %r3, %r2
; nrk %r3, %r4, %r2
; xilf %r3, 4294967295
; ork %r2, %r3, %r5
; xilf %r2, 4294967295
; nrk %r2, %r4, %r2
; or %r2, %r5
; br %r14
function %bitselect_i16(i16, i16, i16) -> i16 {
@@ -510,9 +510,9 @@ block0(v0: i16, v1: i16, v2: i16):
; block0:
; nrk %r5, %r3, %r2
; nrk %r3, %r4, %r2
; xilf %r3, 4294967295
; ork %r2, %r3, %r5
; xilf %r2, 4294967295
; nrk %r2, %r4, %r2
; or %r2, %r5
; br %r14
function %bitselect_i8(i8, i8, i8) -> i8 {
@@ -523,8 +523,8 @@ block0(v0: i8, v1: i8, v2: i8):
; block0:
; nrk %r5, %r3, %r2
; nrk %r3, %r4, %r2
; xilf %r3, 4294967295
; ork %r2, %r3, %r5
; xilf %r2, 4294967295
; nrk %r2, %r4, %r2
; or %r2, %r5
; br %r14