From d9e6902b694608dbb46cc143ba1fa41f1cc15f81 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 30 Sep 2021 18:36:39 +0200 Subject: [PATCH] s390x: Enable most memory64 tests * Support full set of ADD LOGICAL / SUBTRACT LOGICAL instructions * Full implementation of IaddIfcout lowering * Enable most memory64 tests (except simd and threads) --- build.rs | 5 +- cranelift/codegen/src/isa/s390x/abi.rs | 2 +- cranelift/codegen/src/isa/s390x/inst/emit.rs | 150 ++++---- .../codegen/src/isa/s390x/inst/emit_tests.rs | 342 +++++++++++++++++- cranelift/codegen/src/isa/s390x/inst/mod.rs | 32 +- cranelift/codegen/src/isa/s390x/lower.rs | 34 +- .../filetests/isa/s390x/arithmetic.clif | 92 +++++ 7 files changed, 569 insertions(+), 88 deletions(-) diff --git a/build.rs b/build.rs index cc6d3e5047..d5f21e032f 100644 --- a/build.rs +++ b/build.rs @@ -184,8 +184,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "Cranelift" => match (testsuite, testname) { // No simd support yet for s390x. ("simd", _) if platform_is_s390x() => return true, - // No memory64 support yet for s390x. - ("memory64", _) if platform_is_s390x() => return true, + ("memory64", "simd") if platform_is_s390x() => return true, + // No full atomics support yet for s390x. + ("memory64", "threads") if platform_is_s390x() => return true, _ => {} }, _ => panic!("unrecognized strategy"), diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index d0dd517e65..8712d0de28 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -371,7 +371,7 @@ impl ABIMachineSpec for S390xMachineDeps { insts.push(Inst::mov64(into_reg, from_reg)); } insts.push(Inst::AluRUImm32 { - alu_op: ALUOp::Add64, + alu_op: ALUOp::AddLogical64, rd: into_reg, imm, }); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index a28b9d14b4..9c60699279 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -956,24 +956,28 @@ impl MachInstEmit for Inst { match self { &Inst::AluRRR { alu_op, rd, rn, rm } => { let (opcode, have_rr) = match alu_op { - ALUOp::Add32 => (0xb9f8, true), // ARK - ALUOp::Add64 => (0xb9e8, true), // AGRK - ALUOp::Sub32 => (0xb9f9, true), // SRK - ALUOp::Sub64 => (0xb9e9, true), // SGRK - ALUOp::Mul32 => (0xb9fd, true), // MSRKC - ALUOp::Mul64 => (0xb9ed, true), // MSGRKC - ALUOp::And32 => (0xb9f4, true), // NRK - ALUOp::And64 => (0xb9e4, true), // NGRK - ALUOp::Orr32 => (0xb9f6, true), // ORK - ALUOp::Orr64 => (0xb9e6, true), // OGRK - ALUOp::Xor32 => (0xb9f7, true), // XRK - ALUOp::Xor64 => (0xb9e7, true), // XGRK - ALUOp::AndNot32 => (0xb974, false), // NNRK - ALUOp::AndNot64 => (0xb964, false), // NNGRK - ALUOp::OrrNot32 => (0xb976, false), // NORK - ALUOp::OrrNot64 => (0xb966, false), // NOGRK - ALUOp::XorNot32 => (0xb977, false), // NXRK - ALUOp::XorNot64 => (0xb967, false), // NXGRK + ALUOp::Add32 => (0xb9f8, true), // ARK + ALUOp::Add64 => (0xb9e8, true), // AGRK + ALUOp::AddLogical32 => (0xb9fa, true), // ALRK + ALUOp::AddLogical64 => (0xb9ea, true), // ALGRK + ALUOp::Sub32 => (0xb9f9, true), // SRK + ALUOp::Sub64 => (0xb9e9, true), // SGRK + ALUOp::SubLogical32 => (0xb9fb, true), // SLRK + ALUOp::SubLogical64 => (0xb9eb, true), // SLGRK + ALUOp::Mul32 => (0xb9fd, true), // MSRKC + ALUOp::Mul64 => (0xb9ed, true), // MSGRKC + ALUOp::And32 => (0xb9f4, true), // NRK + ALUOp::And64 => (0xb9e4, true), // NGRK + ALUOp::Orr32 => (0xb9f6, true), // ORK + ALUOp::Orr64 => (0xb9e6, true), // OGRK + ALUOp::Xor32 => (0xb9f7, true), // XRK + ALUOp::Xor64 => (0xb9e7, true), // XGRK + ALUOp::AndNot32 => (0xb974, false), // NNRK + ALUOp::AndNot64 => (0xb964, false), // NNGRK + ALUOp::OrrNot32 => (0xb976, false), // NORK + ALUOp::OrrNot64 => (0xb966, false), // NOGRK + ALUOp::XorNot32 => (0xb977, false), // NXRK + ALUOp::XorNot64 => (0xb967, false), // NXGRK _ => unreachable!(), }; if have_rr && rd.to_reg() == rn { @@ -1003,21 +1007,27 @@ impl MachInstEmit for Inst { } &Inst::AluRR { alu_op, rd, rm } => { let (opcode, is_rre) = match alu_op { - ALUOp::Add32 => (0x1a, false), // AR - ALUOp::Add64 => (0xb908, true), // AGR - ALUOp::Add64Ext32 => (0xb918, true), // AGFR - ALUOp::Sub32 => (0x1b, false), // SR - ALUOp::Sub64 => (0xb909, true), // SGR - ALUOp::Sub64Ext32 => (0xb919, true), // SGFR - ALUOp::Mul32 => (0xb252, true), // MSR - ALUOp::Mul64 => (0xb90c, true), // MSGR - ALUOp::Mul64Ext32 => (0xb91c, true), // MSGFR - ALUOp::And32 => (0x14, false), // NR - ALUOp::And64 => (0xb980, true), // NGR - ALUOp::Orr32 => (0x16, false), // OR - ALUOp::Orr64 => (0xb981, true), // OGR - ALUOp::Xor32 => (0x17, false), // XR - ALUOp::Xor64 => (0xb982, true), // XGR + ALUOp::Add32 => (0x1a, false), // AR + ALUOp::Add64 => (0xb908, true), // AGR + ALUOp::Add64Ext32 => (0xb918, true), // AGFR + ALUOp::AddLogical32 => (0x1e, false), // ALR + ALUOp::AddLogical64 => (0xb90a, true), // ALGR + ALUOp::AddLogical64Ext32 => (0xb91a, true), // ALGFR + ALUOp::Sub32 => (0x1b, false), // SR + ALUOp::Sub64 => (0xb909, true), // SGR + ALUOp::Sub64Ext32 => (0xb919, true), // SGFR + ALUOp::SubLogical32 => (0x1f, false), // SLR + ALUOp::SubLogical64 => (0xb90b, true), // SLGR + ALUOp::SubLogical64Ext32 => (0xb91b, true), // SLGFR + ALUOp::Mul32 => (0xb252, true), // MSR + ALUOp::Mul64 => (0xb90c, true), // MSGR + ALUOp::Mul64Ext32 => (0xb91c, true), // MSGFR + ALUOp::And32 => (0x14, false), // NR + ALUOp::And64 => (0xb980, true), // NGR + ALUOp::Orr32 => (0x16, false), // OR + ALUOp::Orr64 => (0xb981, true), // OGR + ALUOp::Xor32 => (0x17, false), // XR + ALUOp::Xor64 => (0xb982, true), // XGR _ => unreachable!(), }; if is_rre { @@ -1032,27 +1042,33 @@ impl MachInstEmit for Inst { ref mem, } => { let (opcode_rx, opcode_rxy) = match alu_op { - ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) - ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) - ALUOp::Add64 => (None, Some(0xe308)), // AG - ALUOp::Add64Ext16 => (None, Some(0xe338)), // AGH - ALUOp::Add64Ext32 => (None, Some(0xe318)), // AGF - ALUOp::Sub32 => (Some(0x5b), Some(0xe35b)), // S(Y) - ALUOp::Sub32Ext16 => (Some(0x4b), Some(0xe37b)), // SH(Y) - ALUOp::Sub64 => (None, Some(0xe309)), // SG - ALUOp::Sub64Ext16 => (None, Some(0xe339)), // SGH - ALUOp::Sub64Ext32 => (None, Some(0xe319)), // SGF - ALUOp::Mul32 => (Some(0x71), Some(0xe351)), // MS(Y) - ALUOp::Mul32Ext16 => (Some(0x4c), Some(0xe37c)), // MH(Y) - ALUOp::Mul64 => (None, Some(0xe30c)), // MSG - ALUOp::Mul64Ext16 => (None, Some(0xe33c)), // MSH - ALUOp::Mul64Ext32 => (None, Some(0xe31c)), // MSGF - ALUOp::And32 => (Some(0x54), Some(0xe354)), // N(Y) - ALUOp::And64 => (None, Some(0xe380)), // NG - ALUOp::Orr32 => (Some(0x56), Some(0xe356)), // O(Y) - ALUOp::Orr64 => (None, Some(0xe381)), // OG - ALUOp::Xor32 => (Some(0x57), Some(0xe357)), // X(Y) - ALUOp::Xor64 => (None, Some(0xe382)), // XG + ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) + ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) + ALUOp::Add64 => (None, Some(0xe308)), // AG + ALUOp::Add64Ext16 => (None, Some(0xe338)), // AGH + ALUOp::Add64Ext32 => (None, Some(0xe318)), // AGF + ALUOp::AddLogical32 => (Some(0x5e), Some(0xe35e)), // AL(Y) + ALUOp::AddLogical64 => (None, Some(0xe30a)), // ALG + ALUOp::AddLogical64Ext32 => (None, Some(0xe31a)), // ALGF + ALUOp::Sub32 => (Some(0x5b), Some(0xe35b)), // S(Y) + ALUOp::Sub32Ext16 => (Some(0x4b), Some(0xe37b)), // SH(Y) + ALUOp::Sub64 => (None, Some(0xe309)), // SG + ALUOp::Sub64Ext16 => (None, Some(0xe339)), // SGH + ALUOp::Sub64Ext32 => (None, Some(0xe319)), // SGF + ALUOp::SubLogical32 => (Some(0x5f), Some(0xe35f)), // SL(Y) + ALUOp::SubLogical64 => (None, Some(0xe30b)), // SLG + ALUOp::SubLogical64Ext32 => (None, Some(0xe31b)), // SLGF + ALUOp::Mul32 => (Some(0x71), Some(0xe351)), // MS(Y) + ALUOp::Mul32Ext16 => (Some(0x4c), Some(0xe37c)), // MH(Y) + ALUOp::Mul64 => (None, Some(0xe30c)), // MSG + ALUOp::Mul64Ext16 => (None, Some(0xe33c)), // MSH + ALUOp::Mul64Ext32 => (None, Some(0xe31c)), // MSGF + ALUOp::And32 => (Some(0x54), Some(0xe354)), // N(Y) + ALUOp::And64 => (None, Some(0xe380)), // NG + ALUOp::Orr32 => (Some(0x56), Some(0xe356)), // O(Y) + ALUOp::Orr64 => (None, Some(0xe381)), // OG + ALUOp::Xor32 => (Some(0x57), Some(0xe357)), // X(Y) + ALUOp::Xor64 => (None, Some(0xe382)), // XG _ => unreachable!(), }; let rd = rd.to_reg(); @@ -1082,10 +1098,10 @@ impl MachInstEmit for Inst { } &Inst::AluRUImm32 { alu_op, rd, imm } => { let opcode = match alu_op { - ALUOp::Add32 => 0xc2b, // ALFI - ALUOp::Add64 => 0xc2a, // ALGFI - ALUOp::Sub32 => 0xc25, // SLFI - ALUOp::Sub64 => 0xc24, // SLGFI + ALUOp::AddLogical32 => 0xc2b, // ALFI + ALUOp::AddLogical64 => 0xc2a, // ALGFI + ALUOp::SubLogical32 => 0xc25, // SLFI + ALUOp::SubLogical64 => 0xc24, // SLGFI _ => unreachable!(), }; put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); @@ -1380,14 +1396,16 @@ impl MachInstEmit for Inst { ref mem, } => { let opcode = match alu_op { - ALUOp::Add32 => 0xebf8, // LAA - ALUOp::Add64 => 0xebe8, // LAAG - ALUOp::And32 => 0xebf4, // LAN - ALUOp::And64 => 0xebe4, // LANG - ALUOp::Orr32 => 0xebf6, // LAO - ALUOp::Orr64 => 0xebe6, // LAOG - ALUOp::Xor32 => 0xebf7, // LAX - ALUOp::Xor64 => 0xebe7, // LAXG + ALUOp::Add32 => 0xebf8, // LAA + ALUOp::Add64 => 0xebe8, // LAAG + ALUOp::AddLogical32 => 0xebfa, // LAAL + ALUOp::AddLogical64 => 0xebea, // LAALG + ALUOp::And32 => 0xebf4, // LAN + ALUOp::And64 => 0xebe4, // LANG + ALUOp::Orr32 => 0xebf6, // LAO + ALUOp::Orr64 => 0xebe6, // LAOG + ALUOp::Xor32 => 0xebf7, // LAX + ALUOp::Xor64 => 0xebe7, // LAXG _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 242c62adbe..15f32bed74 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -32,6 +32,26 @@ fn test_s390x_binemit() { "B9E86045", "agrk %r4, %r5, %r6", )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9FA3012", + "alrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9EA6045", + "algrk %r4, %r5, %r6", + )); insns.push(( Inst::AluRRR { alu_op: ALUOp::Sub32, @@ -52,6 +72,26 @@ fn test_s390x_binemit() { "B9E96045", "sgrk %r4, %r5, %r6", )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9FB3012", + "slrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubLogical64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9EB6045", + "slgrk %r4, %r5, %r6", + )); insns.push(( Inst::AluRRR { alu_op: ALUOp::Mul32, @@ -261,6 +301,33 @@ fn test_s390x_binemit() { "B9180045", "agfr %r4, %r5", )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1E12", + "alr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B90A0045", + "algr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::AddLogical64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B91A0045", + "algfr %r4, %r5", + )); insns.push(( Inst::AluRR { alu_op: ALUOp::Sub32, @@ -288,6 +355,33 @@ fn test_s390x_binemit() { "B9190045", "sgfr %r4, %r5", )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1F12", + "slr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::SubLogical64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B90B0045", + "slgr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::SubLogical64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B91B0045", + "slgfr %r4, %r5", + )); insns.push(( Inst::AluRR { alu_op: ALUOp::Mul32, @@ -468,6 +562,62 @@ fn test_s390x_binemit() { "E31020000018", "agf %r1, 0(%r2)", )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5E102000", + "al %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005E", + "aly %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000A", + "alg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001A", + "algf %r1, 0(%r2)", + )); insns.push(( Inst::AluRX { alu_op: ALUOp::Sub32, @@ -566,6 +716,62 @@ fn test_s390x_binemit() { "E31020000019", "sgf %r1, 0(%r2)", )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5F102000", + "sl %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005F", + "sly %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000B", + "slg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001B", + "slgf %r1, 0(%r2)", + )); insns.push(( Inst::AluRX { alu_op: ALUOp::Mul32, @@ -939,7 +1145,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add32, + alu_op: ALUOp::AddLogical32, rd: writable_gpr(7), imm: 0, }, @@ -948,7 +1154,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add32, + alu_op: ALUOp::AddLogical32, rd: writable_gpr(7), imm: 4294967295, }, @@ -957,7 +1163,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub32, + alu_op: ALUOp::SubLogical32, rd: writable_gpr(7), imm: 0, }, @@ -966,7 +1172,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub32, + alu_op: ALUOp::SubLogical32, rd: writable_gpr(7), imm: 4294967295, }, @@ -975,7 +1181,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add64, + alu_op: ALUOp::AddLogical64, rd: writable_gpr(7), imm: 0, }, @@ -984,7 +1190,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add64, + alu_op: ALUOp::AddLogical64, rd: writable_gpr(7), imm: 4294967295, }, @@ -993,7 +1199,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub64, + alu_op: ALUOp::SubLogical64, rd: writable_gpr(7), imm: 0, }, @@ -1002,7 +1208,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub64, + alu_op: ALUOp::SubLogical64, rd: writable_gpr(7), imm: 4294967295, }, @@ -2325,6 +2531,126 @@ fn test_s390x_binemit() { "EB456FFF7FE8", "laag %r4, %r5, 524287(%r6)", )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45000080FA", + "laal %r4, %r5, -524288", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB450FFF7FFA", + "laal %r4, %r5, 524287", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45600080FA", + "laal %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB456FFF7FFA", + "laal %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45000080EA", + "laalg %r4, %r5, -524288", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB450FFF7FEA", + "laalg %r4, %r5, 524287", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45600080EA", + "laalg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB456FFF7FEA", + "laalg %r4, %r5, 524287(%r6)", + )); insns.push(( Inst::AtomicRmw { alu_op: ALUOp::And32, diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 7d05b61855..74406cd8df 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -55,11 +55,17 @@ pub enum ALUOp { Add64, Add64Ext16, Add64Ext32, + AddLogical32, + AddLogical64, + AddLogical64Ext32, Sub32, Sub32Ext16, Sub64, Sub64Ext16, Sub64Ext32, + SubLogical32, + SubLogical64, + SubLogical64Ext32, Mul32, Mul32Ext16, Mul64, @@ -2572,8 +2578,12 @@ impl Inst { let (op, have_rr) = match alu_op { ALUOp::Add32 => ("ark", true), ALUOp::Add64 => ("agrk", true), + ALUOp::AddLogical32 => ("alrk", true), + ALUOp::AddLogical64 => ("algrk", true), ALUOp::Sub32 => ("srk", true), ALUOp::Sub64 => ("sgrk", true), + ALUOp::SubLogical32 => ("slrk", true), + ALUOp::SubLogical64 => ("slgrk", true), ALUOp::Mul32 => ("msrkc", true), ALUOp::Mul64 => ("msgrkc", true), ALUOp::And32 => ("nrk", true), @@ -2623,9 +2633,15 @@ impl Inst { ALUOp::Add32 => "ar", ALUOp::Add64 => "agr", ALUOp::Add64Ext32 => "agfr", + ALUOp::AddLogical32 => "alr", + ALUOp::AddLogical64 => "algr", + ALUOp::AddLogical64Ext32 => "algfr", ALUOp::Sub32 => "sr", ALUOp::Sub64 => "sgr", ALUOp::Sub64Ext32 => "sgfr", + ALUOp::SubLogical32 => "slr", + ALUOp::SubLogical64 => "slgr", + ALUOp::SubLogical64Ext32 => "slgfr", ALUOp::Mul32 => "msr", ALUOp::Mul64 => "msgr", ALUOp::Mul64Ext32 => "msgfr", @@ -2652,11 +2668,17 @@ impl Inst { ALUOp::Add64 => (None, Some("ag")), ALUOp::Add64Ext16 => (None, Some("agh")), ALUOp::Add64Ext32 => (None, Some("agf")), + ALUOp::AddLogical32 => (Some("al"), Some("aly")), + ALUOp::AddLogical64 => (None, Some("alg")), + ALUOp::AddLogical64Ext32 => (None, Some("algf")), ALUOp::Sub32 => (Some("s"), Some("sy")), ALUOp::Sub32Ext16 => (Some("sh"), Some("shy")), ALUOp::Sub64 => (None, Some("sg")), ALUOp::Sub64Ext16 => (None, Some("sgh")), ALUOp::Sub64Ext32 => (None, Some("sgf")), + ALUOp::SubLogical32 => (Some("sl"), Some("sly")), + ALUOp::SubLogical64 => (None, Some("slg")), + ALUOp::SubLogical64Ext32 => (None, Some("slgf")), ALUOp::Mul32 => (Some("ms"), Some("msy")), ALUOp::Mul32Ext16 => (Some("mh"), Some("mhy")), ALUOp::Mul64 => (None, Some("msg")), @@ -2715,10 +2737,10 @@ impl Inst { } &Inst::AluRUImm32 { alu_op, rd, imm } => { let op = match alu_op { - ALUOp::Add32 => "alfi", - ALUOp::Add64 => "algfi", - ALUOp::Sub32 => "slfi", - ALUOp::Sub64 => "slgfi", + ALUOp::AddLogical32 => "alfi", + ALUOp::AddLogical64 => "algfi", + ALUOp::SubLogical32 => "slfi", + ALUOp::SubLogical64 => "slgfi", _ => unreachable!(), }; let rd = rd.to_reg().show_rru(mb_rru); @@ -2967,6 +2989,8 @@ impl Inst { let op = match alu_op { ALUOp::Add32 => "laa", ALUOp::Add64 => "laag", + ALUOp::AddLogical32 => "laal", + ALUOp::AddLogical64 => "laalg", ALUOp::And32 => "lan", ALUOp::And64 => "lang", ALUOp::Orr32 => "lao", diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index c100e36031..b36337b294 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -973,17 +973,37 @@ fn lower_insn_to_regs>( } } Opcode::IaddIfcout => { - // This only supports the operands emitted by dynamic_addr. let ty = ty.unwrap(); assert!(ty == types::I32 || ty == types::I64); - let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + // Emit an ADD LOGICAL instruction, which sets the condition code + // to indicate an (unsigned) carry bit. + let alu_op = choose_32_64(ty, ALUOp::AddLogical32, ALUOp::AddLogical64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let imm = input_matches_uimm32(ctx, inputs[1]).unwrap(); - ctx.emit(Inst::gen_move(rd, rn, ty)); - // Note that this will emit AL(G)FI, which sets the condition - // code to indicate an (unsigned) carry bit. - ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm }); + if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::AddLogical64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::AddLogical64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } } Opcode::UaddSat | Opcode::SaddSat => unimplemented!(), diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif index 479268ec0c..98883f6ae7 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -202,6 +202,98 @@ block0(v0: i8, v1: i64): ; nextln: ar %r2, %r3 ; nextln: br %r14 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IADD_IFCOUT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %iadd_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2, v3 = iadd_ifcout.i64 v0, v1 + return v2 +} + +; check: algr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_ext32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3, v4 = iadd_ifcout.i64 v0, v2 + return v3 +} + +; check: algfr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32768 + v2, v3 = iadd_ifcout.i64 v0, v1 + return v2 +} + +; check: algfi %r2, 32768 +; nextln: br %r14 + +function %iadd_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3, v4 = iadd_ifcout.i64 v0, v2 + return v3 +} + +; check: alg %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = uload32.i64 v1 + v3, v4 = iadd_ifcout.i64 v0, v2 + return v3 +} + +; check: algf %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2, v3 = iadd_ifcout.i32 v0, v1 + return v2 +} + +; check: alr %r2, %r3 +; nextln: br %r14 + +function %iadd_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32768 + v2, v3 = iadd_ifcout.i32 v0, v1 + return v2 +} + +; check: alfi %r2, 32768 +; nextln: br %r14 + +function %iadd_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3, v4 = iadd_ifcout.i32 v0, v2 + return v3 +} + +; check: al %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3, v4 = iadd_ifcout.i32 v0, v2 + return v3 +} + +; check: aly %r2, 4096(%r3) +; nextln: br %r14 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ISUB ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;