s390x: Add support for atomic operations (part 1)

This adds full back-end support for the Fence, AtomicLoad and AtomicStore operations, and partial support for the AtomicCas and AtomicRmw operations. The missing pieces include sub-word operations, operations on little-endian memory requiring byte-swapping, and some of the subtypes of AtomicRmw -- everything that cannot be implemented without a compare-and-swap loop. This will be done in a follow-up patch. This patch already suffices to make the test suite green again after a recent change that now requires atomic operations when accessing the heap.
2021-06-15 17:00:29 +02:00
parent a7dad4e38f
commit 46b73431ca
11 changed files with 1584 additions and 7 deletions
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -33,6 +33,13 @@ fn ty_is_float(ty: Type) -> bool {
    !ty_is_int(ty)
 }

+fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
+    match ty {
+        types::I8 | types::I16 | types::I32 | types::I64 => true,
+        _ => false,
+    }
+}
+
 fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
    let bits = ty_bits(ty);
    if bits <= 32 {
@@ -2500,13 +2507,159 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // N.B.: the Ret itself is generated by the ABI.
        }

-        Opcode::AtomicRmw
-        | Opcode::AtomicCas
-        | Opcode::AtomicLoad
-        | Opcode::AtomicStore
-        | Opcode::Fence => {
-            // TODO
-            panic!("Atomic operations not implemented");
+        Opcode::AtomicRmw => {
+            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+            let addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let flags = ctx.memflags(insn).unwrap();
+            let endianness = flags.endianness(Endianness::Big);
+            let ty = ty.unwrap();
+            assert!(is_valid_atomic_transaction_ty(ty));
+            if endianness == Endianness::Little {
+                panic!("Little-endian atomic operations not implemented");
+            }
+            if ty_bits(ty) < 32 {
+                panic!("Sub-word atomic operations not implemented");
+            }
+            let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
+            let (alu_op, rn) = match op {
+                AtomicRmwOp::And => (choose_32_64(ty, ALUOp::And32, ALUOp::And64), rn),
+                AtomicRmwOp::Or => (choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), rn),
+                AtomicRmwOp::Xor => (choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), rn),
+                AtomicRmwOp::Add => (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), rn),
+                AtomicRmwOp::Sub => {
+                    let tmp_ty = choose_32_64(ty, types::I32, types::I64);
+                    let tmp = ctx.alloc_tmp(tmp_ty).only_reg().unwrap();
+                    let neg_op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64);
+                    ctx.emit(Inst::UnaryRR {
+                        op: neg_op,
+                        rd: tmp,
+                        rn,
+                    });
+                    (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg())
+                }
+                _ => panic!("AtomicRmw operation type {:?} not implemented", op),
+            };
+            let mem = MemArg::reg(addr, flags);
+            ctx.emit(Inst::AtomicRmw {
+                alu_op,
+                rd,
+                rn,
+                mem,
+            });
+        }
+        Opcode::AtomicCas => {
+            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+            let addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let rn = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+            let flags = ctx.memflags(insn).unwrap();
+            let endianness = flags.endianness(Endianness::Big);
+            let ty = ty.unwrap();
+            assert!(is_valid_atomic_transaction_ty(ty));
+            if endianness == Endianness::Little {
+                panic!("Little-endian atomic operations not implemented");
+            }
+            if ty_bits(ty) < 32 {
+                panic!("Sub-word atomic operations not implemented");
+            }
+            let mem = MemArg::reg(addr, flags);
+            ctx.emit(Inst::gen_move(rd, rm, ty));
+            if ty_bits(ty) == 32 {
+                ctx.emit(Inst::AtomicCas32 { rd, rn, mem });
+            } else {
+                ctx.emit(Inst::AtomicCas64 { rd, rn, mem });
+            }
+        }
+        Opcode::AtomicLoad => {
+            let flags = ctx.memflags(insn).unwrap();
+            let endianness = flags.endianness(Endianness::Big);
+            let ty = ty.unwrap();
+            assert!(is_valid_atomic_transaction_ty(ty));
+
+            let mem = lower_address(ctx, &inputs[..], 0, flags);
+            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+
+            if endianness == Endianness::Big {
+                ctx.emit(match ty_bits(ty) {
+                    8 => Inst::Load32ZExt8 { rd, mem },
+                    16 => Inst::Load32ZExt16 { rd, mem },
+                    32 => Inst::Load32 { rd, mem },
+                    64 => Inst::Load64 { rd, mem },
+                    _ => panic!("Unsupported size in load"),
+                });
+            } else {
+                ctx.emit(match ty_bits(ty) {
+                    8 => Inst::Load32ZExt8 { rd, mem },
+                    16 => Inst::LoadRev16 { rd, mem },
+                    32 => Inst::LoadRev32 { rd, mem },
+                    64 => Inst::LoadRev64 { rd, mem },
+                    _ => panic!("Unsupported size in load"),
+                });
+            }
+        }
+        Opcode::AtomicStore => {
+            let flags = ctx.memflags(insn).unwrap();
+            let endianness = flags.endianness(Endianness::Big);
+            let ty = ctx.input_ty(insn, 0);
+            assert!(is_valid_atomic_transaction_ty(ty));
+
+            let mem = lower_address(ctx, &inputs[1..], 0, flags);
+
+            if ty_bits(ty) <= 16 {
+                if let Some(imm) = input_matches_const(ctx, inputs[0]) {
+                    ctx.emit(match (endianness, ty_bits(ty)) {
+                        (_, 8) => Inst::StoreImm8 {
+                            imm: imm as u8,
+                            mem,
+                        },
+                        (Endianness::Big, 16) => Inst::StoreImm16 {
+                            imm: imm as i16,
+                            mem,
+                        },
+                        (Endianness::Little, 16) => Inst::StoreImm16 {
+                            imm: (imm as i16).swap_bytes(),
+                            mem,
+                        },
+                        _ => panic!("Unsupported size in store"),
+                    });
+                } else {
+                    let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                    ctx.emit(match (endianness, ty_bits(ty)) {
+                        (_, 8) => Inst::Store8 { rd, mem },
+                        (Endianness::Big, 16) => Inst::Store16 { rd, mem },
+                        (Endianness::Little, 16) => Inst::StoreRev16 { rd, mem },
+                        _ => panic!("Unsupported size in store"),
+                    });
+                }
+            } else if endianness == Endianness::Big {
+                if let Some(imm) = input_matches_simm16(ctx, inputs[0]) {
+                    ctx.emit(match ty_bits(ty) {
+                        32 => Inst::StoreImm32SExt16 { imm, mem },
+                        64 => Inst::StoreImm64SExt16 { imm, mem },
+                        _ => panic!("Unsupported size in store"),
+                    });
+                } else {
+                    let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                    ctx.emit(match ty_bits(ty) {
+                        32 => Inst::Store32 { rd, mem },
+                        64 => Inst::Store64 { rd, mem },
+                        _ => panic!("Unsupported size in store"),
+                    });
+                }
+            } else {
+                let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                ctx.emit(match ty_bits(ty) {
+                    32 => Inst::StoreRev32 { rd, mem },
+                    64 => Inst::StoreRev64 { rd, mem },
+                    _ => panic!("Unsupported size in store"),
+                });
+            }
+
+            ctx.emit(Inst::Fence);
+        }
+        Opcode::Fence => {
+            ctx.emit(Inst::Fence);
        }

        Opcode::RawBitcast