[AArch64] Port atomic rmw to ISLE (#4021)

Also fix and extend the current implementation: - AtomicRMWOp::Clr != AtomicRmwOp::And, as the input needs to be inverted first. - Inputs to the cmp for the RMWLoop case are sign-extended when needed. - Lower Xchg to Swp. - Lower Sub to Add with a negated input. - Added more runtests. Copyright (c) 2022, Arm Limited.
2022-04-27 21:13:59 +01:00
parent 8381179503
commit 12b4374cd5
26 changed files with 1632 additions and 1281 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -583,6 +583,13 @@ impl OperandSize {
        }
    }

+    pub fn bits(&self) -> u8 {
+        match self {
+            OperandSize::Size32 => 32,
+            OperandSize::Size64 => 64,
+        }
+    }
+
    /// Convert from an integer type into the smallest size that fits.
    pub fn from_ty(ty: Type) -> OperandSize {
        debug_assert!(!ty.is_vector());
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -7,6 +7,7 @@ use crate::ir::constant::ConstantData;
 use crate::ir::types::*;
 use crate::ir::{LibCall, MemFlags, TrapCode};
 use crate::isa::aarch64::inst::*;
+use crate::isa::aarch64::lower::is_valid_atomic_transaction_ty;
 use crate::machinst::{ty_bits, Reg, RegClass, Writable};
 use core::convert::TryFrom;

@@ -505,7 +506,7 @@ fn enc_dmb_ish() -> u32 {
    0xD5033BBF
 }

-fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
+fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
    assert!(machreg_to_gpr(rt.to_reg()) != 31);
    let sz = match ty {
        I64 => 0b11,
@@ -514,6 +515,10 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
        I8 => 0b00,
        _ => unreachable!(),
    };
+    let bit15 = match op {
+        AtomicRMWOp::Swp => 0b1,
+        _ => 0b0,
+    };
    let op = match op {
        AtomicRMWOp::Add => 0b000,
        AtomicRMWOp::Clr => 0b001,
@@ -523,10 +528,12 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
        AtomicRMWOp::Smin => 0b101,
        AtomicRMWOp::Umax => 0b110,
        AtomicRMWOp::Umin => 0b111,
+        AtomicRMWOp::Swp => 0b000,
    };
    0b00_111_000_111_00000_0_000_00_00000_00000
        | (sz << 30)
        | (machreg_to_gpr(rs) << 16)
+        | bit15 << 15
        | (op << 12)
        | (machreg_to_gpr(rn) << 5)
        | machreg_to_gpr(rt.to_reg())
@@ -1371,15 +1378,18 @@ impl MachInstEmit for Inst {
                sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
            }
            &Inst::AtomicRMW { ty, op, rs, rt, rn } => {
+                assert!(is_valid_atomic_transaction_ty(ty));
                let rs = allocs.next(rs);
                let rt = allocs.next_writable(rt);
                let rn = allocs.next(rn);
-                sink.put4(enc_ldal(ty, op, rs, rt, rn));
+                sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
            }
            &Inst::AtomicRMWLoop { ty, op } => {
+                assert!(is_valid_atomic_transaction_ty(ty));
                /* Emit this:
                     again:
                      ldaxr{,b,h}  x/w27, [x25]
+                      // maybe sign extend
                      op          x28, x27, x26 // op is add,sub,and,orr,eor
                      stlxr{,b,h}  w24, x/w28, [x25]
                      cbnz        x24, again
@@ -1414,10 +1424,31 @@ impl MachInstEmit for Inst {
                }
                sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
                let size = OperandSize::from_ty(ty);
+                let sign_ext = match op {
+                    AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
+                        I16 => Some((ExtendOp::SXTH, 16)),
+                        I8 => Some((ExtendOp::SXTB, 8)),
+                        _ => None,
+                    },
+                    _ => None,
+                };
+
+                // sxt{b|h} the loaded result if necessary.
+                if sign_ext.is_some() {
+                    let (_, from_bits) = sign_ext.unwrap();
+                    Inst::Extend {
+                        rd: x27wr,
+                        rn: x27,
+                        signed: true,
+                        from_bits,
+                        to_bits: size.bits(),
+                    }
+                    .emit(&[], sink, emit_info, state);
+                }

                match op {
-                    AtomicRmwOp::Xchg => {} // do nothing
-                    AtomicRmwOp::Nand => {
+                    AtomicRMWLoopOp::Xchg => {} // do nothing
+                    AtomicRMWLoopOp::Nand => {
                        // and x28, x27, x26
                        // mvn x28, x28

@@ -1439,29 +1470,42 @@ impl MachInstEmit for Inst {
                        }
                        .emit(&[], sink, emit_info, state);
                    }
-                    AtomicRmwOp::Umin
-                    | AtomicRmwOp::Umax
-                    | AtomicRmwOp::Smin
-                    | AtomicRmwOp::Smax => {
-                        // cmp x27, x26
+                    AtomicRMWLoopOp::Umin
+                    | AtomicRMWLoopOp::Umax
+                    | AtomicRMWLoopOp::Smin
+                    | AtomicRMWLoopOp::Smax => {
+                        // cmp x27, x26 {?sxt}
                        // csel.op x28, x27, x26

                        let cond = match op {
-                            AtomicRmwOp::Umin => Cond::Lo,
-                            AtomicRmwOp::Umax => Cond::Hi,
-                            AtomicRmwOp::Smin => Cond::Lt,
-                            AtomicRmwOp::Smax => Cond::Gt,
+                            AtomicRMWLoopOp::Umin => Cond::Lo,
+                            AtomicRMWLoopOp::Umax => Cond::Hi,
+                            AtomicRMWLoopOp::Smin => Cond::Lt,
+                            AtomicRMWLoopOp::Smax => Cond::Gt,
                            _ => unreachable!(),
                        };

-                        Inst::AluRRR {
-                            alu_op: ALUOp::SubS,
-                            size,
-                            rd: writable_zero_reg(),
-                            rn: x27,
-                            rm: x26,
+                        if sign_ext.is_some() {
+                            let (extendop, _) = sign_ext.unwrap();
+                            Inst::AluRRRExtend {
+                                alu_op: ALUOp::SubS,
+                                size,
+                                rd: writable_zero_reg(),
+                                rn: x27,
+                                rm: x26,
+                                extendop,
+                            }
+                            .emit(&[], sink, emit_info, state);
+                        } else {
+                            Inst::AluRRR {
+                                alu_op: ALUOp::SubS,
+                                size,
+                                rd: writable_zero_reg(),
+                                rn: x27,
+                                rm: x26,
+                            }
+                            .emit(&[], sink, emit_info, state);
                        }
-                        .emit(&[], sink, emit_info, state);

                        Inst::CSel {
                            cond,
@@ -1474,17 +1518,17 @@ impl MachInstEmit for Inst {
                    _ => {
                        // add/sub/and/orr/eor x28, x27, x26
                        let alu_op = match op {
-                            AtomicRmwOp::Add => ALUOp::Add,
-                            AtomicRmwOp::Sub => ALUOp::Sub,
-                            AtomicRmwOp::And => ALUOp::And,
-                            AtomicRmwOp::Or => ALUOp::Orr,
-                            AtomicRmwOp::Xor => ALUOp::Eor,
-                            AtomicRmwOp::Nand
-                            | AtomicRmwOp::Umin
-                            | AtomicRmwOp::Umax
-                            | AtomicRmwOp::Smin
-                            | AtomicRmwOp::Smax
-                            | AtomicRmwOp::Xchg => unreachable!(),
+                            AtomicRMWLoopOp::Add => ALUOp::Add,
+                            AtomicRMWLoopOp::Sub => ALUOp::Sub,
+                            AtomicRMWLoopOp::And => ALUOp::And,
+                            AtomicRMWLoopOp::Orr => ALUOp::Orr,
+                            AtomicRMWLoopOp::Eor => ALUOp::Eor,
+                            AtomicRMWLoopOp::Nand
+                            | AtomicRMWLoopOp::Umin
+                            | AtomicRMWLoopOp::Umax
+                            | AtomicRMWLoopOp::Smin
+                            | AtomicRMWLoopOp::Smax
+                            | AtomicRMWLoopOp::Xchg => unreachable!(),
                        };

                        Inst::AluRRR {
@@ -1502,7 +1546,7 @@ impl MachInstEmit for Inst {
                if srcloc != SourceLoc::default() {
                    sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
                }
-                if op == AtomicRmwOp::Xchg {
+                if op == AtomicRMWLoopOp::Xchg {
                    sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
                } else {
                    sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -6205,10 +6205,18 @@ fn test_aarch64_binemit() {
        "frintn d23, d24",
    ));

+    insns.push((
+        Inst::AtomicRMWLoop {
+            ty: I8,
+            op: AtomicRMWLoopOp::Sub,
+        },
+        "3BFF5F087C031A4B3CFF1808B8FFFFB5",
+        "1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
+    ));
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I16,
-            op: inst_common::AtomicRmwOp::Xor,
+            op: AtomicRMWLoopOp::Eor,
        },
        "3BFF5F487C031A4A3CFF1848B8FFFFB5",
        "1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b",
@@ -6216,7 +6224,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I8,
-            op: inst_common::AtomicRmwOp::Add,
+            op: AtomicRMWLoopOp::Add,
        },
        "3BFF5F087C031A0B3CFF1808B8FFFFB5",
        "1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
@@ -6224,7 +6232,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I32,
-            op: inst_common::AtomicRmwOp::Or,
+            op: AtomicRMWLoopOp::Orr,
        },
        "3BFF5F887C031A2A3CFF1888B8FFFFB5",
        "1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b",
@@ -6232,7 +6240,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I64,
-            op: inst_common::AtomicRmwOp::And,
+            op: AtomicRMWLoopOp::And,
        },
        "3BFF5FC87C031A8A3CFF18C8B8FFFFB5",
        "1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b",
@@ -6240,7 +6248,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I8,
-            op: inst_common::AtomicRmwOp::Xchg,
+            op: AtomicRMWLoopOp::Xchg,
        },
        "3BFF5F083AFF1808D8FFFFB5",
        "1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b",
@@ -6248,15 +6256,23 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I16,
-            op: inst_common::AtomicRmwOp::Nand,
+            op: AtomicRMWLoopOp::Nand,
        },
        "3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5",
        "1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b",
    ));
+    insns.push((
+        Inst::AtomicRMWLoop {
+            ty: I16,
+            op: AtomicRMWLoopOp::Smin,
+        },
+        "3BFF5F487B3F00137FA33A6B7CB39A9A3CFF184878FFFFB5",
+        "1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b",
+    ));
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I32,
-            op: inst_common::AtomicRmwOp::Smin,
+            op: AtomicRMWLoopOp::Smin,
        },
        "3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5",
        "1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b",
@@ -6264,7 +6280,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I64,
-            op: inst_common::AtomicRmwOp::Smax,
+            op: AtomicRMWLoopOp::Smax,
        },
        "3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5",
        "1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b",
@@ -6272,7 +6288,15 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I8,
-            op: inst_common::AtomicRmwOp::Umin,
+            op: AtomicRMWLoopOp::Smax,
+        },
+        "3BFF5F087B1F00137F833A6B7CC39A9A3CFF180878FFFFB5",
+        "1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b",
+    ));
+    insns.push((
+        Inst::AtomicRMWLoop {
+            ty: I8,
+            op: AtomicRMWLoopOp::Umin,
        },
        "3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5",
        "1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b",
@@ -6280,7 +6304,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::AtomicRMWLoop {
            ty: I16,
-            op: inst_common::AtomicRmwOp::Umax,
+            op: AtomicRMWLoopOp::Umax,
        },
        "3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5",
        "1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b",
@@ -6638,6 +6662,50 @@ fn test_aarch64_binemit() {
        "7A73F9F8",
        "lduminal x25, x26, [x27]",
    ));
+    insns.push((
+        Inst::AtomicRMW {
+            ty: I8,
+            op: AtomicRMWOp::Swp,
+            rs: xreg(28),
+            rt: writable_xreg(29),
+            rn: xreg(30),
+        },
+        "DD83FC38",
+        "swpalb w28, fp, [lr]",
+    ));
+    insns.push((
+        Inst::AtomicRMW {
+            ty: I16,
+            op: AtomicRMWOp::Swp,
+            rs: xreg(0),
+            rt: writable_xreg(1),
+            rn: xreg(2),
+        },
+        "4180E078",
+        "swpalh w0, w1, [x2]",
+    ));
+    insns.push((
+        Inst::AtomicRMW {
+            ty: I32,
+            op: AtomicRMWOp::Swp,
+            rs: xreg(3),
+            rt: writable_xreg(4),
+            rn: xreg(5),
+        },
+        "A480E3B8",
+        "swpal w3, w4, [x5]",
+    ));
+    insns.push((
+        Inst::AtomicRMW {
+            ty: I64,
+            op: AtomicRMWOp::Swp,
+            rs: xreg(6),
+            rt: writable_xreg(7),
+            rn: xreg(8),
+        },
+        "0781E6F8",
+        "swpal x6, x7, [x8]",
+    ));

    insns.push((
        Inst::AtomicCAS {
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -39,9 +39,9 @@ mod emit_tests;
 // Instructions (top level): definition

 pub use crate::isa::aarch64::lower::isle::generated_code::{
-    ALUOp, ALUOp3, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp,
-    IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp,
-    VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
+    ALUOp, ALUOp3, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode,
+    FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2,
+    VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
 };

 /// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -676,12 +676,14 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
        &Inst::CCmpImm { rn, .. } => {
            collector.reg_use(rn);
        }
-        &Inst::AtomicRMWLoop { .. } => {
+        &Inst::AtomicRMWLoop { op, .. } => {
            collector.reg_use(xreg(25));
            collector.reg_use(xreg(26));
            collector.reg_def(writable_xreg(24));
            collector.reg_def(writable_xreg(27));
-            collector.reg_def(writable_xreg(28));
+            if op != AtomicRMWLoopOp::Xchg {
+                collector.reg_def(writable_xreg(28));
+            }
        }
        &Inst::AtomicRMW { rs, rt, rn, .. } => {
            collector.reg_use(rs);
@@ -1538,6 +1540,7 @@ impl Inst {
                    AtomicRMWOp::Umax => "ldumaxal",
                    AtomicRMWOp::Smin => "ldsminal",
                    AtomicRMWOp::Umin => "lduminal",
+                    AtomicRMWOp::Swp => "swpal",
                };

                let size = OperandSize::from_ty(ty);
@@ -1569,28 +1572,39 @@ impl Inst {
                loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));

                let op_str = match op {
-                    inst_common::AtomicRmwOp::Add => "add",
-                    inst_common::AtomicRmwOp::Sub => "sub",
-                    inst_common::AtomicRmwOp::Xor => "eor",
-                    inst_common::AtomicRmwOp::Or => "orr",
-                    inst_common::AtomicRmwOp::And => "and",
+                    AtomicRMWLoopOp::Add => "add",
+                    AtomicRMWLoopOp::Sub => "sub",
+                    AtomicRMWLoopOp::Eor => "eor",
+                    AtomicRMWLoopOp::Orr => "orr",
+                    AtomicRMWLoopOp::And => "and",
                    _ => "",
                };

                if op_str.is_empty() {
                    match op {
-                        inst_common::AtomicRmwOp::Xchg => r_dst = r_arg2,
-                        inst_common::AtomicRmwOp::Nand => {
+                        AtomicRMWLoopOp::Xchg => r_dst = r_arg2,
+                        AtomicRMWLoopOp::Nand => {
                            loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2));
                            loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst));
                        }
                        _ => {
-                            loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
+                            if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax)
+                                && (ty == I8 || ty == I16)
+                            {
+                                loop_str
+                                    .push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp));
+                                loop_str.push_str(&format!(
+                                    "cmp {}, {}, sxt{}; ",
+                                    r_tmp, r_arg2, ty_suffix
+                                ));
+                            } else {
+                                loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
+                            }
                            let cond = match op {
-                                inst_common::AtomicRmwOp::Smin => "lt",
-                                inst_common::AtomicRmwOp::Smax => "gt",
-                                inst_common::AtomicRmwOp::Umin => "lo",
-                                inst_common::AtomicRmwOp::Umax => "hi",
+                                AtomicRMWLoopOp::Smin => "lt",
+                                AtomicRMWLoopOp::Smax => "gt",
+                                AtomicRMWLoopOp::Umin => "lo",
+                                AtomicRMWLoopOp::Umax => "hi",
                                _ => unreachable!(),
                            };
                            loop_str.push_str(&format!(