diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index c8fda6aea3..78c0bbd11a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1338,10 +1338,6 @@ impl MachInstEmit for Inst { both the store-data and success-flag operands of stlxr. This causes the instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24 instead for the success-flag. - - In the case where the operation is 'xchg', the second insn is instead - mov x28, x26 - so that we simply write in the destination, the "2nd arg for op". */ // TODO: We should not hardcode registers here, a better idea would be to // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those @@ -1363,19 +1359,17 @@ impl MachInstEmit for Inst { sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25] + let size = OperandSize::from_ty(ty); match op { - AtomicRmwOp::Xchg => { - // mov x28, x26 - Inst::Mov64 { rd: x28wr, rm: x26 }.emit(sink, emit_info, state); - } + AtomicRmwOp::Xchg => {} // do nothing AtomicRmwOp::Nand => { // and x28, x27, x26 // mvn x28, x28 Inst::AluRRR { alu_op: ALUOp::And, - size: OperandSize::Size64, + size, rd: x28wr, rn: x27, rm: x26, @@ -1384,7 +1378,7 @@ impl MachInstEmit for Inst { Inst::AluRRR { alu_op: ALUOp::OrrNot, - size: OperandSize::Size64, + size, rd: x28wr, rn: xzr, rm: x28, @@ -1408,7 +1402,7 @@ impl MachInstEmit for Inst { Inst::AluRRR { alu_op: ALUOp::SubS, - size: OperandSize::from_ty(ty), + size, rd: writable_zero_reg(), rn: x27, rm: x26, @@ -1441,7 +1435,7 @@ impl MachInstEmit for Inst { Inst::AluRRR { alu_op, - size: OperandSize::Size64, + size, rd: x28wr, rn: x27, rm: x26, @@ -1454,7 +1448,11 @@ impl MachInstEmit for Inst { if srcloc != SourceLoc::default() { sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } - sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] + if op == AtomicRmwOp::Xchg { + sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25] + } else { + sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] + } // cbnz w24, again // Note, we're actually testing x24, and relying on the default zero-high-half diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 5e3902dd2a..2717bf4d83 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -6105,8 +6105,80 @@ fn test_aarch64_binemit() { ty: I16, op: inst_common::AtomicRmwOp::Xor, }, - "3BFF5F487C031ACA3CFF1848B8FFFFB5", - "atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }", + "3BFF5F487C031A4A3CFF1848B8FFFFB5", + "1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I8, + op: inst_common::AtomicRmwOp::Add, + }, + "3BFF5F087C031A0B3CFF1808B8FFFFB5", + "1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I32, + op: inst_common::AtomicRmwOp::Or, + }, + "3BFF5F887C031A2A3CFF1888B8FFFFB5", + "1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I64, + op: inst_common::AtomicRmwOp::And, + }, + "3BFF5FC87C031A8A3CFF18C8B8FFFFB5", + "1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I8, + op: inst_common::AtomicRmwOp::Xchg, + }, + "3BFF5F083AFF1808D8FFFFB5", + "1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I16, + op: inst_common::AtomicRmwOp::Nand, + }, + "3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5", + "1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I32, + op: inst_common::AtomicRmwOp::Smin, + }, + "3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5", + "1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I64, + op: inst_common::AtomicRmwOp::Smax, + }, + "3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5", + "1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I8, + op: inst_common::AtomicRmwOp::Umin, + }, + "3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5", + "1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b", + )); + insns.push(( + Inst::AtomicRMWLoop { + ty: I16, + op: inst_common::AtomicRmwOp::Umax, + }, + "3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5", + "1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b", )); insns.push(( @@ -6462,14 +6534,6 @@ fn test_aarch64_binemit() { "lduminal x25, x26, [x27]", )); - insns.push(( - Inst::AtomicRMWLoop { - ty: I32, - op: inst_common::AtomicRmwOp::Xchg, - }, - "3BFF5F88FC031AAA3CFF1888B8FFFFB5", - "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }", - )); insns.push(( Inst::AtomicCAS { rs: writable_xreg(28), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 83fa3ec870..c0b7e541b8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -688,12 +688,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::CCmpImm { rn, .. } => { collector.add_use(rn); } - &Inst::AtomicRMWLoop { .. } => { + &Inst::AtomicRMWLoop { op, .. } => { collector.add_use(xreg(25)); collector.add_use(xreg(26)); collector.add_def(writable_xreg(24)); collector.add_def(writable_xreg(27)); - collector.add_def(writable_xreg(28)); + if op != AtomicRmwOp::Xchg { + collector.add_def(writable_xreg(28)); + } } &Inst::AtomicRMW { rs, rt, rn, .. } => { collector.add_use(rs); @@ -2399,9 +2401,60 @@ impl Inst { format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn) } &Inst::AtomicRMWLoop { ty, op, .. } => { - format!( - "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}", - ty.bits(), op) + let ty_suffix = match ty { + I8 => "b", + I16 => "h", + _ => "", + }; + let size = OperandSize::from_ty(ty); + let r_status = show_ireg_sized(xreg(24), mb_rru, OperandSize::Size32); + let r_arg2 = show_ireg_sized(xreg(26), mb_rru, size); + let r_tmp = show_ireg_sized(xreg(27), mb_rru, size); + let mut r_dst = show_ireg_sized(xreg(28), mb_rru, size); + + let mut loop_str: String = "1: ".to_string(); + loop_str.push_str(&format!("ldaxr{} {}, [x25]; ", ty_suffix, r_tmp)); + + let op_str = match op { + inst_common::AtomicRmwOp::Add => "add", + inst_common::AtomicRmwOp::Sub => "sub", + inst_common::AtomicRmwOp::Xor => "eor", + inst_common::AtomicRmwOp::Or => "orr", + inst_common::AtomicRmwOp::And => "and", + _ => "", + }; + + if op_str.is_empty() { + match op { + inst_common::AtomicRmwOp::Xchg => r_dst = r_arg2, + inst_common::AtomicRmwOp::Nand => { + loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2)); + loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst)); + } + _ => { + loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2)); + let cond = match op { + inst_common::AtomicRmwOp::Smin => "lt", + inst_common::AtomicRmwOp::Smax => "gt", + inst_common::AtomicRmwOp::Umin => "lo", + inst_common::AtomicRmwOp::Umax => "hi", + _ => unreachable!(), + }; + loop_str.push_str(&format!( + "csel {}, {}, {}, {}; ", + r_dst, r_tmp, r_arg2, cond + )); + } + }; + } else { + loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2)); + } + loop_str.push_str(&format!( + "stlxr{} {}, {}, [x25]; ", + ty_suffix, r_status, r_dst + )); + loop_str.push_str(&format!("cbnz {}, 1b", r_status)); + loop_str } &Inst::AtomicCAS { rs, rt, rn, ty } => { let op = match ty { diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif index 68964cae88..ca3c26fbcc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif @@ -16,8 +16,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_add_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_add_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 add v0, v1 return } @@ -31,6 +31,36 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_add_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 add v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldaddalh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_add_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 add v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldaddalb w1, w0, [x0] +; Inst 1: ret +; }} + function %atomic_rmw_and_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 and v0, v1 @@ -46,8 +76,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_and_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_and_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 and v0, v1 return } @@ -61,6 +91,140 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_and_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 and v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldclralh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_and_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 and v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldclralb w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_nand_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + function %atomic_rmw_or_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 or v0, v1 @@ -76,8 +240,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_or_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_or_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 or v0, v1 return } @@ -91,6 +255,36 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_or_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 or v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldsetalh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_or_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 or v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldsetalb w1, w0, [x0] +; Inst 1: ret +; }} + function %atomic_rmw_xor_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 xor v0, v1 @@ -106,8 +300,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_xor_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_xor_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 xor v0, v1 return } @@ -121,6 +315,36 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_xor_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 xor v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldeoralh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_xor_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 xor v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldeoralb w1, w0, [x0] +; Inst 1: ret +; }} + function %atomic_rmw_smax_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 smax v0, v1 @@ -136,8 +360,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_smax_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_smax_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 smax v0, v1 return } @@ -151,6 +375,36 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_smax_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 smax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldsmaxalh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_smax_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 smax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldsmaxalb w1, w0, [x0] +; Inst 1: ret +; }} + function %atomic_rmw_umax_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 umax v0, v1 @@ -166,8 +420,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_umax_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_umax_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 umax v0, v1 return } @@ -181,6 +435,36 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_umax_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 umax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldumaxalh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_umax_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 umax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldumaxalb w1, w0, [x0] +; Inst 1: ret +; }} + function %atomic_rmw_smin_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 smin v0, v1 @@ -196,8 +480,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_smin_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_smin_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 smin v0, v1 return } @@ -211,6 +495,36 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_smin_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 smin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldsminalh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_smin_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 smin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: ldsminalb w1, w0, [x0] +; Inst 1: ret +; }} + function %atomic_rmw_umin_i64(i64, i64) { block0(v0: i64, v1: i64): v2 = atomic_rmw.i64 umin v0, v1 @@ -226,8 +540,8 @@ block0(v0: i64, v1: i64): ; Inst 1: ret ; }} -function %atomic_rmw_umin_i32(i32, i32) { -block0(v0: i32, v1: i32): +function %atomic_rmw_umin_i32(i64, i32) { +block0(v0: i64, v1: i32): v2 = atomic_rmw.i32 umin v0, v1 return } @@ -241,3 +555,33 @@ block0(v0: i32, v1: i32): ; Inst 1: ret ; }} +function %atomic_rmw_umin_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 umin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: lduminalh w1, w0, [x0] +; Inst 1: ret +; }} + +function %atomic_rmw_umin_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 umin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 2) +; Inst 0: lduminalb w1, w0, [x0] +; Inst 1: ret +; }} + diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif new file mode 100644 index 0000000000..b793cd27ae --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif @@ -0,0 +1,939 @@ +test compile precise-output +target aarch64 + +function %atomic_rmw_add_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 add v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_add_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 add v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_add_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 add v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_add_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 add v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_and_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 and v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_and_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 and v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_and_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 and v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_and_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 and v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_nand_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 nand v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_or_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 or v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_or_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 or v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_or_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 or v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_or_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 or v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_xor_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xor v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_xor_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 xor v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_xor_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 xor v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_xor_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 xor v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smax_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 smax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smax_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 smax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smax_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 smax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umax_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 umax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umax_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 umax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umax_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 umax v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smin_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 smin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smin_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 smin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_smin_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 smin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umin_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 umin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umin_i16(i64, i16) { +block0(v0: i64, v1: i16): + v2 = atomic_rmw.i16 umin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} + +function %atomic_rmw_umin_i8(i64, i8) { +block0(v0: i64, v1: i8): + v2 = atomic_rmw.i8 umin v0, v1 + return +} + +; VCode_ShowWithRRU {{ +; Entry block: 0 +; Block 0: +; (original IR block: block0) +; (instruction range: 0 .. 13) +; Inst 0: stp fp, lr, [sp, #-16]! +; Inst 1: mov fp, sp +; Inst 2: str x28, [sp, #-16]! +; Inst 3: stp x26, x27, [sp, #-16]! +; Inst 4: stp x24, x25, [sp, #-16]! +; Inst 5: mov x25, x0 +; Inst 6: mov x26, x1 +; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b +; Inst 8: ldp x24, x25, [sp], #16 +; Inst 9: ldp x26, x27, [sp], #16 +; Inst 10: ldr x28, [sp], #16 +; Inst 11: ldp fp, lr, [sp], #16 +; Inst 12: ret +; }} +