Also fix and extend the current implementation: - AtomicRMWOp::Clr != AtomicRmwOp::And, as the input needs to be inverted first. - Inputs to the cmp for the RMWLoop case are sign-extended when needed. - Lower Xchg to Swp. - Lower Sub to Add with a negated input. - Added more runtests. Copyright (c) 2022, Arm Limited.
884 lines
20 KiB
Plaintext
884 lines
20 KiB
Plaintext
test compile precise-output
|
|
target aarch64
|
|
|
|
function %atomic_rmw_add_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 add v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_add_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 add v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_add_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 add v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_add_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 add v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_sub_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 sub v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; sub x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_sub_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 sub v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; sub w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_sub_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 sub v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; sub w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_sub_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 sub v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_and_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 and v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_and_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 and v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_and_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 and v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_and_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 and v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_nand_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 nand v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_nand_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 nand v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_nand_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 nand v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_nand_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 nand v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_or_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 or v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_or_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 or v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_or_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 or v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_or_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 or v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_xor_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 xor v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_xor_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 xor v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_xor_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 xor v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_xor_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 xor v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smax_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 smax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smax_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 smax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smax_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 smax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smax_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 smax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umax_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 umax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umax_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 umax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umax_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 umax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umax_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 umax v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smin_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 smin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smin_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 smin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smin_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 smin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_smin_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 smin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umin_i64(i64, i64) {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = atomic_rmw.i64 umin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umin_i32(i64, i32) {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = atomic_rmw.i32 umin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umin_i16(i64, i16) {
|
|
block0(v0: i64, v1: i16):
|
|
v2 = atomic_rmw.i16 umin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|
|
function %atomic_rmw_umin_i8(i64, i8) {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = atomic_rmw.i8 umin v0, v1
|
|
return
|
|
}
|
|
|
|
; stp fp, lr, [sp, #-16]!
|
|
; mov fp, sp
|
|
; str x28, [sp, #-16]!
|
|
; stp x26, x27, [sp, #-16]!
|
|
; stp x24, x25, [sp, #-16]!
|
|
; block0:
|
|
; mov x25, x0
|
|
; mov x4, x1
|
|
; mov x26, x4
|
|
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
|
; ldp x24, x25, [sp], #16
|
|
; ldp x26, x27, [sp], #16
|
|
; ldr x28, [sp], #16
|
|
; ldp fp, lr, [sp], #16
|
|
; ret
|
|
|