The AArch64 support was a bit broken and was using Armv7 style barriers, which aren't required with Armv8 acquire-release load/stores. The fallback CAS loops and RMW, for AArch64, have also been updated to use acquire-release, exclusive, instructions which, again, remove the need for barriers. The CAS loop has also been further optimised by using the extending form of the cmp instruction. Copyright (c) 2021, Arm Limited.
73 lines
1.3 KiB
Plaintext
73 lines
1.3 KiB
Plaintext
test compile
|
|
target s390x
|
|
|
|
function %atomic_load_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = atomic_load.i64 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: lrvg %r2, 0(%r2)
|
|
; nextln: br %r14
|
|
|
|
function %atomic_load_i64_sym() -> i64 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = atomic_load.i64 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: larl %r1, %sym + 0 ; lrvg %r2, 0(%r1)
|
|
; nextln: br %r14
|
|
|
|
function %atomic_load_i32(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = atomic_load.i32 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: lrv %r2, 0(%r2)
|
|
; nextln: br %r14
|
|
|
|
function %atomic_load_i32_sym() -> i32 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = atomic_load.i32 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
|
|
; nextln: br %r14
|
|
|
|
function %atomic_load_i16(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = atomic_uload16.i32 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: lrvh %r2, 0(%r2)
|
|
; nextln: br %r14
|
|
|
|
function %atomic_load_i16_sym() -> i32 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = atomic_uload16.i32 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
|
|
; nextln: br %r14
|
|
|
|
function %atomic_load_i8(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = atomic_uload8.i32 little v0
|
|
return v1
|
|
}
|
|
|
|
; check: llc %r2, 0(%r2)
|
|
; nextln: br %r14
|
|
|