Re-implement atomic load and stores

The AArch64 support was a bit broken and was using Armv7 style
barriers, which aren't required with Armv8 acquire-release
load/stores.

The fallback CAS loops and RMW, for AArch64, have also been updated
to use acquire-release, exclusive, instructions which, again, remove
the need for barriers. The CAS loop has also been further optimised
by using the extending form of the cmp instruction.

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Sam Parker
2021-07-29 15:41:45 +01:00
parent 85f16f488d
commit cbb7229457
12 changed files with 564 additions and 220 deletions

View File

@@ -0,0 +1,72 @@
test compile
target aarch64
function %atomic_load_i64(i64) -> i64 {
block0(v0: i64):
v1 = atomic_load.i64 v0
return v1
}
; check: ldar x0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_load_i32(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i32 v0
return v1
}
; check: ldar w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_uload_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = atomic_uload32.i64 v0
return v1
}
; check: ldar w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_uload_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = atomic_uload16.i32 v0
return v1
}
; check: ldarh w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_uload_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = atomic_uload16.i64 v0
return v1
}
; check: ldarh w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_uload_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = atomic_uload8.i32 v0
return v1
}
; check: ldarb w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_uload_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = atomic_uload8.i64 v0
return v1
}
; check: ldarb w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -0,0 +1,72 @@
test compile
target aarch64
function %atomic_store_i64(i64, i64) {
block0(v0: i64, v1: i64):
atomic_store.i64 v0, v1
return
}
; check: stlr x0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_store_i32(i32, i64) {
block0(v0: i32, v1: i64):
atomic_store.i32 v0, v1
return
}
; check: stlr w0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_ustore_i32_i64(i64, i64) {
block0(v0: i64, v1: i64):
atomic_store32.i64 v0, v1
return
}
; check: stlr w0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_ustore_i16_i32(i32, i64) {
block0(v0: i32, v1: i64):
atomic_store16.i32 v0, v1
return
}
; check: stlrh w0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_ustore_i16_i64(i64, i64) {
block0(v0: i64, v1: i64):
atomic_store16.i64 v0, v1
return
}
; check: stlrh w0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_ustore_i8_i32(i32, i64) {
block0(v0: i32, v1: i64):
atomic_store8.i32 v0, v1
return
}
; check: stlrb w0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %atomic_ustore_i8_i64(i64, i64) {
block0(v0: i64, v1: i64):
atomic_store8.i64 v0, v1
return
}
; check: stlrb w0, [x1]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -41,29 +41,29 @@ block0:
; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
; nextln: br %r14
function %atomic_load_i16(i64) -> i16 {
function %atomic_load_i16(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i16 little v0
v1 = atomic_uload16.i32 little v0
return v1
}
; check: lrvh %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i16_sym() -> i16 {
function %atomic_load_i16_sym() -> i32 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i16 little v0
v1 = atomic_uload16.i32 little v0
return v1
}
; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
; nextln: br %r14
function %atomic_load_i8(i64) -> i8 {
function %atomic_load_i8(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i8 little v0
v1 = atomic_uload8.i32 little v0
return v1
}

View File

@@ -41,29 +41,29 @@ block0:
; check: lrl %r2, %sym + 0
; nextln: br %r14
function %atomic_load_i16(i64) -> i16 {
function %atomic_load_i16(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i16 v0
v1 = atomic_uload16.i32 v0
return v1
}
; check: llh %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i16_sym() -> i16 {
function %atomic_load_i16_sym() -> i32 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i16 v0
v1 = atomic_uload16.i32 v0
return v1
}
; check: llhrl %r2, %sym + 0
; nextln: br %r14
function %atomic_load_i8(i64) -> i8 {
function %atomic_load_i8(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i8 v0
v1 = atomic_uload8.i32 v0
return v1
}