Re-implement atomic load and stores

The AArch64 support was a bit broken and was using Armv7 style barriers, which aren't required with Armv8 acquire-release load/stores. The fallback CAS loops and RMW, for AArch64, have also been updated to use acquire-release, exclusive, instructions which, again, remove the need for barriers. The CAS loop has also been further optimised by using the extending form of the cmp instruction. Copyright (c) 2021, Arm Limited.
2021-07-29 15:41:45 +01:00
parent 85f16f488d
commit cbb7229457
12 changed files with 564 additions and 220 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
@@ -0,0 +1,72 @@
+test compile
+target aarch64
+
+function %atomic_load_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_load.i64 v0
+  return v1
+}
+
+; check: ldar x0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_load_i32(i64) -> i32 {
+block0(v0: i64):
+  v1 = atomic_load.i32 v0
+  return v1
+}
+
+; check: ldar w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_uload_i32_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_uload32.i64 v0
+  return v1
+}
+
+; check: ldar w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_uload_i16_i32(i64) -> i32 {
+block0(v0: i64):
+  v1 = atomic_uload16.i32 v0
+  return v1
+}
+
+; check: ldarh w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_uload_i16_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_uload16.i64 v0
+  return v1
+}
+
+; check: ldarh w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_uload_i8_i32(i64) -> i32 {
+block0(v0: i64):
+  v1 = atomic_uload8.i32 v0
+  return v1
+}
+
+; check: ldarb w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_uload_i8_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_uload8.i64 v0
+  return v1
+}
+
+; check: ldarb w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
@@ -0,0 +1,72 @@
+test compile
+target aarch64
+
+function %atomic_store_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+  atomic_store.i64 v0, v1
+  return
+}
+
+; check: stlr x0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_store_i32(i32, i64) {
+block0(v0: i32, v1: i64):
+  atomic_store.i32 v0, v1
+  return
+}
+
+; check: stlr w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_ustore_i32_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+  atomic_store32.i64 v0, v1
+  return
+}
+
+; check: stlr w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_ustore_i16_i32(i32, i64) {
+block0(v0: i32, v1: i64):
+  atomic_store16.i32 v0, v1
+  return
+}
+
+; check: stlrh w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_ustore_i16_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+  atomic_store16.i64 v0, v1
+  return
+}
+
+; check: stlrh w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_ustore_i8_i32(i32, i64) {
+block0(v0: i32, v1: i64):
+  atomic_store8.i32 v0, v1
+  return
+}
+
+; check: stlrb w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_ustore_i8_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+  atomic_store8.i64 v0, v1
+  return
+}
+
+; check: stlrb w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
--- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
@@ -41,29 +41,29 @@ block0:
 ; check:  larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
 ; nextln: br %r14

-function %atomic_load_i16(i64) -> i16 {
+function %atomic_load_i16(i64) -> i32 {
 block0(v0: i64):
-  v1 = atomic_load.i16 little v0
+  v1 = atomic_uload16.i32 little v0
  return v1
 }

 ; check:  lrvh %r2, 0(%r2)
 ; nextln: br %r14

-function %atomic_load_i16_sym() -> i16 {
+function %atomic_load_i16_sym() -> i32 {
  gv0 = symbol colocated %sym
 block0:
  v0 = symbol_value.i64 gv0
-  v1 = atomic_load.i16 little v0
+  v1 = atomic_uload16.i32 little v0
  return v1
 }

 ; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
 ; nextln: br %r14

-function %atomic_load_i8(i64) -> i8 {
+function %atomic_load_i8(i64) -> i32 {
 block0(v0: i64):
-  v1 = atomic_load.i8 little v0
+  v1 = atomic_uload8.i32 little v0
  return v1
 }

--- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
@@ -41,29 +41,29 @@ block0:
 ; check:  lrl %r2, %sym + 0
 ; nextln: br %r14

-function %atomic_load_i16(i64) -> i16 {
+function %atomic_load_i16(i64) -> i32 {
 block0(v0: i64):
-  v1 = atomic_load.i16 v0
+  v1 = atomic_uload16.i32 v0
  return v1
 }

 ; check:  llh %r2, 0(%r2)
 ; nextln: br %r14

-function %atomic_load_i16_sym() -> i16 {
+function %atomic_load_i16_sym() -> i32 {
  gv0 = symbol colocated %sym
 block0:
  v0 = symbol_value.i64 gv0
-  v1 = atomic_load.i16 v0
+  v1 = atomic_uload16.i32 v0
  return v1
 }

 ; check:  llhrl %r2, %sym + 0
 ; nextln: br %r14

-function %atomic_load_i8(i64) -> i8 {
+function %atomic_load_i8(i64) -> i32 {
 block0(v0: i64):
-  v1 = atomic_load.i8 v0
+  v1 = atomic_uload8.i32 v0
  return v1
 }