[AArch64] Port atomic rmw to ISLE (#4021)
Also fix and extend the current implementation: - AtomicRMWOp::Clr != AtomicRmwOp::And, as the input needs to be inverted first. - Inputs to the cmp for the RMWLoop case are sign-extended when needed. - Lower Xchg to Swp. - Lower Sub to Add with a negated input. - Added more runtests. Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -41,6 +41,50 @@ block0(v0: i64, v1: i8):
|
||||
; ldaddalb w1, w4, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i64(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = atomic_rmw.i64 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub x4, xzr, x1
|
||||
; ldaddal x4, x6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i32(i64, i32) {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = atomic_rmw.i32 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w4, wzr, w1
|
||||
; ldaddal w4, w6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i16(i64, i16) {
|
||||
block0(v0: i64, v1: i16):
|
||||
v2 = atomic_rmw.i16 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w4, wzr, w1
|
||||
; ldaddalh w4, w6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i8(i64, i8) {
|
||||
block0(v0: i64, v1: i8):
|
||||
v2 = atomic_rmw.i8 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; sub w4, wzr, w1
|
||||
; ldaddalb w4, w6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i64(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = atomic_rmw.i64 and v0, v1
|
||||
@@ -48,7 +92,8 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldclral x1, x4, [x0]
|
||||
; eon x4, x1, xzr
|
||||
; ldclral x4, x6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i32(i64, i32) {
|
||||
@@ -58,7 +103,8 @@ block0(v0: i64, v1: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldclral w1, w4, [x0]
|
||||
; eon w4, w1, wzr
|
||||
; ldclral w4, w6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i16(i64, i16) {
|
||||
@@ -68,7 +114,8 @@ block0(v0: i64, v1: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldclralh w1, w4, [x0]
|
||||
; eon w4, w1, wzr
|
||||
; ldclralh w4, w6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i8(i64, i8) {
|
||||
@@ -78,7 +125,8 @@ block0(v0: i64, v1: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldclralb w1, w4, [x0]
|
||||
; eon w4, w1, wzr
|
||||
; ldclralb w4, w6, [x0]
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_nand_i64(i64, i64) {
|
||||
|
||||
@@ -89,6 +89,94 @@ block0(v0: i64, v1: i8):
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i64(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = atomic_rmw.i64 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; stp fp, lr, [sp, #-16]!
|
||||
; mov fp, sp
|
||||
; str x28, [sp, #-16]!
|
||||
; stp x26, x27, [sp, #-16]!
|
||||
; stp x24, x25, [sp, #-16]!
|
||||
; block0:
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxr x27, [x25]; sub x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i32(i64, i32) {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = atomic_rmw.i32 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; stp fp, lr, [sp, #-16]!
|
||||
; mov fp, sp
|
||||
; str x28, [sp, #-16]!
|
||||
; stp x26, x27, [sp, #-16]!
|
||||
; stp x24, x25, [sp, #-16]!
|
||||
; block0:
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxr w27, [x25]; sub w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i16(i64, i16) {
|
||||
block0(v0: i64, v1: i16):
|
||||
v2 = atomic_rmw.i16 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; stp fp, lr, [sp, #-16]!
|
||||
; mov fp, sp
|
||||
; str x28, [sp, #-16]!
|
||||
; stp x26, x27, [sp, #-16]!
|
||||
; stp x24, x25, [sp, #-16]!
|
||||
; block0:
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxrh w27, [x25]; sub w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_sub_i8(i64, i8) {
|
||||
block0(v0: i64, v1: i8):
|
||||
v2 = atomic_rmw.i8 sub v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; stp fp, lr, [sp, #-16]!
|
||||
; mov fp, sp
|
||||
; str x28, [sp, #-16]!
|
||||
; stp x26, x27, [sp, #-16]!
|
||||
; stp x24, x25, [sp, #-16]!
|
||||
; block0:
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
; ldp fp, lr, [sp], #16
|
||||
; ret
|
||||
|
||||
function %atomic_rmw_and_i64(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = atomic_rmw.i64 and v0, v1
|
||||
@@ -500,7 +588,7 @@ block0(v0: i64, v1: i16):
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
||||
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
@@ -522,7 +610,7 @@ block0(v0: i64, v1: i8):
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
||||
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
@@ -676,7 +764,7 @@ block0(v0: i64, v1: i16):
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
||||
; 1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
@@ -698,7 +786,7 @@ block0(v0: i64, v1: i8):
|
||||
; mov x25, x0
|
||||
; mov x4, x1
|
||||
; mov x26, x4
|
||||
; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
||||
; 1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
|
||||
; ldp x24, x25, [sp], #16
|
||||
; ldp x26, x27, [sp], #16
|
||||
; ldr x28, [sp], #16
|
||||
|
||||
Reference in New Issue
Block a user