Allocate temporary intermediates when loading constants on aarch64 (#5366)

As loading constants on aarch64 can take up to 4 instructions, we need to plumb through some additional registers. Rather than pass a fixed list of registers in, pass an allocation function.
This commit is contained in:
Trevor Elliott
2022-12-01 14:29:36 -08:00
committed by GitHub
parent 03715dda9d
commit d54a27d0ea
11 changed files with 158 additions and 126 deletions

View File

@@ -37,9 +37,9 @@ block0(v0: i32):
}
; block0:
; movz w2, #4369
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; movz w3, #4369
; movk w3, w3, #17, LSL #16
; subs wzr, w0, w3
; cset x0, hs
; ret
@@ -51,9 +51,9 @@ block0(v0: i32):
}
; block0:
; movz w2, #4368
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; movz w3, #4368
; movk w3, w3, #17, LSL #16
; subs wzr, w0, w3
; cset x0, hs
; ret
@@ -89,9 +89,9 @@ block0(v0: i32):
}
; block0:
; movz w2, #4369
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; movz w3, #4369
; movk w3, w3, #17, LSL #16
; subs wzr, w0, w3
; cset x0, ge
; ret
@@ -103,9 +103,9 @@ block0(v0: i32):
}
; block0:
; movz w2, #4368
; movk w2, w2, #17, LSL #16
; subs wzr, w0, w2
; movz w3, #4368
; movk w3, w3, #17, LSL #16
; subs wzr, w0, w3
; cset x0, ge
; ret

View File

@@ -14,11 +14,11 @@ block0(v0: i8x16):
; movk x5, x5, #8208, LSL #32
; movk x5, x5, #32832, LSL #48
; dup v16.2d, x5
; and v19.16b, v2.16b, v16.16b
; ext v21.16b, v19.16b, v19.16b, #8
; zip1 v23.16b, v19.16b, v21.16b
; addv h25, v23.8h
; umov w0, v25.h[0]
; and v22.16b, v2.16b, v16.16b
; ext v24.16b, v22.16b, v22.16b, #8
; zip1 v26.16b, v22.16b, v24.16b
; addv h28, v26.8h
; umov w0, v28.h[0]
; ret
function %f2(i8x16) -> i16 {
@@ -34,11 +34,11 @@ block0(v0: i8x16):
; movk x5, x5, #8208, LSL #32
; movk x5, x5, #32832, LSL #48
; dup v16.2d, x5
; and v19.16b, v2.16b, v16.16b
; ext v21.16b, v19.16b, v19.16b, #8
; zip1 v23.16b, v19.16b, v21.16b
; addv h25, v23.8h
; umov w0, v25.h[0]
; and v22.16b, v2.16b, v16.16b
; ext v24.16b, v22.16b, v22.16b, #8
; zip1 v26.16b, v22.16b, v24.16b
; addv h28, v26.8h
; umov w0, v28.h[0]
; ret
function %f3(i16x8) -> i8 {