aarch64: fix up regalloc2 semantics. (#4830)

This PR removes all uses of modify-operands in the aarch64 backend,
replacing them with reused-input operands instead. This has the nice
effect of removing a bunch of move instructions and more clearly
representing inputs and outputs.

This PR also removes the explicit use of pinned vregs in the aarch64
backend, instead using fixed-register constraints on the operands when
insts or pseudo-inst sequences require certain registers.

This is the second PR in the regalloc-semantics cleanup series; after
the remaining backend (s390x) and the ABI code are cleaned up as well,
we'll be able to simplify the regalloc2 frontend.
This commit is contained in:
Chris Fallin
2022-09-01 14:25:20 -07:00
committed by GitHub
parent ac2d4c4818
commit ae5fe8a728
25 changed files with 1098 additions and 886 deletions

View File

@@ -309,8 +309,8 @@ block0(v0: f32, v1: f32):
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ushr v6.2s, v1.2s, #31
; sli v0.2s, v0.2s, v6.2s, #31
; ret
function %f32(f64, f64) -> f64 {
@@ -320,8 +320,8 @@ block0(v0: f64, v1: f64):
}
; block0:
; ushr d7, d1, #63
; sli d0, d7, #63
; ushr d6, d1, #63
; sli d0, d0, d6, #63
; ret
function %f33(f32) -> i32 {
@@ -918,9 +918,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
}
; block0:
; mov v17.16b, v0.16b
; fmla v2.4s, v2.4s, v0.4s, v1.4s
; mov v0.16b, v2.16b
; fmla v0.4s, v17.4s, v1.4s
; ret
function %f79(f32x2, f32x2, f32x2) -> f32x2 {
@@ -930,9 +929,8 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2):
}
; block0:
; mov v17.16b, v0.16b
; fmla v2.2s, v2.2s, v0.2s, v1.2s
; mov v0.16b, v2.16b
; fmla v0.2s, v17.2s, v1.2s
; ret
function %f80(f64x2, f64x2, f64x2) -> f64x2 {
@@ -942,9 +940,8 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
}
; block0:
; mov v17.16b, v0.16b
; fmla v2.2d, v2.2d, v0.2d, v1.2d
; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d
; ret
function %f81(f32x2, f32x2) -> f32x2 {
@@ -954,8 +951,8 @@ block0(v0: f32x2, v1: f32x2):
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ushr v6.2s, v1.2s, #31
; sli v0.2s, v0.2s, v6.2s, #31
; ret
function %f82(f32x4, f32x4) -> f32x4 {
@@ -965,8 +962,8 @@ block0(v0: f32x4, v1: f32x4):
}
; block0:
; ushr v7.4s, v1.4s, #31
; sli v0.4s, v7.4s, #31
; ushr v6.4s, v1.4s, #31
; sli v0.4s, v0.4s, v6.4s, #31
; ret
function %f83(f64x2, f64x2) -> f64x2 {
@@ -976,6 +973,7 @@ block0(v0: f64x2, v1: f64x2):
}
; block0:
; ushr v7.2d, v1.2d, #63
; sli v0.2d, v7.2d, #63
; ushr v6.2d, v1.2d, #63
; sli v0.2d, v0.2d, v6.2d, #63
; ret