aarch64: fix up regalloc2 semantics. (#4830)

This PR removes all uses of modify-operands in the aarch64 backend, replacing them with reused-input operands instead. This has the nice effect of removing a bunch of move instructions and more clearly representing inputs and outputs. This PR also removes the explicit use of pinned vregs in the aarch64 backend, instead using fixed-register constraints on the operands when insts or pseudo-inst sequences require certain registers. This is the second PR in the regalloc-semantics cleanup series; after the remaining backend (s390x) and the ABI code are cleaned up as well, we'll be able to simplify the regalloc2 frontend.
2022-09-01 14:25:20 -07:00
parent ac2d4c4818
commit ae5fe8a728
25 changed files with 1098 additions and 886 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -309,8 +309,8 @@ block0(v0: f32, v1: f32):
 }

 ; block0:
-;   ushr v7.2s, v1.2s, #31
-;   sli v0.2s, v7.2s, #31
+;   ushr v6.2s, v1.2s, #31
+;   sli v0.2s, v0.2s, v6.2s, #31
 ;   ret

 function %f32(f64, f64) -> f64 {
@@ -320,8 +320,8 @@ block0(v0: f64, v1: f64):
 }

 ; block0:
-;   ushr d7, d1, #63
-;   sli d0, d7, #63
+;   ushr d6, d1, #63
+;   sli d0, d0, d6, #63
 ;   ret

 function %f33(f32) -> i32 {
@@ -918,9 +918,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
 }

 ; block0:
-;   mov v17.16b, v0.16b
+;   fmla v2.4s, v2.4s, v0.4s, v1.4s
 ;   mov v0.16b, v2.16b
-;   fmla v0.4s, v17.4s, v1.4s
 ;   ret

 function %f79(f32x2, f32x2, f32x2) -> f32x2 {
@@ -930,9 +929,8 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2):
 }

 ; block0:
-;   mov v17.16b, v0.16b
+;   fmla v2.2s, v2.2s, v0.2s, v1.2s
 ;   mov v0.16b, v2.16b
-;   fmla v0.2s, v17.2s, v1.2s
 ;   ret

 function %f80(f64x2, f64x2, f64x2) -> f64x2 {
@@ -942,9 +940,8 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
 }

 ; block0:
-;   mov v17.16b, v0.16b
+;   fmla v2.2d, v2.2d, v0.2d, v1.2d
 ;   mov v0.16b, v2.16b
-;   fmla v0.2d, v17.2d, v1.2d
 ;   ret

 function %f81(f32x2, f32x2) -> f32x2 {
@@ -954,8 +951,8 @@ block0(v0: f32x2, v1: f32x2):
 }

 ; block0:
-;   ushr v7.2s, v1.2s, #31
-;   sli v0.2s, v7.2s, #31
+;   ushr v6.2s, v1.2s, #31
+;   sli v0.2s, v0.2s, v6.2s, #31
 ;   ret

 function %f82(f32x4, f32x4) -> f32x4 {
@@ -965,8 +962,8 @@ block0(v0: f32x4, v1: f32x4):
 }

 ; block0:
-;   ushr v7.4s, v1.4s, #31
-;   sli v0.4s, v7.4s, #31
+;   ushr v6.4s, v1.4s, #31
+;   sli v0.4s, v0.4s, v6.4s, #31
 ;   ret

 function %f83(f64x2, f64x2) -> f64x2 {
@@ -976,6 +973,7 @@ block0(v0: f64x2, v1: f64x2):
 }

 ; block0:
-;   ushr v7.2d, v1.2d, #63
-;   sli v0.2d, v7.2d, #63
+;   ushr v6.2d, v1.2d, #63
+;   sli v0.2d, v0.2d, v6.2d, #63
 ;   ret
+