Update lots of isa/*/*.clif tests to precise-output (#3677)

* Update lots of `isa/*/*.clif` tests to `precise-output` This commit goes through the `aarch64` and `x64` subdirectories and subjectively changes tests from `test compile` to add `precise-output`. This then auto-updates all the test expectations so they can be automatically instead of manually updated in the future. Not all tests were migrated, largely subject to the whims of myself, mainly looking to see if the test was looking for specific instructions or just checking the whole assembly output. * Filter out `;;` comments from test expctations Looks like the cranelift parser picks up all comments, not just those trailing the function, so use a convention where `;;` is used for human-readable-comments in test cases and `;`-prefixed comments are the test expectation.
2022-01-10 13:38:23 -06:00
parent a8ea0ec097
commit 1ef0abb12c
58 changed files with 6883 additions and 3386 deletions
--- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 set enable_simd
 target x86_64 has_ssse3 has_sse41

@@ -8,31 +8,50 @@ function %shuffle_different_ssa_values() -> i8x16 {
 block0:
    v0 = vconst.i8x16 0x00
    v1 = vconst.i8x16 0x01
-    v2 = shuffle v0, v1, 0x11000000000000000000000000000000     ; pick the second lane of v1, the rest use the first lane of v0
+    v2 = shuffle v0, v1, 0x11000000000000000000000000000000     ;; pick the second lane of v1, the rest use the first lane of v0
    return v2
 }
-; check:  load_const VCodeConstant(3), %xmm1
-; nextln: load_const VCodeConstant(2), %xmm0
-; nextln: load_const VCodeConstant(0), %xmm2
-; nextln: pshufb  %xmm2, %xmm1
-; nextln: load_const VCodeConstant(1), %xmm2
-; nextln: pshufb  %xmm2, %xmm0
-; nextln: orps    %xmm1, %xmm0

+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 12)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   load_const VCodeConstant(3), %xmm1
+;   Inst 3:   load_const VCodeConstant(2), %xmm0
+;   Inst 4:   load_const VCodeConstant(0), %xmm2
+;   Inst 5:   pshufb  %xmm2, %xmm1
+;   Inst 6:   load_const VCodeConstant(1), %xmm2
+;   Inst 7:   pshufb  %xmm2, %xmm0
+;   Inst 8:   orps    %xmm1, %xmm0
+;   Inst 9:   movq    %rbp, %rsp
+;   Inst 10:   popq    %rbp
+;   Inst 11:   ret
+; }}

 function %shuffle_same_ssa_value() -> i8x16 {
 block0:
    v1 = vconst.i8x16 0x01
-    v2 = shuffle v1, v1, 0x13000000000000000000000000000000     ; pick the fourth lane of v1 and the rest from the first lane of v1
+    v2 = shuffle v1, v1, 0x13000000000000000000000000000000     ;; pick the fourth lane of v1 and the rest from the first lane of v1
    return v2
 }
-; check:  load_const VCodeConstant(1), %xmm0
-; nextln: load_const VCodeConstant(0), %xmm1
-; nextln: pshufb  %xmm1, %xmm0

-
-
-;; swizzle
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 8)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   load_const VCodeConstant(1), %xmm0
+;   Inst 3:   load_const VCodeConstant(0), %xmm1
+;   Inst 4:   pshufb  %xmm1, %xmm0
+;   Inst 5:   movq    %rbp, %rsp
+;   Inst 6:   popq    %rbp
+;   Inst 7:   ret
+; }}

 function %swizzle() -> i8x16 {
 block0:
@@ -41,26 +60,46 @@ block0:
    v2 = swizzle.i8x16 v0, v1
    return v2
 }
-; check:  load_const VCodeConstant(1), %xmm1
-; nextln: load_const VCodeConstant(1), %xmm0
-; nextln: load_const VCodeConstant(0), %xmm2
-; nextln: paddusb %xmm2, %xmm0
-; nextln: pshufb  %xmm0, %xmm1
-; nextln: movdqa  %xmm1, %xmm0

-
-
-;; splat
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 11)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   load_const VCodeConstant(1), %xmm1
+;   Inst 3:   load_const VCodeConstant(1), %xmm0
+;   Inst 4:   load_const VCodeConstant(0), %xmm2
+;   Inst 5:   paddusb %xmm2, %xmm0
+;   Inst 6:   pshufb  %xmm0, %xmm1
+;   Inst 7:   movdqa  %xmm1, %xmm0
+;   Inst 8:   movq    %rbp, %rsp
+;   Inst 9:   popq    %rbp
+;   Inst 10:   ret
+; }}

 function %splat_i8(i8) -> i8x16 {
 block0(v0: i8):
    v1 = splat.i8x16 v0
    return v1
 }
-; check:  uninit  %xmm0
-; nextln: pinsrb  $$0, %rdi, %xmm0
-; nextln: pxor    %xmm1, %xmm1
-; nextln: pshufb  %xmm1, %xmm0
+
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 9)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   uninit  %xmm0
+;   Inst 3:   pinsrb  $0, %rdi, %xmm0
+;   Inst 4:   pxor    %xmm1, %xmm1
+;   Inst 5:   pshufb  %xmm1, %xmm0
+;   Inst 6:   movq    %rbp, %rsp
+;   Inst 7:   popq    %rbp
+;   Inst 8:   ret
+; }}

 function %splat_b16() -> b16x8 {
 block0:
@@ -68,56 +107,121 @@ block0:
    v1 = splat.b16x8 v0
    return v1
 }
-; check:  uninit  %xmm0
-; nextln: pinsrw  $$0, %rsi, %xmm0
-; nextln: pinsrw  $$1, %rsi, %xmm0
-; nextln: pshufd  $$0, %xmm0, %xmm0
+
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 10)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   movl    $65535, %esi
+;   Inst 3:   uninit  %xmm0
+;   Inst 4:   pinsrw  $0, %rsi, %xmm0
+;   Inst 5:   pinsrw  $1, %rsi, %xmm0
+;   Inst 6:   pshufd  $0, %xmm0, %xmm0
+;   Inst 7:   movq    %rbp, %rsp
+;   Inst 8:   popq    %rbp
+;   Inst 9:   ret
+; }}

 function %splat_i32(i32) -> i32x4 {
 block0(v0: i32):
    v1 = splat.i32x4 v0
    return v1
 }
-; check:  uninit  %xmm0
-; nextln: pinsrd  $$0, %rdi, %xmm0
-; nextln: pshufd  $$0, %xmm0, %xmm0
+
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 8)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   uninit  %xmm0
+;   Inst 3:   pinsrd  $0, %rdi, %xmm0
+;   Inst 4:   pshufd  $0, %xmm0, %xmm0
+;   Inst 5:   movq    %rbp, %rsp
+;   Inst 6:   popq    %rbp
+;   Inst 7:   ret
+; }}

 function %splat_f64(f64) -> f64x2 {
 block0(v0: f64):
    v1 = splat.f64x2 v0
    return v1
 }
-; check:  uninit  %xmm1
-; nextln: movsd   %xmm0, %xmm1
-; nextln: movlhps %xmm0, %xmm1

+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 9)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   uninit  %xmm1
+;   Inst 3:   movsd   %xmm0, %xmm1
+;   Inst 4:   movlhps %xmm0, %xmm1
+;   Inst 5:   movdqa  %xmm1, %xmm0
+;   Inst 6:   movq    %rbp, %rsp
+;   Inst 7:   popq    %rbp
+;   Inst 8:   ret
+; }}

-
-;; load*_zero
-
-; Verify that a `load` followed by a `scalar_to_vector` (the CLIF translation of `load32_zero`) is
-; lowered to a single MOVSS instruction.
 function %load32_zero_coalesced(i64) -> i32x4 {
 block0(v0: i64):
    v1 = load.i32 v0
    v2 = scalar_to_vector.i32x4 v1
-    ; check:  movss   0(%rdi), %xmm0
    return v2
 }

-;; Verify that `scalar_to_vector` (used by `load32_zero`), lowers as expected.
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 6)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   movss   0(%rdi), %xmm0
+;   Inst 3:   movq    %rbp, %rsp
+;   Inst 4:   popq    %rbp
+;   Inst 5:   ret
+; }}
+
 function %load32_zero_int(i32) -> i32x4 {
 block0(v0: i32):
    v1 = scalar_to_vector.i32x4 v0
-    ; check:  movd    %edi, %xmm0
    return v1
 }
+
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 6)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   movd    %edi, %xmm0
+;   Inst 3:   movq    %rbp, %rsp
+;   Inst 4:   popq    %rbp
+;   Inst 5:   ret
+; }}
+
 function %load32_zero_float(f32) -> f32x4 {
 block0(v0: f32):
    v1 = scalar_to_vector.f32x4 v0
-    ; regex: MOV=movap*
-    ; check: pushq
-    ; not: $MOV
-    ; check: ret
    return v1
 }
+
+; VCode_ShowWithRRU {{
+;   Entry block: 0
+; Block 0:
+;   (original IR block: block0)
+;   (instruction range: 0 .. 5)
+;   Inst 0:   pushq   %rbp
+;   Inst 1:   movq    %rsp, %rbp
+;   Inst 2:   movq    %rbp, %rsp
+;   Inst 3:   popq    %rbp
+;   Inst 4:   ret
+; }}
+