Update lots of isa/*/*.clif tests to precise-output (#3677)

* Update lots of `isa/*/*.clif` tests to `precise-output`

This commit goes through the `aarch64` and `x64` subdirectories and
subjectively changes tests from `test compile` to add `precise-output`.
This then auto-updates all the test expectations so they can be
automatically instead of manually updated in the future. Not all tests
were migrated, largely subject to the whims of myself, mainly looking to
see if the test was looking for specific instructions or just checking
the whole assembly output.

* Filter out `;;` comments from test expctations

Looks like the cranelift parser picks up all comments, not just those
trailing the function, so use a convention where `;;` is used for
human-readable-comments in test cases and `;`-prefixed comments are the
test expectation.
This commit is contained in:
Alex Crichton
2022-01-10 13:38:23 -06:00
committed by GitHub
parent a8ea0ec097
commit 1ef0abb12c
58 changed files with 6883 additions and 3386 deletions

View File

@@ -1,4 +1,4 @@
test compile
test compile precise-output
set enable_simd
target x86_64 has_ssse3 has_sse41
@@ -8,31 +8,50 @@ function %shuffle_different_ssa_values() -> i8x16 {
block0:
v0 = vconst.i8x16 0x00
v1 = vconst.i8x16 0x01
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ;; pick the second lane of v1, the rest use the first lane of v0
return v2
}
; check: load_const VCodeConstant(3), %xmm1
; nextln: load_const VCodeConstant(2), %xmm0
; nextln: load_const VCodeConstant(0), %xmm2
; nextln: pshufb %xmm2, %xmm1
; nextln: load_const VCodeConstant(1), %xmm2
; nextln: pshufb %xmm2, %xmm0
; nextln: orps %xmm1, %xmm0
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 12)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: load_const VCodeConstant(3), %xmm1
; Inst 3: load_const VCodeConstant(2), %xmm0
; Inst 4: load_const VCodeConstant(0), %xmm2
; Inst 5: pshufb %xmm2, %xmm1
; Inst 6: load_const VCodeConstant(1), %xmm2
; Inst 7: pshufb %xmm2, %xmm0
; Inst 8: orps %xmm1, %xmm0
; Inst 9: movq %rbp, %rsp
; Inst 10: popq %rbp
; Inst 11: ret
; }}
function %shuffle_same_ssa_value() -> i8x16 {
block0:
v1 = vconst.i8x16 0x01
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ;; pick the fourth lane of v1 and the rest from the first lane of v1
return v2
}
; check: load_const VCodeConstant(1), %xmm0
; nextln: load_const VCodeConstant(0), %xmm1
; nextln: pshufb %xmm1, %xmm0
;; swizzle
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 8)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: load_const VCodeConstant(1), %xmm0
; Inst 3: load_const VCodeConstant(0), %xmm1
; Inst 4: pshufb %xmm1, %xmm0
; Inst 5: movq %rbp, %rsp
; Inst 6: popq %rbp
; Inst 7: ret
; }}
function %swizzle() -> i8x16 {
block0:
@@ -41,26 +60,46 @@ block0:
v2 = swizzle.i8x16 v0, v1
return v2
}
; check: load_const VCodeConstant(1), %xmm1
; nextln: load_const VCodeConstant(1), %xmm0
; nextln: load_const VCodeConstant(0), %xmm2
; nextln: paddusb %xmm2, %xmm0
; nextln: pshufb %xmm0, %xmm1
; nextln: movdqa %xmm1, %xmm0
;; splat
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 11)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: load_const VCodeConstant(1), %xmm1
; Inst 3: load_const VCodeConstant(1), %xmm0
; Inst 4: load_const VCodeConstant(0), %xmm2
; Inst 5: paddusb %xmm2, %xmm0
; Inst 6: pshufb %xmm0, %xmm1
; Inst 7: movdqa %xmm1, %xmm0
; Inst 8: movq %rbp, %rsp
; Inst 9: popq %rbp
; Inst 10: ret
; }}
function %splat_i8(i8) -> i8x16 {
block0(v0: i8):
v1 = splat.i8x16 v0
return v1
}
; check: uninit %xmm0
; nextln: pinsrb $$0, %rdi, %xmm0
; nextln: pxor %xmm1, %xmm1
; nextln: pshufb %xmm1, %xmm0
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 9)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: uninit %xmm0
; Inst 3: pinsrb $0, %rdi, %xmm0
; Inst 4: pxor %xmm1, %xmm1
; Inst 5: pshufb %xmm1, %xmm0
; Inst 6: movq %rbp, %rsp
; Inst 7: popq %rbp
; Inst 8: ret
; }}
function %splat_b16() -> b16x8 {
block0:
@@ -68,56 +107,121 @@ block0:
v1 = splat.b16x8 v0
return v1
}
; check: uninit %xmm0
; nextln: pinsrw $$0, %rsi, %xmm0
; nextln: pinsrw $$1, %rsi, %xmm0
; nextln: pshufd $$0, %xmm0, %xmm0
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 10)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movl $65535, %esi
; Inst 3: uninit %xmm0
; Inst 4: pinsrw $0, %rsi, %xmm0
; Inst 5: pinsrw $1, %rsi, %xmm0
; Inst 6: pshufd $0, %xmm0, %xmm0
; Inst 7: movq %rbp, %rsp
; Inst 8: popq %rbp
; Inst 9: ret
; }}
function %splat_i32(i32) -> i32x4 {
block0(v0: i32):
v1 = splat.i32x4 v0
return v1
}
; check: uninit %xmm0
; nextln: pinsrd $$0, %rdi, %xmm0
; nextln: pshufd $$0, %xmm0, %xmm0
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 8)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: uninit %xmm0
; Inst 3: pinsrd $0, %rdi, %xmm0
; Inst 4: pshufd $0, %xmm0, %xmm0
; Inst 5: movq %rbp, %rsp
; Inst 6: popq %rbp
; Inst 7: ret
; }}
function %splat_f64(f64) -> f64x2 {
block0(v0: f64):
v1 = splat.f64x2 v0
return v1
}
; check: uninit %xmm1
; nextln: movsd %xmm0, %xmm1
; nextln: movlhps %xmm0, %xmm1
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 9)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: uninit %xmm1
; Inst 3: movsd %xmm0, %xmm1
; Inst 4: movlhps %xmm0, %xmm1
; Inst 5: movdqa %xmm1, %xmm0
; Inst 6: movq %rbp, %rsp
; Inst 7: popq %rbp
; Inst 8: ret
; }}
;; load*_zero
; Verify that a `load` followed by a `scalar_to_vector` (the CLIF translation of `load32_zero`) is
; lowered to a single MOVSS instruction.
function %load32_zero_coalesced(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 v0
v2 = scalar_to_vector.i32x4 v1
; check: movss 0(%rdi), %xmm0
return v2
}
;; Verify that `scalar_to_vector` (used by `load32_zero`), lowers as expected.
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 6)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movss 0(%rdi), %xmm0
; Inst 3: movq %rbp, %rsp
; Inst 4: popq %rbp
; Inst 5: ret
; }}
function %load32_zero_int(i32) -> i32x4 {
block0(v0: i32):
v1 = scalar_to_vector.i32x4 v0
; check: movd %edi, %xmm0
return v1
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 6)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movd %edi, %xmm0
; Inst 3: movq %rbp, %rsp
; Inst 4: popq %rbp
; Inst 5: ret
; }}
function %load32_zero_float(f32) -> f32x4 {
block0(v0: f32):
v1 = scalar_to_vector.f32x4 v0
; regex: MOV=movap*
; check: pushq
; not: $MOV
; check: ret
return v1
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 5)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rbp, %rsp
; Inst 3: popq %rbp
; Inst 4: ret
; }}