Merge raw_bitcast and bitcast (#5175)

- Allow bitcast for vectors with differing lane widths - Remove raw_bitcast IR instruction - Change all users of raw_bitcast to bitcast - Implement support for no-op bitcast cases across backends This implements the second step of the plan outlined here: https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
2022-11-02 18:16:27 +01:00
parent e0c8a7f477
commit 961107ec63
26 changed files with 95 additions and 130 deletions
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -7,9 +7,9 @@ block0(v0: i32x4):
    ;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
    ;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
    ;; and epilogue.
-    v1 = raw_bitcast.f32x4 v0
-    v2 = raw_bitcast.f64x2 v1
-    v3 = raw_bitcast.i8x16 v2
+    v1 = bitcast.f32x4 v0
+    v2 = bitcast.f64x2 v1
+    v3 = bitcast.i8x16 v2
    return v3
 }

--- a/cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif
@@ -12,7 +12,7 @@ function %check_issue_3951(i64 vmctx) -> i8x16 fast {
    v4 = global_value.i64 gv0
    v5 = load.i8x16 notrap aligned v4+8
    v6 = icmp ugt v3, v5
-    v7 = raw_bitcast.i8x16 v6
+    v7 = bitcast.i8x16 v6
    jump block1(v7)
  block1(v1: i8x16):
    return v1
--- a/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
+++ b/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
@@ -1,7 +1,8 @@
 test run
 target aarch64
-; the interpreter, x86_64, and s390x do not support bitcasting to/from
-; references
+target x86_64
+target s390x
+; the interpreter does not support bitcasting to/from references

 function %bitcast_ir64(i64) -> i8 {
 block0(v0: i64):
--- a/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
+++ b/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
@@ -1,7 +1,9 @@
 test interpret
 test run
+set enable_llvm_abi_extensions=true
 target aarch64
-; x86_64 and s390x do not support bitcasting to the same type as the input.
+target x86_64
+target s390x

 function %bitcast_i8(i8) -> i8 {
 block0(v0: i8):
--- a/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
+++ b/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
@@ -14,7 +14,7 @@ block0:

 function %is_null_r64(i64) -> i8 {
 block0(v0: i64):
-  v1 = raw_bitcast.r64 v0
+  v1 = bitcast.r64 v0
  v2 = is_null v1
  return v2
 }
@@ -24,7 +24,7 @@ block0(v0: i64):

 function %is_invalid_r64(i64) -> i8 {
 block0(v0: i64):
-  v1 = raw_bitcast.r64 v0
+  v1 = bitcast.r64 v0
  v2 = is_invalid v1
  return v2
 }
--- a/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
@@ -0,0 +1,21 @@
+test interpret
+test run
+target aarch64
+;; 64-bit vector types only supported on aarch64
+
+function %bitcast_if32x2(i32x2) -> f32x2 {
+block0(v0: i32x2):
+  v1 = bitcast.f32x2 v0
+  return v1
+}
+; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
+; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
+
+function %bitcast_fi32x2(f32x2) -> i32x2 {
+block0(v0: f32x2):
+  v1 = bitcast.i32x2 v0
+  return v1
+}
+; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
+; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
+
--- a/cranelift/filetests/filetests/runtests/simd-bitcast.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitcast.clif
@@ -1,23 +1,8 @@
 test interpret
 test run
 target aarch64
-; x86_64 and s390x do not support vector bitcasts.
-
-function %bitcast_if32x2(i32x2) -> f32x2 {
-block0(v0: i32x2):
-  v1 = bitcast.f32x2 v0
-  return v1
-}
-; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
-; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
-
-function %bitcast_fi32x2(f32x2) -> i32x2 {
-block0(v0: f32x2):
-  v1 = bitcast.i32x2 v0
-  return v1
-}
-; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
-; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
+target x86_64
+target s390x

 function %bitcast_if32x4(i32x4) -> f32x4 {
 block0(v0: i32x4):
--- a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
@@ -8,7 +8,7 @@ target x86_64 skylake
 function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
    v2 = icmp sge v0, v1
-    v3 = raw_bitcast.i32x4 v2
+    v3 = bitcast.i32x4 v2
    v4 = bitselect v3, v0, v1
    return v4
 }
@@ -16,7 +16,7 @@ block0(v0: i32x4, v1: i32x4):

 function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
 block0(v0: i64x2, v1: i64x2, v2: i32x4):
-    v3 = raw_bitcast.i64x2 v2
+    v3 = bitcast.i64x2 v2
    v4 = bitselect v3, v0, v1
    return v4
 }
--- a/cranelift/filetests/filetests/runtests/simd-comparison.clif
+++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif
@@ -51,7 +51,7 @@ block0:
    v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
    v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
    v2 = icmp sgt v0, v1
-    v3 = raw_bitcast.i8x16 v2
+    v3 = bitcast.i8x16 v2
    v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
    v7 = icmp eq v3, v4
    v8 = vall_true v7
@@ -126,7 +126,7 @@ block0:
    v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
    v2 = icmp ult v0, v1
    v3 = vconst.i16x8 0x00
-    v4 = raw_bitcast.i16x8 v2
+    v4 = bitcast.i16x8 v2
    v5 = icmp eq v3, v4
    v8 = vall_true v5
    return v8
@@ -200,7 +200,7 @@ block0:
    v2 = fcmp gt v0, v1
    ; now check that the result v2 is all zeroes
    v3 = vconst.i32x4 0x00
-    v4 = raw_bitcast.i32x4 v2
+    v4 = bitcast.i32x4 v2
    v5 = icmp eq v3, v4
    v8 = vall_true v5
    return v8
--- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif
+++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif
@@ -26,10 +26,10 @@ block0:
 function %shuffle_i32x4_in_same_place() -> i32x4 {
 block0:
    v1 = vconst.i32x4 [0 1 2 3]
-    v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
    ; keep each lane in place from the first vector
    v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v4 = raw_bitcast.i32x4 v3
+    v4 = bitcast.i32x4 v3
    return v4
 }
 ; run: %shuffle_in_same_place() == [0 1 2 3]
@@ -37,10 +37,10 @@ block0:
 function %shuffle_i32x4_to_all_true() -> i32x4 {
 block0:
    v1 = vconst.i32x4 [-1 0 -1 0]
-    v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
    ; pair up the true values to make the entire vector true
    v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
-    v4 = raw_bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
+    v4 = bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
    return v4
 }
 ; run: %shuffle_i32x4_to_all_true() == [0xffffffff 0xffffffff 0xffffffff 0xffffffff]
@@ -100,7 +100,7 @@ block0:
    v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 -1 0 0
    0 0 0]
    v2 = extractlane v1, 10
-    v3 = raw_bitcast.i8 v2
+    v3 = bitcast.i8 v2
    return v3
 }
 ; run: %extractlane_i8x16_last() == 0xff
--- a/cranelift/filetests/filetests/simple_preopt/bitselect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/bitselect.clif
@@ -16,7 +16,7 @@ block0(v0: i8x16, v1: i8x16):
 ;; can't remove the bitselect in this case.
 function %mask_casted(i8x16, i8x16, i32x4) -> i8x16 {
 block0(v0: i8x16, v1: i8x16, v2: i32x4):
-    v3 = raw_bitcast.i8x16 v2
+    v3 = bitcast.i8x16 v2
    v4 = bitselect v3, v0, v1
    ; check: v4 = bitselect v3, v0, v1
    return v4
@@ -26,7 +26,7 @@ function %good_const_mask_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
    v3 = vconst.i8x16 [0 0 0xFF 0 0 0xFF 0 0 0 0 0xFF 0 0 0 0 0xFF]
    v4 = bitselect v3, v0, v1
-    ; check:  v5 = raw_bitcast.i8x16 v3
+    ; check:  v5 = bitcast.i8x16 v3
    ; nextln: v4 = vselect v5, v0, v1
    return v4
 }
@@ -35,11 +35,11 @@ function %good_const_mask_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
    v3 = vconst.i16x8 [0x0000 0xFF00 0x0000 0x00FF 0x0000 0xFFFF 0x00FF 0xFFFF]
    v4 = bitselect v3, v0, v1
-    ; check:  v5 = raw_bitcast.i8x16 v3
-    ; nextln: v6 = raw_bitcast.i8x16 v0
-    ; nextln: v7 = raw_bitcast.i8x16 v1
+    ; check:  v5 = bitcast.i8x16 v3
+    ; nextln: v6 = bitcast.i8x16 v0
+    ; nextln: v7 = bitcast.i8x16 v1
    ; nextln: v8 = vselect v5, v6, v7
-    ; nextln: v4 = raw_bitcast.i16x8 v8
+    ; nextln: v4 = bitcast.i16x8 v8
    return v4
 }

--- a/cranelift/filetests/filetests/verifier/bitcast.clif
+++ b/cranelift/filetests/filetests/verifier/bitcast.clif
@@ -10,14 +10,14 @@ block0(v0: i32):
 ; bitcast to a type larger than the operand is not ok
 function %valid_bitcast2(i32) -> i64 {
 block0(v0: i32):
-    v1 = bitcast.i64 v0 ; error: The bitcast argument v0 has a lane type of 32 bits, which doesn't match an expected type of 64 bits
+    v1 = bitcast.i64 v0 ; error: The bitcast argument v0 has a type of 32 bits, which doesn't match an expected type of 64 bits
    return v1
 }

 ; bitcast to a smaller type is not ok
 function %bad_bitcast(i64) -> i32 {
 block0(v0: i64):
-    v1 = bitcast.i32 v0 ; error: The bitcast argument v0 has a lane type of 64 bits, which doesn't match an expected type of 32 bits
+    v1 = bitcast.i32 v0 ; error: The bitcast argument v0 has a type of 64 bits, which doesn't match an expected type of 32 bits
    return v1
 }