Merge raw_bitcast and bitcast (#5175)

- Allow bitcast for vectors with differing lane widths - Remove raw_bitcast IR instruction - Change all users of raw_bitcast to bitcast - Implement support for no-op bitcast cases across backends This implements the second step of the plan outlined here: https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
2022-11-02 18:16:27 +01:00
parent e0c8a7f477
commit 961107ec63
26 changed files with 95 additions and 130 deletions
--- a/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
+++ b/cranelift/filetests/filetests/runtests/bitcast-ref64.clif
@@ -1,7 +1,8 @@
 test run
 target aarch64
-; the interpreter, x86_64, and s390x do not support bitcasting to/from
-; references
+target x86_64
+target s390x
+; the interpreter does not support bitcasting to/from references

 function %bitcast_ir64(i64) -> i8 {
 block0(v0: i64):
--- a/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
+++ b/cranelift/filetests/filetests/runtests/bitcast-same-type.clif
@@ -1,7 +1,9 @@
 test interpret
 test run
+set enable_llvm_abi_extensions=true
 target aarch64
-; x86_64 and s390x do not support bitcasting to the same type as the input.
+target x86_64
+target s390x

 function %bitcast_i8(i8) -> i8 {
 block0(v0: i8):
--- a/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
+++ b/cranelift/filetests/filetests/runtests/ref64-invalid-null.clif
@@ -14,7 +14,7 @@ block0:

 function %is_null_r64(i64) -> i8 {
 block0(v0: i64):
-  v1 = raw_bitcast.r64 v0
+  v1 = bitcast.r64 v0
  v2 = is_null v1
  return v2
 }
@@ -24,7 +24,7 @@ block0(v0: i64):

 function %is_invalid_r64(i64) -> i8 {
 block0(v0: i64):
-  v1 = raw_bitcast.r64 v0
+  v1 = bitcast.r64 v0
  v2 = is_invalid v1
  return v2
 }
--- a/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
@@ -0,0 +1,21 @@
+test interpret
+test run
+target aarch64
+;; 64-bit vector types only supported on aarch64
+
+function %bitcast_if32x2(i32x2) -> f32x2 {
+block0(v0: i32x2):
+  v1 = bitcast.f32x2 v0
+  return v1
+}
+; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
+; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
+
+function %bitcast_fi32x2(f32x2) -> i32x2 {
+block0(v0: f32x2):
+  v1 = bitcast.i32x2 v0
+  return v1
+}
+; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
+; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
+
--- a/cranelift/filetests/filetests/runtests/simd-bitcast.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitcast.clif
@@ -1,23 +1,8 @@
 test interpret
 test run
 target aarch64
-; x86_64 and s390x do not support vector bitcasts.
-
-function %bitcast_if32x2(i32x2) -> f32x2 {
-block0(v0: i32x2):
-  v1 = bitcast.f32x2 v0
-  return v1
-}
-; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
-; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
-
-function %bitcast_fi32x2(f32x2) -> i32x2 {
-block0(v0: f32x2):
-  v1 = bitcast.i32x2 v0
-  return v1
-}
-; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
-; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
+target x86_64
+target s390x

 function %bitcast_if32x4(i32x4) -> f32x4 {
 block0(v0: i32x4):
--- a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif
@@ -8,7 +8,7 @@ target x86_64 skylake
 function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
    v2 = icmp sge v0, v1
-    v3 = raw_bitcast.i32x4 v2
+    v3 = bitcast.i32x4 v2
    v4 = bitselect v3, v0, v1
    return v4
 }
@@ -16,7 +16,7 @@ block0(v0: i32x4, v1: i32x4):

 function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
 block0(v0: i64x2, v1: i64x2, v2: i32x4):
-    v3 = raw_bitcast.i64x2 v2
+    v3 = bitcast.i64x2 v2
    v4 = bitselect v3, v0, v1
    return v4
 }
--- a/cranelift/filetests/filetests/runtests/simd-comparison.clif
+++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif
@@ -51,7 +51,7 @@ block0:
    v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
    v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
    v2 = icmp sgt v0, v1
-    v3 = raw_bitcast.i8x16 v2
+    v3 = bitcast.i8x16 v2
    v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
    v7 = icmp eq v3, v4
    v8 = vall_true v7
@@ -126,7 +126,7 @@ block0:
    v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
    v2 = icmp ult v0, v1
    v3 = vconst.i16x8 0x00
-    v4 = raw_bitcast.i16x8 v2
+    v4 = bitcast.i16x8 v2
    v5 = icmp eq v3, v4
    v8 = vall_true v5
    return v8
@@ -200,7 +200,7 @@ block0:
    v2 = fcmp gt v0, v1
    ; now check that the result v2 is all zeroes
    v3 = vconst.i32x4 0x00
-    v4 = raw_bitcast.i32x4 v2
+    v4 = bitcast.i32x4 v2
    v5 = icmp eq v3, v4
    v8 = vall_true v5
    return v8
--- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif
+++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif
@@ -26,10 +26,10 @@ block0:
 function %shuffle_i32x4_in_same_place() -> i32x4 {
 block0:
    v1 = vconst.i32x4 [0 1 2 3]
-    v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
    ; keep each lane in place from the first vector
    v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v4 = raw_bitcast.i32x4 v3
+    v4 = bitcast.i32x4 v3
    return v4
 }
 ; run: %shuffle_in_same_place() == [0 1 2 3]
@@ -37,10 +37,10 @@ block0:
 function %shuffle_i32x4_to_all_true() -> i32x4 {
 block0:
    v1 = vconst.i32x4 [-1 0 -1 0]
-    v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
    ; pair up the true values to make the entire vector true
    v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
-    v4 = raw_bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
+    v4 = bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
    return v4
 }
 ; run: %shuffle_i32x4_to_all_true() == [0xffffffff 0xffffffff 0xffffffff 0xffffffff]
@@ -100,7 +100,7 @@ block0:
    v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 -1 0 0
    0 0 0]
    v2 = extractlane v1, 10
-    v3 = raw_bitcast.i8 v2
+    v3 = bitcast.i8 v2
    return v3
 }
 ; run: %extractlane_i8x16_last() == 0xff