Merge raw_bitcast and bitcast (#5175)

- Allow bitcast for vectors with differing lane widths - Remove raw_bitcast IR instruction - Change all users of raw_bitcast to bitcast - Implement support for no-op bitcast cases across backends This implements the second step of the plan outlined here: https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
2022-11-02 18:16:27 +01:00
parent e0c8a7f477
commit 961107ec63
26 changed files with 95 additions and 130 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -683,8 +683,6 @@ pub(crate) fn define(
            .build(),
    );

-    let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
-
    let Mem = &TypeVar::new(
        "Mem",
        "Any type that can be stored in memory",
@@ -3148,32 +3146,6 @@ pub(crate) fn define(
        The input and output types must be storable to memory and of the same
        size. A bitcast is equivalent to storing one type and loading the other
        type from the same address.
-
-        For vector types, the lane types must also be the same size (see
-        `raw_bitcast` for changing the lane size).
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", Any);
-    let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");
-
-    ig.push(
-        Inst::new(
-            "raw_bitcast",
-            r#"
-        Cast the bits in `x` as a different type of the same bit width.
-
-        This instruction does not change the data's representation but allows
-        data in registers to be used as different types, e.g. an i32x4 as a
-        b8x16. The only constraint on the result `a` is that it can be
-        `raw_bitcast` back to the original type. Also, in a raw_bitcast between
-        vector types with the same number of lanes, the value of each result
-        lane is a raw_bitcast of the corresponding operand lane. TODO there is
-        currently no mechanism for enforcing the bit width constraint.
        "#,
            &formats.unary,
        )
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -2212,8 +2212,8 @@
 ;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ; SIMD&FP <=> SIMD&FP
-(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
-      (fpu_move out_ty x))
+(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
+      x)

 ; GPR => SIMD&FP
 (rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
@@ -2232,11 +2232,6 @@
      x)
 (rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)

-;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (raw_bitcast val))
-      val)
-
 ;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; extractlane with lane 0 can pass through the value unchanged; upper
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(

        Opcode::Vconst => implemented_in_isle(ctx),

-        Opcode::RawBitcast => implemented_in_isle(ctx),
-
        Opcode::Extractlane => implemented_in_isle(ctx),

        Opcode::Insertlane => implemented_in_isle(ctx),
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -814,11 +814,6 @@
   (lower (has_type out (bitcast v @ (value_type in_ty))))
   (gen_moves v in_ty out))

-;;;;;  Rules for `raw_bitcast`;;;;;;;;;
-(rule
-   (lower (has_type out (raw_bitcast v @ (value_type in_ty))))
-   (gen_moves v in_ty out))
-
 ;;;;;  Rules for `ceil`;;;;;;;;;
 (rule 
  (lower (has_type ty (ceil x)))
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -1760,16 +1760,25 @@
 (rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
      (vec_extract_lane $F32X4 x 0 (zero_reg)))

+;; Bitcast between types residing in GPRs is a no-op.
+(rule 1 (lower (has_type (gpr32_ty _)
+                         (bitcast x @ (value_type (gpr32_ty _))))) x)
+(rule 2 (lower (has_type (gpr64_ty _)
+                         (bitcast x @ (value_type (gpr64_ty _))))) x)

-;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Bitcast between types residing in FPRs is a no-op.
+(rule 3 (lower (has_type (ty_scalar_float _)
+                         (bitcast x @ (value_type (ty_scalar_float _))))) x)

-;; FIXME: There are two flavors of raw_bitcast, which are currently not
+;; Bitcast between types residing in VRs is a no-op.
+;; FIXME: There are two flavors of vector bitcast, which are currently not
 ;; distinguished in CLIF IR.  Those generated by Wasmtime assume little-endian
 ;; lane order, and those generated elsewhere assume big-endian lane order.
-;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
+;; Bitcast is a no-op if current lane order matches that assumed lane order.
 ;; However, due to our choice of lane order depending on the current function
 ;; ABI, every bitcast we currently see here is indeed a no-op.
-(rule (lower (raw_bitcast x)) x)
+(rule 4 (lower (has_type (vr128_ty _)
+                         (bitcast x @ (value_type (vr128_ty _))))) x)


 ;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
            | Opcode::ScalarToVector
            | Opcode::VhighBits
            | Opcode::Bitcast
-            | Opcode::RawBitcast
            | Opcode::Load
            | Opcode::Uload8
            | Opcode::Sload8
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3303,6 +3303,14 @@
 (rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
      (bitcast_gpr_to_xmm $I64 src))

+;; Bitcast between types residing in GPR registers is a no-op.
+(rule 1 (lower (has_type (is_gpr_type _)
+                         (bitcast x @ (value_type (is_gpr_type _))))) x)
+
+;; Bitcast between types residing in XMM registers is a no-op.
+(rule 2 (lower (has_type (is_xmm_type _)
+                         (bitcast x @ (value_type (is_xmm_type _))))) x)
+
 ;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
@@ -3472,15 +3480,6 @@
      ;; TODO use Inst::gen_constant() instead.
      (x64_xmm_load_const ty (const_to_vconst const)))

-;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
-;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
-;; instruction should emit no machine code but a move is necessary to give the
-;; register allocator a definition for the output virtual register.
-(rule (lower (raw_bitcast val))
-      (put_in_regs val))
-
 ;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -453,7 +453,6 @@ fn lower_insn_to_regs(
        | Opcode::GetPinnedReg
        | Opcode::SetPinnedReg
        | Opcode::Vconst
-        | Opcode::RawBitcast
        | Opcode::Insertlane
        | Opcode::Shuffle
        | Opcode::Swizzle
--- a/cranelift/codegen/src/nan_canonicalization.rs
+++ b/cranelift/codegen/src/nan_canonicalization.rs
@@ -70,11 +70,11 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
            .select(is_nan, canon_nan, new_res);
    };
    let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
-        let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
-        let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
-        let result = pos.ins().raw_bitcast(types::I8X16, new_res);
+        let cond = pos.ins().bitcast(types::I8X16, is_nan);
+        let canon_nan = pos.ins().bitcast(types::I8X16, canon_nan);
+        let result = pos.ins().bitcast(types::I8X16, new_res);
        let bitmask = pos.ins().bitselect(cond, canon_nan, result);
-        pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
+        pos.ins().with_result(val).bitcast(val_type, bitmask);
    };

    match val_type {
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -863,7 +863,7 @@ mod simplify {
                                return;
                            }
                            let new_type = I8.by(old_cond_type.bytes()).unwrap();
-                            (pos.ins().raw_bitcast(new_type, args[0]), new_type)
+                            (pos.ins().bitcast(new_type, args[0]), new_type)
                        }
                        _ => return,
                    };
@@ -874,10 +874,10 @@ mod simplify {

                    if arg_type != old_arg_type {
                        // Operands types must match, we need to add bitcasts.
-                        let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
-                        let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
+                        let arg1 = pos.ins().bitcast(arg_type, args[1]);
+                        let arg2 = pos.ins().bitcast(arg_type, args[2]);
                        let ret = pos.ins().vselect(cond_val, arg1, arg2);
-                        pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
+                        pos.func.dfg.replace(inst).bitcast(old_arg_type, ret);
                    } else {
                        pos.func
                            .dfg
--- a/cranelift/codegen/src/verifier/mod.rs
+++ b/cranelift/codegen/src/verifier/mod.rs
@@ -1078,17 +1078,7 @@ impl<'a> Verifier<'a> {
        let typ = self.func.dfg.ctrl_typevar(inst);
        let value_type = self.func.dfg.value_type(arg);

-        if typ.lane_bits() != value_type.lane_bits() {
-            errors.fatal((
-                inst,
-                format!(
-                    "The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
-                    arg,
-                    value_type.lane_bits(),
-                    typ.lane_bits()
-                ),
-            ))
-        } else if typ.bits() != value_type.bits() {
+        if typ.bits() != value_type.bits() {
            errors.fatal((
                inst,
                format!(