Merge raw_bitcast and bitcast (#5175)

- Allow bitcast for vectors with differing lane widths
- Remove raw_bitcast IR instruction
- Change all users of raw_bitcast to bitcast
- Implement support for no-op bitcast cases across backends

This implements the second step of the plan outlined here:
https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
This commit is contained in:
Ulrich Weigand
2022-11-02 18:16:27 +01:00
committed by GitHub
parent e0c8a7f477
commit 961107ec63
26 changed files with 95 additions and 130 deletions

View File

@@ -2212,8 +2212,8 @@
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; SIMD&FP <=> SIMD&FP
(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
(fpu_move out_ty x))
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
x)
; GPR => SIMD&FP
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
@@ -2232,11 +2232,6 @@
x)
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (raw_bitcast val))
val)
;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; extractlane with lane 0 can pass through the value unchanged; upper

View File

@@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(
Opcode::Vconst => implemented_in_isle(ctx),
Opcode::RawBitcast => implemented_in_isle(ctx),
Opcode::Extractlane => implemented_in_isle(ctx),
Opcode::Insertlane => implemented_in_isle(ctx),

View File

@@ -814,11 +814,6 @@
(lower (has_type out (bitcast v @ (value_type in_ty))))
(gen_moves v in_ty out))
;;;;; Rules for `raw_bitcast`;;;;;;;;;
(rule
(lower (has_type out (raw_bitcast v @ (value_type in_ty))))
(gen_moves v in_ty out))
;;;;; Rules for `ceil`;;;;;;;;;
(rule
(lower (has_type ty (ceil x)))

View File

@@ -1760,16 +1760,25 @@
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
(vec_extract_lane $F32X4 x 0 (zero_reg)))
;; Bitcast between types residing in GPRs is a no-op.
(rule 1 (lower (has_type (gpr32_ty _)
(bitcast x @ (value_type (gpr32_ty _))))) x)
(rule 2 (lower (has_type (gpr64_ty _)
(bitcast x @ (value_type (gpr64_ty _))))) x)
;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Bitcast between types residing in FPRs is a no-op.
(rule 3 (lower (has_type (ty_scalar_float _)
(bitcast x @ (value_type (ty_scalar_float _))))) x)
;; FIXME: There are two flavors of raw_bitcast, which are currently not
;; Bitcast between types residing in VRs is a no-op.
;; FIXME: There are two flavors of vector bitcast, which are currently not
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
;; lane order, and those generated elsewhere assume big-endian lane order.
;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
;; Bitcast is a no-op if current lane order matches that assumed lane order.
;; However, due to our choice of lane order depending on the current function
;; ABI, every bitcast we currently see here is indeed a no-op.
(rule (lower (raw_bitcast x)) x)
(rule 4 (lower (has_type (vr128_ty _)
(bitcast x @ (value_type (vr128_ty _))))) x)
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
| Opcode::ScalarToVector
| Opcode::VhighBits
| Opcode::Bitcast
| Opcode::RawBitcast
| Opcode::Load
| Opcode::Uload8
| Opcode::Sload8

View File

@@ -3303,6 +3303,14 @@
(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
(bitcast_gpr_to_xmm $I64 src))
;; Bitcast between types residing in GPR registers is a no-op.
(rule 1 (lower (has_type (is_gpr_type _)
(bitcast x @ (value_type (is_gpr_type _))))) x)
;; Bitcast between types residing in XMM registers is a no-op.
(rule 2 (lower (has_type (is_xmm_type _)
(bitcast x @ (value_type (is_xmm_type _))))) x)
;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
@@ -3472,15 +3480,6 @@
;; TODO use Inst::gen_constant() instead.
(x64_xmm_load_const ty (const_to_vconst const)))
;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
;; instruction should emit no machine code but a move is necessary to give the
;; register allocator a definition for the output virtual register.
(rule (lower (raw_bitcast val))
(put_in_regs val))
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM

View File

@@ -453,7 +453,6 @@ fn lower_insn_to_regs(
| Opcode::GetPinnedReg
| Opcode::SetPinnedReg
| Opcode::Vconst
| Opcode::RawBitcast
| Opcode::Insertlane
| Opcode::Shuffle
| Opcode::Swizzle