Merge raw_bitcast and bitcast (#5175)
- Allow bitcast for vectors with differing lane widths - Remove raw_bitcast IR instruction - Change all users of raw_bitcast to bitcast - Implement support for no-op bitcast cases across backends This implements the second step of the plan outlined here: https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
This commit is contained in:
@@ -683,8 +683,6 @@ pub(crate) fn define(
|
||||
.build(),
|
||||
);
|
||||
|
||||
let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
|
||||
|
||||
let Mem = &TypeVar::new(
|
||||
"Mem",
|
||||
"Any type that can be stored in memory",
|
||||
@@ -3148,32 +3146,6 @@ pub(crate) fn define(
|
||||
The input and output types must be storable to memory and of the same
|
||||
size. A bitcast is equivalent to storing one type and loading the other
|
||||
type from the same address.
|
||||
|
||||
For vector types, the lane types must also be the same size (see
|
||||
`raw_bitcast` for changing the lane size).
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", Any);
|
||||
let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"raw_bitcast",
|
||||
r#"
|
||||
Cast the bits in `x` as a different type of the same bit width.
|
||||
|
||||
This instruction does not change the data's representation but allows
|
||||
data in registers to be used as different types, e.g. an i32x4 as a
|
||||
b8x16. The only constraint on the result `a` is that it can be
|
||||
`raw_bitcast` back to the original type. Also, in a raw_bitcast between
|
||||
vector types with the same number of lanes, the value of each result
|
||||
lane is a raw_bitcast of the corresponding operand lane. TODO there is
|
||||
currently no mechanism for enforcing the bit width constraint.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
|
||||
@@ -2212,8 +2212,8 @@
|
||||
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; SIMD&FP <=> SIMD&FP
|
||||
(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
|
||||
(fpu_move out_ty x))
|
||||
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
|
||||
x)
|
||||
|
||||
; GPR => SIMD&FP
|
||||
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
|
||||
@@ -2232,11 +2232,6 @@
|
||||
x)
|
||||
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
|
||||
|
||||
;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (raw_bitcast val))
|
||||
val)
|
||||
|
||||
;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; extractlane with lane 0 can pass through the value unchanged; upper
|
||||
|
||||
@@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(
|
||||
|
||||
Opcode::Vconst => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::RawBitcast => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Extractlane => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Insertlane => implemented_in_isle(ctx),
|
||||
|
||||
@@ -814,11 +814,6 @@
|
||||
(lower (has_type out (bitcast v @ (value_type in_ty))))
|
||||
(gen_moves v in_ty out))
|
||||
|
||||
;;;;; Rules for `raw_bitcast`;;;;;;;;;
|
||||
(rule
|
||||
(lower (has_type out (raw_bitcast v @ (value_type in_ty))))
|
||||
(gen_moves v in_ty out))
|
||||
|
||||
;;;;; Rules for `ceil`;;;;;;;;;
|
||||
(rule
|
||||
(lower (has_type ty (ceil x)))
|
||||
|
||||
@@ -1760,16 +1760,25 @@
|
||||
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
|
||||
(vec_extract_lane $F32X4 x 0 (zero_reg)))
|
||||
|
||||
;; Bitcast between types residing in GPRs is a no-op.
|
||||
(rule 1 (lower (has_type (gpr32_ty _)
|
||||
(bitcast x @ (value_type (gpr32_ty _))))) x)
|
||||
(rule 2 (lower (has_type (gpr64_ty _)
|
||||
(bitcast x @ (value_type (gpr64_ty _))))) x)
|
||||
|
||||
;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Bitcast between types residing in FPRs is a no-op.
|
||||
(rule 3 (lower (has_type (ty_scalar_float _)
|
||||
(bitcast x @ (value_type (ty_scalar_float _))))) x)
|
||||
|
||||
;; FIXME: There are two flavors of raw_bitcast, which are currently not
|
||||
;; Bitcast between types residing in VRs is a no-op.
|
||||
;; FIXME: There are two flavors of vector bitcast, which are currently not
|
||||
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
|
||||
;; lane order, and those generated elsewhere assume big-endian lane order.
|
||||
;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
|
||||
;; Bitcast is a no-op if current lane order matches that assumed lane order.
|
||||
;; However, due to our choice of lane order depending on the current function
|
||||
;; ABI, every bitcast we currently see here is indeed a no-op.
|
||||
(rule (lower (raw_bitcast x)) x)
|
||||
(rule 4 (lower (has_type (vr128_ty _)
|
||||
(bitcast x @ (value_type (vr128_ty _))))) x)
|
||||
|
||||
|
||||
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
|
||||
| Opcode::ScalarToVector
|
||||
| Opcode::VhighBits
|
||||
| Opcode::Bitcast
|
||||
| Opcode::RawBitcast
|
||||
| Opcode::Load
|
||||
| Opcode::Uload8
|
||||
| Opcode::Sload8
|
||||
|
||||
@@ -3303,6 +3303,14 @@
|
||||
(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
|
||||
(bitcast_gpr_to_xmm $I64 src))
|
||||
|
||||
;; Bitcast between types residing in GPR registers is a no-op.
|
||||
(rule 1 (lower (has_type (is_gpr_type _)
|
||||
(bitcast x @ (value_type (is_gpr_type _))))) x)
|
||||
|
||||
;; Bitcast between types residing in XMM registers is a no-op.
|
||||
(rule 2 (lower (has_type (is_xmm_type _)
|
||||
(bitcast x @ (value_type (is_xmm_type _))))) x)
|
||||
|
||||
;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
|
||||
@@ -3472,15 +3480,6 @@
|
||||
;; TODO use Inst::gen_constant() instead.
|
||||
(x64_xmm_load_const ty (const_to_vconst const)))
|
||||
|
||||
;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
|
||||
;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
|
||||
;; instruction should emit no machine code but a move is necessary to give the
|
||||
;; register allocator a definition for the output virtual register.
|
||||
(rule (lower (raw_bitcast val))
|
||||
(put_in_regs val))
|
||||
|
||||
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
|
||||
|
||||
@@ -453,7 +453,6 @@ fn lower_insn_to_regs(
|
||||
| Opcode::GetPinnedReg
|
||||
| Opcode::SetPinnedReg
|
||||
| Opcode::Vconst
|
||||
| Opcode::RawBitcast
|
||||
| Opcode::Insertlane
|
||||
| Opcode::Shuffle
|
||||
| Opcode::Swizzle
|
||||
|
||||
@@ -70,11 +70,11 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
|
||||
.select(is_nan, canon_nan, new_res);
|
||||
};
|
||||
let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
|
||||
let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
|
||||
let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
|
||||
let result = pos.ins().raw_bitcast(types::I8X16, new_res);
|
||||
let cond = pos.ins().bitcast(types::I8X16, is_nan);
|
||||
let canon_nan = pos.ins().bitcast(types::I8X16, canon_nan);
|
||||
let result = pos.ins().bitcast(types::I8X16, new_res);
|
||||
let bitmask = pos.ins().bitselect(cond, canon_nan, result);
|
||||
pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
|
||||
pos.ins().with_result(val).bitcast(val_type, bitmask);
|
||||
};
|
||||
|
||||
match val_type {
|
||||
|
||||
@@ -863,7 +863,7 @@ mod simplify {
|
||||
return;
|
||||
}
|
||||
let new_type = I8.by(old_cond_type.bytes()).unwrap();
|
||||
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
|
||||
(pos.ins().bitcast(new_type, args[0]), new_type)
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
@@ -874,10 +874,10 @@ mod simplify {
|
||||
|
||||
if arg_type != old_arg_type {
|
||||
// Operands types must match, we need to add bitcasts.
|
||||
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
|
||||
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
|
||||
let arg1 = pos.ins().bitcast(arg_type, args[1]);
|
||||
let arg2 = pos.ins().bitcast(arg_type, args[2]);
|
||||
let ret = pos.ins().vselect(cond_val, arg1, arg2);
|
||||
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
|
||||
pos.func.dfg.replace(inst).bitcast(old_arg_type, ret);
|
||||
} else {
|
||||
pos.func
|
||||
.dfg
|
||||
|
||||
@@ -1078,17 +1078,7 @@ impl<'a> Verifier<'a> {
|
||||
let typ = self.func.dfg.ctrl_typevar(inst);
|
||||
let value_type = self.func.dfg.value_type(arg);
|
||||
|
||||
if typ.lane_bits() != value_type.lane_bits() {
|
||||
errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
|
||||
arg,
|
||||
value_type.lane_bits(),
|
||||
typ.lane_bits()
|
||||
),
|
||||
))
|
||||
} else if typ.bits() != value_type.bits() {
|
||||
if typ.bits() != value_type.bits() {
|
||||
errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
|
||||
Reference in New Issue
Block a user