Merge raw_bitcast and bitcast (#5175)
- Allow bitcast for vectors with differing lane widths - Remove raw_bitcast IR instruction - Change all users of raw_bitcast to bitcast - Implement support for no-op bitcast cases across backends This implements the second step of the plan outlined here: https://github.com/bytecodealliance/wasmtime/issues/4566#issuecomment-1234819394
This commit is contained in:
@@ -683,8 +683,6 @@ pub(crate) fn define(
|
|||||||
.build(),
|
.build(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
|
|
||||||
|
|
||||||
let Mem = &TypeVar::new(
|
let Mem = &TypeVar::new(
|
||||||
"Mem",
|
"Mem",
|
||||||
"Any type that can be stored in memory",
|
"Any type that can be stored in memory",
|
||||||
@@ -3148,32 +3146,6 @@ pub(crate) fn define(
|
|||||||
The input and output types must be storable to memory and of the same
|
The input and output types must be storable to memory and of the same
|
||||||
size. A bitcast is equivalent to storing one type and loading the other
|
size. A bitcast is equivalent to storing one type and loading the other
|
||||||
type from the same address.
|
type from the same address.
|
||||||
|
|
||||||
For vector types, the lane types must also be the same size (see
|
|
||||||
`raw_bitcast` for changing the lane size).
|
|
||||||
"#,
|
|
||||||
&formats.unary,
|
|
||||||
)
|
|
||||||
.operands_in(vec![x])
|
|
||||||
.operands_out(vec![a]),
|
|
||||||
);
|
|
||||||
|
|
||||||
let x = &Operand::new("x", Any);
|
|
||||||
let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");
|
|
||||||
|
|
||||||
ig.push(
|
|
||||||
Inst::new(
|
|
||||||
"raw_bitcast",
|
|
||||||
r#"
|
|
||||||
Cast the bits in `x` as a different type of the same bit width.
|
|
||||||
|
|
||||||
This instruction does not change the data's representation but allows
|
|
||||||
data in registers to be used as different types, e.g. an i32x4 as a
|
|
||||||
b8x16. The only constraint on the result `a` is that it can be
|
|
||||||
`raw_bitcast` back to the original type. Also, in a raw_bitcast between
|
|
||||||
vector types with the same number of lanes, the value of each result
|
|
||||||
lane is a raw_bitcast of the corresponding operand lane. TODO there is
|
|
||||||
currently no mechanism for enforcing the bit width constraint.
|
|
||||||
"#,
|
"#,
|
||||||
&formats.unary,
|
&formats.unary,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2212,8 +2212,8 @@
|
|||||||
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
; SIMD&FP <=> SIMD&FP
|
; SIMD&FP <=> SIMD&FP
|
||||||
(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
|
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
|
||||||
(fpu_move out_ty x))
|
x)
|
||||||
|
|
||||||
; GPR => SIMD&FP
|
; GPR => SIMD&FP
|
||||||
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
|
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
|
||||||
@@ -2232,11 +2232,6 @@
|
|||||||
x)
|
x)
|
||||||
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
|
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
|
||||||
|
|
||||||
;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
(rule (lower (raw_bitcast val))
|
|
||||||
val)
|
|
||||||
|
|
||||||
;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; extractlane with lane 0 can pass through the value unchanged; upper
|
;; extractlane with lane 0 can pass through the value unchanged; upper
|
||||||
|
|||||||
@@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(
|
|||||||
|
|
||||||
Opcode::Vconst => implemented_in_isle(ctx),
|
Opcode::Vconst => implemented_in_isle(ctx),
|
||||||
|
|
||||||
Opcode::RawBitcast => implemented_in_isle(ctx),
|
|
||||||
|
|
||||||
Opcode::Extractlane => implemented_in_isle(ctx),
|
Opcode::Extractlane => implemented_in_isle(ctx),
|
||||||
|
|
||||||
Opcode::Insertlane => implemented_in_isle(ctx),
|
Opcode::Insertlane => implemented_in_isle(ctx),
|
||||||
|
|||||||
@@ -814,11 +814,6 @@
|
|||||||
(lower (has_type out (bitcast v @ (value_type in_ty))))
|
(lower (has_type out (bitcast v @ (value_type in_ty))))
|
||||||
(gen_moves v in_ty out))
|
(gen_moves v in_ty out))
|
||||||
|
|
||||||
;;;;; Rules for `raw_bitcast`;;;;;;;;;
|
|
||||||
(rule
|
|
||||||
(lower (has_type out (raw_bitcast v @ (value_type in_ty))))
|
|
||||||
(gen_moves v in_ty out))
|
|
||||||
|
|
||||||
;;;;; Rules for `ceil`;;;;;;;;;
|
;;;;; Rules for `ceil`;;;;;;;;;
|
||||||
(rule
|
(rule
|
||||||
(lower (has_type ty (ceil x)))
|
(lower (has_type ty (ceil x)))
|
||||||
|
|||||||
@@ -1760,16 +1760,25 @@
|
|||||||
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
|
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
|
||||||
(vec_extract_lane $F32X4 x 0 (zero_reg)))
|
(vec_extract_lane $F32X4 x 0 (zero_reg)))
|
||||||
|
|
||||||
|
;; Bitcast between types residing in GPRs is a no-op.
|
||||||
|
(rule 1 (lower (has_type (gpr32_ty _)
|
||||||
|
(bitcast x @ (value_type (gpr32_ty _))))) x)
|
||||||
|
(rule 2 (lower (has_type (gpr64_ty _)
|
||||||
|
(bitcast x @ (value_type (gpr64_ty _))))) x)
|
||||||
|
|
||||||
;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Bitcast between types residing in FPRs is a no-op.
|
||||||
|
(rule 3 (lower (has_type (ty_scalar_float _)
|
||||||
|
(bitcast x @ (value_type (ty_scalar_float _))))) x)
|
||||||
|
|
||||||
;; FIXME: There are two flavors of raw_bitcast, which are currently not
|
;; Bitcast between types residing in VRs is a no-op.
|
||||||
|
;; FIXME: There are two flavors of vector bitcast, which are currently not
|
||||||
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
|
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
|
||||||
;; lane order, and those generated elsewhere assume big-endian lane order.
|
;; lane order, and those generated elsewhere assume big-endian lane order.
|
||||||
;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
|
;; Bitcast is a no-op if current lane order matches that assumed lane order.
|
||||||
;; However, due to our choice of lane order depending on the current function
|
;; However, due to our choice of lane order depending on the current function
|
||||||
;; ABI, every bitcast we currently see here is indeed a no-op.
|
;; ABI, every bitcast we currently see here is indeed a no-op.
|
||||||
(rule (lower (raw_bitcast x)) x)
|
(rule 4 (lower (has_type (vr128_ty _)
|
||||||
|
(bitcast x @ (value_type (vr128_ty _))))) x)
|
||||||
|
|
||||||
|
|
||||||
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|||||||
@@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
|
|||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::VhighBits
|
| Opcode::VhighBits
|
||||||
| Opcode::Bitcast
|
| Opcode::Bitcast
|
||||||
| Opcode::RawBitcast
|
|
||||||
| Opcode::Load
|
| Opcode::Load
|
||||||
| Opcode::Uload8
|
| Opcode::Uload8
|
||||||
| Opcode::Sload8
|
| Opcode::Sload8
|
||||||
|
|||||||
@@ -3303,6 +3303,14 @@
|
|||||||
(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
|
(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
|
||||||
(bitcast_gpr_to_xmm $I64 src))
|
(bitcast_gpr_to_xmm $I64 src))
|
||||||
|
|
||||||
|
;; Bitcast between types residing in GPR registers is a no-op.
|
||||||
|
(rule 1 (lower (has_type (is_gpr_type _)
|
||||||
|
(bitcast x @ (value_type (is_gpr_type _))))) x)
|
||||||
|
|
||||||
|
;; Bitcast between types residing in XMM registers is a no-op.
|
||||||
|
(rule 2 (lower (has_type (is_xmm_type _)
|
||||||
|
(bitcast x @ (value_type (is_xmm_type _))))) x)
|
||||||
|
|
||||||
;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
|
(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
|
||||||
@@ -3472,15 +3480,6 @@
|
|||||||
;; TODO use Inst::gen_constant() instead.
|
;; TODO use Inst::gen_constant() instead.
|
||||||
(x64_xmm_load_const ty (const_to_vconst const)))
|
(x64_xmm_load_const ty (const_to_vconst const)))
|
||||||
|
|
||||||
;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
|
|
||||||
;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
|
|
||||||
;; instruction should emit no machine code but a move is necessary to give the
|
|
||||||
;; register allocator a definition for the output virtual register.
|
|
||||||
(rule (lower (raw_bitcast val))
|
|
||||||
(put_in_regs val))
|
|
||||||
|
|
||||||
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
|
;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
|
||||||
|
|||||||
@@ -453,7 +453,6 @@ fn lower_insn_to_regs(
|
|||||||
| Opcode::GetPinnedReg
|
| Opcode::GetPinnedReg
|
||||||
| Opcode::SetPinnedReg
|
| Opcode::SetPinnedReg
|
||||||
| Opcode::Vconst
|
| Opcode::Vconst
|
||||||
| Opcode::RawBitcast
|
|
||||||
| Opcode::Insertlane
|
| Opcode::Insertlane
|
||||||
| Opcode::Shuffle
|
| Opcode::Shuffle
|
||||||
| Opcode::Swizzle
|
| Opcode::Swizzle
|
||||||
|
|||||||
@@ -70,11 +70,11 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
|
|||||||
.select(is_nan, canon_nan, new_res);
|
.select(is_nan, canon_nan, new_res);
|
||||||
};
|
};
|
||||||
let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
|
let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
|
||||||
let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
|
let cond = pos.ins().bitcast(types::I8X16, is_nan);
|
||||||
let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
|
let canon_nan = pos.ins().bitcast(types::I8X16, canon_nan);
|
||||||
let result = pos.ins().raw_bitcast(types::I8X16, new_res);
|
let result = pos.ins().bitcast(types::I8X16, new_res);
|
||||||
let bitmask = pos.ins().bitselect(cond, canon_nan, result);
|
let bitmask = pos.ins().bitselect(cond, canon_nan, result);
|
||||||
pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
|
pos.ins().with_result(val).bitcast(val_type, bitmask);
|
||||||
};
|
};
|
||||||
|
|
||||||
match val_type {
|
match val_type {
|
||||||
|
|||||||
@@ -863,7 +863,7 @@ mod simplify {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let new_type = I8.by(old_cond_type.bytes()).unwrap();
|
let new_type = I8.by(old_cond_type.bytes()).unwrap();
|
||||||
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
|
(pos.ins().bitcast(new_type, args[0]), new_type)
|
||||||
}
|
}
|
||||||
_ => return,
|
_ => return,
|
||||||
};
|
};
|
||||||
@@ -874,10 +874,10 @@ mod simplify {
|
|||||||
|
|
||||||
if arg_type != old_arg_type {
|
if arg_type != old_arg_type {
|
||||||
// Operands types must match, we need to add bitcasts.
|
// Operands types must match, we need to add bitcasts.
|
||||||
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
|
let arg1 = pos.ins().bitcast(arg_type, args[1]);
|
||||||
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
|
let arg2 = pos.ins().bitcast(arg_type, args[2]);
|
||||||
let ret = pos.ins().vselect(cond_val, arg1, arg2);
|
let ret = pos.ins().vselect(cond_val, arg1, arg2);
|
||||||
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
|
pos.func.dfg.replace(inst).bitcast(old_arg_type, ret);
|
||||||
} else {
|
} else {
|
||||||
pos.func
|
pos.func
|
||||||
.dfg
|
.dfg
|
||||||
|
|||||||
@@ -1078,17 +1078,7 @@ impl<'a> Verifier<'a> {
|
|||||||
let typ = self.func.dfg.ctrl_typevar(inst);
|
let typ = self.func.dfg.ctrl_typevar(inst);
|
||||||
let value_type = self.func.dfg.value_type(arg);
|
let value_type = self.func.dfg.value_type(arg);
|
||||||
|
|
||||||
if typ.lane_bits() != value_type.lane_bits() {
|
if typ.bits() != value_type.bits() {
|
||||||
errors.fatal((
|
|
||||||
inst,
|
|
||||||
format!(
|
|
||||||
"The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
|
|
||||||
arg,
|
|
||||||
value_type.lane_bits(),
|
|
||||||
typ.lane_bits()
|
|
||||||
),
|
|
||||||
))
|
|
||||||
} else if typ.bits() != value_type.bits() {
|
|
||||||
errors.fatal((
|
errors.fatal((
|
||||||
inst,
|
inst,
|
||||||
format!(
|
format!(
|
||||||
|
|||||||
@@ -7,9 +7,9 @@ block0(v0: i32x4):
|
|||||||
;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
|
;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
|
||||||
;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
|
;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
|
||||||
;; and epilogue.
|
;; and epilogue.
|
||||||
v1 = raw_bitcast.f32x4 v0
|
v1 = bitcast.f32x4 v0
|
||||||
v2 = raw_bitcast.f64x2 v1
|
v2 = bitcast.f64x2 v1
|
||||||
v3 = raw_bitcast.i8x16 v2
|
v3 = bitcast.i8x16 v2
|
||||||
return v3
|
return v3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ function %check_issue_3951(i64 vmctx) -> i8x16 fast {
|
|||||||
v4 = global_value.i64 gv0
|
v4 = global_value.i64 gv0
|
||||||
v5 = load.i8x16 notrap aligned v4+8
|
v5 = load.i8x16 notrap aligned v4+8
|
||||||
v6 = icmp ugt v3, v5
|
v6 = icmp ugt v3, v5
|
||||||
v7 = raw_bitcast.i8x16 v6
|
v7 = bitcast.i8x16 v6
|
||||||
jump block1(v7)
|
jump block1(v7)
|
||||||
block1(v1: i8x16):
|
block1(v1: i8x16):
|
||||||
return v1
|
return v1
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
test run
|
test run
|
||||||
target aarch64
|
target aarch64
|
||||||
; the interpreter, x86_64, and s390x do not support bitcasting to/from
|
target x86_64
|
||||||
; references
|
target s390x
|
||||||
|
; the interpreter does not support bitcasting to/from references
|
||||||
|
|
||||||
function %bitcast_ir64(i64) -> i8 {
|
function %bitcast_ir64(i64) -> i8 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
test interpret
|
test interpret
|
||||||
test run
|
test run
|
||||||
|
set enable_llvm_abi_extensions=true
|
||||||
target aarch64
|
target aarch64
|
||||||
; x86_64 and s390x do not support bitcasting to the same type as the input.
|
target x86_64
|
||||||
|
target s390x
|
||||||
|
|
||||||
function %bitcast_i8(i8) -> i8 {
|
function %bitcast_i8(i8) -> i8 {
|
||||||
block0(v0: i8):
|
block0(v0: i8):
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ block0:
|
|||||||
|
|
||||||
function %is_null_r64(i64) -> i8 {
|
function %is_null_r64(i64) -> i8 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = raw_bitcast.r64 v0
|
v1 = bitcast.r64 v0
|
||||||
v2 = is_null v1
|
v2 = is_null v1
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
@@ -24,7 +24,7 @@ block0(v0: i64):
|
|||||||
|
|
||||||
function %is_invalid_r64(i64) -> i8 {
|
function %is_invalid_r64(i64) -> i8 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = raw_bitcast.r64 v0
|
v1 = bitcast.r64 v0
|
||||||
v2 = is_invalid v1
|
v2 = is_invalid v1
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
;; 64-bit vector types only supported on aarch64
|
||||||
|
|
||||||
|
function %bitcast_if32x2(i32x2) -> f32x2 {
|
||||||
|
block0(v0: i32x2):
|
||||||
|
v1 = bitcast.f32x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
|
||||||
|
; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
|
||||||
|
|
||||||
|
function %bitcast_fi32x2(f32x2) -> i32x2 {
|
||||||
|
block0(v0: f32x2):
|
||||||
|
v1 = bitcast.i32x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
|
||||||
|
; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
|
||||||
|
|
||||||
@@ -1,23 +1,8 @@
|
|||||||
test interpret
|
test interpret
|
||||||
test run
|
test run
|
||||||
target aarch64
|
target aarch64
|
||||||
; x86_64 and s390x do not support vector bitcasts.
|
target x86_64
|
||||||
|
target s390x
|
||||||
function %bitcast_if32x2(i32x2) -> f32x2 {
|
|
||||||
block0(v0: i32x2):
|
|
||||||
v1 = bitcast.f32x2 v0
|
|
||||||
return v1
|
|
||||||
}
|
|
||||||
; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
|
|
||||||
; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]
|
|
||||||
|
|
||||||
function %bitcast_fi32x2(f32x2) -> i32x2 {
|
|
||||||
block0(v0: f32x2):
|
|
||||||
v1 = bitcast.i32x2 v0
|
|
||||||
return v1
|
|
||||||
}
|
|
||||||
; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
|
|
||||||
; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
|
|
||||||
|
|
||||||
function %bitcast_if32x4(i32x4) -> f32x4 {
|
function %bitcast_if32x4(i32x4) -> f32x4 {
|
||||||
block0(v0: i32x4):
|
block0(v0: i32x4):
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ target x86_64 skylake
|
|||||||
function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
|
function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
|
||||||
block0(v0: i32x4, v1: i32x4):
|
block0(v0: i32x4, v1: i32x4):
|
||||||
v2 = icmp sge v0, v1
|
v2 = icmp sge v0, v1
|
||||||
v3 = raw_bitcast.i32x4 v2
|
v3 = bitcast.i32x4 v2
|
||||||
v4 = bitselect v3, v0, v1
|
v4 = bitselect v3, v0, v1
|
||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
@@ -16,7 +16,7 @@ block0(v0: i32x4, v1: i32x4):
|
|||||||
|
|
||||||
function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
|
function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
|
||||||
block0(v0: i64x2, v1: i64x2, v2: i32x4):
|
block0(v0: i64x2, v1: i64x2, v2: i32x4):
|
||||||
v3 = raw_bitcast.i64x2 v2
|
v3 = bitcast.i64x2 v2
|
||||||
v4 = bitselect v3, v0, v1
|
v4 = bitselect v3, v0, v1
|
||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ block0:
|
|||||||
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
|
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
|
||||||
v2 = icmp sgt v0, v1
|
v2 = icmp sgt v0, v1
|
||||||
v3 = raw_bitcast.i8x16 v2
|
v3 = bitcast.i8x16 v2
|
||||||
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
|
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
|
||||||
v7 = icmp eq v3, v4
|
v7 = icmp eq v3, v4
|
||||||
v8 = vall_true v7
|
v8 = vall_true v7
|
||||||
@@ -126,7 +126,7 @@ block0:
|
|||||||
v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
|
v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
|
||||||
v2 = icmp ult v0, v1
|
v2 = icmp ult v0, v1
|
||||||
v3 = vconst.i16x8 0x00
|
v3 = vconst.i16x8 0x00
|
||||||
v4 = raw_bitcast.i16x8 v2
|
v4 = bitcast.i16x8 v2
|
||||||
v5 = icmp eq v3, v4
|
v5 = icmp eq v3, v4
|
||||||
v8 = vall_true v5
|
v8 = vall_true v5
|
||||||
return v8
|
return v8
|
||||||
@@ -200,7 +200,7 @@ block0:
|
|||||||
v2 = fcmp gt v0, v1
|
v2 = fcmp gt v0, v1
|
||||||
; now check that the result v2 is all zeroes
|
; now check that the result v2 is all zeroes
|
||||||
v3 = vconst.i32x4 0x00
|
v3 = vconst.i32x4 0x00
|
||||||
v4 = raw_bitcast.i32x4 v2
|
v4 = bitcast.i32x4 v2
|
||||||
v5 = icmp eq v3, v4
|
v5 = icmp eq v3, v4
|
||||||
v8 = vall_true v5
|
v8 = vall_true v5
|
||||||
return v8
|
return v8
|
||||||
|
|||||||
@@ -26,10 +26,10 @@ block0:
|
|||||||
function %shuffle_i32x4_in_same_place() -> i32x4 {
|
function %shuffle_i32x4_in_same_place() -> i32x4 {
|
||||||
block0:
|
block0:
|
||||||
v1 = vconst.i32x4 [0 1 2 3]
|
v1 = vconst.i32x4 [0 1 2 3]
|
||||||
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
||||||
; keep each lane in place from the first vector
|
; keep each lane in place from the first vector
|
||||||
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||||
v4 = raw_bitcast.i32x4 v3
|
v4 = bitcast.i32x4 v3
|
||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run: %shuffle_in_same_place() == [0 1 2 3]
|
; run: %shuffle_in_same_place() == [0 1 2 3]
|
||||||
@@ -37,10 +37,10 @@ block0:
|
|||||||
function %shuffle_i32x4_to_all_true() -> i32x4 {
|
function %shuffle_i32x4_to_all_true() -> i32x4 {
|
||||||
block0:
|
block0:
|
||||||
v1 = vconst.i32x4 [-1 0 -1 0]
|
v1 = vconst.i32x4 [-1 0 -1 0]
|
||||||
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
v2 = bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
||||||
; pair up the true values to make the entire vector true
|
; pair up the true values to make the entire vector true
|
||||||
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
|
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
|
||||||
v4 = raw_bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
|
v4 = bitcast.i32x4 v3 ; TODO store.i32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
|
||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run: %shuffle_i32x4_to_all_true() == [0xffffffff 0xffffffff 0xffffffff 0xffffffff]
|
; run: %shuffle_i32x4_to_all_true() == [0xffffffff 0xffffffff 0xffffffff 0xffffffff]
|
||||||
@@ -100,7 +100,7 @@ block0:
|
|||||||
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 -1 0 0
|
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 -1 0 0
|
||||||
0 0 0]
|
0 0 0]
|
||||||
v2 = extractlane v1, 10
|
v2 = extractlane v1, 10
|
||||||
v3 = raw_bitcast.i8 v2
|
v3 = bitcast.i8 v2
|
||||||
return v3
|
return v3
|
||||||
}
|
}
|
||||||
; run: %extractlane_i8x16_last() == 0xff
|
; run: %extractlane_i8x16_last() == 0xff
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ block0(v0: i8x16, v1: i8x16):
|
|||||||
;; can't remove the bitselect in this case.
|
;; can't remove the bitselect in this case.
|
||||||
function %mask_casted(i8x16, i8x16, i32x4) -> i8x16 {
|
function %mask_casted(i8x16, i8x16, i32x4) -> i8x16 {
|
||||||
block0(v0: i8x16, v1: i8x16, v2: i32x4):
|
block0(v0: i8x16, v1: i8x16, v2: i32x4):
|
||||||
v3 = raw_bitcast.i8x16 v2
|
v3 = bitcast.i8x16 v2
|
||||||
v4 = bitselect v3, v0, v1
|
v4 = bitselect v3, v0, v1
|
||||||
; check: v4 = bitselect v3, v0, v1
|
; check: v4 = bitselect v3, v0, v1
|
||||||
return v4
|
return v4
|
||||||
@@ -26,7 +26,7 @@ function %good_const_mask_i8x16(i8x16, i8x16) -> i8x16 {
|
|||||||
block0(v0: i8x16, v1: i8x16):
|
block0(v0: i8x16, v1: i8x16):
|
||||||
v3 = vconst.i8x16 [0 0 0xFF 0 0 0xFF 0 0 0 0 0xFF 0 0 0 0 0xFF]
|
v3 = vconst.i8x16 [0 0 0xFF 0 0 0xFF 0 0 0 0 0xFF 0 0 0 0 0xFF]
|
||||||
v4 = bitselect v3, v0, v1
|
v4 = bitselect v3, v0, v1
|
||||||
; check: v5 = raw_bitcast.i8x16 v3
|
; check: v5 = bitcast.i8x16 v3
|
||||||
; nextln: v4 = vselect v5, v0, v1
|
; nextln: v4 = vselect v5, v0, v1
|
||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
@@ -35,11 +35,11 @@ function %good_const_mask_i16x8(i16x8, i16x8) -> i16x8 {
|
|||||||
block0(v0: i16x8, v1: i16x8):
|
block0(v0: i16x8, v1: i16x8):
|
||||||
v3 = vconst.i16x8 [0x0000 0xFF00 0x0000 0x00FF 0x0000 0xFFFF 0x00FF 0xFFFF]
|
v3 = vconst.i16x8 [0x0000 0xFF00 0x0000 0x00FF 0x0000 0xFFFF 0x00FF 0xFFFF]
|
||||||
v4 = bitselect v3, v0, v1
|
v4 = bitselect v3, v0, v1
|
||||||
; check: v5 = raw_bitcast.i8x16 v3
|
; check: v5 = bitcast.i8x16 v3
|
||||||
; nextln: v6 = raw_bitcast.i8x16 v0
|
; nextln: v6 = bitcast.i8x16 v0
|
||||||
; nextln: v7 = raw_bitcast.i8x16 v1
|
; nextln: v7 = bitcast.i8x16 v1
|
||||||
; nextln: v8 = vselect v5, v6, v7
|
; nextln: v8 = vselect v5, v6, v7
|
||||||
; nextln: v4 = raw_bitcast.i16x8 v8
|
; nextln: v4 = bitcast.i16x8 v8
|
||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,14 +10,14 @@ block0(v0: i32):
|
|||||||
; bitcast to a type larger than the operand is not ok
|
; bitcast to a type larger than the operand is not ok
|
||||||
function %valid_bitcast2(i32) -> i64 {
|
function %valid_bitcast2(i32) -> i64 {
|
||||||
block0(v0: i32):
|
block0(v0: i32):
|
||||||
v1 = bitcast.i64 v0 ; error: The bitcast argument v0 has a lane type of 32 bits, which doesn't match an expected type of 64 bits
|
v1 = bitcast.i64 v0 ; error: The bitcast argument v0 has a type of 32 bits, which doesn't match an expected type of 64 bits
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
; bitcast to a smaller type is not ok
|
; bitcast to a smaller type is not ok
|
||||||
function %bad_bitcast(i64) -> i32 {
|
function %bad_bitcast(i64) -> i32 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = bitcast.i32 v0 ; error: The bitcast argument v0 has a lane type of 64 bits, which doesn't match an expected type of 32 bits
|
v1 = bitcast.i32 v0 ; error: The bitcast argument v0 has a type of 64 bits, which doesn't match an expected type of 32 bits
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -947,7 +947,7 @@ where
|
|||||||
Opcode::Nearest => assign(Value::nearest(arg(0)?)?),
|
Opcode::Nearest => assign(Value::nearest(arg(0)?)?),
|
||||||
Opcode::IsNull => unimplemented!("IsNull"),
|
Opcode::IsNull => unimplemented!("IsNull"),
|
||||||
Opcode::IsInvalid => unimplemented!("IsInvalid"),
|
Opcode::IsInvalid => unimplemented!("IsInvalid"),
|
||||||
Opcode::Bitcast | Opcode::RawBitcast | Opcode::ScalarToVector => {
|
Opcode::Bitcast | Opcode::ScalarToVector => {
|
||||||
let input_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
|
let input_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
|
||||||
let arg0 = extractlanes(&arg(0)?, input_ty)?;
|
let arg0 = extractlanes(&arg(0)?, input_ty)?;
|
||||||
|
|
||||||
|
|||||||
@@ -335,7 +335,7 @@ impl Value for DataValue {
|
|||||||
fn convert(self, kind: ValueConversionKind) -> ValueResult<Self> {
|
fn convert(self, kind: ValueConversionKind) -> ValueResult<Self> {
|
||||||
Ok(match kind {
|
Ok(match kind {
|
||||||
ValueConversionKind::Exact(ty) => match (self, ty) {
|
ValueConversionKind::Exact(ty) => match (self, ty) {
|
||||||
// TODO a lot to do here: from bmask to ireduce to raw_bitcast...
|
// TODO a lot to do here: from bmask to ireduce to bitcast...
|
||||||
(val, ty) if val.ty().is_int() && ty.is_int() => {
|
(val, ty) if val.ty().is_int() && ty.is_int() => {
|
||||||
DataValue::from_integer(val.into_int()?, ty)?
|
DataValue::from_integer(val.into_int()?, ty)?
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1427,7 +1427,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let data = value.bytes().to_vec().into();
|
let data = value.bytes().to_vec().into();
|
||||||
let handle = builder.func.dfg.constants.insert(data);
|
let handle = builder.func.dfg.constants.insert(data);
|
||||||
let value = builder.ins().vconst(I8X16, handle);
|
let value = builder.ins().vconst(I8X16, handle);
|
||||||
// the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type
|
// the v128.const is typed in CLIF as a I8x16 but bitcast to a different type
|
||||||
// before use
|
// before use
|
||||||
state.push1(value)
|
state.push1(value)
|
||||||
}
|
}
|
||||||
@@ -1536,7 +1536,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let shuffled = builder.ins().shuffle(a, b, mask);
|
let shuffled = builder.ins().shuffle(a, b, mask);
|
||||||
state.push1(shuffled)
|
state.push1(shuffled)
|
||||||
// At this point the original types of a and b are lost; users of this value (i.e. this
|
// At this point the original types of a and b are lost; users of this value (i.e. this
|
||||||
// WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due
|
// WASM-to-CLIF translator) may need to bitcast for type-correctness. This is due
|
||||||
// to WASM using the less specific v128 type for certain operations and more specific
|
// to WASM using the less specific v128 type for certain operations and more specific
|
||||||
// types (e.g. i8x16) for others.
|
// types (e.g. i8x16) for others.
|
||||||
}
|
}
|
||||||
@@ -2895,14 +2895,14 @@ fn type_of(operator: &Operator) -> Type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by
|
/// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by
|
||||||
/// adding a raw_bitcast if necessary.
|
/// adding a bitcast if necessary.
|
||||||
fn optionally_bitcast_vector(
|
fn optionally_bitcast_vector(
|
||||||
value: Value,
|
value: Value,
|
||||||
needed_type: Type,
|
needed_type: Type,
|
||||||
builder: &mut FunctionBuilder,
|
builder: &mut FunctionBuilder,
|
||||||
) -> Value {
|
) -> Value {
|
||||||
if builder.func.dfg.value_type(value) != needed_type {
|
if builder.func.dfg.value_type(value) != needed_type {
|
||||||
builder.ins().raw_bitcast(needed_type, value)
|
builder.ins().bitcast(needed_type, value)
|
||||||
} else {
|
} else {
|
||||||
value
|
value
|
||||||
}
|
}
|
||||||
@@ -2937,7 +2937,7 @@ fn canonicalise_v128_values<'a>(
|
|||||||
// Otherwise we'll have to cast, and push the resulting `Value`s into `canonicalised`.
|
// Otherwise we'll have to cast, and push the resulting `Value`s into `canonicalised`.
|
||||||
for v in values {
|
for v in values {
|
||||||
tmp_canonicalised.push(if is_non_canonical_v128(builder.func.dfg.value_type(*v)) {
|
tmp_canonicalised.push(if is_non_canonical_v128(builder.func.dfg.value_type(*v)) {
|
||||||
builder.ins().raw_bitcast(I8X16, *v)
|
builder.ins().bitcast(I8X16, *v)
|
||||||
} else {
|
} else {
|
||||||
*v
|
*v
|
||||||
});
|
});
|
||||||
@@ -3048,7 +3048,7 @@ fn bitcast_arguments<'a>(
|
|||||||
|
|
||||||
/// A helper for bitcasting a sequence of return values for the function currently being built. If
|
/// A helper for bitcasting a sequence of return values for the function currently being built. If
|
||||||
/// a value is a vector type that does not match its expected type, this will modify the value in
|
/// a value is a vector type that does not match its expected type, this will modify the value in
|
||||||
/// place to point to the result of a `raw_bitcast`. This conversion is necessary to translate Wasm
|
/// place to point to the result of a `bitcast`. This conversion is necessary to translate Wasm
|
||||||
/// code that uses `V128` as function parameters (or implicitly in block parameters) and still use
|
/// code that uses `V128` as function parameters (or implicitly in block parameters) and still use
|
||||||
/// specific CLIF types (e.g. `I32X4`) in the function body.
|
/// specific CLIF types (e.g. `I32X4`) in the function body.
|
||||||
pub fn bitcast_wasm_returns<FE: FuncEnvironment + ?Sized>(
|
pub fn bitcast_wasm_returns<FE: FuncEnvironment + ?Sized>(
|
||||||
@@ -3060,7 +3060,7 @@ pub fn bitcast_wasm_returns<FE: FuncEnvironment + ?Sized>(
|
|||||||
environ.is_wasm_return(&builder.func.signature, i)
|
environ.is_wasm_return(&builder.func.signature, i)
|
||||||
});
|
});
|
||||||
for (t, arg) in changes {
|
for (t, arg) in changes {
|
||||||
*arg = builder.ins().raw_bitcast(t, *arg);
|
*arg = builder.ins().bitcast(t, *arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3076,6 +3076,6 @@ fn bitcast_wasm_params<FE: FuncEnvironment + ?Sized>(
|
|||||||
environ.is_wasm_parameter(&callee_signature, i)
|
environ.is_wasm_parameter(&callee_signature, i)
|
||||||
});
|
});
|
||||||
for (t, arg) in changes {
|
for (t, arg) in changes {
|
||||||
*arg = builder.ins().raw_bitcast(t, *arg);
|
*arg = builder.ins().bitcast(t, *arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user