x64: Fix codegen for the i8x16.swizzle instruction (#4318)
This commit fixes a mistake in the `Swizzle` opcode implementation in the x64 backend of Cranelift. Previously an input register was casted to a writable register and then modified, which I believe instructions are not supposed to do. This was discovered as part of my investigation into #4315.
This commit is contained in:
@@ -2554,17 +2554,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
|
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
|
||||||
|
|
||||||
// Use the `zero_mask` on a writable `swizzle_mask`.
|
// Use the `zero_mask` on a writable `swizzle_mask`.
|
||||||
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
let swizzle_mask_tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(swizzle_mask_tmp, swizzle_mask, ty));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Paddusb,
|
SseOpcode::Paddusb,
|
||||||
RegMem::from(zero_mask),
|
RegMem::from(zero_mask),
|
||||||
swizzle_mask,
|
swizzle_mask_tmp,
|
||||||
));
|
));
|
||||||
|
|
||||||
// Shuffle `dst` using the fixed-up `swizzle_mask`.
|
// Shuffle `dst` using the fixed-up `swizzle_mask`.
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Pshufb,
|
SseOpcode::Pshufb,
|
||||||
RegMem::from(swizzle_mask),
|
RegMem::from(swizzle_mask_tmp),
|
||||||
dst,
|
dst,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user