x64: Fix codegen for the i8x16.swizzle instruction (#4318)
This commit fixes a mistake in the `Swizzle` opcode implementation in the x64 backend of Cranelift. Previously an input register was casted to a writable register and then modified, which I believe instructions are not supposed to do. This was discovered as part of my investigation into #4315.
This commit is contained in:
@@ -2554,17 +2554,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
|
||||
|
||||
// Use the `zero_mask` on a writable `swizzle_mask`.
|
||||
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
||||
let swizzle_mask_tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(swizzle_mask_tmp, swizzle_mask, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Paddusb,
|
||||
RegMem::from(zero_mask),
|
||||
swizzle_mask,
|
||||
swizzle_mask_tmp,
|
||||
));
|
||||
|
||||
// Shuffle `dst` using the fixed-up `swizzle_mask`.
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pshufb,
|
||||
RegMem::from(swizzle_mask),
|
||||
RegMem::from(swizzle_mask_tmp),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user