Fix for 3089 X64 ext_mul_i8x16 has incorrect lowering

Also factors out unnecessary temp register
This commit is contained in:
Johnnie Birch
2021-07-18 11:47:43 -07:00
parent 766774e1f5
commit ffec1f9b41

View File

@@ -1705,20 +1705,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match (input0_ty, input1_ty, output_ty) { match (input0_ty, input1_ty, output_ty) {
(types::I8X16, types::I8X16, types::I16X8) => { (types::I8X16, types::I8X16, types::I16X8) => {
// i16x8.extmul_high_i8x16_s // i16x8.extmul_high_i8x16_s
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
ctx.emit(Inst::xmm_rm_r_imm( ctx.emit(Inst::xmm_rm_r_imm(
SseOpcode::Palignr, SseOpcode::Palignr,
RegMem::reg(lhs), RegMem::reg(lhs),
tmp_reg, Writable::from_reg(lhs),
8, 8,
OperandSize::Size32, OperandSize::Size32,
)); ));
ctx.emit(Inst::xmm_mov( ctx.emit(Inst::xmm_mov(
SseOpcode::Pmovsxbw, SseOpcode::Pmovsxbw,
RegMem::reg(lhs), RegMem::reg(lhs),
tmp_reg, Writable::from_reg(lhs),
)); ));
ctx.emit(Inst::gen_move(dst, rhs, output_ty)); ctx.emit(Inst::gen_move(dst, rhs, output_ty));
@@ -1729,12 +1726,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
8, 8,
OperandSize::Size32, OperandSize::Size32,
)); ));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst)); ctx.emit(Inst::xmm_mov(
ctx.emit(Inst::xmm_rm_r( SseOpcode::Pmovsxbw,
SseOpcode::Pmullw, RegMem::reg(dst.to_reg()),
RegMem::reg(tmp_reg.to_reg()),
dst, dst,
)); ));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
} }
(types::I16X8, types::I16X8, types::I32X4) => { (types::I16X8, types::I16X8, types::I32X4) => {
// i32x4.extmul_high_i16x8_s // i32x4.extmul_high_i16x8_s
@@ -1882,19 +1879,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match (input0_ty, input1_ty, output_ty) { match (input0_ty, input1_ty, output_ty) {
(types::I8X16, types::I8X16, types::I16X8) => { (types::I8X16, types::I8X16, types::I16X8) => {
// i16x8.extmul_high_i8x16_u // i16x8.extmul_high_i8x16_u
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
ctx.emit(Inst::xmm_rm_r_imm( ctx.emit(Inst::xmm_rm_r_imm(
SseOpcode::Palignr, SseOpcode::Palignr,
RegMem::reg(lhs), RegMem::reg(lhs),
tmp_reg, Writable::from_reg(lhs),
8, 8,
OperandSize::Size32, OperandSize::Size32,
)); ));
ctx.emit(Inst::xmm_mov( ctx.emit(Inst::xmm_mov(
SseOpcode::Pmovzxbw, SseOpcode::Pmovzxbw,
RegMem::reg(lhs), RegMem::reg(lhs),
tmp_reg, Writable::from_reg(lhs),
)); ));
ctx.emit(Inst::gen_move(dst, rhs, output_ty)); ctx.emit(Inst::gen_move(dst, rhs, output_ty));
ctx.emit(Inst::xmm_rm_r_imm( ctx.emit(Inst::xmm_rm_r_imm(
@@ -1904,12 +1899,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
8, 8,
OperandSize::Size32, OperandSize::Size32,
)); ));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst)); ctx.emit(Inst::xmm_mov(
ctx.emit(Inst::xmm_rm_r( SseOpcode::Pmovzxbw,
SseOpcode::Pmullw, RegMem::reg(dst.to_reg()),
RegMem::reg(tmp_reg.to_reg()),
dst, dst,
)); ));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
} }
(types::I16X8, types::I16X8, types::I32X4) => { (types::I16X8, types::I16X8, types::I32X4) => {
// i32x4.extmul_high_i16x8_u // i32x4.extmul_high_i16x8_u