Refactor lowering structure for ext_mul on x64 and add comments

This commit is contained in:
Johnnie Birch
2021-07-14 23:17:40 -07:00
parent e5b6bee968
commit 2452a4cd74

View File

@@ -1663,8 +1663,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Imul => { Opcode::Imul => {
let ty = ty.unwrap(); let ty = ty.unwrap();
// First check for ext_mul_* instructions. Where possible ext_mul_* lowerings // Check for ext_mul_* instructions which are being shared here under imul. We must
// are based on optimized lowerings here: https://github.com/WebAssembly/simd/pull/376 // check first for operands that are opcodes since checking for types is not enough.
if let Some(_) = matches_input_any(
ctx,
inputs[0],
&[
Opcode::SwidenHigh,
Opcode::SwidenLow,
Opcode::UwidenHigh,
Opcode::UwidenLow,
],
) {
// Optimized ext_mul_* lowerings are based on optimized lowerings
// here: https://github.com/WebAssembly/simd/pull/376
if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) { if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) { if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {
let swiden_input = &[ let swiden_input = &[
@@ -1687,6 +1699,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match (input0_ty, input1_ty, output_ty) { match (input0_ty, input1_ty, output_ty) {
(types::I8X16, types::I8X16, types::I16X8) => { (types::I8X16, types::I8X16, types::I16X8) => {
// i16x8.extmul_high_i8x16_s // i16x8.extmul_high_i8x16_s
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty)); ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
ctx.emit(Inst::xmm_rm_r_imm( ctx.emit(Inst::xmm_rm_r_imm(
@@ -1723,7 +1736,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty)); ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst)); ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmulhw,
RegMem::reg(rhs),
tmp_reg,
));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
SseOpcode::Punpckhwd, SseOpcode::Punpckhwd,
RegMem::from(tmp_reg), RegMem::from(tmp_reg),
@@ -1753,6 +1770,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
)); ));
} }
// Note swiden_high only allows types: I8X16, I16X8, and I32X4
_ => panic!("Unsupported extmul_low_signed type"), _ => panic!("Unsupported extmul_low_signed type"),
} }
} }
@@ -1797,7 +1815,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty)); ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst)); ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmulhw,
RegMem::reg(rhs),
tmp_reg,
));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
SseOpcode::Punpcklwd, SseOpcode::Punpcklwd,
RegMem::from(tmp_reg), RegMem::from(tmp_reg),
@@ -1827,10 +1849,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
)); ));
} }
// Note swiden_low only allows types: I8X16, I16X8, and I32X4
_ => panic!("Unsupported extmul_low_signed type"), _ => panic!("Unsupported extmul_low_signed type"),
} }
} }
} else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh) { } else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh)
{
if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) { if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) {
let uwiden_input = &[ let uwiden_input = &[
InsnInput { InsnInput {
@@ -1921,7 +1945,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
)); ));
} }
_ => panic!("Unsupported extmul_low_signed type"), // Note uwiden_high only allows types: I8X16, I16X8, and I32X4
_ => panic!("Unsupported extmul_high_unsigned type"),
} }
} }
} else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) { } else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
@@ -2000,9 +2025,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
)); ));
} }
_ => panic!("Unsupported extmul_low_signed type"), // Note uwiden_low only allows types: I8X16, I16X8, and I32X4
_ => panic!("Unsupported extmul_low_unsigned type"),
} }
} }
} else {
panic!("Unsupported imul operation for type: {}", ty);
}
} else if ty == types::I64X2 { } else if ty == types::I64X2 {
// Eventually one of these should be `input_to_reg_mem` (TODO). // Eventually one of these should be `input_to_reg_mem` (TODO).
let lhs = put_input_in_reg(ctx, inputs[0]); let lhs = put_input_in_reg(ctx, inputs[0]);