CL/aarch64: implement the wasm SIMD i32x4.dot_i16x8_s instruction
This patch implements, for aarch64, the following wasm SIMD extensions i32x4.dot_i16x8_s instruction https://github.com/WebAssembly/simd/pull/127 It also updates dependencies as follows, in order that the new instruction can be parsed, decoded, etc: wat to 1.0.27 wast to 26.0.1 wasmparser to 0.65.0 wasmprinter to 0.2.12 The changes are straightforward: * new CLIF instruction `widening_pairwise_dot_product_s` * translation from wasm into `widening_pairwise_dot_product_s` * new AArch64 instructions `smull`, `smull2` (part of the `VecRRR` group) * translation from `widening_pairwise_dot_product_s` to `smull ; smull2 ; addv` There is no testcase in this commit, because that is a separate repo. The implementation has been tested, nevertheless.
This commit is contained in:
committed by
julian-seward1
parent
54a97f784e
commit
5a5fb11979
@@ -2375,6 +2375,47 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::WideningPairwiseDotProductS => {
|
||||
let r_y = get_output_reg(ctx, outputs[0]);
|
||||
let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty == I32X4 {
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, I8X16);
|
||||
// The args have type I16X8.
|
||||
// "y = i32x4.dot_i16x8_s(a, b)"
|
||||
// => smull tmp, a, b
|
||||
// smull2 y, a, b
|
||||
// addp y, tmp, y
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Smull,
|
||||
rd: tmp,
|
||||
rn: r_a,
|
||||
rm: r_b,
|
||||
size: VectorSize::Size16x8,
|
||||
});
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Smull2,
|
||||
rd: r_y,
|
||||
rn: r_a,
|
||||
rm: r_b,
|
||||
size: VectorSize::Size16x8,
|
||||
});
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Addp,
|
||||
rd: r_y,
|
||||
rn: tmp.to_reg(),
|
||||
rm: r_y.to_reg(),
|
||||
size: VectorSize::Size32x4,
|
||||
});
|
||||
} else {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Opcode::WideningPairwiseDotProductS: unsupported laneage: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
|
||||
let ty = ty.unwrap();
|
||||
let bits = ty_bits(ty);
|
||||
|
||||
Reference in New Issue
Block a user