Add support for Saturating Rounding Q-format Multiplication for x64
This commit is contained in:
1
build.rs
1
build.rs
@@ -191,7 +191,6 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
|
|
||||||
match (testsuite, testname) {
|
match (testsuite, testname) {
|
||||||
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
||||||
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
|
|
||||||
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
||||||
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
|
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
|
||||||
("simd", "simd_int_to_int_extend") => return true,
|
("simd", "simd_int_to_int_extend") => return true,
|
||||||
|
|||||||
@@ -596,6 +596,7 @@ pub enum SseOpcode {
|
|||||||
Pmuldq,
|
Pmuldq,
|
||||||
Pmulhw,
|
Pmulhw,
|
||||||
Pmulhuw,
|
Pmulhuw,
|
||||||
|
Pmulhrsw,
|
||||||
Pmulld,
|
Pmulld,
|
||||||
Pmullw,
|
Pmullw,
|
||||||
Pmuludq,
|
Pmuludq,
|
||||||
@@ -785,6 +786,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pabsw
|
| SseOpcode::Pabsw
|
||||||
| SseOpcode::Pabsd
|
| SseOpcode::Pabsd
|
||||||
| SseOpcode::Palignr
|
| SseOpcode::Palignr
|
||||||
|
| SseOpcode::Pmulhrsw
|
||||||
| SseOpcode::Pshufb => SSSE3,
|
| SseOpcode::Pshufb => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Blendvpd
|
SseOpcode::Blendvpd
|
||||||
@@ -966,6 +968,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Pmuldq => "pmuldq",
|
SseOpcode::Pmuldq => "pmuldq",
|
||||||
SseOpcode::Pmulhw => "pmulhw",
|
SseOpcode::Pmulhw => "pmulhw",
|
||||||
SseOpcode::Pmulhuw => "pmulhuw",
|
SseOpcode::Pmulhuw => "pmulhuw",
|
||||||
|
SseOpcode::Pmulhrsw => "pmulhrsw",
|
||||||
SseOpcode::Pmulld => "pmulld",
|
SseOpcode::Pmulld => "pmulld",
|
||||||
SseOpcode::Pmullw => "pmullw",
|
SseOpcode::Pmullw => "pmullw",
|
||||||
SseOpcode::Pmuludq => "pmuludq",
|
SseOpcode::Pmuludq => "pmuludq",
|
||||||
|
|||||||
@@ -1510,6 +1510,7 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
||||||
SseOpcode::Pmuldq => (LegacyPrefixes::_66, 0x0F3828, 3),
|
SseOpcode::Pmuldq => (LegacyPrefixes::_66, 0x0F3828, 3),
|
||||||
SseOpcode::Pmulhw => (LegacyPrefixes::_66, 0x0FE5, 2),
|
SseOpcode::Pmulhw => (LegacyPrefixes::_66, 0x0FE5, 2),
|
||||||
|
SseOpcode::Pmulhrsw => (LegacyPrefixes::_66, 0x0F380B, 3),
|
||||||
SseOpcode::Pmulhuw => (LegacyPrefixes::_66, 0x0FE4, 2),
|
SseOpcode::Pmulhuw => (LegacyPrefixes::_66, 0x0FE4, 2),
|
||||||
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
||||||
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
||||||
@@ -1754,7 +1755,6 @@ pub(crate) fn emit(
|
|||||||
let (prefix, opcode) = match op {
|
let (prefix, opcode) = match op {
|
||||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
|
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
|
||||||
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
|
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
|
||||||
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
|
|
||||||
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
|
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
|
||||||
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
|
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
|
||||||
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
|
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
|
||||||
|
|||||||
@@ -6406,6 +6406,42 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
Opcode::SqmulRoundSat => {
|
||||||
|
// Lane-wise saturating rounding multiplication in Q15 format
|
||||||
|
// Optimal lowering taken from instruction proposal https://github.com/WebAssembly/simd/pull/365
|
||||||
|
// y = i16x8.q15mulr_sat_s(a, b) is lowered to:
|
||||||
|
//MOVDQA xmm_y, xmm_a
|
||||||
|
//MOVDQA xmm_tmp, wasm_i16x8_splat(0x8000)
|
||||||
|
//PMULHRSW xmm_y, xmm_b
|
||||||
|
//PCMPEQW xmm_tmp, xmm_y
|
||||||
|
//PXOR xmm_y, xmm_tmp
|
||||||
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
let src1 = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let src2 = put_input_in_reg(ctx, inputs[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(dst, src1, input_ty));
|
||||||
|
static SAT_MASK: [u8; 16] = [
|
||||||
|
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||||
|
0x00, 0x80,
|
||||||
|
];
|
||||||
|
let mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&SAT_MASK));
|
||||||
|
let mask = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_load_const(mask_const, mask, types::I16X8));
|
||||||
|
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhrsw, RegMem::reg(src2), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pcmpeqw,
|
||||||
|
RegMem::reg(dst.to_reg()),
|
||||||
|
mask,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pxor,
|
||||||
|
RegMem::reg(mask.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
// Unimplemented opcodes below. These are not currently used by Wasm
|
// Unimplemented opcodes below. These are not currently used by Wasm
|
||||||
// lowering or other known embeddings, but should be either supported or
|
// lowering or other known embeddings, but should be either supported or
|
||||||
// removed eventually.
|
// removed eventually.
|
||||||
@@ -6436,8 +6472,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
unimplemented!("Vector split/concat ops not implemented.");
|
unimplemented!("Vector split/concat ops not implemented.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::SqmulRoundSat | Opcode::Uunarrow => {
|
Opcode::Uunarrow => {
|
||||||
unimplemented!("unimplemented lowering for opcode {:?}", op)
|
unimplemented!("unimplemented lowering for opcode {:?}", op);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Opcodes that should be removed by legalization. These should
|
// Opcodes that should be removed by legalization. These should
|
||||||
|
|||||||
Reference in New Issue
Block a user