Add simd_extmul_* support for x64
This commit is contained in:
8
build.rs
8
build.rs
@@ -190,12 +190,9 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
|
|
||||||
match (testsuite, testname) {
|
match (testsuite, testname) {
|
||||||
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
||||||
("simd", "simd_i16x8_extmul_i8x16") => return true,
|
|
||||||
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
|
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
|
||||||
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
||||||
("simd", "simd_i32x4_extmul_i16x8") => return true,
|
|
||||||
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
|
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
|
||||||
("simd", "simd_i64x2_extmul_i32x4") => return true,
|
|
||||||
("simd", "simd_int_to_int_extend") => return true,
|
("simd", "simd_int_to_int_extend") => return true,
|
||||||
("simd", _) => return false,
|
("simd", _) => return false,
|
||||||
_ => {}
|
_ => {}
|
||||||
@@ -229,10 +226,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
// These are new instructions that are not really implemented in any backend.
|
// These are new instructions that are not really implemented in any backend.
|
||||||
("simd", "simd_conversions")
|
("simd", "simd_conversions")
|
||||||
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
||||||
| ("simd", "simd_i16x8_extmul_i8x16")
|
| ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
||||||
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
|
|
||||||
| ("simd", "simd_i32x4_extmul_i16x8")
|
|
||||||
| ("simd", "simd_i64x2_extmul_i32x4") => return true,
|
|
||||||
|
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -593,6 +593,9 @@ pub enum SseOpcode {
|
|||||||
Pmovzxwd,
|
Pmovzxwd,
|
||||||
Pmovzxwq,
|
Pmovzxwq,
|
||||||
Pmovzxdq,
|
Pmovzxdq,
|
||||||
|
Pmuldq,
|
||||||
|
Pmulhw,
|
||||||
|
Pmulhuw,
|
||||||
Pmulld,
|
Pmulld,
|
||||||
Pmullw,
|
Pmullw,
|
||||||
Pmuludq,
|
Pmuludq,
|
||||||
@@ -617,7 +620,9 @@ pub enum SseOpcode {
|
|||||||
Psubusw,
|
Psubusw,
|
||||||
Ptest,
|
Ptest,
|
||||||
Punpckhbw,
|
Punpckhbw,
|
||||||
|
Punpckhwd,
|
||||||
Punpcklbw,
|
Punpcklbw,
|
||||||
|
Punpcklwd,
|
||||||
Pxor,
|
Pxor,
|
||||||
Rcpss,
|
Rcpss,
|
||||||
Roundps,
|
Roundps,
|
||||||
@@ -742,6 +747,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pminsw
|
| SseOpcode::Pminsw
|
||||||
| SseOpcode::Pminub
|
| SseOpcode::Pminub
|
||||||
| SseOpcode::Pmovmskb
|
| SseOpcode::Pmovmskb
|
||||||
|
| SseOpcode::Pmulhw
|
||||||
|
| SseOpcode::Pmulhuw
|
||||||
| SseOpcode::Pmullw
|
| SseOpcode::Pmullw
|
||||||
| SseOpcode::Pmuludq
|
| SseOpcode::Pmuludq
|
||||||
| SseOpcode::Por
|
| SseOpcode::Por
|
||||||
@@ -763,7 +770,9 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Psubusb
|
| SseOpcode::Psubusb
|
||||||
| SseOpcode::Psubusw
|
| SseOpcode::Psubusw
|
||||||
| SseOpcode::Punpckhbw
|
| SseOpcode::Punpckhbw
|
||||||
|
| SseOpcode::Punpckhwd
|
||||||
| SseOpcode::Punpcklbw
|
| SseOpcode::Punpcklbw
|
||||||
|
| SseOpcode::Punpcklwd
|
||||||
| SseOpcode::Pxor
|
| SseOpcode::Pxor
|
||||||
| SseOpcode::Sqrtpd
|
| SseOpcode::Sqrtpd
|
||||||
| SseOpcode::Sqrtsd
|
| SseOpcode::Sqrtsd
|
||||||
@@ -808,6 +817,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pmovzxwd
|
| SseOpcode::Pmovzxwd
|
||||||
| SseOpcode::Pmovzxwq
|
| SseOpcode::Pmovzxwq
|
||||||
| SseOpcode::Pmovzxdq
|
| SseOpcode::Pmovzxdq
|
||||||
|
| SseOpcode::Pmuldq
|
||||||
| SseOpcode::Pmulld
|
| SseOpcode::Pmulld
|
||||||
| SseOpcode::Ptest
|
| SseOpcode::Ptest
|
||||||
| SseOpcode::Roundps
|
| SseOpcode::Roundps
|
||||||
@@ -953,6 +963,9 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Pmovzxwd => "pmovzxwd",
|
SseOpcode::Pmovzxwd => "pmovzxwd",
|
||||||
SseOpcode::Pmovzxwq => "pmovzxwq",
|
SseOpcode::Pmovzxwq => "pmovzxwq",
|
||||||
SseOpcode::Pmovzxdq => "pmovzxdq",
|
SseOpcode::Pmovzxdq => "pmovzxdq",
|
||||||
|
SseOpcode::Pmuldq => "pmuldq",
|
||||||
|
SseOpcode::Pmulhw => "pmulhw",
|
||||||
|
SseOpcode::Pmulhuw => "pmulhuw",
|
||||||
SseOpcode::Pmulld => "pmulld",
|
SseOpcode::Pmulld => "pmulld",
|
||||||
SseOpcode::Pmullw => "pmullw",
|
SseOpcode::Pmullw => "pmullw",
|
||||||
SseOpcode::Pmuludq => "pmuludq",
|
SseOpcode::Pmuludq => "pmuludq",
|
||||||
@@ -977,7 +990,9 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Psubusw => "psubusw",
|
SseOpcode::Psubusw => "psubusw",
|
||||||
SseOpcode::Ptest => "ptest",
|
SseOpcode::Ptest => "ptest",
|
||||||
SseOpcode::Punpckhbw => "punpckhbw",
|
SseOpcode::Punpckhbw => "punpckhbw",
|
||||||
|
SseOpcode::Punpckhwd => "punpckhwd",
|
||||||
SseOpcode::Punpcklbw => "punpcklbw",
|
SseOpcode::Punpcklbw => "punpcklbw",
|
||||||
|
SseOpcode::Punpcklwd => "punpcklwd",
|
||||||
SseOpcode::Pxor => "pxor",
|
SseOpcode::Pxor => "pxor",
|
||||||
SseOpcode::Rcpss => "rcpss",
|
SseOpcode::Rcpss => "rcpss",
|
||||||
SseOpcode::Roundps => "roundps",
|
SseOpcode::Roundps => "roundps",
|
||||||
|
|||||||
@@ -1509,6 +1509,9 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
|
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
|
||||||
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
|
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
|
||||||
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
||||||
|
SseOpcode::Pmuldq => (LegacyPrefixes::_66, 0x0F3828, 3),
|
||||||
|
SseOpcode::Pmulhw => (LegacyPrefixes::_66, 0x0FE5, 2),
|
||||||
|
SseOpcode::Pmulhuw => (LegacyPrefixes::_66, 0x0FE4, 2),
|
||||||
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
||||||
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
||||||
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
||||||
@@ -1523,7 +1526,9 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
|
SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
|
||||||
SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
|
SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
|
||||||
SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2),
|
SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2),
|
||||||
|
SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2),
|
||||||
SseOpcode::Punpcklbw => (LegacyPrefixes::_66, 0x0F60, 2),
|
SseOpcode::Punpcklbw => (LegacyPrefixes::_66, 0x0F60, 2),
|
||||||
|
SseOpcode::Punpcklwd => (LegacyPrefixes::_66, 0x0F61, 2),
|
||||||
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
||||||
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
||||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||||
|
|||||||
@@ -1662,7 +1662,348 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::Imul => {
|
Opcode::Imul => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
if ty == types::I64X2 {
|
|
||||||
|
// First check for ext_mul_* instructions. Where possible ext_mul_* lowerings
|
||||||
|
// are based on optimized lowerings here: https://github.com/WebAssembly/simd/pull/376
|
||||||
|
if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
|
||||||
|
if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {
|
||||||
|
let swiden_input = &[
|
||||||
|
InsnInput {
|
||||||
|
insn: swiden0_high,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
InsnInput {
|
||||||
|
insn: swiden1_high,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let input0_ty = ctx.input_ty(swiden0_high, 0);
|
||||||
|
let input1_ty = ctx.input_ty(swiden1_high, 0);
|
||||||
|
let output_ty = ctx.output_ty(insn, 0);
|
||||||
|
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||||
|
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
|
match (input0_ty, input1_ty, output_ty) {
|
||||||
|
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||||
|
// i16x8.extmul_high_i8x16_s
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Palignr,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
8,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_mov(
|
||||||
|
SseOpcode::Pmovsxbw,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
));
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Palignr,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
dst,
|
||||||
|
8,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmullw,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||||
|
// i32x4.extmul_high_i16x8_s
|
||||||
|
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Punpckhwd,
|
||||||
|
RegMem::from(tmp_reg),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||||
|
// i64x2.extmul_high_i32x4_s
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
0xFA,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
dst,
|
||||||
|
0xFA,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmuldq,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
_ => panic!("Unsupported extmul_low_signed type"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(swiden0_low) = matches_input(ctx, inputs[0], Opcode::SwidenLow) {
|
||||||
|
if let Some(swiden1_low) = matches_input(ctx, inputs[1], Opcode::SwidenLow) {
|
||||||
|
let swiden_input = &[
|
||||||
|
InsnInput {
|
||||||
|
insn: swiden0_low,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
InsnInput {
|
||||||
|
insn: swiden1_low,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let input0_ty = ctx.input_ty(swiden0_low, 0);
|
||||||
|
let input1_ty = ctx.input_ty(swiden1_low, 0);
|
||||||
|
let output_ty = ctx.output_ty(insn, 0);
|
||||||
|
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||||
|
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
|
match (input0_ty, input1_ty, output_ty) {
|
||||||
|
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||||
|
// i32x4.extmul_low_i8x16_s
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_mov(
|
||||||
|
SseOpcode::Pmovsxbw,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmullw,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||||
|
// i32x4.extmul_low_i16x8_s
|
||||||
|
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Punpcklwd,
|
||||||
|
RegMem::from(tmp_reg),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||||
|
// i64x2.extmul_low_i32x4_s
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
0x50,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
dst,
|
||||||
|
0x50,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmuldq,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
_ => panic!("Unsupported extmul_low_signed type"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh) {
|
||||||
|
if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) {
|
||||||
|
let uwiden_input = &[
|
||||||
|
InsnInput {
|
||||||
|
insn: uwiden0_high,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
InsnInput {
|
||||||
|
insn: uwiden1_high,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let input0_ty = ctx.input_ty(uwiden0_high, 0);
|
||||||
|
let input1_ty = ctx.input_ty(uwiden1_high, 0);
|
||||||
|
let output_ty = ctx.output_ty(insn, 0);
|
||||||
|
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||||
|
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
|
match (input0_ty, input1_ty, output_ty) {
|
||||||
|
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||||
|
// i16x8.extmul_high_i8x16_u
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Palignr,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
8,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_mov(
|
||||||
|
SseOpcode::Pmovzxbw,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Palignr,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
dst,
|
||||||
|
8,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmullw,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||||
|
// i32x4.extmul_high_i16x8_u
|
||||||
|
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmulhuw,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
tmp_reg,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Punpckhwd,
|
||||||
|
RegMem::from(tmp_reg),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||||
|
// i64x2.extmul_high_i32x4_u
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
0xFA,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
dst,
|
||||||
|
0xFA,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmuludq,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
_ => panic!("Unsupported extmul_low_signed type"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
|
||||||
|
if let Some(uwiden1_low) = matches_input(ctx, inputs[1], Opcode::UwidenLow) {
|
||||||
|
let uwiden_input = &[
|
||||||
|
InsnInput {
|
||||||
|
insn: uwiden0_low,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
InsnInput {
|
||||||
|
insn: uwiden1_low,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let input0_ty = ctx.input_ty(uwiden0_low, 0);
|
||||||
|
let input1_ty = ctx.input_ty(uwiden1_low, 0);
|
||||||
|
let output_ty = ctx.output_ty(insn, 0);
|
||||||
|
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||||
|
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
|
match (input0_ty, input1_ty, output_ty) {
|
||||||
|
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||||
|
// i16x8.extmul_low_i8x16_u
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_mov(
|
||||||
|
SseOpcode::Pmovzxbw,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmullw,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||||
|
// i32x4.extmul_low_i16x8_u
|
||||||
|
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmulhuw,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
tmp_reg,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Punpcklwd,
|
||||||
|
RegMem::from(tmp_reg),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||||
|
// i64x2.extmul_low_i32x4_u
|
||||||
|
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(lhs),
|
||||||
|
tmp_reg,
|
||||||
|
0x50,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
SseOpcode::Pshufd,
|
||||||
|
RegMem::reg(rhs),
|
||||||
|
dst,
|
||||||
|
0x50,
|
||||||
|
OperandSize::Size32,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pmuludq,
|
||||||
|
RegMem::reg(tmp_reg.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
_ => panic!("Unsupported extmul_low_signed type"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if ty == types::I64X2 {
|
||||||
// Eventually one of these should be `input_to_reg_mem` (TODO).
|
// Eventually one of these should be `input_to_reg_mem` (TODO).
|
||||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||||
|
|||||||
@@ -1911,19 +1911,79 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
|
|
||||||
state.push1(builder.ins().sqmul_round_sat(a, b))
|
state.push1(builder.ins().sqmul_round_sat(a, b))
|
||||||
}
|
}
|
||||||
Operator::I16x8ExtMulLowI8x16S
|
Operator::I16x8ExtMulLowI8x16S => {
|
||||||
| Operator::I16x8ExtMulHighI8x16S
|
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||||
| Operator::I16x8ExtMulLowI8x16U
|
let a_low = builder.ins().swiden_low(a);
|
||||||
| Operator::I16x8ExtMulHighI8x16U
|
let b_low = builder.ins().swiden_low(b);
|
||||||
| Operator::I32x4ExtMulLowI16x8S
|
state.push1(builder.ins().imul(a_low, b_low));
|
||||||
| Operator::I32x4ExtMulHighI16x8S
|
}
|
||||||
| Operator::I32x4ExtMulLowI16x8U
|
Operator::I16x8ExtMulHighI8x16S => {
|
||||||
| Operator::I32x4ExtMulHighI16x8U
|
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||||
| Operator::I64x2ExtMulLowI32x4S
|
let a_high = builder.ins().swiden_high(a);
|
||||||
| Operator::I64x2ExtMulHighI32x4S
|
let b_high = builder.ins().swiden_high(b);
|
||||||
| Operator::I64x2ExtMulLowI32x4U
|
state.push1(builder.ins().imul(a_high, b_high));
|
||||||
| Operator::I64x2ExtMulHighI32x4U
|
}
|
||||||
| Operator::I16x8ExtAddPairwiseI8x16S
|
Operator::I16x8ExtMulLowI8x16U => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||||
|
let a_low = builder.ins().uwiden_low(a);
|
||||||
|
let b_low = builder.ins().uwiden_low(b);
|
||||||
|
state.push1(builder.ins().imul(a_low, b_low));
|
||||||
|
}
|
||||||
|
Operator::I16x8ExtMulHighI8x16U => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||||
|
let a_high = builder.ins().uwiden_high(a);
|
||||||
|
let b_high = builder.ins().uwiden_high(b);
|
||||||
|
state.push1(builder.ins().imul(a_high, b_high));
|
||||||
|
}
|
||||||
|
Operator::I32x4ExtMulLowI16x8S => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||||
|
let a_low = builder.ins().swiden_low(a);
|
||||||
|
let b_low = builder.ins().swiden_low(b);
|
||||||
|
state.push1(builder.ins().imul(a_low, b_low));
|
||||||
|
}
|
||||||
|
Operator::I32x4ExtMulHighI16x8S => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||||
|
let a_high = builder.ins().swiden_high(a);
|
||||||
|
let b_high = builder.ins().swiden_high(b);
|
||||||
|
state.push1(builder.ins().imul(a_high, b_high));
|
||||||
|
}
|
||||||
|
Operator::I32x4ExtMulLowI16x8U => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||||
|
let a_low = builder.ins().uwiden_low(a);
|
||||||
|
let b_low = builder.ins().uwiden_low(b);
|
||||||
|
state.push1(builder.ins().imul(a_low, b_low));
|
||||||
|
}
|
||||||
|
Operator::I32x4ExtMulHighI16x8U => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||||
|
let a_high = builder.ins().uwiden_high(a);
|
||||||
|
let b_high = builder.ins().uwiden_high(b);
|
||||||
|
state.push1(builder.ins().imul(a_high, b_high));
|
||||||
|
}
|
||||||
|
Operator::I64x2ExtMulLowI32x4S => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||||
|
let a_low = builder.ins().swiden_low(a);
|
||||||
|
let b_low = builder.ins().swiden_low(b);
|
||||||
|
state.push1(builder.ins().imul(a_low, b_low));
|
||||||
|
}
|
||||||
|
Operator::I64x2ExtMulHighI32x4S => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||||
|
let a_high = builder.ins().swiden_high(a);
|
||||||
|
let b_high = builder.ins().swiden_high(b);
|
||||||
|
state.push1(builder.ins().imul(a_high, b_high));
|
||||||
|
}
|
||||||
|
Operator::I64x2ExtMulLowI32x4U => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||||
|
let a_low = builder.ins().uwiden_low(a);
|
||||||
|
let b_low = builder.ins().uwiden_low(b);
|
||||||
|
state.push1(builder.ins().imul(a_low, b_low));
|
||||||
|
}
|
||||||
|
Operator::I64x2ExtMulHighI32x4U => {
|
||||||
|
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||||
|
let a_high = builder.ins().uwiden_high(a);
|
||||||
|
let b_high = builder.ins().uwiden_high(b);
|
||||||
|
state.push1(builder.ins().imul(a_high, b_high));
|
||||||
|
}
|
||||||
|
Operator::I16x8ExtAddPairwiseI8x16S
|
||||||
| Operator::I16x8ExtAddPairwiseI8x16U
|
| Operator::I16x8ExtAddPairwiseI8x16U
|
||||||
| Operator::I32x4ExtAddPairwiseI16x8S
|
| Operator::I32x4ExtAddPairwiseI16x8S
|
||||||
| Operator::I32x4ExtAddPairwiseI16x8U => {
|
| Operator::I32x4ExtAddPairwiseI16x8U => {
|
||||||
|
|||||||
Reference in New Issue
Block a user