Add simd_extmul_* support for x64
This commit is contained in:
8
build.rs
8
build.rs
@@ -190,12 +190,9 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
|
||||
match (testsuite, testname) {
|
||||
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
||||
("simd", "simd_i16x8_extmul_i8x16") => return true,
|
||||
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
|
||||
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
||||
("simd", "simd_i32x4_extmul_i16x8") => return true,
|
||||
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
|
||||
("simd", "simd_i64x2_extmul_i32x4") => return true,
|
||||
("simd", "simd_int_to_int_extend") => return true,
|
||||
("simd", _) => return false,
|
||||
_ => {}
|
||||
@@ -229,10 +226,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
// These are new instructions that are not really implemented in any backend.
|
||||
("simd", "simd_conversions")
|
||||
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
||||
| ("simd", "simd_i16x8_extmul_i8x16")
|
||||
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
|
||||
| ("simd", "simd_i32x4_extmul_i16x8")
|
||||
| ("simd", "simd_i64x2_extmul_i32x4") => return true,
|
||||
| ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
|
||||
|
||||
_ => {}
|
||||
},
|
||||
|
||||
@@ -593,6 +593,9 @@ pub enum SseOpcode {
|
||||
Pmovzxwd,
|
||||
Pmovzxwq,
|
||||
Pmovzxdq,
|
||||
Pmuldq,
|
||||
Pmulhw,
|
||||
Pmulhuw,
|
||||
Pmulld,
|
||||
Pmullw,
|
||||
Pmuludq,
|
||||
@@ -617,7 +620,9 @@ pub enum SseOpcode {
|
||||
Psubusw,
|
||||
Ptest,
|
||||
Punpckhbw,
|
||||
Punpckhwd,
|
||||
Punpcklbw,
|
||||
Punpcklwd,
|
||||
Pxor,
|
||||
Rcpss,
|
||||
Roundps,
|
||||
@@ -742,6 +747,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Pminsw
|
||||
| SseOpcode::Pminub
|
||||
| SseOpcode::Pmovmskb
|
||||
| SseOpcode::Pmulhw
|
||||
| SseOpcode::Pmulhuw
|
||||
| SseOpcode::Pmullw
|
||||
| SseOpcode::Pmuludq
|
||||
| SseOpcode::Por
|
||||
@@ -763,7 +770,9 @@ impl SseOpcode {
|
||||
| SseOpcode::Psubusb
|
||||
| SseOpcode::Psubusw
|
||||
| SseOpcode::Punpckhbw
|
||||
| SseOpcode::Punpckhwd
|
||||
| SseOpcode::Punpcklbw
|
||||
| SseOpcode::Punpcklwd
|
||||
| SseOpcode::Pxor
|
||||
| SseOpcode::Sqrtpd
|
||||
| SseOpcode::Sqrtsd
|
||||
@@ -808,6 +817,7 @@ impl SseOpcode {
|
||||
| SseOpcode::Pmovzxwd
|
||||
| SseOpcode::Pmovzxwq
|
||||
| SseOpcode::Pmovzxdq
|
||||
| SseOpcode::Pmuldq
|
||||
| SseOpcode::Pmulld
|
||||
| SseOpcode::Ptest
|
||||
| SseOpcode::Roundps
|
||||
@@ -953,6 +963,9 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Pmovzxwd => "pmovzxwd",
|
||||
SseOpcode::Pmovzxwq => "pmovzxwq",
|
||||
SseOpcode::Pmovzxdq => "pmovzxdq",
|
||||
SseOpcode::Pmuldq => "pmuldq",
|
||||
SseOpcode::Pmulhw => "pmulhw",
|
||||
SseOpcode::Pmulhuw => "pmulhuw",
|
||||
SseOpcode::Pmulld => "pmulld",
|
||||
SseOpcode::Pmullw => "pmullw",
|
||||
SseOpcode::Pmuludq => "pmuludq",
|
||||
@@ -977,7 +990,9 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Psubusw => "psubusw",
|
||||
SseOpcode::Ptest => "ptest",
|
||||
SseOpcode::Punpckhbw => "punpckhbw",
|
||||
SseOpcode::Punpckhwd => "punpckhwd",
|
||||
SseOpcode::Punpcklbw => "punpcklbw",
|
||||
SseOpcode::Punpcklwd => "punpcklwd",
|
||||
SseOpcode::Pxor => "pxor",
|
||||
SseOpcode::Rcpss => "rcpss",
|
||||
SseOpcode::Roundps => "roundps",
|
||||
|
||||
@@ -1509,6 +1509,9 @@ pub(crate) fn emit(
|
||||
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
|
||||
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
|
||||
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
||||
SseOpcode::Pmuldq => (LegacyPrefixes::_66, 0x0F3828, 3),
|
||||
SseOpcode::Pmulhw => (LegacyPrefixes::_66, 0x0FE5, 2),
|
||||
SseOpcode::Pmulhuw => (LegacyPrefixes::_66, 0x0FE4, 2),
|
||||
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
||||
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
||||
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
||||
@@ -1523,7 +1526,9 @@ pub(crate) fn emit(
|
||||
SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
|
||||
SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
|
||||
SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2),
|
||||
SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2),
|
||||
SseOpcode::Punpcklbw => (LegacyPrefixes::_66, 0x0F60, 2),
|
||||
SseOpcode::Punpcklwd => (LegacyPrefixes::_66, 0x0F61, 2),
|
||||
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
||||
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||
|
||||
@@ -1662,7 +1662,348 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Imul => {
|
||||
let ty = ty.unwrap();
|
||||
if ty == types::I64X2 {
|
||||
|
||||
// First check for ext_mul_* instructions. Where possible ext_mul_* lowerings
|
||||
// are based on optimized lowerings here: https://github.com/WebAssembly/simd/pull/376
|
||||
if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
|
||||
if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden0_high,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden1_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(swiden0_high, 0);
|
||||
let input1_ty = ctx.input_ty(swiden1_high, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_high_i8x16_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
|
||||
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_high_i16x8_s
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpckhwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_high_i32x4_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuldq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(swiden0_low) = matches_input(ctx, inputs[0], Opcode::SwidenLow) {
|
||||
if let Some(swiden1_low) = matches_input(ctx, inputs[1], Opcode::SwidenLow) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden0_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden1_low,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(swiden0_low, 0);
|
||||
let input1_ty = ctx.input_ty(swiden1_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i32x4.extmul_low_i8x16_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_low_i16x8_s
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpcklwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_low_i32x4_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuldq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh) {
|
||||
if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden0_high,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden1_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(uwiden0_high, 0);
|
||||
let input1_ty = ctx.input_ty(uwiden1_high, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_high_i8x16_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_high_i16x8_u
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhuw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpckhwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_high_i32x4_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
|
||||
if let Some(uwiden1_low) = matches_input(ctx, inputs[1], Opcode::UwidenLow) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden0_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden1_low,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
|
||||
let input0_ty = ctx.input_ty(uwiden0_low, 0);
|
||||
let input1_ty = ctx.input_ty(uwiden1_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_low_i8x16_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_low_i16x8_u
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhuw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpcklwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_low_i32x4_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if ty == types::I64X2 {
|
||||
// Eventually one of these should be `input_to_reg_mem` (TODO).
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
|
||||
@@ -1911,19 +1911,79 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
||||
|
||||
state.push1(builder.ins().sqmul_round_sat(a, b))
|
||||
}
|
||||
Operator::I16x8ExtMulLowI8x16S
|
||||
| Operator::I16x8ExtMulHighI8x16S
|
||||
| Operator::I16x8ExtMulLowI8x16U
|
||||
| Operator::I16x8ExtMulHighI8x16U
|
||||
| Operator::I32x4ExtMulLowI16x8S
|
||||
| Operator::I32x4ExtMulHighI16x8S
|
||||
| Operator::I32x4ExtMulLowI16x8U
|
||||
| Operator::I32x4ExtMulHighI16x8U
|
||||
| Operator::I64x2ExtMulLowI32x4S
|
||||
| Operator::I64x2ExtMulHighI32x4S
|
||||
| Operator::I64x2ExtMulLowI32x4U
|
||||
| Operator::I64x2ExtMulHighI32x4U
|
||||
| Operator::I16x8ExtAddPairwiseI8x16S
|
||||
Operator::I16x8ExtMulLowI8x16S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||
let a_low = builder.ins().swiden_low(a);
|
||||
let b_low = builder.ins().swiden_low(b);
|
||||
state.push1(builder.ins().imul(a_low, b_low));
|
||||
}
|
||||
Operator::I16x8ExtMulHighI8x16S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||
let a_high = builder.ins().swiden_high(a);
|
||||
let b_high = builder.ins().swiden_high(b);
|
||||
state.push1(builder.ins().imul(a_high, b_high));
|
||||
}
|
||||
Operator::I16x8ExtMulLowI8x16U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||
let a_low = builder.ins().uwiden_low(a);
|
||||
let b_low = builder.ins().uwiden_low(b);
|
||||
state.push1(builder.ins().imul(a_low, b_low));
|
||||
}
|
||||
Operator::I16x8ExtMulHighI8x16U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||
let a_high = builder.ins().uwiden_high(a);
|
||||
let b_high = builder.ins().uwiden_high(b);
|
||||
state.push1(builder.ins().imul(a_high, b_high));
|
||||
}
|
||||
Operator::I32x4ExtMulLowI16x8S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||
let a_low = builder.ins().swiden_low(a);
|
||||
let b_low = builder.ins().swiden_low(b);
|
||||
state.push1(builder.ins().imul(a_low, b_low));
|
||||
}
|
||||
Operator::I32x4ExtMulHighI16x8S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||
let a_high = builder.ins().swiden_high(a);
|
||||
let b_high = builder.ins().swiden_high(b);
|
||||
state.push1(builder.ins().imul(a_high, b_high));
|
||||
}
|
||||
Operator::I32x4ExtMulLowI16x8U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||
let a_low = builder.ins().uwiden_low(a);
|
||||
let b_low = builder.ins().uwiden_low(b);
|
||||
state.push1(builder.ins().imul(a_low, b_low));
|
||||
}
|
||||
Operator::I32x4ExtMulHighI16x8U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||
let a_high = builder.ins().uwiden_high(a);
|
||||
let b_high = builder.ins().uwiden_high(b);
|
||||
state.push1(builder.ins().imul(a_high, b_high));
|
||||
}
|
||||
Operator::I64x2ExtMulLowI32x4S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
let a_low = builder.ins().swiden_low(a);
|
||||
let b_low = builder.ins().swiden_low(b);
|
||||
state.push1(builder.ins().imul(a_low, b_low));
|
||||
}
|
||||
Operator::I64x2ExtMulHighI32x4S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
let a_high = builder.ins().swiden_high(a);
|
||||
let b_high = builder.ins().swiden_high(b);
|
||||
state.push1(builder.ins().imul(a_high, b_high));
|
||||
}
|
||||
Operator::I64x2ExtMulLowI32x4U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
let a_low = builder.ins().uwiden_low(a);
|
||||
let b_low = builder.ins().uwiden_low(b);
|
||||
state.push1(builder.ins().imul(a_low, b_low));
|
||||
}
|
||||
Operator::I64x2ExtMulHighI32x4U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
let a_high = builder.ins().uwiden_high(a);
|
||||
let b_high = builder.ins().uwiden_high(b);
|
||||
state.push1(builder.ins().imul(a_high, b_high));
|
||||
}
|
||||
Operator::I16x8ExtAddPairwiseI8x16S
|
||||
| Operator::I16x8ExtAddPairwiseI8x16U
|
||||
| Operator::I32x4ExtAddPairwiseI16x8S
|
||||
| Operator::I32x4ExtAddPairwiseI16x8U => {
|
||||
|
||||
Reference in New Issue
Block a user