x64: lower i8x16.shuffle to VPERMI2B when possible
When shuffling values from two different registers, the x64 lowering for `i8x16.shuffle` must first shuffle each register separately and then OR the results with SSE instructions. With `VPERMI2B`, available in AVX512VL + AVX512VBMI, this can be done in a single instruction after the shuffle mask has been moved into the destination register. This change uses `VPERMI2B` for that case when the CPU supports it.
This commit is contained in:
@@ -3573,6 +3573,18 @@ fn test_x64_emit() {
|
||||
"vpmullq %xmm14, %xmm10, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_evex(Avx512Opcode::Vpermi2b, RegMem::reg(xmm14), xmm10, w_xmm1),
|
||||
"62D22D0875CE",
|
||||
"vpermi2b %xmm14, %xmm10, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_evex(Avx512Opcode::Vpermi2b, RegMem::reg(xmm1), xmm0, w_xmm2),
|
||||
"62F27D0875D1",
|
||||
"vpermi2b %xmm1, %xmm0, %xmm2",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
|
||||
"66450FF4C8",
|
||||
@@ -4315,6 +4327,7 @@ fn test_x64_emit() {
|
||||
isa_flag_builder.enable("has_avx512f").unwrap();
|
||||
isa_flag_builder.enable("has_avx512dq").unwrap();
|
||||
isa_flag_builder.enable("has_avx512vl").unwrap();
|
||||
isa_flag_builder.enable("has_avx512vbmi").unwrap();
|
||||
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
|
||||
|
||||
let rru = regs::create_reg_universe_systemv(&flags);
|
||||
|
||||
Reference in New Issue
Block a user