x64: Improve codegen for splats (#6025)

This commit goes through the lowerings for the CLIF `splat` instruction
and improves the support for each operator. Many of these lowerings are
mirrored from v8/SpiderMonkey and there are a number of improvements:

* AVX2 `v{p,}broadcast*` instructions are added and used when available.
* Float-based splats are much simpler and always a single-instruction
* Integer-based splats don't insert into an uninit xmm value and instead
  start out with a `movd` to move into an `xmm` register. This
  thoeretically breaks dependencies with prior instructions since `movd`
  creates a fresh new value in the destination register.
* Loads are now sunk into all of the instructions. A new extractor,
  `sinkable_load_exact`, was added to sink the i8/i16 loads.
This commit is contained in:
Alex Crichton
2023-03-15 16:33:56 -05:00
committed by GitHub
parent a10c50afe9
commit d76f7ee52e
12 changed files with 1216 additions and 82 deletions

View File

@@ -11,7 +11,7 @@ use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
use generated_code::{Context, MInst, RegisterClass};
// Types that the generated ISLE code uses via `use super::*`.
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize};
use crate::ir::LibCall;
use crate::isa::x64::lower::emit_vm_call;
use crate::isa::x64::X64Backend;
@@ -174,6 +174,11 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
self.backend.x64_flags.has_avx()
}
#[inline]
fn has_avx2(&mut self) -> bool {
self.backend.x64_flags.has_avx2()
}
#[inline]
fn avx512vl_enabled(&mut self, _: Type) -> bool {
self.backend.x64_flags.use_avx512vl_simd()
@@ -268,7 +273,25 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, inst) {
if let Some((addr_input, offset)) =
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
{
return Some(SinkableLoad {
inst,
addr_input,
offset,
});
}
}
None
}
fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
if let Some((addr_input, offset)) =
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
{
return Some(SinkableLoad {
inst,
addr_input,