x64: Improve codegen for splats (#6025)
This commit goes through the lowerings for the CLIF `splat` instruction
and improves the support for each operator. Many of these lowerings are
mirrored from v8/SpiderMonkey and there are a number of improvements:
* AVX2 `v{p,}broadcast*` instructions are added and used when available.
* Float-based splats are much simpler and always a single-instruction
* Integer-based splats don't insert into an uninit xmm value and instead
start out with a `movd` to move into an `xmm` register. This
thoeretically breaks dependencies with prior instructions since `movd`
creates a fresh new value in the destination register.
* Loads are now sunk into all of the instructions. A new extractor,
`sinkable_load_exact`, was added to sink the i8/i16 loads.
This commit is contained in:
@@ -11,7 +11,7 @@ use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
|
||||
use generated_code::{Context, MInst, RegisterClass};
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
|
||||
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize};
|
||||
use crate::ir::LibCall;
|
||||
use crate::isa::x64::lower::emit_vm_call;
|
||||
use crate::isa::x64::X64Backend;
|
||||
@@ -174,6 +174,11 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
|
||||
self.backend.x64_flags.has_avx()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_avx2(&mut self) -> bool {
|
||||
self.backend.x64_flags.has_avx2()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512vl_enabled(&mut self, _: Type) -> bool {
|
||||
self.backend.x64_flags.use_avx512vl_simd()
|
||||
@@ -268,7 +273,25 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
|
||||
fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
|
||||
let input = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
|
||||
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, inst) {
|
||||
if let Some((addr_input, offset)) =
|
||||
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
|
||||
{
|
||||
return Some(SinkableLoad {
|
||||
inst,
|
||||
addr_input,
|
||||
offset,
|
||||
});
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
|
||||
let input = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
|
||||
if let Some((addr_input, offset)) =
|
||||
is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
|
||||
{
|
||||
return Some(SinkableLoad {
|
||||
inst,
|
||||
addr_input,
|
||||
|
||||
Reference in New Issue
Block a user