Move bitselect->vselect optimization to x64 back-end (#5191)

The simplifier was performing an optimization to replace bitselect
with vselect if the all bytes of the condition mask could be shown
to be all ones or all zeros.

This optimization only ever made any difference in codegen on the
x64 target.  Therefore, move this optimization to the x64 back-end
and perform it in ISLE instead.  Resulting codegen should be
unchanged, with slightly improved compile time.

This also eliminates a few endian-dependent bitcast operations.
This commit is contained in:
Ulrich Weigand
2022-11-03 21:17:36 +01:00
committed by GitHub
parent 3ef30b5b67
commit 137a8b710f
6 changed files with 152 additions and 135 deletions

View File

@@ -1255,6 +1255,26 @@
(b Xmm (sse_and_not ty cond_xmm if_false)))
(sse_or ty b a)))
;; If every byte of the condition is guaranteed to be all ones or all zeroes,
;; we can use x86_blend like vselect does.
(rule 1 (lower (has_type ty @ (multi_lane _bits _lanes)
(bitselect condition
if_true
if_false)))
(if (all_ones_or_all_zeros condition))
(x64_blend ty
condition
if_true
if_false))
(decl pure all_ones_or_all_zeros (Value) bool)
(rule (all_ones_or_all_zeros (and (icmp _ _ _) (value_type (multi_lane _ _)))) $true)
(rule (all_ones_or_all_zeros (and (fcmp _ _ _) (value_type (multi_lane _ _)))) $true)
(rule (all_ones_or_all_zeros (vconst (vconst_all_ones_or_all_zeros))) $true)
(decl pure vconst_all_ones_or_all_zeros () Constant)
(extern extractor vconst_all_ones_or_all_zeros vconst_all_ones_or_all_zeros)
;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _bits _lanes)

View File

@@ -713,6 +713,15 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
targets.len() as u32
}
#[inline]
fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
let const_data = self.lower_ctx.get_constant_data(constant);
if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
return Some(());
}
None
}
#[inline]
fn fcvt_uint_mask_const(&mut self) -> VCodeConstant {
self.lower_ctx