Rework the ISA flag checking extractors for x64 (#4878)

Using fallible extractors that produce no values for flag checks means
that it's not possible to pattern match cases where those flags are
false. This change reworks the existing flag-checking extractors to be
infallible, returning the flag's boolean value from the context instead.
This commit is contained in:
Trevor Elliott
2022-09-07 13:49:35 -07:00
committed by GitHub
parent f063082474
commit caad14826c
3 changed files with 74 additions and 114 deletions

View File

@@ -1386,35 +1386,35 @@
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl avx512vl_enabled () Type)
(extern extractor avx512vl_enabled avx512vl_enabled)
(decl avx512vl_enabled (bool) Type)
(extern extractor infallible avx512vl_enabled avx512vl_enabled)
(decl avx512dq_enabled () Type)
(extern extractor avx512dq_enabled avx512dq_enabled)
(decl avx512dq_enabled (bool) Type)
(extern extractor infallible avx512dq_enabled avx512dq_enabled)
(decl avx512f_enabled () Type)
(extern extractor avx512f_enabled avx512f_enabled)
(decl avx512f_enabled (bool) Type)
(extern extractor infallible avx512f_enabled avx512f_enabled)
(decl avx512bitalg_enabled () Type)
(extern extractor avx512bitalg_enabled avx512bitalg_enabled)
(decl avx512bitalg_enabled (bool) Type)
(extern extractor infallible avx512bitalg_enabled avx512bitalg_enabled)
(decl avx512vbmi_enabled () Type)
(extern extractor avx512vbmi_enabled avx512vbmi_enabled)
(decl avx512vbmi_enabled (bool) Type)
(extern extractor infallible avx512vbmi_enabled avx512vbmi_enabled)
(decl use_lzcnt () Type)
(extern extractor use_lzcnt use_lzcnt)
(decl use_lzcnt (bool) Type)
(extern extractor infallible use_lzcnt use_lzcnt)
(decl use_bmi1 () Type)
(extern extractor use_bmi1 use_bmi1)
(decl use_bmi1 (bool) Type)
(extern extractor infallible use_bmi1 use_bmi1)
(decl use_popcnt () Type)
(extern extractor use_popcnt use_popcnt)
(decl use_popcnt (bool) Type)
(extern extractor infallible use_popcnt use_popcnt)
(decl use_fma () Type)
(extern extractor use_fma use_fma)
(decl use_fma (bool) Type)
(extern extractor infallible use_fma use_fma)
(decl use_sse41 () Type)
(extern extractor use_sse41 use_sse41)
(decl use_sse41 (bool) Type)
(extern extractor infallible use_sse41 use_sse41)
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -939,8 +939,8 @@
;; With AVX-512 we can implement `i64x2` multiplication with a single
;; instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512dq_enabled)
(rule (lower (has_type (and (avx512vl_enabled $true)
(avx512dq_enabled $true)
(multi_lane 64 2))
(imul x y)))
(x64_vpmullq x y))
@@ -1167,8 +1167,8 @@
(x64_pabsd x))
;; When AVX512 is available, we can use a single `vpabsq` instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512f_enabled)
(rule (lower (has_type (and (avx512vl_enabled $true)
(avx512f_enabled $true)
$I64X2)
(iabs x)))
(x64_vpabsq x))
@@ -1733,7 +1733,7 @@
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_lzcnt))
(use_lzcnt $true))
(clz src)))
(x64_lzcnt ty src))
@@ -1775,7 +1775,7 @@
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_bmi1))
(use_bmi1 $true))
(ctz src)))
(x64_tzcnt ty src))
@@ -1811,21 +1811,21 @@
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_popcnt))
(use_popcnt $true))
(popcnt src)))
(x64_popcnt ty src))
(rule 1 (lower
(has_type (and
(ty_8_or_16 ty)
(use_popcnt))
(use_popcnt $true))
(popcnt src)))
(x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule 1 (lower
(has_type (and
$I128
(use_popcnt))
(use_popcnt $true))
(popcnt src)))
(let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0)))
(hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1))))
@@ -1916,8 +1916,8 @@
(rule 1 (lower (has_type (and
$I8X16
(avx512vl_enabled)
(avx512bitalg_enabled))
(avx512vl_enabled $true)
(avx512bitalg_enabled $true))
(popcnt src)))
(x64_vpopcntb src))
@@ -2480,13 +2480,13 @@
(libcall_3 (LibCall.FmaF32) x y z))
(rule (lower (has_type $F64 (fma x y z)))
(libcall_3 (LibCall.FmaF64) x y z))
(rule 1 (lower (has_type (and (use_fma) $F32) (fma x y z)))
(rule 1 (lower (has_type (and (use_fma $true) $F32) (fma x y z)))
(x64_vfmadd213ss x y z))
(rule 1 (lower (has_type (and (use_fma) $F64) (fma x y z)))
(rule 1 (lower (has_type (and (use_fma $true) $F64) (fma x y z)))
(x64_vfmadd213sd x y z))
(rule (lower (has_type (and (use_fma) $F32X4) (fma x y z)))
(rule (lower (has_type (and (use_fma $true) $F32X4) (fma x y z)))
(x64_vfmadd213ps x y z))
(rule (lower (has_type (and (use_fma) $F64X2) (fma x y z)))
(rule (lower (has_type (and (use_fma $true) $F64X2) (fma x y z)))
(x64_vfmadd213pd x y z))
;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -2993,7 +2993,7 @@
;;
;; NOTE: the priority of 1 here is to break ties with the next case for $F32X4,
;; as it doesn't require either of the avx512 extensions to be enabled.
(rule 1 (lower (has_type (and (avx512vl_enabled) (avx512f_enabled) $F32X4)
(rule 1 (lower (has_type (and (avx512vl_enabled $true) (avx512f_enabled $true) $F32X4)
(fcvt_from_uint src)))
(x64_vcvtudq2ps src))
@@ -3332,82 +3332,82 @@
;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundUp)))
(rule (lower (ceil a @ (value_type $F32)))
(libcall_1 (LibCall.CeilF32) a))
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundUp)))
(rule (lower (ceil a @ (value_type $F64)))
(libcall_1 (LibCall.CeilF64) a))
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundUp)))
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundUp)))
;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundDown)))
(rule (lower (floor a @ (value_type $F32)))
(libcall_1 (LibCall.FloorF32) a))
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundDown)))
(rule (lower (floor a @ (value_type $F64)))
(libcall_1 (LibCall.FloorF64) a))
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundDown)))
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundDown)))
;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundNearest)))
(rule (lower (nearest a @ (value_type $F32)))
(libcall_1 (LibCall.NearestF32) a))
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundNearest)))
(rule (lower (nearest a @ (value_type $F64)))
(libcall_1 (LibCall.NearestF64) a))
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundNearest)))
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundNearest)))
;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundZero)))
(rule (lower (trunc a @ (value_type $F32)))
(libcall_1 (LibCall.TruncF32) a))
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundZero)))
(rule (lower (trunc a @ (value_type $F64)))
(libcall_1 (LibCall.TruncF64) a))
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundZero)))
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundZero)))
;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3506,7 +3506,7 @@
;; For the case where the shuffle mask contains out-of-bounds values (values
;; greater than 31) we must mask off those resulting values in the result of
;; `vpermi2b`.
(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled))
(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
(shuffle a b (vec_mask_from_immediate
(perm_from_mask_with_zeros mask zeros)))))
(x64_andps
@@ -3515,7 +3515,7 @@
;; However, if the shuffle mask contains no out-of-bounds values, we can use
;; `vpermi2b` without any masking.
(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled))
(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
(shuffle a b (vec_mask_from_immediate mask))))
(x64_vpermi2b b a (x64_xmm_load_const $I8X16 (perm_from_mask mask))))

View File

@@ -209,93 +209,53 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}
#[inline]
fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512vl_simd() {
Some(())
} else {
None
}
fn avx512vl_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512vl_simd()
}
#[inline]
fn avx512dq_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512dq_simd() {
Some(())
} else {
None
}
fn avx512dq_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512dq_simd()
}
#[inline]
fn avx512f_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512f_simd() {
Some(())
} else {
None
}
fn avx512f_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512f_simd()
}
#[inline]
fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512bitalg_simd() {
Some(())
} else {
None
}
fn avx512bitalg_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512bitalg_simd()
}
#[inline]
fn avx512vbmi_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512vbmi_simd() {
Some(())
} else {
None
}
fn avx512vbmi_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512vbmi_simd()
}
#[inline]
fn use_lzcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_lzcnt() {
Some(())
} else {
None
}
fn use_lzcnt(&mut self, _: Type) -> bool {
self.isa_flags.use_lzcnt()
}
#[inline]
fn use_bmi1(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_bmi1() {
Some(())
} else {
None
}
fn use_bmi1(&mut self, _: Type) -> bool {
self.isa_flags.use_bmi1()
}
#[inline]
fn use_popcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_popcnt() {
Some(())
} else {
None
}
fn use_popcnt(&mut self, _: Type) -> bool {
self.isa_flags.use_popcnt()
}
#[inline]
fn use_fma(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_fma() {
Some(())
} else {
None
}
fn use_fma(&mut self, _: Type) -> bool {
self.isa_flags.use_fma()
}
#[inline]
fn use_sse41(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_sse41() {
Some(())
} else {
None
}
fn use_sse41(&mut self, _: Type) -> bool {
self.isa_flags.use_sse41()
}
#[inline]