Rework the ISA flag checking extractors for x64 (#4878)
Using fallible extractors that produce no values for flag checks means that it's not possible to pattern match cases where those flags are false. This change reworks the existing flag-checking extractors to be infallible, returning the flag's boolean value from the context instead.
This commit is contained in:
@@ -1386,35 +1386,35 @@
|
||||
|
||||
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl avx512vl_enabled () Type)
|
||||
(extern extractor avx512vl_enabled avx512vl_enabled)
|
||||
(decl avx512vl_enabled (bool) Type)
|
||||
(extern extractor infallible avx512vl_enabled avx512vl_enabled)
|
||||
|
||||
(decl avx512dq_enabled () Type)
|
||||
(extern extractor avx512dq_enabled avx512dq_enabled)
|
||||
(decl avx512dq_enabled (bool) Type)
|
||||
(extern extractor infallible avx512dq_enabled avx512dq_enabled)
|
||||
|
||||
(decl avx512f_enabled () Type)
|
||||
(extern extractor avx512f_enabled avx512f_enabled)
|
||||
(decl avx512f_enabled (bool) Type)
|
||||
(extern extractor infallible avx512f_enabled avx512f_enabled)
|
||||
|
||||
(decl avx512bitalg_enabled () Type)
|
||||
(extern extractor avx512bitalg_enabled avx512bitalg_enabled)
|
||||
(decl avx512bitalg_enabled (bool) Type)
|
||||
(extern extractor infallible avx512bitalg_enabled avx512bitalg_enabled)
|
||||
|
||||
(decl avx512vbmi_enabled () Type)
|
||||
(extern extractor avx512vbmi_enabled avx512vbmi_enabled)
|
||||
(decl avx512vbmi_enabled (bool) Type)
|
||||
(extern extractor infallible avx512vbmi_enabled avx512vbmi_enabled)
|
||||
|
||||
(decl use_lzcnt () Type)
|
||||
(extern extractor use_lzcnt use_lzcnt)
|
||||
(decl use_lzcnt (bool) Type)
|
||||
(extern extractor infallible use_lzcnt use_lzcnt)
|
||||
|
||||
(decl use_bmi1 () Type)
|
||||
(extern extractor use_bmi1 use_bmi1)
|
||||
(decl use_bmi1 (bool) Type)
|
||||
(extern extractor infallible use_bmi1 use_bmi1)
|
||||
|
||||
(decl use_popcnt () Type)
|
||||
(extern extractor use_popcnt use_popcnt)
|
||||
(decl use_popcnt (bool) Type)
|
||||
(extern extractor infallible use_popcnt use_popcnt)
|
||||
|
||||
(decl use_fma () Type)
|
||||
(extern extractor use_fma use_fma)
|
||||
(decl use_fma (bool) Type)
|
||||
(extern extractor infallible use_fma use_fma)
|
||||
|
||||
(decl use_sse41 () Type)
|
||||
(extern extractor use_sse41 use_sse41)
|
||||
(decl use_sse41 (bool) Type)
|
||||
(extern extractor infallible use_sse41 use_sse41)
|
||||
|
||||
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
@@ -939,8 +939,8 @@
|
||||
|
||||
;; With AVX-512 we can implement `i64x2` multiplication with a single
|
||||
;; instruction.
|
||||
(rule (lower (has_type (and (avx512vl_enabled)
|
||||
(avx512dq_enabled)
|
||||
(rule (lower (has_type (and (avx512vl_enabled $true)
|
||||
(avx512dq_enabled $true)
|
||||
(multi_lane 64 2))
|
||||
(imul x y)))
|
||||
(x64_vpmullq x y))
|
||||
@@ -1167,8 +1167,8 @@
|
||||
(x64_pabsd x))
|
||||
|
||||
;; When AVX512 is available, we can use a single `vpabsq` instruction.
|
||||
(rule (lower (has_type (and (avx512vl_enabled)
|
||||
(avx512f_enabled)
|
||||
(rule (lower (has_type (and (avx512vl_enabled $true)
|
||||
(avx512f_enabled $true)
|
||||
$I64X2)
|
||||
(iabs x)))
|
||||
(x64_vpabsq x))
|
||||
@@ -1733,7 +1733,7 @@
|
||||
(rule 1 (lower
|
||||
(has_type (and
|
||||
(ty_32_or_64 ty)
|
||||
(use_lzcnt))
|
||||
(use_lzcnt $true))
|
||||
(clz src)))
|
||||
(x64_lzcnt ty src))
|
||||
|
||||
@@ -1775,7 +1775,7 @@
|
||||
(rule 1 (lower
|
||||
(has_type (and
|
||||
(ty_32_or_64 ty)
|
||||
(use_bmi1))
|
||||
(use_bmi1 $true))
|
||||
(ctz src)))
|
||||
(x64_tzcnt ty src))
|
||||
|
||||
@@ -1811,21 +1811,21 @@
|
||||
(rule 1 (lower
|
||||
(has_type (and
|
||||
(ty_32_or_64 ty)
|
||||
(use_popcnt))
|
||||
(use_popcnt $true))
|
||||
(popcnt src)))
|
||||
(x64_popcnt ty src))
|
||||
|
||||
(rule 1 (lower
|
||||
(has_type (and
|
||||
(ty_8_or_16 ty)
|
||||
(use_popcnt))
|
||||
(use_popcnt $true))
|
||||
(popcnt src)))
|
||||
(x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))
|
||||
|
||||
(rule 1 (lower
|
||||
(has_type (and
|
||||
$I128
|
||||
(use_popcnt))
|
||||
(use_popcnt $true))
|
||||
(popcnt src)))
|
||||
(let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0)))
|
||||
(hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1))))
|
||||
@@ -1916,8 +1916,8 @@
|
||||
|
||||
(rule 1 (lower (has_type (and
|
||||
$I8X16
|
||||
(avx512vl_enabled)
|
||||
(avx512bitalg_enabled))
|
||||
(avx512vl_enabled $true)
|
||||
(avx512bitalg_enabled $true))
|
||||
(popcnt src)))
|
||||
(x64_vpopcntb src))
|
||||
|
||||
@@ -2480,13 +2480,13 @@
|
||||
(libcall_3 (LibCall.FmaF32) x y z))
|
||||
(rule (lower (has_type $F64 (fma x y z)))
|
||||
(libcall_3 (LibCall.FmaF64) x y z))
|
||||
(rule 1 (lower (has_type (and (use_fma) $F32) (fma x y z)))
|
||||
(rule 1 (lower (has_type (and (use_fma $true) $F32) (fma x y z)))
|
||||
(x64_vfmadd213ss x y z))
|
||||
(rule 1 (lower (has_type (and (use_fma) $F64) (fma x y z)))
|
||||
(rule 1 (lower (has_type (and (use_fma $true) $F64) (fma x y z)))
|
||||
(x64_vfmadd213sd x y z))
|
||||
(rule (lower (has_type (and (use_fma) $F32X4) (fma x y z)))
|
||||
(rule (lower (has_type (and (use_fma $true) $F32X4) (fma x y z)))
|
||||
(x64_vfmadd213ps x y z))
|
||||
(rule (lower (has_type (and (use_fma) $F64X2) (fma x y z)))
|
||||
(rule (lower (has_type (and (use_fma $true) $F64X2) (fma x y z)))
|
||||
(x64_vfmadd213pd x y z))
|
||||
|
||||
;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -2993,7 +2993,7 @@
|
||||
;;
|
||||
;; NOTE: the priority of 1 here is to break ties with the next case for $F32X4,
|
||||
;; as it doesn't require either of the avx512 extensions to be enabled.
|
||||
(rule 1 (lower (has_type (and (avx512vl_enabled) (avx512f_enabled) $F32X4)
|
||||
(rule 1 (lower (has_type (and (avx512vl_enabled $true) (avx512f_enabled $true) $F32X4)
|
||||
(fcvt_from_uint src)))
|
||||
(x64_vcvtudq2ps src))
|
||||
|
||||
@@ -3332,82 +3332,82 @@
|
||||
|
||||
;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32))))
|
||||
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32))))
|
||||
(x64_roundss a (RoundImm.RoundUp)))
|
||||
|
||||
(rule (lower (ceil a @ (value_type $F32)))
|
||||
(libcall_1 (LibCall.CeilF32) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64))))
|
||||
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64))))
|
||||
(x64_roundsd a (RoundImm.RoundUp)))
|
||||
|
||||
(rule (lower (ceil a @ (value_type $F64)))
|
||||
(libcall_1 (LibCall.CeilF64) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32X4))))
|
||||
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32X4))))
|
||||
(x64_roundps a (RoundImm.RoundUp)))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64X2))))
|
||||
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64X2))))
|
||||
(x64_roundpd a (RoundImm.RoundUp)))
|
||||
|
||||
;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32))))
|
||||
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32))))
|
||||
(x64_roundss a (RoundImm.RoundDown)))
|
||||
|
||||
(rule (lower (floor a @ (value_type $F32)))
|
||||
(libcall_1 (LibCall.FloorF32) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64))))
|
||||
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64))))
|
||||
(x64_roundsd a (RoundImm.RoundDown)))
|
||||
|
||||
(rule (lower (floor a @ (value_type $F64)))
|
||||
(libcall_1 (LibCall.FloorF64) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32X4))))
|
||||
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32X4))))
|
||||
(x64_roundps a (RoundImm.RoundDown)))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64X2))))
|
||||
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64X2))))
|
||||
(x64_roundpd a (RoundImm.RoundDown)))
|
||||
|
||||
;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32))))
|
||||
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32))))
|
||||
(x64_roundss a (RoundImm.RoundNearest)))
|
||||
|
||||
(rule (lower (nearest a @ (value_type $F32)))
|
||||
(libcall_1 (LibCall.NearestF32) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64))))
|
||||
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64))))
|
||||
(x64_roundsd a (RoundImm.RoundNearest)))
|
||||
|
||||
(rule (lower (nearest a @ (value_type $F64)))
|
||||
(libcall_1 (LibCall.NearestF64) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32X4))))
|
||||
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32X4))))
|
||||
(x64_roundps a (RoundImm.RoundNearest)))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64X2))))
|
||||
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64X2))))
|
||||
(x64_roundpd a (RoundImm.RoundNearest)))
|
||||
|
||||
;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32))))
|
||||
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32))))
|
||||
(x64_roundss a (RoundImm.RoundZero)))
|
||||
|
||||
(rule (lower (trunc a @ (value_type $F32)))
|
||||
(libcall_1 (LibCall.TruncF32) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64))))
|
||||
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64))))
|
||||
(x64_roundsd a (RoundImm.RoundZero)))
|
||||
|
||||
(rule (lower (trunc a @ (value_type $F64)))
|
||||
(libcall_1 (LibCall.TruncF64) a))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32X4))))
|
||||
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32X4))))
|
||||
(x64_roundps a (RoundImm.RoundZero)))
|
||||
|
||||
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2))))
|
||||
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64X2))))
|
||||
(x64_roundpd a (RoundImm.RoundZero)))
|
||||
|
||||
;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -3506,7 +3506,7 @@
|
||||
;; For the case where the shuffle mask contains out-of-bounds values (values
|
||||
;; greater than 31) we must mask off those resulting values in the result of
|
||||
;; `vpermi2b`.
|
||||
(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled))
|
||||
(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
|
||||
(shuffle a b (vec_mask_from_immediate
|
||||
(perm_from_mask_with_zeros mask zeros)))))
|
||||
(x64_andps
|
||||
@@ -3515,7 +3515,7 @@
|
||||
|
||||
;; However, if the shuffle mask contains no out-of-bounds values, we can use
|
||||
;; `vpermi2b` without any masking.
|
||||
(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled))
|
||||
(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
|
||||
(shuffle a b (vec_mask_from_immediate mask))))
|
||||
(x64_vpermi2b b a (x64_xmm_load_const $I8X16 (perm_from_mask mask))))
|
||||
|
||||
|
||||
@@ -209,93 +209,53 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512vl_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn avx512vl_enabled(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_avx512vl_simd()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512dq_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512dq_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn avx512dq_enabled(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_avx512dq_simd()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512f_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512f_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn avx512f_enabled(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_avx512f_simd()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512bitalg_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn avx512bitalg_enabled(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_avx512bitalg_simd()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512vbmi_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512vbmi_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn avx512vbmi_enabled(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_avx512vbmi_simd()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn use_lzcnt(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_lzcnt() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn use_lzcnt(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_lzcnt()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn use_bmi1(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_bmi1() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn use_bmi1(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_bmi1()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn use_popcnt(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_popcnt() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn use_popcnt(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_popcnt()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn use_fma(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_fma() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn use_fma(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_fma()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn use_sse41(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_sse41() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
fn use_sse41(&mut self, _: Type) -> bool {
|
||||
self.isa_flags.use_sse41()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
Reference in New Issue
Block a user