diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 97540660b8..0bd0d859ff 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1639,8 +1639,8 @@ (decl pure use_fma () bool) (extern constructor use_fma use_fma) -(decl use_sse41 (bool) Type) -(extern extractor infallible use_sse41 use_sse41) +(decl pure use_sse41 () bool) +(extern constructor use_sse41 use_sse41) (decl pure use_sse42 () bool) (extern constructor use_sse42 use_sse42) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 05649b820f..255f73bb82 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -941,7 +941,7 @@ ;; ;; A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32 ;; -;; Note, the algorithm will use `pmuldq` which operates directly on the lower +;; Note, the algorithm will use `pmuludq` which operates directly on the lower ;; 32-bit (`Al` or `Bl`) of a lane and writes the result to the full 64-bits of ;; the lane of the destination. For this reason we don't need shifts to isolate ;; the lower 32-bits, however, we will need to use shifts to isolate the high @@ -999,6 +999,7 @@ x)) (swiden_high (and (value_type (multi_lane 32 4)) y))))) + (if-let $true (use_sse41)) (let ((x2 Xmm (x64_pshufd x 0xFA)) (y2 Xmm (x64_pshufd y 0xFA))) (x64_pmuldq x2 y2))) @@ -1031,6 +1032,7 @@ x)) (swiden_low (and (value_type (multi_lane 32 4)) y))))) + (if-let $true (use_sse41)) (let ((x2 Xmm (x64_pshufd x 0x50)) (y2 Xmm (x64_pshufd y 0x50))) (x64_pmuldq x2 y2))) @@ -3354,84 +3356,100 @@ ;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32)))) - (x64_roundss a (RoundImm.RoundUp))) +(rule 1 (lower (ceil a @ (value_type $F32))) + (if-let $true (use_sse41)) + (x64_roundss a (RoundImm.RoundUp))) -(rule (lower (has_type (use_sse41 $false) (ceil a @ (value_type $F32)))) +(rule 1 (lower (ceil a @ (value_type $F64))) + (if-let $true (use_sse41)) + (x64_roundsd a (RoundImm.RoundUp))) + +(rule 1 (lower (ceil a @ (value_type $F32X4))) + (if-let $true (use_sse41)) + (x64_roundps a (RoundImm.RoundUp))) + +(rule 1 (lower (ceil a @ (value_type $F64X2))) + (if-let $true (use_sse41)) + (x64_roundpd a (RoundImm.RoundUp))) + +(rule (lower (ceil a @ (value_type $F32))) (libcall_1 (LibCall.CeilF32) a)) -(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64)))) - (x64_roundsd a (RoundImm.RoundUp))) - -(rule (lower (has_type (use_sse41 $false) (ceil a @ (value_type $F64)))) +(rule (lower (ceil a @ (value_type $F64))) (libcall_1 (LibCall.CeilF64) a)) -(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32X4)))) - (x64_roundps a (RoundImm.RoundUp))) - -(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64X2)))) - (x64_roundpd a (RoundImm.RoundUp))) - ;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32)))) - (x64_roundss a (RoundImm.RoundDown))) +(rule 1 (lower (floor a @ (value_type $F32))) + (if-let $true (use_sse41)) + (x64_roundss a (RoundImm.RoundDown))) -(rule (lower (has_type (use_sse41 $false) (floor a @ (value_type $F32)))) +(rule 1 (lower (floor a @ (value_type $F64))) + (if-let $true (use_sse41)) + (x64_roundsd a (RoundImm.RoundDown))) + +(rule 1 (lower (floor a @ (value_type $F32X4))) + (if-let $true (use_sse41)) + (x64_roundps a (RoundImm.RoundDown))) + +(rule 1 (lower (floor a @ (value_type $F64X2))) + (if-let $true (use_sse41)) + (x64_roundpd a (RoundImm.RoundDown))) + +(rule (lower (floor a @ (value_type $F32))) (libcall_1 (LibCall.FloorF32) a)) -(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64)))) - (x64_roundsd a (RoundImm.RoundDown))) - -(rule (lower (has_type (use_sse41 $false) (floor a @ (value_type $F64)))) +(rule (lower (floor a @ (value_type $F64))) (libcall_1 (LibCall.FloorF64) a)) -(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32X4)))) - (x64_roundps a (RoundImm.RoundDown))) - -(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64X2)))) - (x64_roundpd a (RoundImm.RoundDown))) - ;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32)))) - (x64_roundss a (RoundImm.RoundNearest))) +(rule 1 (lower (nearest a @ (value_type $F32))) + (if-let $true (use_sse41)) + (x64_roundss a (RoundImm.RoundNearest))) -(rule (lower (has_type (use_sse41 $false) (nearest a @ (value_type $F32)))) +(rule 1 (lower (nearest a @ (value_type $F64))) + (if-let $true (use_sse41)) + (x64_roundsd a (RoundImm.RoundNearest))) + +(rule 1 (lower (nearest a @ (value_type $F32X4))) + (if-let $true (use_sse41)) + (x64_roundps a (RoundImm.RoundNearest))) + +(rule 1 (lower (nearest a @ (value_type $F64X2))) + (if-let $true (use_sse41)) + (x64_roundpd a (RoundImm.RoundNearest))) + +(rule (lower (nearest a @ (value_type $F32))) (libcall_1 (LibCall.NearestF32) a)) -(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64)))) - (x64_roundsd a (RoundImm.RoundNearest))) - -(rule (lower (has_type (use_sse41 $false) (nearest a @ (value_type $F64)))) +(rule (lower (nearest a @ (value_type $F64))) (libcall_1 (LibCall.NearestF64) a)) -(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32X4)))) - (x64_roundps a (RoundImm.RoundNearest))) - -(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64X2)))) - (x64_roundpd a (RoundImm.RoundNearest))) - ;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32)))) - (x64_roundss a (RoundImm.RoundZero))) +(rule 1 (lower (trunc a @ (value_type $F32))) + (if-let $true (use_sse41)) + (x64_roundss a (RoundImm.RoundZero))) -(rule (lower (has_type (use_sse41 $false) (trunc a @ (value_type $F32)))) +(rule 1 (lower (trunc a @ (value_type $F64))) + (if-let $true (use_sse41)) + (x64_roundsd a (RoundImm.RoundZero))) + +(rule 1 (lower (trunc a @ (value_type $F32X4))) + (if-let $true (use_sse41)) + (x64_roundps a (RoundImm.RoundZero))) + +(rule 1 (lower (trunc a @ (value_type $F64X2))) + (if-let $true (use_sse41)) + (x64_roundpd a (RoundImm.RoundZero))) + +(rule (lower (trunc a @ (value_type $F32))) (libcall_1 (LibCall.TruncF32) a)) -(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64)))) - (x64_roundsd a (RoundImm.RoundZero))) - -(rule (lower (has_type (use_sse41 $false) (trunc a @ (value_type $F64)))) +(rule (lower (trunc a @ (value_type $F64))) (libcall_1 (LibCall.TruncF64) a)) -(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32X4)))) - (x64_roundps a (RoundImm.RoundZero))) - -(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64X2)))) - (x64_roundpd a (RoundImm.RoundZero))) - ;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (stack_addr stack_slot offset)) @@ -3624,6 +3642,7 @@ ;; 0 chooses the corresponding 16-it lane from `a` and a bit of 1 chooses the ;; corresponding 16-bit lane from `b`. (rule 14 (lower (shuffle a b (pblendw_imm n))) + (if-let $true (use_sse41)) (x64_pblendw a b n)) (decl pblendw_imm (u8) Immediate) (extern extractor pblendw_imm pblendw_imm) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 36302e39c5..425b68cbc2 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -225,7 +225,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { } #[inline] - fn use_sse41(&mut self, _: Type) -> bool { + fn use_sse41(&mut self) -> bool { self.backend.x64_flags.use_sse41() }