x64: Optimize store-of-extract-lane-0 (#5924)
* x64: Optimize store-of-extract-lane-0 The `movss` and `movsd` instructions can be used to store the 0th lane of a `t32x4` or a `t64x2` vector into memory, enabling fusing a `store` and an `extractlane` instruction. * Fix merge conflict with `main`
This commit is contained in:
@@ -2653,6 +2653,24 @@
|
||||
(x64_movrm $I64 addr_lo value_lo)
|
||||
(x64_movrm $I64 addr_hi value_hi)))))
|
||||
|
||||
;; Slightly optimize the extraction of the first lane from a vector which is
|
||||
;; stored in memory. In the case the first lane specifically is selected the
|
||||
;; standard `movss` and `movsd` instructions can be used as-if we're storing a
|
||||
;; f32 or f64 despite the source perhaps being an integer vector since the
|
||||
;; result of the instruction is the same.
|
||||
(rule 2 (lower (store flags
|
||||
(has_type (ty_32 _) (extractlane value (u8_from_uimm8 0)))
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_movss_store (to_amode flags address offset) value)))
|
||||
(rule 3 (lower (store flags
|
||||
(has_type (ty_64 _) (extractlane value (u8_from_uimm8 0)))
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_movsd_store (to_amode flags address offset) value)))
|
||||
|
||||
;; Rules for `load*` + ALU op + `store*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Add mem, reg
|
||||
|
||||
Reference in New Issue
Block a user