x64: Improve codegen for i8x16.shr_u (#5906)

This catches a case that wasn't handled previously by #5880 to allow a constant load to be folded into an instruction rather than forcing it to be loaded into a temporary register.
2023-03-01 23:43:42 -06:00
parent 7b8854f803
commit 52b4c48a1b
3 changed files with 22 additions and 31 deletions
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -522,12 +522,10 @@
            ;; Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be
            ;; correct for half of the lanes; the others must be fixed up with
            ;; the mask below.
-            (unmasked Xmm (x64_psrlw src (mov_rmi_to_xmm masked_amt)))
-            (mask_addr SyntheticAmode (ushr_i8x16_mask masked_amt))
-            (mask Reg (x64_load $I8X16 mask_addr (ExtKind.None))))
+            (unmasked Xmm (x64_psrlw src (mov_rmi_to_xmm masked_amt))))
        (sse_and $I8X16
                 unmasked
-                 (RegMem.Reg mask))))
+                 (ushr_i8x16_mask masked_amt))))

 ;; Get the address of the mask to use when fixing up the lanes that weren't
 ;; correctly generated by the 16x8 shift.