Use regalloc constraints for sse blend operations (#5251)

Instead of using xmm0 explicitly for the mask argument to instructions like blendvpd, use regalloc constraints to constrain it to xmm0 instead.
This commit is contained in:
Trevor Elliott
2022-11-14 16:44:34 -08:00
committed by GitHub
parent 72eda0c6ef
commit dece901d16
7 changed files with 135 additions and 77 deletions

View File

@@ -16,9 +16,9 @@ block0(v0: i8x16, v1: i8x16):
; pcmpeqb %xmm4, %xmm1, %xmm4
; movdqa %xmm0, %xmm7
; movdqa %xmm4, %xmm0
; movdqa %xmm1, %xmm5
; pblendvb %xmm5, %xmm7, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm1, %xmm4
; pblendvb %xmm4, %xmm7, %xmm4
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -34,9 +34,9 @@ block0(v0: f32x4, v1: f32x4, v2: i32x4, v3: i32x4):
; movq %rsp, %rbp
; block0:
; cmpps $0, %xmm0, %xmm1, %xmm0
; movdqa %xmm3, %xmm7
; pblendvb %xmm7, %xmm2, %xmm7
; movdqa %xmm7, %xmm0
; movdqa %xmm3, %xmm6
; pblendvb %xmm6, %xmm2, %xmm6
; movdqa %xmm6, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -72,10 +72,10 @@ block0(v0: i8x16, v1: i8x16):
; block0:
; movdqa %xmm0, %xmm5
; movdqu const(0), %xmm0
; movdqa %xmm5, %xmm7
; movdqa %xmm1, %xmm5
; pblendvb %xmm5, %xmm7, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm5, %xmm6
; movdqa %xmm1, %xmm4
; pblendvb %xmm4, %xmm6, %xmm4
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -92,10 +92,10 @@ block0(v0: i16x8, v1: i16x8):
; block0:
; movdqa %xmm0, %xmm5
; movdqu const(0), %xmm0
; movdqa %xmm5, %xmm7
; movdqa %xmm1, %xmm5
; pblendvb %xmm5, %xmm7, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm5, %xmm6
; movdqa %xmm1, %xmm4
; pblendvb %xmm4, %xmm6, %xmm4
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -137,9 +137,9 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm2, %xmm5
; pblendvb %xmm5, %xmm1, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm2, %xmm4
; pblendvb %xmm4, %xmm1, %xmm4
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -153,9 +153,9 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm2, %xmm5
; blendvps %xmm5, %xmm1, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm2, %xmm4
; blendvps %xmm4, %xmm1, %xmm4
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -169,9 +169,9 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm2, %xmm5
; blendvpd %xmm5, %xmm1, %xmm5
; movdqa %xmm5, %xmm0
; movdqa %xmm2, %xmm4
; blendvpd %xmm4, %xmm1, %xmm4
; movdqa %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret