x64: Add support for the pblendw instruction (#6023)

This commit adds another case for `shuffle` lowering to the x64 backend
for the `{,v}pblendw` instruction. This instruction selects 16-bit
values from either of the inputs corresponding to an immediate 8-bit-mask where
each bit selects the corresponding lane from the inputs.
This commit is contained in:
Alex Crichton
2023-03-15 12:20:43 -05:00
committed by GitHub
parent fcddb9ca81
commit 6ed90f86c8
8 changed files with 132 additions and 14 deletions

View File

@@ -114,3 +114,31 @@ block0(v0: i64x2, v1: i64x2):
; popq %rbp
; retq
function %pblendw_0b10011001(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bitcast.i8x16 little v0
v3 = bitcast.i8x16 little v1
v4 = shuffle v2, v3, [16 17 2 3 4 5 22 23 24 25 10 11 12 13 30 31]
v5 = bitcast.i16x8 little v4
return v5
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpblendw $153, %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpblendw $0x99, %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

View File

@@ -654,9 +654,7 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm4
; movdqa %xmm1, %xmm0
; palignr $0, %xmm0, %xmm4, %xmm0
; pblendw $0, %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -666,9 +664,7 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqa %xmm0, %xmm4
; movdqa %xmm1, %xmm0
; palignr $0, %xmm4, %xmm0
; pblendw $0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
@@ -770,9 +766,7 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm4
; movdqa %xmm1, %xmm0
; palignr $16, %xmm0, %xmm4, %xmm0
; pblendw $255, %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@@ -782,9 +776,35 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqa %xmm0, %xmm4
; movdqa %xmm1, %xmm0
; palignr $0x10, %xmm4, %xmm0
; pblendw $0xff, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %pblendw_0b10011001(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bitcast.i8x16 little v0
v3 = bitcast.i8x16 little v1
v4 = shuffle v2, v3, [16 17 2 3 4 5 22 23 24 25 10 11 12 13 30 31]
v5 = bitcast.i16x8 little v4
return v5
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pblendw $153, %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pblendw $0x99, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

View File

@@ -553,3 +553,13 @@ block0(v0: i64x2, v1: i64x2):
return v5
}
; run: %aarch64_rev64_words([0x0102030405060708 0x0807060504030201], [0 0]) == [0x0506070801020304 0x0403020108070605]
function %pblendw_0b10011001(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bitcast.i8x16 little v0
v3 = bitcast.i8x16 little v1
v4 = shuffle v2, v3, [16 17 2 3 4 5 22 23 24 25 10 11 12 13 30 31]
v5 = bitcast.i16x8 little v4
return v5
}
; run: %pblendw_0b10011001([1 2 3 4 5 6 7 8], [9 10 11 12 13 14 15 16]) == [9 2 3 12 13 6 7 16]