x64: Lower widening and narrowing operations in ISLE (#4722)

Lower uwiden_high, uwiden_low, swiden_high, swiden_low, snarrow, and unarrow in ISLE.
This commit is contained in:
Trevor Elliott
2022-08-18 11:53:24 -07:00
committed by GitHub
parent 7d9a359f51
commit 8b6019909b
6 changed files with 387 additions and 207 deletions

View File

@@ -0,0 +1,80 @@
test compile precise-output
target x86_64
function %f1(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = snarrow v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; packsswb %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; packssdw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(f64x2) -> i32x4 {
block0(v0: f64x2):
v1 = fcvt_to_sint_sat.i64x2 v0
v2 = vconst.i64x2 0x00
v3 = snarrow v1, v2
return v3
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm5
; cmppd $0, %xmm5, %xmm0, %xmm5
; load_const VCodeConstant(0), %xmm6
; andps %xmm5, %xmm6, %xmm5
; minpd %xmm0, %xmm5, %xmm0
; cvttpd2dq %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f4(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = unarrow v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; packuswb %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f5(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; packusdw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,177 @@
test compile precise-output
target x86_64
function %f1(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = swiden_low v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pmovsxbw %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = swiden_low v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pmovsxwd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = swiden_low v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pmovsxdq %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f4(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = swiden_high v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; palignr $8, %xmm0, %xmm0, %xmm0
; pmovsxbw %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f5(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = swiden_high v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; palignr $8, %xmm0, %xmm0, %xmm0
; pmovsxwd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f6(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = swiden_high v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pshufd $238, %xmm0, %xmm3
; pmovsxdq %xmm3, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f7(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = uwiden_low v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pmovzxbw %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f8(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = uwiden_low v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pmovzxwd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f9(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = uwiden_low v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pmovzxdq %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f10(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = uwiden_high v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; palignr $8, %xmm0, %xmm0, %xmm0
; pmovzxbw %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f11(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = uwiden_high v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; palignr $8, %xmm0, %xmm0, %xmm0
; pmovzxwd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f12(i32x4) -> i64x2 {
block0(v0: i32x4):
v1 = uwiden_high v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pshufd $238, %xmm0, %xmm3
; pmovzxdq %xmm3, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret