* [AArch64] Port SIMD narrowing to ISLE Fvdemote, snarrow, unarrow and uunarrow. Also refactor the aarch64 instructions descriptions to parameterize on ScalarSize instead of using different opcodes. The zero_value pure constructor has been introduced and used by the integer narrow operations and it replaces, and extends, the compare zero patterns. Copright (c) 2022, Arm Limited. * use short 'if' patterns
231 lines
5.7 KiB
Plaintext
231 lines
5.7 KiB
Plaintext
test run
|
|
target aarch64
|
|
|
|
function %snarrow_i16x8(i16, i16) -> i8x16 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i16x8*gv0
|
|
dt1 = i8x16*gv0
|
|
|
|
block0(v0: i16, v1: i16):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = snarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %snarrow_i16x8(1, -1) == [1 1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1]
|
|
; run: %snarrow_i16x8(32767, -32768) == [127 127 127 127 127 127 127 127 -128 -128 -128 -128 -128 -128 -128 -128]
|
|
|
|
function %snarrow_i32x4(i32, i32) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i32x4*gv0
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i32, v1: i32):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = snarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %snarrow_i32x4(1, -1) == [1 1 1 1 -1 -1 -1 -1]
|
|
; run: %snarrow_i32x4(-65536, 65535) == [-32768 -32768 -32768 -32768 32767 32767 32767 32767]
|
|
|
|
function %snarrow_i64x2(i64, i64) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i64x2
|
|
gv1 = dyn_scale_target_const.i32x4
|
|
dt0 = i64x2*gv0
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i64, v1: i64):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = snarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %snarrow_i64x2(-65536, -5000000000) == [-65536 -65536 -2147483648 -2147483648]
|
|
; run: %snarrow_i64x2(65535, 5000000000) == [65535 65535 2147483647 2147483647]
|
|
|
|
function %unarrow_i16x8(i16, i16) -> i8x16 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i16x8*gv0
|
|
dt1 = i8x16*gv0
|
|
|
|
block0(v0: i16, v1:i16):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = unarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %unarrow_i16x8(1, -1) == [1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
|
|
; run: %unarrow_i16x8(32767, -32768) == [255 255 255 255 255 255 255 255 0 0 0 0 0 0 0 0]
|
|
|
|
function %unarrow_i32x4(i32, i32) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i32x4*gv0
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i32, v1: i32):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = unarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %unarrow_i32x4(1, -1) == [1 1 1 1 0 0 0 0]
|
|
; run: %unarrow_i32x4(65536, -65536) == [65535 65535 65535 65535 0 0 0 0]
|
|
|
|
function %unarrow_i64x2(i64, i64) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i64x2
|
|
gv1 = dyn_scale_target_const.i32x4
|
|
dt0 = i64x2*gv0
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i64, v1: i64):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = unarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %unarrow_i64x2(1, -1) == [1 1 0 0]
|
|
; run: %unarrow_i64x2(4294967296, 1) == [4294967295 4294967295 1 1]
|
|
|
|
function %uunarrow_i16x8(i16, i16) -> i8x16 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i16x8*gv0
|
|
dt1 = i8x16*gv0
|
|
|
|
block0(v0: i16, v1:i16):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = uunarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %uunarrow_i16x8(1, -1) == [1 1 1 1 1 1 1 1 255 255 255 255 255 255 255 255]
|
|
; run: %uunarrow_i16x8(32767, -32768) == [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
|
|
|
|
function %uunarrow_i32x4(i32, i32) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i32x4*gv0
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i32, v1: i32):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = uunarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %uunarrow_i32x4(1, -1) == [1 1 1 1 65535 65535 65535 65535]
|
|
; run: %uunarrow_i32x4(65536, -65536) == [65535 65535 65535 65535 65535 65535 65535 65535]
|
|
|
|
function %uunarrow_i64x2(i64, i64) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i64x2
|
|
gv1 = dyn_scale_target_const.i32x4
|
|
dt0 = i64x2*gv0
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i64, v1: i64):
|
|
v2 = splat.dt0 v0
|
|
v3 = splat.dt0 v1
|
|
v4 = uunarrow.dt0 v2, v3
|
|
v5 = extract_vector v4, 0
|
|
return v5
|
|
}
|
|
; run: %uunarrow_i64x2(1, -1) == [1 1 4294967295 4294967295]
|
|
; run: %uunarrow_i64x2(4294967296, 1) == [4294967295 4294967295 1 1]
|
|
|
|
function %swidenhigh_i8x16(i8) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i8x16*gv1
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i8):
|
|
v1 = splat.dt0 v0
|
|
v2 = swiden_high v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
; run: %swidenhigh_i8x16(9) == [9 9 9 9 9 9 9 9]
|
|
|
|
function %swidenhigh_i16x8(i16) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i16x8*gv1
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = swiden_high v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
; run: %swidenhigh_i16x8(-8) == [-8 -8 -8 -8]
|
|
|
|
function %swidenhigh_i32x4(i32) -> i64x2 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i64x2
|
|
dt0 = i64x2*gv1
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt1 v0
|
|
v2 = swiden_high v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
; run: %swidenhigh_i32x4(-4) == [-4 -4]
|
|
|
|
function %swidenlow_i8x16(i8) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i8x16*gv1
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i8):
|
|
v1 = splat.dt0 v0
|
|
v2 = swiden_low v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
; run: %swidenhigh_i8x16(9) == [9 9 9 9 9 9 9 9]
|
|
|
|
function %swidenlow_i16x8(i16) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i16x8*gv1
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = swiden_low v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
; run: %swidenhigh_i16x8(-8) == [-8 -8 -8 -8]
|
|
|
|
function %swidenlow_i32x4(i32) -> i64x2 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i64x2
|
|
dt0 = i64x2*gv1
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt1 v0
|
|
v2 = swiden_low v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
; run: %swidenhigh_i32x4(-4) == [-4 -4]
|