Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)

* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
  - Also introduced missing support for `fcopysign` on vector values, as
    per the docs.
  - This introduces the vector encoding for the `SLI` machine
    instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`

Copyright (c) 2022 Arm Limited

* Document helpers and abstract conversion checks
This commit is contained in:
Damian Heaton
2022-08-24 18:37:14 +01:00
committed by GitHub
parent 7e3c481f4e
commit 94bcbe8446
12 changed files with 863 additions and 548 deletions

View File

@@ -9,8 +9,8 @@ block0(v0: i8):
}
; block0:
; uxtb w4, w0
; ucvtf s0, w4
; uxtb w3, w0
; ucvtf s0, w3
; ret
function u0:0(i8) -> f64 {
@@ -20,8 +20,8 @@ block0(v0: i8):
}
; block0:
; uxtb w4, w0
; ucvtf d0, w4
; uxtb w3, w0
; ucvtf d0, w3
; ret
function u0:0(i16) -> f32 {
@@ -31,8 +31,8 @@ block0(v0: i16):
}
; block0:
; uxth w4, w0
; ucvtf s0, w4
; uxth w3, w0
; ucvtf s0, w3
; ret
function u0:0(i16) -> f64 {
@@ -42,8 +42,8 @@ block0(v0: i16):
}
; block0:
; uxth w4, w0
; ucvtf d0, w4
; uxth w3, w0
; ucvtf d0, w3
; ret
function u0:0(f32) -> i8 {
@@ -55,13 +55,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #17280, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
@@ -74,13 +74,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16496, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret
@@ -93,13 +93,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #18304, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
@@ -112,13 +112,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16624, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret

View File

@@ -333,13 +333,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #20352, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
@@ -352,14 +352,14 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; movz x7, #52992, LSL #16
; fmov s7, w7
; fcmp s0, s7
; movz x6, #52992, LSL #16
; fmov s6, w6
; fcmp s0, s6
; b.ge 8 ; udf
; movz x12, #20224, LSL #16
; fmov s7, w12
; fcmp s0, s7
; b.mi 8 ; udf
; fmov s20, w12
; fcmp s0, s20
; b.lt 8 ; udf
; fcvtzs w0, s0
; ret
@@ -372,13 +372,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #24448, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu x0, s0
; ret
@@ -391,14 +391,14 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; movz x7, #57088, LSL #16
; fmov s7, w7
; fcmp s0, s7
; movz x6, #57088, LSL #16
; fmov s6, w6
; fcmp s0, s6
; b.ge 8 ; udf
; movz x12, #24320, LSL #16
; fmov s7, w12
; fcmp s0, s7
; b.mi 8 ; udf
; fmov s20, w12
; fcmp s0, s20
; b.lt 8 ; udf
; fcvtzs x0, s0
; ret
@@ -411,13 +411,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16880, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret
@@ -430,13 +430,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; ldr d6, pc+8 ; b 12 ; data.f64 -2147483649
; fcmp d0, d6
; ldr d5, pc+8 ; b 12 ; data.f64 -2147483649
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16864, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzs w0, d0
; ret
@@ -449,13 +449,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #17392, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu x0, d0
; ret
@@ -468,14 +468,14 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; movz x7, #50144, LSL #48
; fmov d7, x7
; fcmp d0, d7
; movz x6, #50144, LSL #48
; fmov d6, x6
; fcmp d0, d6
; b.ge 8 ; udf
; movz x12, #17376, LSL #48
; fmov d7, x12
; fcmp d0, d7
; b.mi 8 ; udf
; fmov d20, x12
; fcmp d0, d20
; b.lt 8 ; udf
; fcvtzs x0, d0
; ret
@@ -566,14 +566,14 @@ block0(v0: f32):
}
; block0:
; movz x6, #20352, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movi v5.2s, #0
; fmax s7, s7, s5
; movz x4, #20352, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movi v17.2s, #0
; fmax s19, s7, s17
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzu w0, s7
; fcsel s22, s17, s19, ne
; fcvtzu w0, s22
; ret
function %f50(f32) -> i32 {
@@ -583,16 +583,16 @@ block0(v0: f32):
}
; block0:
; movz x6, #20224, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movz x4, #20224, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movz x10, #52992, LSL #16
; fmov s5, w10
; fmax s7, s7, s5
; movi v5.2s, #0
; fmov s18, w10
; fmax s21, s7, s18
; movi v23.16b, #0
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzs w0, s7
; fcsel s26, s23, s21, ne
; fcvtzs w0, s26
; ret
function %f51(f32) -> i64 {
@@ -602,14 +602,14 @@ block0(v0: f32):
}
; block0:
; movz x6, #24448, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movi v5.2s, #0
; fmax s7, s7, s5
; movz x4, #24448, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movi v17.2s, #0
; fmax s19, s7, s17
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzu x0, s7
; fcsel s22, s17, s19, ne
; fcvtzu x0, s22
; ret
function %f52(f32) -> i64 {
@@ -619,16 +619,16 @@ block0(v0: f32):
}
; block0:
; movz x6, #24320, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movz x4, #24320, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movz x10, #57088, LSL #16
; fmov s5, w10
; fmax s7, s7, s5
; movi v5.2s, #0
; fmov s18, w10
; fmax s21, s7, s18
; movi v23.16b, #0
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzs x0, s7
; fcsel s26, s23, s21, ne
; fcvtzs x0, s26
; ret
function %f53(f64) -> i32 {
@@ -638,13 +638,13 @@ block0(v0: f64):
}
; block0:
; ldr d4, pc+8 ; b 12 ; data.f64 4294967295
; fmin d6, d0, d4
; movi v4.2s, #0
; fmax d6, d6, d4
; ldr d3, pc+8 ; b 12 ; data.f64 4294967295
; fmin d5, d0, d3
; movi v7.2s, #0
; fmax d17, d5, d7
; fcmp d0, d0
; fcsel d6, d4, d6, ne
; fcvtzu w0, d6
; fcsel d20, d7, d17, ne
; fcvtzu w0, d20
; ret
function %f54(f64) -> i32 {
@@ -654,15 +654,15 @@ block0(v0: f64):
}
; block0:
; ldr d4, pc+8 ; b 12 ; data.f64 2147483647
; fmin d6, d0, d4
; ldr d3, pc+8 ; b 12 ; data.f64 2147483647
; fmin d5, d0, d3
; movz x8, #49632, LSL #48
; fmov d4, x8
; fmax d6, d6, d4
; movi v4.2s, #0
; fmov d16, x8
; fmax d19, d5, d16
; movi v21.16b, #0
; fcmp d0, d0
; fcsel d6, d4, d6, ne
; fcvtzs w0, d6
; fcsel d24, d21, d19, ne
; fcvtzs w0, d24
; ret
function %f55(f64) -> i64 {
@@ -672,14 +672,14 @@ block0(v0: f64):
}
; block0:
; movz x6, #17392, LSL #48
; fmov d5, x6
; fmin d7, d0, d5
; movi v5.2s, #0
; fmax d7, d7, d5
; movz x4, #17392, LSL #48
; fmov d4, x4
; fmin d7, d0, d4
; movi v17.2s, #0
; fmax d19, d7, d17
; fcmp d0, d0
; fcsel d7, d5, d7, ne
; fcvtzu x0, d7
; fcsel d22, d17, d19, ne
; fcvtzu x0, d22
; ret
function %f56(f64) -> i64 {
@@ -689,16 +689,16 @@ block0(v0: f64):
}
; block0:
; movz x6, #17376, LSL #48
; fmov d5, x6
; fmin d7, d0, d5
; movz x4, #17376, LSL #48
; fmov d4, x4
; fmin d7, d0, d4
; movz x10, #50144, LSL #48
; fmov d5, x10
; fmax d7, d7, d5
; movi v5.2s, #0
; fmov d18, x10
; fmax d21, d7, d18
; movi v23.16b, #0
; fcmp d0, d0
; fcsel d7, d5, d7, ne
; fcvtzs x0, d7
; fcsel d26, d23, d21, ne
; fcvtzs x0, d26
; ret
function %f57(f32x2) -> f32x2 {
@@ -946,3 +946,36 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d
; ret
function %f81(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ret
function %f82(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.4s, v1.4s, #31
; sli v0.4s, v7.4s, #31
; ret
function %f83(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.2d, v1.2d, #63
; sli v0.2d, v7.2d, #63
; ret

View File

@@ -0,0 +1,37 @@
test interpret
test run
target aarch64
; x86_64 and s390x do not support 64-bit vectors in `fcopysign`.
function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
; run: %fcopysign_f32x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
; F32 Inf
; run: %fcopysign_f32x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
; run: %fcopysign_f32x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
; F32 Epsilon / Max / Min Positive
; run: %fcopysign_f32x2([0x1.000000p-23 -0x1.000000p-23], [-0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23]
; run: %fcopysign_f32x2([0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0]) == [-0x1.fffffep127 0x1.fffffep127]
; run: %fcopysign_f32x2([0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126]
; F32 Subnormals
; run: %fcopysign_f32x2([0x0.800000p-126 -0x0.800000p-126], [-0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126]
; run: %fcopysign_f32x2([0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0]) == [-0x0.000002p-126 0x0.000002p-126]
; F32 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f32x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
; run: %fcopysign_f32x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
; run: %fcopysign_f32x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
; run: %fcopysign_f32x2([+NaN:0x1 +NaN:0x300001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x300001]
; run: %fcopysign_f32x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
; run: %fcopysign_f32x2([-NaN:0x300001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x300001 -sNaN:0x1]
; run: %fcopysign_f32x2([-sNaN:0x1 +sNaN:0x200001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x200001]
; run: %fcopysign_f32x2([-sNaN:0x200001 -sNaN:0x200001], [+NaN +NaN]) == [+sNaN:0x200001 +sNaN:0x200001]

View File

@@ -0,0 +1,63 @@
test interpret
test run
target s390x
target aarch64
; x86_64 does not support SIMD fcopysign.
function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f32x4([0x9.0 -0x9.0 0x9.0 -0x9.0], [0x9.0 0x9.0 -0x9.0 -0x9.0]) == [0x9.0 0x9.0 -0x9.0 -0x9.0]
; run: %fcopysign_f32x4([0x0.0 -0x0.0 0x0.0 -0x0.0], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.0 0x0.0 -0x0.0 0x0.0]
; F32 Inf
; run: %fcopysign_f32x4([Inf -Inf Inf -Inf], [Inf Inf -Inf -Inf]) == [Inf Inf -Inf -Inf]
; F32 Epsilon / Max / Min Positive
; run: %fcopysign_f32x4([0x1.000000p-23 -0x1.000000p-23 0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23 -0x1.fffffep127 0x1.fffffep127]
; run: %fcopysign_f32x4([0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126]
; F32 Subnormals
; run: %fcopysign_f32x4([0x0.800000p-126 -0x0.800000p-126 0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126 -0x0.000002p-126 0x0.000002p-126]
; F32 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f32x4([0x0.0 0x3.0 Inf +NaN], [-NaN +sNaN:0x1 -NaN -NaN]) == [-0x0.0 0x3.0 -Inf -NaN]
; run: %fcopysign_f32x4([-NaN +NaN:0x0 +NaN:0x1 +NaN:0x300001], [+NaN -NaN -NaN -NaN]) == [+NaN -NaN:0x0 -NaN:0x1 -NaN:0x300001]
; run: %fcopysign_f32x4([-NaN:0x0 -NaN:0x1 -NaN:0x300001 +sNaN:0x1], [+NaN +NaN +NaN -NaN]) == [+NaN:0x0 +NaN:0x1 +NaN:0x300001 -sNaN:0x1]
; run: %fcopysign_f32x4([-sNaN:0x1 +sNaN:0x200001 -sNaN:0x200001 -sNaN:0x200001], [+NaN -NaN +NaN +NaN]) == [+sNaN:0x1 -sNaN:0x200001 +sNaN:0x200001 +sNaN:0x200001]
function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
; run: %fcopysign_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
; F64 Inf
; run: %fcopysign_f64x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
; run: %fcopysign_f64x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
; F64 Epsilon / Max / Min Positive
; run: %fcopysign_f64x2([0x1.0000000000000p-52 -0x1.0000000000000p-52], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-52 0x1.0000000000000p-52]
; run: %fcopysign_f64x2([0x1.fffffffffffffp1023 -0x1.fffffffffffffp1023], [-0x0.0 0x0.0]) == [-0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
; run: %fcopysign_f64x2([0x1.0000000000000p-1022 -0x1.0000000000000p-1022], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-1022 0x1.0000000000000p-1022]
; F64 Subnormals
; run: %fcopysign_f64x2([0x0.8000000000000p-1022 -0x0.8000000000000p-1022], [-0x0.0 0x0.0]) == [-0x0.8000000000000p-1022 0x0.8000000000000p-1022]
; run: %fcopysign_f64x2([0x0.0000000000001p-1022 -0x0.0000000000001p-1022], [-0x0.0 0x0.0]) == [-0x0.0000000000001p-1022 0x0.0000000000001p-1022]
; F64 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f64x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
; run: %fcopysign_f64x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
; run: %fcopysign_f64x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
; run: %fcopysign_f64x2([+NaN:0x1 +NaN:0x4000000000001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x4000000000001]
; run: %fcopysign_f64x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
; run: %fcopysign_f64x2([-NaN:0x4000000000001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x4000000000001 -sNaN:0x1]
; run: %fcopysign_f64x2([-sNaN:0x1 +sNaN:0x4000000000001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x4000000000001]
; run: %fcopysign_f64x2([-sNaN:0x4000000000001 -sNaN:0x4000000000001], [+NaN +NaN]) == [+sNaN:0x4000000000001 +sNaN:0x4000000000001]