Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)
* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)
Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
- Also introduced missing support for `fcopysign` on vector values, as
per the docs.
- This introduces the vector encoding for the `SLI` machine
instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`
Copyright (c) 2022 Arm Limited
* Document helpers and abstract conversion checks
This commit is contained in:
@@ -9,8 +9,8 @@ block0(v0: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w4, w0
|
||||
; ucvtf s0, w4
|
||||
; uxtb w3, w0
|
||||
; ucvtf s0, w3
|
||||
; ret
|
||||
|
||||
function u0:0(i8) -> f64 {
|
||||
@@ -20,8 +20,8 @@ block0(v0: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w4, w0
|
||||
; ucvtf d0, w4
|
||||
; uxtb w3, w0
|
||||
; ucvtf d0, w3
|
||||
; ret
|
||||
|
||||
function u0:0(i16) -> f32 {
|
||||
@@ -31,8 +31,8 @@ block0(v0: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxth w4, w0
|
||||
; ucvtf s0, w4
|
||||
; uxth w3, w0
|
||||
; ucvtf s0, w3
|
||||
; ret
|
||||
|
||||
function u0:0(i16) -> f64 {
|
||||
@@ -42,8 +42,8 @@ block0(v0: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxth w4, w0
|
||||
; ucvtf d0, w4
|
||||
; uxth w3, w0
|
||||
; ucvtf d0, w3
|
||||
; ret
|
||||
|
||||
function u0:0(f32) -> i8 {
|
||||
@@ -55,13 +55,13 @@ block0(v0: f32):
|
||||
; block0:
|
||||
; fcmp s0, s0
|
||||
; b.vc 8 ; udf
|
||||
; fmov s6, #-1
|
||||
; fcmp s0, s6
|
||||
; fmov s5, #-1
|
||||
; fcmp s0, s5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #17280, LSL #16
|
||||
; fmov s6, w10
|
||||
; fcmp s0, s6
|
||||
; b.mi 8 ; udf
|
||||
; fmov s18, w10
|
||||
; fcmp s0, s18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu w0, s0
|
||||
; ret
|
||||
|
||||
@@ -74,13 +74,13 @@ block0(v0: f64):
|
||||
; block0:
|
||||
; fcmp d0, d0
|
||||
; b.vc 8 ; udf
|
||||
; fmov d6, #-1
|
||||
; fcmp d0, d6
|
||||
; fmov d5, #-1
|
||||
; fcmp d0, d5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #16496, LSL #48
|
||||
; fmov d6, x10
|
||||
; fcmp d0, d6
|
||||
; b.mi 8 ; udf
|
||||
; fmov d18, x10
|
||||
; fcmp d0, d18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu w0, d0
|
||||
; ret
|
||||
|
||||
@@ -93,13 +93,13 @@ block0(v0: f32):
|
||||
; block0:
|
||||
; fcmp s0, s0
|
||||
; b.vc 8 ; udf
|
||||
; fmov s6, #-1
|
||||
; fcmp s0, s6
|
||||
; fmov s5, #-1
|
||||
; fcmp s0, s5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #18304, LSL #16
|
||||
; fmov s6, w10
|
||||
; fcmp s0, s6
|
||||
; b.mi 8 ; udf
|
||||
; fmov s18, w10
|
||||
; fcmp s0, s18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu w0, s0
|
||||
; ret
|
||||
|
||||
@@ -112,13 +112,13 @@ block0(v0: f64):
|
||||
; block0:
|
||||
; fcmp d0, d0
|
||||
; b.vc 8 ; udf
|
||||
; fmov d6, #-1
|
||||
; fcmp d0, d6
|
||||
; fmov d5, #-1
|
||||
; fcmp d0, d5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #16624, LSL #48
|
||||
; fmov d6, x10
|
||||
; fcmp d0, d6
|
||||
; b.mi 8 ; udf
|
||||
; fmov d18, x10
|
||||
; fcmp d0, d18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu w0, d0
|
||||
; ret
|
||||
|
||||
|
||||
@@ -333,13 +333,13 @@ block0(v0: f32):
|
||||
; block0:
|
||||
; fcmp s0, s0
|
||||
; b.vc 8 ; udf
|
||||
; fmov s6, #-1
|
||||
; fcmp s0, s6
|
||||
; fmov s5, #-1
|
||||
; fcmp s0, s5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #20352, LSL #16
|
||||
; fmov s6, w10
|
||||
; fcmp s0, s6
|
||||
; b.mi 8 ; udf
|
||||
; fmov s18, w10
|
||||
; fcmp s0, s18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu w0, s0
|
||||
; ret
|
||||
|
||||
@@ -352,14 +352,14 @@ block0(v0: f32):
|
||||
; block0:
|
||||
; fcmp s0, s0
|
||||
; b.vc 8 ; udf
|
||||
; movz x7, #52992, LSL #16
|
||||
; fmov s7, w7
|
||||
; fcmp s0, s7
|
||||
; movz x6, #52992, LSL #16
|
||||
; fmov s6, w6
|
||||
; fcmp s0, s6
|
||||
; b.ge 8 ; udf
|
||||
; movz x12, #20224, LSL #16
|
||||
; fmov s7, w12
|
||||
; fcmp s0, s7
|
||||
; b.mi 8 ; udf
|
||||
; fmov s20, w12
|
||||
; fcmp s0, s20
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzs w0, s0
|
||||
; ret
|
||||
|
||||
@@ -372,13 +372,13 @@ block0(v0: f32):
|
||||
; block0:
|
||||
; fcmp s0, s0
|
||||
; b.vc 8 ; udf
|
||||
; fmov s6, #-1
|
||||
; fcmp s0, s6
|
||||
; fmov s5, #-1
|
||||
; fcmp s0, s5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #24448, LSL #16
|
||||
; fmov s6, w10
|
||||
; fcmp s0, s6
|
||||
; b.mi 8 ; udf
|
||||
; fmov s18, w10
|
||||
; fcmp s0, s18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu x0, s0
|
||||
; ret
|
||||
|
||||
@@ -391,14 +391,14 @@ block0(v0: f32):
|
||||
; block0:
|
||||
; fcmp s0, s0
|
||||
; b.vc 8 ; udf
|
||||
; movz x7, #57088, LSL #16
|
||||
; fmov s7, w7
|
||||
; fcmp s0, s7
|
||||
; movz x6, #57088, LSL #16
|
||||
; fmov s6, w6
|
||||
; fcmp s0, s6
|
||||
; b.ge 8 ; udf
|
||||
; movz x12, #24320, LSL #16
|
||||
; fmov s7, w12
|
||||
; fcmp s0, s7
|
||||
; b.mi 8 ; udf
|
||||
; fmov s20, w12
|
||||
; fcmp s0, s20
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzs x0, s0
|
||||
; ret
|
||||
|
||||
@@ -411,13 +411,13 @@ block0(v0: f64):
|
||||
; block0:
|
||||
; fcmp d0, d0
|
||||
; b.vc 8 ; udf
|
||||
; fmov d6, #-1
|
||||
; fcmp d0, d6
|
||||
; fmov d5, #-1
|
||||
; fcmp d0, d5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #16880, LSL #48
|
||||
; fmov d6, x10
|
||||
; fcmp d0, d6
|
||||
; b.mi 8 ; udf
|
||||
; fmov d18, x10
|
||||
; fcmp d0, d18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu w0, d0
|
||||
; ret
|
||||
|
||||
@@ -430,13 +430,13 @@ block0(v0: f64):
|
||||
; block0:
|
||||
; fcmp d0, d0
|
||||
; b.vc 8 ; udf
|
||||
; ldr d6, pc+8 ; b 12 ; data.f64 -2147483649
|
||||
; fcmp d0, d6
|
||||
; ldr d5, pc+8 ; b 12 ; data.f64 -2147483649
|
||||
; fcmp d0, d5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #16864, LSL #48
|
||||
; fmov d6, x10
|
||||
; fcmp d0, d6
|
||||
; b.mi 8 ; udf
|
||||
; fmov d18, x10
|
||||
; fcmp d0, d18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzs w0, d0
|
||||
; ret
|
||||
|
||||
@@ -449,13 +449,13 @@ block0(v0: f64):
|
||||
; block0:
|
||||
; fcmp d0, d0
|
||||
; b.vc 8 ; udf
|
||||
; fmov d6, #-1
|
||||
; fcmp d0, d6
|
||||
; fmov d5, #-1
|
||||
; fcmp d0, d5
|
||||
; b.gt 8 ; udf
|
||||
; movz x10, #17392, LSL #48
|
||||
; fmov d6, x10
|
||||
; fcmp d0, d6
|
||||
; b.mi 8 ; udf
|
||||
; fmov d18, x10
|
||||
; fcmp d0, d18
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzu x0, d0
|
||||
; ret
|
||||
|
||||
@@ -468,14 +468,14 @@ block0(v0: f64):
|
||||
; block0:
|
||||
; fcmp d0, d0
|
||||
; b.vc 8 ; udf
|
||||
; movz x7, #50144, LSL #48
|
||||
; fmov d7, x7
|
||||
; fcmp d0, d7
|
||||
; movz x6, #50144, LSL #48
|
||||
; fmov d6, x6
|
||||
; fcmp d0, d6
|
||||
; b.ge 8 ; udf
|
||||
; movz x12, #17376, LSL #48
|
||||
; fmov d7, x12
|
||||
; fcmp d0, d7
|
||||
; b.mi 8 ; udf
|
||||
; fmov d20, x12
|
||||
; fcmp d0, d20
|
||||
; b.lt 8 ; udf
|
||||
; fcvtzs x0, d0
|
||||
; ret
|
||||
|
||||
@@ -566,14 +566,14 @@ block0(v0: f32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #20352, LSL #16
|
||||
; fmov s5, w6
|
||||
; fmin s7, s0, s5
|
||||
; movi v5.2s, #0
|
||||
; fmax s7, s7, s5
|
||||
; movz x4, #20352, LSL #16
|
||||
; fmov s4, w4
|
||||
; fmin s7, s0, s4
|
||||
; movi v17.2s, #0
|
||||
; fmax s19, s7, s17
|
||||
; fcmp s0, s0
|
||||
; fcsel s7, s5, s7, ne
|
||||
; fcvtzu w0, s7
|
||||
; fcsel s22, s17, s19, ne
|
||||
; fcvtzu w0, s22
|
||||
; ret
|
||||
|
||||
function %f50(f32) -> i32 {
|
||||
@@ -583,16 +583,16 @@ block0(v0: f32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #20224, LSL #16
|
||||
; fmov s5, w6
|
||||
; fmin s7, s0, s5
|
||||
; movz x4, #20224, LSL #16
|
||||
; fmov s4, w4
|
||||
; fmin s7, s0, s4
|
||||
; movz x10, #52992, LSL #16
|
||||
; fmov s5, w10
|
||||
; fmax s7, s7, s5
|
||||
; movi v5.2s, #0
|
||||
; fmov s18, w10
|
||||
; fmax s21, s7, s18
|
||||
; movi v23.16b, #0
|
||||
; fcmp s0, s0
|
||||
; fcsel s7, s5, s7, ne
|
||||
; fcvtzs w0, s7
|
||||
; fcsel s26, s23, s21, ne
|
||||
; fcvtzs w0, s26
|
||||
; ret
|
||||
|
||||
function %f51(f32) -> i64 {
|
||||
@@ -602,14 +602,14 @@ block0(v0: f32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #24448, LSL #16
|
||||
; fmov s5, w6
|
||||
; fmin s7, s0, s5
|
||||
; movi v5.2s, #0
|
||||
; fmax s7, s7, s5
|
||||
; movz x4, #24448, LSL #16
|
||||
; fmov s4, w4
|
||||
; fmin s7, s0, s4
|
||||
; movi v17.2s, #0
|
||||
; fmax s19, s7, s17
|
||||
; fcmp s0, s0
|
||||
; fcsel s7, s5, s7, ne
|
||||
; fcvtzu x0, s7
|
||||
; fcsel s22, s17, s19, ne
|
||||
; fcvtzu x0, s22
|
||||
; ret
|
||||
|
||||
function %f52(f32) -> i64 {
|
||||
@@ -619,16 +619,16 @@ block0(v0: f32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #24320, LSL #16
|
||||
; fmov s5, w6
|
||||
; fmin s7, s0, s5
|
||||
; movz x4, #24320, LSL #16
|
||||
; fmov s4, w4
|
||||
; fmin s7, s0, s4
|
||||
; movz x10, #57088, LSL #16
|
||||
; fmov s5, w10
|
||||
; fmax s7, s7, s5
|
||||
; movi v5.2s, #0
|
||||
; fmov s18, w10
|
||||
; fmax s21, s7, s18
|
||||
; movi v23.16b, #0
|
||||
; fcmp s0, s0
|
||||
; fcsel s7, s5, s7, ne
|
||||
; fcvtzs x0, s7
|
||||
; fcsel s26, s23, s21, ne
|
||||
; fcvtzs x0, s26
|
||||
; ret
|
||||
|
||||
function %f53(f64) -> i32 {
|
||||
@@ -638,13 +638,13 @@ block0(v0: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldr d4, pc+8 ; b 12 ; data.f64 4294967295
|
||||
; fmin d6, d0, d4
|
||||
; movi v4.2s, #0
|
||||
; fmax d6, d6, d4
|
||||
; ldr d3, pc+8 ; b 12 ; data.f64 4294967295
|
||||
; fmin d5, d0, d3
|
||||
; movi v7.2s, #0
|
||||
; fmax d17, d5, d7
|
||||
; fcmp d0, d0
|
||||
; fcsel d6, d4, d6, ne
|
||||
; fcvtzu w0, d6
|
||||
; fcsel d20, d7, d17, ne
|
||||
; fcvtzu w0, d20
|
||||
; ret
|
||||
|
||||
function %f54(f64) -> i32 {
|
||||
@@ -654,15 +654,15 @@ block0(v0: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ldr d4, pc+8 ; b 12 ; data.f64 2147483647
|
||||
; fmin d6, d0, d4
|
||||
; ldr d3, pc+8 ; b 12 ; data.f64 2147483647
|
||||
; fmin d5, d0, d3
|
||||
; movz x8, #49632, LSL #48
|
||||
; fmov d4, x8
|
||||
; fmax d6, d6, d4
|
||||
; movi v4.2s, #0
|
||||
; fmov d16, x8
|
||||
; fmax d19, d5, d16
|
||||
; movi v21.16b, #0
|
||||
; fcmp d0, d0
|
||||
; fcsel d6, d4, d6, ne
|
||||
; fcvtzs w0, d6
|
||||
; fcsel d24, d21, d19, ne
|
||||
; fcvtzs w0, d24
|
||||
; ret
|
||||
|
||||
function %f55(f64) -> i64 {
|
||||
@@ -672,14 +672,14 @@ block0(v0: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #17392, LSL #48
|
||||
; fmov d5, x6
|
||||
; fmin d7, d0, d5
|
||||
; movi v5.2s, #0
|
||||
; fmax d7, d7, d5
|
||||
; movz x4, #17392, LSL #48
|
||||
; fmov d4, x4
|
||||
; fmin d7, d0, d4
|
||||
; movi v17.2s, #0
|
||||
; fmax d19, d7, d17
|
||||
; fcmp d0, d0
|
||||
; fcsel d7, d5, d7, ne
|
||||
; fcvtzu x0, d7
|
||||
; fcsel d22, d17, d19, ne
|
||||
; fcvtzu x0, d22
|
||||
; ret
|
||||
|
||||
function %f56(f64) -> i64 {
|
||||
@@ -689,16 +689,16 @@ block0(v0: f64):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; movz x6, #17376, LSL #48
|
||||
; fmov d5, x6
|
||||
; fmin d7, d0, d5
|
||||
; movz x4, #17376, LSL #48
|
||||
; fmov d4, x4
|
||||
; fmin d7, d0, d4
|
||||
; movz x10, #50144, LSL #48
|
||||
; fmov d5, x10
|
||||
; fmax d7, d7, d5
|
||||
; movi v5.2s, #0
|
||||
; fmov d18, x10
|
||||
; fmax d21, d7, d18
|
||||
; movi v23.16b, #0
|
||||
; fcmp d0, d0
|
||||
; fcsel d7, d5, d7, ne
|
||||
; fcvtzs x0, d7
|
||||
; fcsel d26, d23, d21, ne
|
||||
; fcvtzs x0, d26
|
||||
; ret
|
||||
|
||||
function %f57(f32x2) -> f32x2 {
|
||||
@@ -946,3 +946,36 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
|
||||
; mov v0.16b, v2.16b
|
||||
; fmla v0.2d, v17.2d, v1.2d
|
||||
; ret
|
||||
|
||||
function %f81(f32x2, f32x2) -> f32x2 {
|
||||
block0(v0: f32x2, v1: f32x2):
|
||||
v2 = fcopysign v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v7.2s, v1.2s, #31
|
||||
; sli v0.2s, v7.2s, #31
|
||||
; ret
|
||||
|
||||
function %f82(f32x4, f32x4) -> f32x4 {
|
||||
block0(v0: f32x4, v1: f32x4):
|
||||
v2 = fcopysign v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v7.4s, v1.4s, #31
|
||||
; sli v0.4s, v7.4s, #31
|
||||
; ret
|
||||
|
||||
function %f83(f64x2, f64x2) -> f64x2 {
|
||||
block0(v0: f64x2, v1: f64x2):
|
||||
v2 = fcopysign v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; ushr v7.2d, v1.2d, #63
|
||||
; sli v0.2d, v7.2d, #63
|
||||
; ret
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
; x86_64 and s390x do not support 64-bit vectors in `fcopysign`.
|
||||
|
||||
function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 {
|
||||
block0(v0: f32x2, v1: f32x2):
|
||||
v2 = fcopysign v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
|
||||
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
|
||||
; run: %fcopysign_f32x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
|
||||
|
||||
; F32 Inf
|
||||
; run: %fcopysign_f32x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
|
||||
; run: %fcopysign_f32x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
|
||||
|
||||
; F32 Epsilon / Max / Min Positive
|
||||
; run: %fcopysign_f32x2([0x1.000000p-23 -0x1.000000p-23], [-0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23]
|
||||
; run: %fcopysign_f32x2([0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0]) == [-0x1.fffffep127 0x1.fffffep127]
|
||||
; run: %fcopysign_f32x2([0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126]
|
||||
|
||||
; F32 Subnormals
|
||||
; run: %fcopysign_f32x2([0x0.800000p-126 -0x0.800000p-126], [-0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126]
|
||||
; run: %fcopysign_f32x2([0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0]) == [-0x0.000002p-126 0x0.000002p-126]
|
||||
|
||||
; F32 NaN's
|
||||
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
|
||||
; run: %fcopysign_f32x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
|
||||
; run: %fcopysign_f32x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
|
||||
; run: %fcopysign_f32x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
|
||||
; run: %fcopysign_f32x2([+NaN:0x1 +NaN:0x300001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x300001]
|
||||
; run: %fcopysign_f32x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
|
||||
; run: %fcopysign_f32x2([-NaN:0x300001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x300001 -sNaN:0x1]
|
||||
; run: %fcopysign_f32x2([-sNaN:0x1 +sNaN:0x200001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x200001]
|
||||
; run: %fcopysign_f32x2([-sNaN:0x200001 -sNaN:0x200001], [+NaN +NaN]) == [+sNaN:0x200001 +sNaN:0x200001]
|
||||
63
cranelift/filetests/filetests/runtests/simd-fcopysign.clif
Normal file
63
cranelift/filetests/filetests/runtests/simd-fcopysign.clif
Normal file
@@ -0,0 +1,63 @@
|
||||
test interpret
|
||||
test run
|
||||
target s390x
|
||||
target aarch64
|
||||
; x86_64 does not support SIMD fcopysign.
|
||||
|
||||
function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
|
||||
block0(v0: f32x4, v1: f32x4):
|
||||
v2 = fcopysign v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %fcopysign_f32x4([0x9.0 -0x9.0 0x9.0 -0x9.0], [0x9.0 0x9.0 -0x9.0 -0x9.0]) == [0x9.0 0x9.0 -0x9.0 -0x9.0]
|
||||
; run: %fcopysign_f32x4([0x0.0 -0x0.0 0x0.0 -0x0.0], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.0 0x0.0 -0x0.0 0x0.0]
|
||||
|
||||
; F32 Inf
|
||||
; run: %fcopysign_f32x4([Inf -Inf Inf -Inf], [Inf Inf -Inf -Inf]) == [Inf Inf -Inf -Inf]
|
||||
|
||||
; F32 Epsilon / Max / Min Positive
|
||||
; run: %fcopysign_f32x4([0x1.000000p-23 -0x1.000000p-23 0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23 -0x1.fffffep127 0x1.fffffep127]
|
||||
; run: %fcopysign_f32x4([0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126]
|
||||
|
||||
; F32 Subnormals
|
||||
; run: %fcopysign_f32x4([0x0.800000p-126 -0x0.800000p-126 0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126 -0x0.000002p-126 0x0.000002p-126]
|
||||
|
||||
; F32 NaN's
|
||||
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
|
||||
; run: %fcopysign_f32x4([0x0.0 0x3.0 Inf +NaN], [-NaN +sNaN:0x1 -NaN -NaN]) == [-0x0.0 0x3.0 -Inf -NaN]
|
||||
; run: %fcopysign_f32x4([-NaN +NaN:0x0 +NaN:0x1 +NaN:0x300001], [+NaN -NaN -NaN -NaN]) == [+NaN -NaN:0x0 -NaN:0x1 -NaN:0x300001]
|
||||
; run: %fcopysign_f32x4([-NaN:0x0 -NaN:0x1 -NaN:0x300001 +sNaN:0x1], [+NaN +NaN +NaN -NaN]) == [+NaN:0x0 +NaN:0x1 +NaN:0x300001 -sNaN:0x1]
|
||||
; run: %fcopysign_f32x4([-sNaN:0x1 +sNaN:0x200001 -sNaN:0x200001 -sNaN:0x200001], [+NaN -NaN +NaN +NaN]) == [+sNaN:0x1 -sNaN:0x200001 +sNaN:0x200001 +sNaN:0x200001]
|
||||
|
||||
function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
|
||||
block0(v0: f64x2, v1: f64x2):
|
||||
v2 = fcopysign v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
|
||||
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
|
||||
; run: %fcopysign_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
|
||||
|
||||
; F64 Inf
|
||||
; run: %fcopysign_f64x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
|
||||
; run: %fcopysign_f64x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
|
||||
|
||||
; F64 Epsilon / Max / Min Positive
|
||||
; run: %fcopysign_f64x2([0x1.0000000000000p-52 -0x1.0000000000000p-52], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-52 0x1.0000000000000p-52]
|
||||
; run: %fcopysign_f64x2([0x1.fffffffffffffp1023 -0x1.fffffffffffffp1023], [-0x0.0 0x0.0]) == [-0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
|
||||
; run: %fcopysign_f64x2([0x1.0000000000000p-1022 -0x1.0000000000000p-1022], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-1022 0x1.0000000000000p-1022]
|
||||
|
||||
; F64 Subnormals
|
||||
; run: %fcopysign_f64x2([0x0.8000000000000p-1022 -0x0.8000000000000p-1022], [-0x0.0 0x0.0]) == [-0x0.8000000000000p-1022 0x0.8000000000000p-1022]
|
||||
; run: %fcopysign_f64x2([0x0.0000000000001p-1022 -0x0.0000000000001p-1022], [-0x0.0 0x0.0]) == [-0x0.0000000000001p-1022 0x0.0000000000001p-1022]
|
||||
|
||||
; F64 NaN's
|
||||
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
|
||||
; run: %fcopysign_f64x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
|
||||
; run: %fcopysign_f64x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
|
||||
; run: %fcopysign_f64x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
|
||||
; run: %fcopysign_f64x2([+NaN:0x1 +NaN:0x4000000000001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x4000000000001]
|
||||
; run: %fcopysign_f64x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
|
||||
; run: %fcopysign_f64x2([-NaN:0x4000000000001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x4000000000001 -sNaN:0x1]
|
||||
; run: %fcopysign_f64x2([-sNaN:0x1 +sNaN:0x4000000000001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x4000000000001]
|
||||
; run: %fcopysign_f64x2([-sNaN:0x4000000000001 -sNaN:0x4000000000001], [+NaN +NaN]) == [+sNaN:0x4000000000001 +sNaN:0x4000000000001]
|
||||
Reference in New Issue
Block a user