Cranelift AArch64: Migrate Splat to ISLE (#4521)

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Anton Kirilov
2022-07-26 18:57:15 +01:00
committed by GitHub
parent 1321c234e5
commit ead6edb0c5
21 changed files with 593 additions and 338 deletions

View File

@@ -244,18 +244,13 @@ block0(v0: i128):
return v1
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp d11, d13, [sp, #-16]!
; block0:
; fmov d6, x0
; mov v6.d[1], x1
; cnt v11.16b, v6.16b
; addv b13, v11.16b
; umov w0, v13.b[0]
; cnt v19.16b, v6.16b
; addv b21, v19.16b
; umov w0, v21.b[0]
; movz w1, #0
; ldp d11, d13, [sp], #16
; ldp fp, lr, [sp], #16
; ret
function %d(i64) -> i64 {

View File

@@ -15,9 +15,9 @@ block0(v0: i16):
}
; block0:
; dup v2.4h, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtn v0.8b, v7.8h
; ret
@@ -35,9 +35,9 @@ block0(v0: i16):
}
; block0:
; dup v2.8h, w0
; sqxtn v0.8b, v2.8h
; sqxtn2 v0.16b, v2.8h
; dup v6.8h, w0
; sqxtn v0.8b, v6.8h
; sqxtn2 v0.16b, v6.8h
; ret
function %snarrow_i32x2(i32) -> i16x4 {
@@ -54,9 +54,9 @@ block0(v0: i32):
}
; block0:
; dup v2.2s, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtn v0.4h, v7.4s
; ret
@@ -74,9 +74,9 @@ block0(v0: i32):
}
; block0:
; dup v2.4s, w0
; sqxtn v0.4h, v2.4s
; sqxtn2 v0.8h, v2.4s
; dup v6.4s, w0
; sqxtn v0.4h, v6.4s
; sqxtn2 v0.8h, v6.4s
; ret
function %snarrow_i64x2(i64) -> i32x4 {
@@ -93,9 +93,9 @@ block0(v0: i64):
}
; block0:
; dup v2.2d, x0
; sqxtn v0.2s, v2.2d
; sqxtn2 v0.4s, v2.2d
; dup v6.2d, x0
; sqxtn v0.2s, v6.2d
; sqxtn2 v0.4s, v6.2d
; ret
function %unarrow_i16x4(i16) -> i8x8 {
@@ -112,9 +112,9 @@ block0(v0: i16):
}
; block0:
; dup v2.4h, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtun v0.8b, v7.8h
; ret
@@ -132,9 +132,9 @@ block0(v0: i16):
}
; block0:
; dup v2.8h, w0
; sqxtun v0.8b, v2.8h
; sqxtun2 v0.16b, v2.8h
; dup v6.8h, w0
; sqxtun v0.8b, v6.8h
; sqxtun2 v0.16b, v6.8h
; ret
function %unarrow_i32x2(i32) -> i16x4 {
@@ -151,9 +151,9 @@ block0(v0: i32):
}
; block0:
; dup v2.2s, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtun v0.4h, v7.4s
; ret
@@ -171,9 +171,9 @@ block0(v0: i32):
}
; block0:
; dup v2.4s, w0
; sqxtun v0.4h, v2.4s
; sqxtun2 v0.8h, v2.4s
; dup v6.4s, w0
; sqxtun v0.4h, v6.4s
; sqxtun2 v0.8h, v6.4s
; ret
function %unarrow_i64x2(i64) -> i32x4 {
@@ -190,9 +190,9 @@ block0(v0: i64):
}
; block0:
; dup v2.2d, x0
; sqxtun v0.2s, v2.2d
; sqxtun2 v0.4s, v2.2d
; dup v6.2d, x0
; sqxtun v0.2s, v6.2d
; sqxtun2 v0.4s, v6.2d
; ret
function %uunarrow_i16x4(i16) -> i8x8 {
@@ -209,9 +209,9 @@ block0(v0: i16):
}
; block0:
; dup v2.4h, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; uqxtn v0.8b, v7.8h
; ret
@@ -229,9 +229,9 @@ block0(v0: i16):
}
; block0:
; dup v2.8h, w0
; uqxtn v0.8b, v2.8h
; uqxtn2 v0.16b, v2.8h
; dup v6.8h, w0
; uqxtn v0.8b, v6.8h
; uqxtn2 v0.16b, v6.8h
; ret
function %uunarrow_i32x2(i32) -> i16x4 {
@@ -248,9 +248,9 @@ block0(v0: i32):
}
; block0:
; dup v2.2s, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; uqxtn v0.4h, v7.4s
; ret
@@ -268,9 +268,9 @@ block0(v0: i32):
}
; block0:
; dup v2.4s, w0
; uqxtn v0.4h, v2.4s
; uqxtn2 v0.8h, v2.4s
; dup v6.4s, w0
; uqxtn v0.4h, v6.4s
; uqxtn2 v0.8h, v6.4s
; ret
function %uunarrow_i64x2(i64) -> i32x4 {
@@ -287,8 +287,7 @@ block0(v0: i64):
}
; block0:
; dup v2.2d, x0
; uqxtn v0.2s, v2.2d
; uqxtn2 v0.4s, v2.2d
; dup v6.2d, x0
; uqxtn v0.2s, v6.2d
; uqxtn2 v0.4s, v6.2d
; ret

View File

@@ -1,4 +1,4 @@
test compile
test compile precise-output
target aarch64
function %i8x16_splat_add(i8, i8) -> i8x16 {
@@ -13,10 +13,11 @@ block0(v0: i8, v1: i8):
return v5
}
; check: dup v4.16b, w0
; nextln: dup v6.16b, w1
; nextln: add v0.16b, v4.16b, v6.16b
; nextln: ret
; block0:
; dup v16.16b, w0
; dup v17.16b, w1
; add v0.16b, v16.16b, v17.16b
; ret
function %i16x8_splat_add(i16, i16) -> i16x8 {
gv0 = dyn_scale_target_const.i16x8
@@ -30,10 +31,11 @@ block0(v0: i16, v1: i16):
return v5
}
; check: dup v4.8h, w0
; nextln: dup v6.8h, w1
; nextln: add v0.8h, v4.8h, v6.8h
; nextln: ret
; block0:
; dup v16.8h, w0
; dup v17.8h, w1
; add v0.8h, v16.8h, v17.8h
; ret
function %i32x4_splat_mul(i32, i32) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4
@@ -47,10 +49,11 @@ block0(v0: i32, v1: i32):
return v5
}
; check: dup v4.4s, w0
; nextln: dup v6.4s, w1
; nextln: mul v0.4s, v4.4s, v6.4s
; nextln: ret
; block0:
; dup v16.4s, w0
; dup v17.4s, w1
; mul v0.4s, v16.4s, v17.4s
; ret
function %i64x2_splat_sub(i64, i64) -> i64x2 {
gv0 = dyn_scale_target_const.i64x2
@@ -64,10 +67,11 @@ block0(v0: i64, v1: i64):
return v5
}
; check: dup v4.2d, x0
; nextln: dup v6.2d, x1
; nextln: sub v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, x0
; dup v17.2d, x1
; sub v0.2d, v16.2d, v17.2d
; ret
function %f32x4_splat_add(f32, f32) -> f32x4 {
gv0 = dyn_scale_target_const.f32x4
@@ -81,10 +85,11 @@ block0(v0: f32, v1: f32):
return v5
}
; check: dup v4.4s, v0.s[0]
; nextln: dup v6.4s, v1.s[0]
; nextln: fadd v0.4s, v4.4s, v6.4s
; nextln: ret
; block0:
; dup v16.4s, v0.s[0]
; dup v17.4s, v1.s[0]
; fadd v0.4s, v16.4s, v17.4s
; ret
function %f64x2_splat_sub(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -98,10 +103,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fsub v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fsub v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_mul(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -115,10 +121,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fmul v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fmul v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_div(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -132,10 +139,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fdiv v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fdiv v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_min(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -149,10 +157,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fmin v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fmin v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_max(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -166,10 +175,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fmax v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fmax v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_min_pseudo(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -183,11 +193,12 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fcmgt v0.2d, v4.2d, v6.2d
; nextln: bsl v0.16b, v6.16b, v4.16b
; nextln: ret
; block0:
; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v17.2d, v18.2d
; bsl v0.16b, v18.16b, v17.16b
; ret
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@@ -201,8 +212,9 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fcmgt v0.2d, v6.2d, v4.2d
; nextln: bsl v0.16b, v6.16b, v4.16b
; nextln: ret
; block0:
; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v18.2d, v17.2d
; bsl v0.16b, v18.16b, v17.16b
; ret

View File

@@ -1,4 +1,4 @@
test compile
test compile precise-output
target aarch64
function %swidenhigh_i8x16(i8) -> i16x8 {
@@ -14,9 +14,10 @@ block0(v0: i8):
return v3
}
; check: dup v2.16b, w0
; nextln: sxtl2 v0.8h, v2.16b
; nextln: ret
; block0:
; dup v5.16b, w0
; sxtl2 v0.8h, v5.16b
; ret
function %swidenhigh_i16x8(i16) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4
@@ -31,9 +32,10 @@ block0(v0: i16):
return v3
}
; check: dup v2.8h, w0
; nextln: sxtl2 v0.4s, v2.8h
; nextln: ret
; block0:
; dup v5.8h, w0
; sxtl2 v0.4s, v5.8h
; ret
function %swidenhigh_i32x4(i32) -> i64x2 {
gv0 = dyn_scale_target_const.i32x4
@@ -48,9 +50,10 @@ block0(v0: i32):
return v3
}
; check: dup v2.4s, w0
; nextln: sxtl2 v0.2d, v2.4s
; nextln: ret
; block0:
; dup v5.4s, w0
; sxtl2 v0.2d, v5.4s
; ret
function %swidenlow_i8x16(i8) -> i16x8 {
gv0 = dyn_scale_target_const.i16x8
@@ -65,9 +68,10 @@ block0(v0: i8):
return v3
}
; check: dup v2.16b, w0
; nextln: sxtl v0.8h, v2.8b
; nextln: ret
; block0:
; dup v5.16b, w0
; sxtl v0.8h, v5.8b
; ret
function %swidenlow_i16x8(i16) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4
@@ -82,9 +86,10 @@ block0(v0: i16):
return v3
}
; check: dup v2.8h, w0
; nextln: sxtl v0.4s, v2.4h
; nextln: ret
; block0:
; dup v5.8h, w0
; sxtl v0.4s, v5.4h
; ret
function %swidenlow_i32x4(i32) -> i64x2 {
gv0 = dyn_scale_target_const.i32x4
@@ -99,6 +104,7 @@ block0(v0: i32):
return v3
}
; check: dup v2.4s, w0
; nextln: sxtl v0.2d, v2.2s
; nextln: ret
; block0:
; dup v5.4s, w0
; sxtl v0.2d, v5.2s
; ret

View File

@@ -58,9 +58,9 @@ block0(v0: i32):
; mov fp, sp
; sub sp, sp, #16
; block0:
; dup v2.4s, w0
; mov x4, sp
; str q2, [x4]
; dup v3.4s, w0
; mov x3, sp
; str q3, [x3]
; add sp, sp, #16
; ldp fp, lr, [sp], #16
; ret
@@ -101,9 +101,9 @@ block0(v0: i32):
; mov fp, sp
; sub sp, sp, #16
; block0:
; dup v2.4s, w0
; mov x4, sp
; str q2, [x4]
; dup v3.4s, w0
; mov x3, sp
; str q3, [x3]
; add sp, sp, #16
; ldp fp, lr, [sp], #16
; ret

View File

@@ -82,29 +82,6 @@ block0(v0: f64):
; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]!
; block0:
; fadd d1, d0, d0
; fadd d2, d0, d0
; fadd d3, d0, d0
; fadd d4, d0, d0
; fadd d5, d0, d0
; fadd d6, d0, d0
; fadd d7, d0, d0
; fadd d8, d0, d0
; fadd d9, d0, d0
; fadd d10, d0, d0
; fadd d11, d0, d0
; fadd d12, d0, d0
; fadd d13, d0, d0
; fadd d14, d0, d0
; fadd d15, d0, d0
; fadd d16, d0, d0
; fadd d17, d0, d0
; fadd d18, d0, d0
; fadd d19, d0, d0
; fadd d20, d0, d0
; fadd d21, d0, d0
; fadd d22, d0, d0
; fadd d23, d0, d0
; fadd d24, d0, d0
; fadd d25, d0, d0
; fadd d26, d0, d0
@@ -113,37 +90,60 @@ block0(v0: f64):
; fadd d29, d0, d0
; fadd d30, d0, d0
; fadd d31, d0, d0
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d1, d0, d0
; fadd d2, d0, d0
; fadd d3, d0, d0
; fadd d4, d0, d0
; fadd d5, d0, d0
; fadd d6, d0, d0
; fadd d7, d0, d0
; fadd d16, d0, d0
; fadd d17, d0, d0
; fadd d18, d0, d0
; fadd d19, d0, d0
; fadd d20, d0, d0
; fadd d21, d0, d0
; fadd d22, d0, d0
; fadd d23, d0, d0
; fadd d8, d0, d0
; fadd d9, d0, d0
; fadd d10, d0, d0
; fadd d11, d0, d0
; fadd d12, d0, d0
; fadd d13, d0, d0
; fadd d14, d0, d0
; fadd d15, d0, d0
; fadd d24, d0, d24
; fadd d25, d25, d26
; fadd d26, d27, d28
; fadd d27, d29, d30
; fadd d28, d31, d1
; fadd d29, d2, d3
; fadd d30, d4, d5
; fadd d31, d6, d7
; fadd d0, d16, d17
; fadd d1, d18, d19
; fadd d2, d20, d21
; fadd d3, d22, d23
; fadd d4, d8, d9
; fadd d5, d10, d11
; fadd d6, d12, d13
; fadd d7, d14, d15
; fadd d8, d16, d17
; fadd d9, d18, d19
; fadd d10, d20, d21
; fadd d11, d22, d23
; fadd d12, d24, d25
; fadd d13, d26, d27
; fadd d14, d28, d29
; fadd d15, d30, d31
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d4, d8, d9
; fadd d5, d10, d11
; fadd d6, d12, d13
; fadd d7, d14, d15
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d0, d0, d1
; fadd d24, d24, d25
; fadd d25, d26, d27
; fadd d26, d28, d29
; fadd d27, d30, d31
; fadd d28, d0, d1
; fadd d29, d2, d3
; fadd d30, d4, d5
; fadd d31, d6, d7
; fadd d24, d24, d25
; fadd d25, d26, d27
; fadd d26, d28, d29
; fadd d27, d30, d31
; fadd d24, d24, d25
; fadd d25, d26, d27
; fadd d0, d24, d25
; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16
@@ -242,4 +242,3 @@ block0(v0: i64):
; ldr x28, [sp], #16
; ldp fp, lr, [sp], #16
; ret

View File

@@ -1,4 +1,4 @@
test interpret
; test interpret TODO: Not yet implemented
test run
target aarch64
target s390x
@@ -10,6 +10,8 @@ block0(v0: i8):
v1 = splat.i8x16 v0
return v1
}
; run: %splat_i8x16(-1) == [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
; run: %splat_i8x16(0) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %splat_i16x8(i16) -> i16x8 {
@@ -17,6 +19,8 @@ block0(v0: i16):
v1 = splat.i16x8 v0
return v1
}
; run: %splat_i16x8(-1) == [-1 -1 -1 -1 -1 -1 -1 -1]
; run: %splat_i16x8(0) == [0 0 0 0 0 0 0 0]
; run: %splat_i16x8(512) == [512 512 512 512 512 512 512 512]
function %splat_i32x4(i32) -> i32x4 {
@@ -24,6 +28,8 @@ block0(v0: i32):
v1 = splat.i32x4 v0
return v1
}
; run: %splat_i32x4(-1) == [-1 -1 -1 -1]
; run: %splat_i32x4(0) == [0 0 0 0]
; run: %splat_i32x4(2000000) == [2000000 2000000 2000000 2000000]
function %splat_i64x2(i64) -> i64x2 {
@@ -31,4 +37,189 @@ block0(v0: i64):
v1 = splat.i64x2 v0
return v1
}
; run: %splat_i64x2(-1) == [-1 -1]
; run: %splat_i64x2(0) == [0 0]
; run: %splat_i64x2(5000000000) == [5000000000 5000000000]
function %splat_f32x4(f32) -> f32x4 {
block0(v0: f32):
v1 = splat.f32x4 v0
return v1
}
; run: %splat_f32x4(-0x0.0) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
; run: %splat_f32x4(0x1.0) == [0x1.0 0x1.0 0x1.0 0x1.0]
; run: %splat_f32x4(NaN) == [NaN NaN NaN NaN]
function %splat_f64x2(f64) -> f64x2 {
block0(v0: f64):
v1 = splat.f64x2 v0
return v1
}
; run: %splat_f64x2(0x0.0) == [0x0.0 0x0.0]
; run: %splat_f64x2(0x2.0) == [0x2.0 0x2.0]
; run: %splat_f64x2(NaN) == [NaN NaN]
; TODO: Test combinations of `bconst` and `splat`, potentially with `breduce` in
; the middle
function %splat_i8x16_2(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 116
v2 = splat.i8x16 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i8x16_2([-128 -101 -75 -59 -22 -12 -7 -1 0 3 17 34 68 92 111 127]) == [-12 15 41 57 94 104 109 115 116 119 -123 -106 -72 -48 -29 -13]
function %splat_i8x16_3(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i16 116
v2 = ireduce.i8 v1
v3 = splat.i8x16 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i8x16_3([-128 -101 -75 -59 -22 -12 -7 -1 0 3 17 34 68 92 111 127]) == [-12 15 41 57 94 104 109 115 116 119 -123 -106 -72 -48 -29 -13]
function %splat_i16x8_2(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 42
v2 = splat.i16x8 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i16x8_2([-32768 -1500 -1 0 42 200 8576 32767]) == [-32726 -1458 41 42 84 242 8618 -32727]
function %splat_i16x8_3(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i64 42
v2 = ireduce.i16 v1
v3 = splat.i16x8 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i16x8_3([-32768 -1500 -1 0 42 200 8576 32767]) == [-32726 -1458 41 42 84 242 8618 -32727]
function %splat_i32x4_2(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 1024
v2 = splat.i32x4 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i32x4_2([-2147483648 -1 0 2147483647]) == [-2147482624 1023 1024 -2147482625]
function %splat_i32x4_3(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i64 1024
v2 = ireduce.i32 v1
v3 = splat.i32x4 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i32x4_3([-2147483648 -1 0 2147483647]) == [-2147482624 1023 1024 -2147482625]
function %splat_i64x2_2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 -1
v2 = splat.i64x2 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i64x2_2([-1 0]) == [-2 -1]
function %splat_f32x4_2(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = f32const 0x1.5
v2 = splat.f32x4 v1
v3 = fadd v0, v2
return v3
}
; run: %splat_f32x4_2([0x0.0 NaN 0x1.0 0x2.0]) == [0x1.5 NaN 0x2.5 0x3.5]
function %splat_f64x2_2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = f64const 0x7.5
v2 = splat.f64x2 v1
v3 = fadd v0, v2
return v3
}
; run: %splat_f64x2_2([0x0.0 0x1.0]) == [0x7.5 0x8.5]
function %load_splat_i8x16(i8) -> i8x16 {
ss0 = explicit_slot 8
block0(v0: i8):
stack_store.i8 v0, ss0
v1 = stack_load.i8 ss0
v2 = splat.i8x16 v1
return v2
}
; run: %load_splat_i8x16(-1) == [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
; run: %load_splat_i8x16(0) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %load_splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %load_splat_i16x8(i16) -> i16x8 {
ss0 = explicit_slot 8
block0(v0: i16):
stack_store.i16 v0, ss0
v1 = stack_load.i16 ss0
v2 = splat.i16x8 v1
return v2
}
; run: %load_splat_i16x8(-1) == [-1 -1 -1 -1 -1 -1 -1 -1]
; run: %load_splat_i16x8(0) == [0 0 0 0 0 0 0 0]
; run: %load_splat_i16x8(512) == [512 512 512 512 512 512 512 512]
function %load_splat_i32x4(i32) -> i32x4 {
ss0 = explicit_slot 8
block0(v0: i32):
stack_store.i32 v0, ss0
v1 = stack_load.i32 ss0
v2 = splat.i32x4 v1
return v2
}
; run: %load_splat_i32x4(-1) == [-1 -1 -1 -1]
; run: %load_splat_i32x4(0) == [0 0 0 0]
; run: %load_splat_i32x4(2000000) == [2000000 2000000 2000000 2000000]
function %load_splat_i64x2(i64) -> i64x2 {
ss0 = explicit_slot 8
block0(v0: i64):
stack_store.i64 v0, ss0
v1 = stack_load.i64 ss0
v2 = splat.i64x2 v1
return v2
}
; run: %load_splat_i64x2(-1) == [-1 -1]
; run: %load_splat_i64x2(0) == [0 0]
; run: %load_splat_i64x2(5000000000) == [5000000000 5000000000]
function %load_splat_f32x4(f32) -> f32x4 {
ss0 = explicit_slot 8
block0(v0: f32):
stack_store.f32 v0, ss0
v1 = stack_load.f32 ss0
v2 = splat.f32x4 v1
return v2
}
; run: %load_splat_f32x4(-0x0.0) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
; run: %load_splat_f32x4(0x1.0) == [0x1.0 0x1.0 0x1.0 0x1.0]
; run: %load_splat_f32x4(NaN) == [NaN NaN NaN NaN]
function %load_splat_f64x2(f64) -> f64x2 {
ss0 = explicit_slot 8
block0(v0: f64):
stack_store.f64 v0, ss0
v1 = stack_load.f64 ss0
v2 = splat.f64x2 v1
return v2
}
; run: %load_splat_f64x2(0x0.0) == [0x0.0 0x0.0]
; run: %load_splat_f64x2(0x2.0) == [0x2.0 0x2.0]
; run: %load_splat_f64x2(NaN) == [NaN NaN]