Cranelift AArch64: Improve code generation for vector constants
In particular, introduce initial support for the MOVI and MVNI instructions, with 8-bit elements. Also, treat vector constants as 32- or 64-bit floating-point numbers, if their value allows it, by relying on the architectural zero extension. Finally, stop generating literal loads for 32-bit constants. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -9,7 +9,7 @@ block0:
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #1
|
||||
; nextln: movz x0, #255
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
@@ -60,10 +60,12 @@ block0(v0: f32):
|
||||
v1 = fcvt_to_uint.i8 v0
|
||||
; check: fcmp s0, s0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; check: movz x0, #49024, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 256
|
||||
; check: movz x0, #17280, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, s0
|
||||
@@ -80,10 +82,12 @@ block0(v0: f64):
|
||||
v1 = fcvt_to_uint.i8 v0
|
||||
; check: fcmp d0, d0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; check: movz x0, #49136, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 256
|
||||
; check: movz x0, #16496, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, d0
|
||||
@@ -100,10 +104,12 @@ block0(v0: f32):
|
||||
v1 = fcvt_to_uint.i16 v0
|
||||
; check: fcmp s0, s0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; check: movz x0, #49024, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr s1, pc+8 ; b 8 ; data.f32 65536
|
||||
; check: movz x0, #18304, LSL #16
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp s0, s1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, s0
|
||||
@@ -120,10 +126,12 @@ block0(v0: f64):
|
||||
v1 = fcvt_to_uint.i16 v0
|
||||
; check: fcmp d0, d0
|
||||
; check: b.vc 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; check: movz x0, #49136, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.gt 8 ; udf
|
||||
; check: ldr d1, pc+8 ; b 12 ; data.f64 65536
|
||||
; check: movz x0, #16624, LSL #48
|
||||
; check: fmov d1, x0
|
||||
; check: fcmp d0, d1
|
||||
; check: b.mi 8 ; udf
|
||||
; check: fcvtzu w0, d0
|
||||
|
||||
@@ -427,10 +427,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; nextln: movz x0, #49024, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 4294967300
|
||||
; nextln: movz x0, #20352, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu w0, s0
|
||||
@@ -448,10 +450,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -2147483600
|
||||
; nextln: movz x0, #52992, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.ge 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 2147483600
|
||||
; nextln: movz x0, #20224, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs w0, s0
|
||||
@@ -469,10 +473,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -1
|
||||
; nextln: movz x0, #49024, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 18446744000000000000
|
||||
; nextln: movz x0, #24448, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu x0, s0
|
||||
@@ -490,10 +496,12 @@ block0(v0: f32):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 -9223372000000000000
|
||||
; nextln: movz x0, #57088, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.ge 8 ; udf
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 9223372000000000000
|
||||
; nextln: movz x0, #24320, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp s0, s1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs x0, s0
|
||||
@@ -511,10 +519,12 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; nextln: movz x0, #49136, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967296
|
||||
; nextln: movz x0, #16880, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu w0, d0
|
||||
@@ -535,7 +545,8 @@ block0(v0: f64):
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 2147483648
|
||||
; nextln: movz x0, #16864, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs w0, d0
|
||||
@@ -553,10 +564,12 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -1
|
||||
; nextln: movz x0, #49136, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.gt 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 18446744073709552000
|
||||
; nextln: movz x0, #17392, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzu x0, d0
|
||||
@@ -574,10 +587,12 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: b.vc 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 -9223372036854776000
|
||||
; nextln: movz x0, #50144, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.ge 8 ; udf
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 9223372036854776000
|
||||
; nextln: movz x0, #17376, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fcmp d0, d1
|
||||
; nextln: b.mi 8 ; udf
|
||||
; nextln: fcvtzs x0, d0
|
||||
@@ -697,9 +712,10 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 4294967300
|
||||
; nextln: movz x0, #20352, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s2, s0, s1
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax s2, s2, s1
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s1, s2, ne
|
||||
@@ -716,11 +732,13 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 2147483600
|
||||
; nextln: movz x0, #20224, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s1, s0, s1
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 -2147483600
|
||||
; nextln: movz x0, #52992, LSL #16
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax s1, s1, s2
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s2, s1, ne
|
||||
; nextln: fcvtzs w0, s0
|
||||
@@ -736,9 +754,10 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 18446744000000000000
|
||||
; nextln: movz x0, #24448, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s2, s0, s1
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax s2, s2, s1
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s1, s2, ne
|
||||
@@ -755,11 +774,13 @@ block0(v0: f32):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr s1, pc+8 ; b 8 ; data.f32 9223372000000000000
|
||||
; nextln: movz x0, #24320, LSL #16
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin s1, s0, s1
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 -9223372000000000000
|
||||
; nextln: movz x0, #57088, LSL #16
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax s1, s1, s2
|
||||
; nextln: ldr s2, pc+8 ; b 8 ; data.f32 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp s0, s0
|
||||
; nextln: fcsel s0, s2, s1, ne
|
||||
; nextln: fcvtzs x0, s0
|
||||
@@ -777,7 +798,7 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295
|
||||
; nextln: fmin d2, d0, d1
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax d2, d2, d1
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d1, d2, ne
|
||||
@@ -796,9 +817,10 @@ block0(v0: f64):
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 2147483647
|
||||
; nextln: fmin d1, d0, d1
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 -2147483648
|
||||
; nextln: movz x0, #49632, LSL #48
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax d1, d1, d2
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d2, d1, ne
|
||||
; nextln: fcvtzs w0, d0
|
||||
@@ -814,9 +836,10 @@ block0(v0: f64):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 18446744073709552000
|
||||
; nextln: movz x0, #17392, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin d2, d0, d1
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v1.8b, #0
|
||||
; nextln: fmax d2, d2, d1
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d1, d2, ne
|
||||
@@ -833,11 +856,13 @@ block0(v0: f64):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 9223372036854776000
|
||||
; nextln: movz x0, #17376, LSL #48
|
||||
; nextln: fmov d1, x0
|
||||
; nextln: fmin d1, d0, d1
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 -9223372036854776000
|
||||
; nextln: movz x0, #50144, LSL #48
|
||||
; nextln: fmov d2, x0
|
||||
; nextln: fmax d1, d1, d2
|
||||
; nextln: ldr d2, pc+8 ; b 12 ; data.f64 0
|
||||
; nextln: movi v2.8b, #0
|
||||
; nextln: fcmp d0, d0
|
||||
; nextln: fcsel d0, d2, d1, ne
|
||||
; nextln: fcvtzs x0, d0
|
||||
|
||||
49
cranelift/filetests/filetests/isa/aarch64/simd.clif
Normal file
49
cranelift/filetests/filetests/isa/aarch64/simd.clif
Normal file
@@ -0,0 +1,49 @@
|
||||
test compile
|
||||
target aarch64
|
||||
|
||||
function %f1() -> i64x2 {
|
||||
block0:
|
||||
v0 = iconst.i64 281474976710657
|
||||
v1 = splat.i64x2 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #1
|
||||
; nextln: movk x0, #1, LSL #48
|
||||
; nextln: dup v0.2d, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f2() -> i16x8 {
|
||||
block0:
|
||||
v0 = iconst.i32 42679
|
||||
v1 = ireduce.i16 v0
|
||||
v2 = splat.i16x8 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #42679
|
||||
; nextln: dup v0.8h, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f3() -> b8x16 {
|
||||
block0:
|
||||
v0 = bconst.b32 true
|
||||
v1 = breduce.b8 v0
|
||||
v2 = splat.b8x16 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movi v0.16b, #255
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
Reference in New Issue
Block a user