Cranelift AArch64: Further vector constant improvements

Introduce support for MOVI/MVNI with 16-, 32-, and 64-bit elements,
and the vector variant of FMOV.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-10-29 13:29:03 +00:00
parent b93381e126
commit f59b274d22
8 changed files with 498 additions and 24 deletions

View File

@@ -715,7 +715,7 @@ block0(v0: f32):
; nextln: movz x0, #20352, LSL #16
; nextln: fmov d1, x0
; nextln: fmin s2, s0, s1
; nextln: movi v1.8b, #0
; nextln: movi v1.2s, #0
; nextln: fmax s2, s2, s1
; nextln: fcmp s0, s0
; nextln: fcsel s0, s1, s2, ne
@@ -738,7 +738,7 @@ block0(v0: f32):
; nextln: movz x0, #52992, LSL #16
; nextln: fmov d2, x0
; nextln: fmax s1, s1, s2
; nextln: movi v2.8b, #0
; nextln: movi v2.2s, #0
; nextln: fcmp s0, s0
; nextln: fcsel s0, s2, s1, ne
; nextln: fcvtzs w0, s0
@@ -757,7 +757,7 @@ block0(v0: f32):
; nextln: movz x0, #24448, LSL #16
; nextln: fmov d1, x0
; nextln: fmin s2, s0, s1
; nextln: movi v1.8b, #0
; nextln: movi v1.2s, #0
; nextln: fmax s2, s2, s1
; nextln: fcmp s0, s0
; nextln: fcsel s0, s1, s2, ne
@@ -780,7 +780,7 @@ block0(v0: f32):
; nextln: movz x0, #57088, LSL #16
; nextln: fmov d2, x0
; nextln: fmax s1, s1, s2
; nextln: movi v2.8b, #0
; nextln: movi v2.2s, #0
; nextln: fcmp s0, s0
; nextln: fcsel s0, s2, s1, ne
; nextln: fcvtzs x0, s0
@@ -798,7 +798,7 @@ block0(v0: f64):
; nextln: mov fp, sp
; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295
; nextln: fmin d2, d0, d1
; nextln: movi v1.8b, #0
; nextln: movi v1.2s, #0
; nextln: fmax d2, d2, d1
; nextln: fcmp d0, d0
; nextln: fcsel d0, d1, d2, ne
@@ -820,7 +820,7 @@ block0(v0: f64):
; nextln: movz x0, #49632, LSL #48
; nextln: fmov d2, x0
; nextln: fmax d1, d1, d2
; nextln: movi v2.8b, #0
; nextln: movi v2.2s, #0
; nextln: fcmp d0, d0
; nextln: fcsel d0, d2, d1, ne
; nextln: fcvtzs w0, d0
@@ -839,7 +839,7 @@ block0(v0: f64):
; nextln: movz x0, #17392, LSL #48
; nextln: fmov d1, x0
; nextln: fmin d2, d0, d1
; nextln: movi v1.8b, #0
; nextln: movi v1.2s, #0
; nextln: fmax d2, d2, d1
; nextln: fcmp d0, d0
; nextln: fcsel d0, d1, d2, ne
@@ -862,7 +862,7 @@ block0(v0: f64):
; nextln: movz x0, #50144, LSL #48
; nextln: fmov d2, x0
; nextln: fmax d1, d1, d2
; nextln: movi v2.8b, #0
; nextln: movi v2.2s, #0
; nextln: fcmp d0, d0
; nextln: fcsel d0, d2, d1, ne
; nextln: fcvtzs x0, d0

View File

@@ -127,3 +127,46 @@ block0(v0: i64, v1: i64):
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f9() -> i32x2 {
block0:
v0 = iconst.i32 4278190335
v1 = splat.i32x2 v0
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movi v0.2d, #18374687579166474495
; nextln: fmov d0, d0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f10() -> i32x4 {
block0:
v0 = iconst.i32 4293918720
v1 = splat.i32x4 v0
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mvni v0.4s, #15, MSL #16
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f11() -> f32x4 {
block0:
v0 = f32const 0x1.5
v1 = splat.f32x4 v0
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: fmov v0.4s, #1.3125
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret