aarch64: Add more lowerings for the CLIF fma (#6150)
This commit adds new lowerings to the AArch64 backend of the element-based `fmla` and `fmls` instructions. These instructions have one of the multiplicands as an implicit broadcast of a single lane of another register and can help remove `shuffle` or `dup` instructions that would otherwise be used to implement them.
This commit is contained in:
@@ -87,3 +87,39 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
|
||||
; run: %fma_is_nan_f64x2([0x0.0 0x0.0], [+NaN 0x0.0], [0x0.0 +NaN]) == 1
|
||||
; run: %fma_is_nan_f64x2([-NaN 0x0.0], [0x0.0 -NaN], [0x0.0 0x0.0]) == 1
|
||||
; run: %fma_is_nan_f64x2([0x0.0 NaN], [0x0.0 NaN], [-NaN NaN]) == 1
|
||||
|
||||
function %fma_f32x4_splat1(f32x4, f32, f32x4) -> f32x4 {
|
||||
block0(v0: f32x4, v1: f32, v2: f32x4):
|
||||
v3 = splat.f32x4 v1
|
||||
v4 = fma v0, v3, v2
|
||||
return v4
|
||||
}
|
||||
; run: %fma_f32x4_splat1([0x9.0 0x9.0 0x9.0 0x9.0], 0x9.0, [0x9.0 0x9.0 0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6 0x1.680000p6 0x1.680000p6]
|
||||
; run: %fma_f32x4_splat1([0x1.0 0x2.0 0x3.0 0x4.0], 0x0.0, [0x5.0 0x6.0 0x7.0 0x8.0]) == [0x5.0 0x6.0 0x7.0 0x8.0]
|
||||
|
||||
function %fma_f32x4_splat2(f32, f32x4, f32x4) -> f32x4 {
|
||||
block0(v0: f32, v1: f32x4, v2: f32x4):
|
||||
v3 = splat.f32x4 v0
|
||||
v4 = fma v3, v1, v2
|
||||
return v4
|
||||
}
|
||||
; run: %fma_f32x4_splat2(0x9.0, [0x9.0 0x9.0 0x9.0 0x9.0], [0x9.0 0x9.0 0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6 0x1.680000p6 0x1.680000p6]
|
||||
; run: %fma_f32x4_splat2(0x0.0, [0x1.0 0x2.0 0x3.0 0x4.0], [0x5.0 0x6.0 0x7.0 0x8.0]) == [0x5.0 0x6.0 0x7.0 0x8.0]
|
||||
|
||||
function %fma_f64x2_splat1(f64x2, f64, f64x2) -> f64x2 {
|
||||
block0(v0: f64x2, v1: f64, v2: f64x2):
|
||||
v3 = splat.f64x2 v1
|
||||
v4 = fma v0, v3, v2
|
||||
return v4
|
||||
}
|
||||
; run: %fma_f64x2_splat1([0x9.0 0x9.0], 0x9.0, [0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6]
|
||||
; run: %fma_f64x2_splat1([0x1.0 0x2.0], 0x0.0, [0x5.0 0x6.0]) == [0x5.0 0x6.0]
|
||||
|
||||
function %fma_f64x2_splat2(f64, f64x2, f64x2) -> f64x2 {
|
||||
block0(v0: f64, v1: f64x2, v2: f64x2):
|
||||
v3 = splat.f64x2 v0
|
||||
v4 = fma v3, v1, v2
|
||||
return v4
|
||||
}
|
||||
; run: %fma_f64x2_splat2(0x9.0, [0x9.0 0x9.0], [0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6]
|
||||
; run: %fma_f64x2_splat2(0x0.0, [0x1.0 0x2.0], [0x5.0 0x6.0]) == [0x5.0 0x6.0]
|
||||
|
||||
Reference in New Issue
Block a user