aarch64: Add support for the fmls instruction (#5895)
This commit adds lowerings to the AArch64 backend for the `fmls` instruction which is intended to be leveraged in the relaxed-simd proposal for WebAssembly. This should hopefully allow for a teeny-bit-more efficient codegen for this operator instead of using the `fmla` instruction plus a negation instruction.
This commit is contained in:
@@ -852,7 +852,7 @@
|
||||
(rd WritableReg)
|
||||
;; Offset in range -2^20 .. 2^20.
|
||||
(off i32))
|
||||
|
||||
|
||||
;; Compute the address (using a PC-relative offset) of a 4KB page.
|
||||
(Adrp
|
||||
(rd WritableReg)
|
||||
@@ -1401,6 +1401,8 @@
|
||||
(Bsl)
|
||||
;; Floating-point fused multiply-add vectors
|
||||
(Fmla)
|
||||
;; Floating-point fused multiply-subtract vectors
|
||||
(Fmls)
|
||||
))
|
||||
|
||||
;; A Vector miscellaneous operation with two registers.
|
||||
|
||||
@@ -2906,6 +2906,9 @@ impl MachInstEmit for Inst {
|
||||
VecALUModOp::Fmla => {
|
||||
(0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
|
||||
}
|
||||
VecALUModOp::Fmls => {
|
||||
(0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
|
||||
}
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
||||
@@ -2363,6 +2363,7 @@ impl Inst {
|
||||
let (op, size) = match alu_op {
|
||||
VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
|
||||
VecALUModOp::Fmla => ("fmla", size),
|
||||
VecALUModOp::Fmls => ("fmls", size),
|
||||
};
|
||||
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||
let ri = pretty_print_vreg_vector(ri, size, allocs);
|
||||
|
||||
@@ -404,7 +404,13 @@
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (fma x y z)))
|
||||
(vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty)))
|
||||
|
||||
(rule 1 (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
||||
(rule 1 (lower (has_type ty @ (multi_lane _ _) (fma (fneg x) y z)))
|
||||
(vec_rrr_mod (VecALUModOp.Fmls) z x y (vector_size ty)))
|
||||
|
||||
(rule 2 (lower (has_type ty @ (multi_lane _ _) (fma x (fneg y) z)))
|
||||
(vec_rrr_mod (VecALUModOp.Fmls) z x y (vector_size ty)))
|
||||
|
||||
(rule 3 (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
||||
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
|
||||
|
||||
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
Reference in New Issue
Block a user