Split Fmla and Bsl out into new VecRRRMod op (#4638)
Separates the following opcodes for AArch64 into a separate `VecALUModOp` enum, which is emitted via the `VecRRRMod` instruction. This separates vector ALU instructions which modify a register from instructions which write to a new register: - `Bsl` - `Fmla` Addresses [a discussion](https://github.com/bytecodealliance/wasmtime/pull/4608#discussion_r937975581) in #4608. Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -576,6 +576,14 @@
|
|||||||
(rm Reg)
|
(rm Reg)
|
||||||
(size VectorSize))
|
(size VectorSize))
|
||||||
|
|
||||||
|
;; A vector ALU op modifying a source register.
|
||||||
|
(VecRRRMod
|
||||||
|
(alu_op VecALUModOp)
|
||||||
|
(rd WritableReg)
|
||||||
|
(rn Reg)
|
||||||
|
(rm Reg)
|
||||||
|
(size VectorSize))
|
||||||
|
|
||||||
;; Vector two register miscellaneous instruction.
|
;; Vector two register miscellaneous instruction.
|
||||||
(VecMisc
|
(VecMisc
|
||||||
(op VecMisc2)
|
(op VecMisc2)
|
||||||
@@ -1108,10 +1116,6 @@
|
|||||||
(Orr)
|
(Orr)
|
||||||
;; Bitwise exclusive or
|
;; Bitwise exclusive or
|
||||||
(Eor)
|
(Eor)
|
||||||
;; Bitwise select
|
|
||||||
;; This opcode should only be used with the `vec_rrr_inplace`
|
|
||||||
;; constructor.
|
|
||||||
(Bsl)
|
|
||||||
;; Unsigned maximum pairwise
|
;; Unsigned maximum pairwise
|
||||||
(Umaxp)
|
(Umaxp)
|
||||||
;; Add
|
;; Add
|
||||||
@@ -1146,10 +1150,6 @@
|
|||||||
(Fmin)
|
(Fmin)
|
||||||
;; Floating-point multiply
|
;; Floating-point multiply
|
||||||
(Fmul)
|
(Fmul)
|
||||||
;; Floating-point fused multiply-add vectors
|
|
||||||
;; This opcode should only be used with the `vec_rrr_inplace`
|
|
||||||
;; constructor.
|
|
||||||
(Fmla)
|
|
||||||
;; Add pairwise
|
;; Add pairwise
|
||||||
(Addp)
|
(Addp)
|
||||||
;; Zip vectors (primary) [meaning, high halves]
|
;; Zip vectors (primary) [meaning, high halves]
|
||||||
@@ -1158,6 +1158,15 @@
|
|||||||
(Sqrdmulh)
|
(Sqrdmulh)
|
||||||
))
|
))
|
||||||
|
|
||||||
|
;; A Vector ALU operation which modifies a source register.
|
||||||
|
(type VecALUModOp
|
||||||
|
(enum
|
||||||
|
;; Bitwise select
|
||||||
|
(Bsl)
|
||||||
|
;; Floating-point fused multiply-add vectors
|
||||||
|
(Fmla)
|
||||||
|
))
|
||||||
|
|
||||||
;; A Vector miscellaneous operation with two registers.
|
;; A Vector miscellaneous operation with two registers.
|
||||||
(type VecMisc2
|
(type VecMisc2
|
||||||
(enum
|
(enum
|
||||||
@@ -1508,11 +1517,11 @@
|
|||||||
|
|
||||||
;; Helper for emitting `MInst.VecRRR` instructions which use three registers,
|
;; Helper for emitting `MInst.VecRRR` instructions which use three registers,
|
||||||
;; one of which is both source and output.
|
;; one of which is both source and output.
|
||||||
(decl vec_rrr_inplace (VecALUOp Reg Reg Reg VectorSize) Reg)
|
(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg)
|
||||||
(rule (vec_rrr_inplace op src1 src2 src3 size)
|
(rule (vec_rrr_mod op src1 src2 src3 size)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
|
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
|
||||||
(_2 Unit (emit (MInst.VecRRR op dst src2 src3 size))))
|
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.FpuRRR` instructions.
|
;; Helper for emitting `MInst.FpuRRR` instructions.
|
||||||
@@ -2198,10 +2207,7 @@
|
|||||||
|
|
||||||
(decl bsl (Type Reg Reg Reg) Reg)
|
(decl bsl (Type Reg Reg Reg) Reg)
|
||||||
(rule (bsl ty c x y)
|
(rule (bsl ty c x y)
|
||||||
(let ((dst WritableReg (temp_writable_reg ty))
|
(vec_rrr_mod (VecALUModOp.Bsl) c x y (vector_size ty)))
|
||||||
(_ Unit (emit (MInst.FpuMove128 dst c)))
|
|
||||||
(_ Unit (emit (MInst.VecRRR (VecALUOp.Bsl) dst x y (vector_size ty)))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
;; Helper for generating a `udf` instruction.
|
;; Helper for generating a `udf` instruction.
|
||||||
|
|
||||||
|
|||||||
@@ -752,6 +752,16 @@ impl VectorSize {
|
|||||||
|
|
||||||
(q, size)
|
(q, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the encoding bit that is used by some floating-point SIMD
|
||||||
|
/// instructions for a particular operand size.
|
||||||
|
pub fn enc_float_size(&self) -> u32 {
|
||||||
|
match self.lane_size() {
|
||||||
|
ScalarSize::Size32 => 0b0,
|
||||||
|
ScalarSize::Size64 => 0b1,
|
||||||
|
size => panic!("Unsupported floating-point size for vector op: {:?}", size),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn dynamic_to_fixed(ty: Type) -> Type {
|
pub(crate) fn dynamic_to_fixed(ty: Type) -> Type {
|
||||||
|
|||||||
@@ -2543,17 +2543,9 @@ impl MachInstEmit for Inst {
|
|||||||
| VecALUOp::Fdiv
|
| VecALUOp::Fdiv
|
||||||
| VecALUOp::Fmax
|
| VecALUOp::Fmax
|
||||||
| VecALUOp::Fmin
|
| VecALUOp::Fmin
|
||||||
| VecALUOp::Fmul
|
| VecALUOp::Fmul => true,
|
||||||
| VecALUOp::Fmla => true,
|
|
||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
let enc_float_size = match (is_float, size) {
|
|
||||||
(true, VectorSize::Size32x2) => 0b0,
|
|
||||||
(true, VectorSize::Size32x4) => 0b0,
|
|
||||||
(true, VectorSize::Size64x2) => 0b1,
|
|
||||||
(true, _) => unimplemented!(),
|
|
||||||
_ => 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
let (top11, bit15_10) = match alu_op {
|
let (top11, bit15_10) = match alu_op {
|
||||||
VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
|
VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
|
||||||
@@ -2574,7 +2566,6 @@ impl MachInstEmit for Inst {
|
|||||||
VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
|
VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
|
||||||
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
|
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
|
||||||
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
|
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
|
||||||
VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
|
|
||||||
VecALUOp::Umaxp => {
|
VecALUOp::Umaxp => {
|
||||||
debug_assert_ne!(size, VectorSize::Size64x2);
|
debug_assert_ne!(size, VectorSize::Size64x2);
|
||||||
|
|
||||||
@@ -2619,7 +2610,6 @@ impl MachInstEmit for Inst {
|
|||||||
VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
|
VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
|
||||||
VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
|
VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
|
||||||
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
|
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
|
||||||
VecALUOp::Fmla => (0b000_01110_00_1, 0b110011),
|
|
||||||
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
|
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
|
||||||
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
|
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
|
||||||
VecALUOp::Sqrdmulh => {
|
VecALUOp::Sqrdmulh => {
|
||||||
@@ -2632,12 +2622,32 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let top11 = if is_float {
|
let top11 = if is_float {
|
||||||
top11 | enc_float_size << 1
|
top11 | size.enc_float_size() << 1
|
||||||
} else {
|
} else {
|
||||||
top11
|
top11
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
|
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
|
||||||
}
|
}
|
||||||
|
&Inst::VecRRRMod {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
let rm = allocs.next(rm);
|
||||||
|
let (q, _enc_size) = size.enc_size();
|
||||||
|
|
||||||
|
let (top11, bit15_10) = match alu_op {
|
||||||
|
VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
|
||||||
|
VecALUModOp::Fmla => {
|
||||||
|
(0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
|
||||||
|
}
|
||||||
&Inst::VecLoadReplicate {
|
&Inst::VecLoadReplicate {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -3383,8 +3383,8 @@ fn test_aarch64_binemit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRRMod {
|
||||||
alu_op: VecALUOp::Bsl,
|
alu_op: VecALUModOp::Bsl,
|
||||||
rd: writable_vreg(8),
|
rd: writable_vreg(8),
|
||||||
rn: vreg(9),
|
rn: vreg(9),
|
||||||
rm: vreg(1),
|
rm: vreg(1),
|
||||||
@@ -4055,8 +4055,8 @@ fn test_aarch64_binemit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRRMod {
|
||||||
alu_op: VecALUOp::Fmla,
|
alu_op: VecALUModOp::Fmla,
|
||||||
rd: writable_vreg(2),
|
rd: writable_vreg(2),
|
||||||
rn: vreg(0),
|
rn: vreg(0),
|
||||||
rm: vreg(5),
|
rm: vreg(5),
|
||||||
@@ -4067,8 +4067,8 @@ fn test_aarch64_binemit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRRMod {
|
||||||
alu_op: VecALUOp::Fmla,
|
alu_op: VecALUModOp::Fmla,
|
||||||
rd: writable_vreg(2),
|
rd: writable_vreg(2),
|
||||||
rn: vreg(0),
|
rn: vreg(0),
|
||||||
rm: vreg(5),
|
rm: vreg(5),
|
||||||
@@ -4079,8 +4079,8 @@ fn test_aarch64_binemit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRRMod {
|
||||||
alu_op: VecALUOp::Fmla,
|
alu_op: VecALUModOp::Fmla,
|
||||||
rd: writable_vreg(2),
|
rd: writable_vreg(2),
|
||||||
rn: vreg(0),
|
rn: vreg(0),
|
||||||
rm: vreg(5),
|
rm: vreg(5),
|
||||||
|
|||||||
@@ -37,9 +37,9 @@ mod emit_tests;
|
|||||||
|
|
||||||
pub use crate::isa::aarch64::lower::isle::generated_code::{
|
pub use crate::isa::aarch64::lower::isle::generated_code::{
|
||||||
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
|
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
|
||||||
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp,
|
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
|
||||||
VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp,
|
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
|
||||||
VecShiftImmOp,
|
VecRRRLongOp, VecShiftImmOp,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||||
@@ -957,14 +957,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR { rd, rn, rm, .. } => {
|
||||||
alu_op, rd, rn, rm, ..
|
|
||||||
} => {
|
|
||||||
if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Fmla {
|
|
||||||
collector.reg_mod(rd);
|
|
||||||
} else {
|
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
|
collector.reg_use(rn);
|
||||||
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
|
&Inst::VecRRRMod { rd, rn, rm, .. } => {
|
||||||
|
collector.reg_mod(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
collector.reg_use(rm);
|
collector.reg_use(rm);
|
||||||
}
|
}
|
||||||
@@ -2208,7 +2207,6 @@ impl Inst {
|
|||||||
VecALUOp::Bic => ("bic", VectorSize::Size8x16),
|
VecALUOp::Bic => ("bic", VectorSize::Size8x16),
|
||||||
VecALUOp::Orr => ("orr", VectorSize::Size8x16),
|
VecALUOp::Orr => ("orr", VectorSize::Size8x16),
|
||||||
VecALUOp::Eor => ("eor", VectorSize::Size8x16),
|
VecALUOp::Eor => ("eor", VectorSize::Size8x16),
|
||||||
VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
|
|
||||||
VecALUOp::Umaxp => ("umaxp", size),
|
VecALUOp::Umaxp => ("umaxp", size),
|
||||||
VecALUOp::Add => ("add", size),
|
VecALUOp::Add => ("add", size),
|
||||||
VecALUOp::Sub => ("sub", size),
|
VecALUOp::Sub => ("sub", size),
|
||||||
@@ -2226,7 +2224,6 @@ impl Inst {
|
|||||||
VecALUOp::Fmax => ("fmax", size),
|
VecALUOp::Fmax => ("fmax", size),
|
||||||
VecALUOp::Fmin => ("fmin", size),
|
VecALUOp::Fmin => ("fmin", size),
|
||||||
VecALUOp::Fmul => ("fmul", size),
|
VecALUOp::Fmul => ("fmul", size),
|
||||||
VecALUOp::Fmla => ("fmla", size),
|
|
||||||
VecALUOp::Addp => ("addp", size),
|
VecALUOp::Addp => ("addp", size),
|
||||||
VecALUOp::Zip1 => ("zip1", size),
|
VecALUOp::Zip1 => ("zip1", size),
|
||||||
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
|
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
|
||||||
@@ -2236,6 +2233,22 @@ impl Inst {
|
|||||||
let rm = pretty_print_vreg_vector(rm, size, allocs);
|
let rm = pretty_print_vreg_vector(rm, size, allocs);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
}
|
}
|
||||||
|
&Inst::VecRRRMod {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
|
let (op, size) = match alu_op {
|
||||||
|
VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
|
||||||
|
VecALUModOp::Fmla => ("fmla", size),
|
||||||
|
};
|
||||||
|
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||||
|
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
||||||
|
let rm = pretty_print_vreg_vector(rm, size, allocs);
|
||||||
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
|
}
|
||||||
&Inst::VecRRRLong {
|
&Inst::VecRRRLong {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -380,7 +380,7 @@
|
|||||||
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type ty @ (multi_lane _ _) (fma x y z)))
|
(rule (lower (has_type ty @ (multi_lane _ _) (fma x y z)))
|
||||||
(vec_rrr_inplace (VecALUOp.Fmla) z x y (vector_size ty)))
|
(vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty)))
|
||||||
|
|
||||||
(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
||||||
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
|
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
|
||||||
|
|||||||
Reference in New Issue
Block a user