Convert fadd..fmax_pseudo to ISLE (AArch64) (#4452)

Converted the existing implementations for the following Opcodes to ISLE on AArch64:
- `fadd`
- `fsub`
- `fmul`
- `fdiv`
- `fmin`
- `fmax`
- `fmin_pseudo`
- `fmax_pseudo`

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-07-19 20:03:05 +01:00
committed by GitHub
parent b18c9bee15
commit 00ac18c866
7 changed files with 447 additions and 154 deletions

View File

@@ -895,6 +895,16 @@
Size64
Size128))
;; Helper for calculating the `ScalarSize` corresponding to a type
(decl scalar_size (Type) ScalarSize)
(rule (scalar_size $I8) (ScalarSize.Size8))
(rule (scalar_size $I16) (ScalarSize.Size16))
(rule (scalar_size $I32) (ScalarSize.Size32))
(rule (scalar_size $I64) (ScalarSize.Size64))
(rule (scalar_size $I128) (ScalarSize.Size128))
(rule (scalar_size $F32) (ScalarSize.Size32))
(rule (scalar_size $F64) (ScalarSize.Size64))
(type Cond extern
(enum
(Eq)
@@ -1460,6 +1470,19 @@
(_ Unit (emit (MInst.VecRRR op dst src1 src2 size))))
dst))
;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
(rule (fpu_rrr op src1 src2 size)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRR op size dst src1 src2))))
dst))
;; Helper for emitting `MInst.FpuCmp` instructions.
(decl fpu_cmp (ScalarSize Reg Reg) ProducesFlags)
(rule (fpu_cmp size rn rm)
(ProducesFlags.ProducesFlagsSideEffect
(MInst.FpuCmp size rn rm)))
;; Helper for emitting `MInst.VecLanes` instructions.
(decl vec_lanes (VecLanesOp Reg VectorSize) Reg)
(rule (vec_lanes op src size)
@@ -1612,6 +1635,22 @@
(_ Unit (emit (MInst.VecRRLong op dst src high_half))))
dst))
;; Helper for emitting `MInst.FpuCSel32` / `MInst.FpuCSel64`
;; instructions.
(decl fpu_csel (Type Cond Reg Reg) ConsumesFlags)
(rule (fpu_csel $F32 cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $F32)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.FpuCSel32 dst if_true if_false cond)
dst)))
(rule (fpu_csel $F64 cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $F64)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.FpuCSel64 dst if_true if_false cond)
dst)))
;; Helper for emitting `MInst.MovToFpu` instructions.
(decl mov_to_fpu (Reg ScalarSize) Reg)
(rule (mov_to_fpu x size)

View File

@@ -164,6 +164,72 @@
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
(vec_rrr (VecALUOp.Fadd) rn rm (vector_size ty)))
(rule (lower (has_type (ty_scalar_float ty) (fadd rn rm)))
(fpu_rrr (FPUOp2.Add) rn rm (scalar_size ty)))
;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fsub rn rm)))
(vec_rrr (VecALUOp.Fsub) rn rm (vector_size ty)))
(rule (lower (has_type (ty_scalar_float ty) (fsub rn rm)))
(fpu_rrr (FPUOp2.Sub) rn rm (scalar_size ty)))
;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fmul rn rm)))
(vec_rrr (VecALUOp.Fmul) rn rm (vector_size ty)))
(rule (lower (has_type (ty_scalar_float ty) (fmul rn rm)))
(fpu_rrr (FPUOp2.Mul) rn rm (scalar_size ty)))
;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fdiv rn rm)))
(vec_rrr (VecALUOp.Fdiv) rn rm (vector_size ty)))
(rule (lower (has_type (ty_scalar_float ty) (fdiv rn rm)))
(fpu_rrr (FPUOp2.Div) rn rm (scalar_size ty)))
;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fmin rn rm)))
(vec_rrr (VecALUOp.Fmin) rn rm (vector_size ty)))
(rule (lower (has_type (ty_scalar_float ty) (fmin rn rm)))
(fpu_rrr (FPUOp2.Min) rn rm (scalar_size ty)))
;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fmax rn rm)))
(vec_rrr (VecALUOp.Fmax) rn rm (vector_size ty)))
(rule (lower (has_type (ty_scalar_float ty) (fmax rn rm)))
(fpu_rrr (FPUOp2.Max) rn rm (scalar_size ty)))
;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fmin_pseudo rm rn)))
(bsl ty (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)) rn rm))
(rule (lower (has_type (ty_scalar_float ty) (fmin_pseudo rm rn)))
(with_flags (fpu_cmp (scalar_size ty) rm rn)
(fpu_csel ty (Cond.Gt) rn rm)))
;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fmax_pseudo rm rn)))
(bsl ty (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)) rn rm))
(rule (lower (has_type (ty_scalar_float ty) (fmax_pseudo rm rn)))
(with_flags (fpu_cmp (scalar_size ty) rn rm)
(fpu_csel ty (Cond.Gt) rn rm)))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller

View File

@@ -19,6 +19,34 @@
(rule (lower (has_type ty @ (dynamic_lane _ _) (fsub x y)))
(value_reg (vec_rrr (VecALUOp.Fsub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (dynamic_lane _ _) (fmul x y)))
(value_reg (vec_rrr (VecALUOp.Fmul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (dynamic_lane _ _) (fdiv x y)))
(value_reg (vec_rrr (VecALUOp.Fdiv) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (dynamic_lane _ _) (fmin x y)))
(value_reg (vec_rrr (VecALUOp.Fmin) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (dynamic_lane _ _) (fmax x y)))
(value_reg (vec_rrr (VecALUOp.Fmax) (put_in_reg x) (put_in_reg y) (vector_size ty))))
;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (dynamic_lane _ _) (fmin_pseudo x y)))
(value_reg (bsl ty
(vec_rrr (VecALUOp.Fcmgt) (put_in_reg x) (put_in_reg y)
(vector_size ty)) (put_in_reg y) (put_in_reg x))))
;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (dynamic_lane _ _) (fmax_pseudo x y)))
(value_reg (bsl ty
(vec_rrr (VecALUOp.Fcmgt) (put_in_reg y) (put_in_reg x)
(vector_size ty)) (put_in_reg y) (put_in_reg x))))
;;; Rules for `dynamic_stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (dynamic_stack_addr stack_slot))
(let ((dst WritableReg (temp_writable_reg $I64))

View File

@@ -1244,110 +1244,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
let ty = ty.unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if !ty.is_vector() && !ty.is_dynamic_vector() {
let fpu_op = match op {
Opcode::Fadd => FPUOp2::Add,
Opcode::Fsub => FPUOp2::Sub,
Opcode::Fmul => FPUOp2::Mul,
Opcode::Fdiv => FPUOp2::Div,
Opcode::Fmin => FPUOp2::Min,
Opcode::Fmax => FPUOp2::Max,
_ => unreachable!(),
};
ctx.emit(Inst::FpuRRR {
fpu_op,
size: ScalarSize::from_ty(ty),
rd,
rn,
rm,
});
} else {
let alu_op = match op {
Opcode::Fadd => VecALUOp::Fadd,
Opcode::Fsub => VecALUOp::Fsub,
Opcode::Fdiv => VecALUOp::Fdiv,
Opcode::Fmax => VecALUOp::Fmax,
Opcode::Fmin => VecALUOp::Fmin,
Opcode::Fmul => VecALUOp::Fmul,
_ => unreachable!(),
};
ctx.emit(Inst::VecRRR {
rd,
rn,
rm,
alu_op,
size: VectorSize::from_ty(ty),
});
}
implemented_in_isle(ctx)
}
Opcode::FminPseudo | Opcode::FmaxPseudo => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let (ra, rb) = if op == Opcode::FminPseudo {
(rm, rn)
} else {
(rn, rm)
};
let ty = ty.unwrap();
let lane_type = ty.lane_type();
debug_assert!(lane_type == F32 || lane_type == F64);
if ty.is_vector() || ty.is_dynamic_vector() {
let size = VectorSize::from_ty(ty);
// pmin(a,b) => bitsel(b, a, cmpgt(a, b))
// pmax(a,b) => bitsel(b, a, cmpgt(b, a))
// Since we're going to write the output register `rd` anyway, we might as well
// first use it to hold the comparison result. This has the slightly unusual
// effect that we modify the output register in the first instruction (`fcmgt`)
// but read both the inputs again in the second instruction (`bsl`), which means
// that the output register can't be either of the input registers. Regalloc
// should handle this correctly, nevertheless.
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Fcmgt,
rd,
rn: ra,
rm: rb,
size,
});
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Bsl,
rd,
rn,
rm,
size,
});
} else {
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(lane_type),
rn: ra,
rm: rb,
});
if lane_type == F32 {
ctx.emit(Inst::FpuCSel32 {
rd,
rn,
rm,
cond: Cond::Gt,
});
} else {
ctx.emit(Inst::FpuCSel64 {
rd,
rn,
rm,
cond: Cond::Gt,
});
}
}
}
Opcode::FminPseudo | Opcode::FmaxPseudo => implemented_in_isle(ctx),
Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
let ty = ty.unwrap();