Convert sqrt..nearest to ISLE (AArch64) (#4508)
Converted the existing implementations for the following opcodes to ISLE on AArch64: - `sqrt` - `fneg` - `fabs` - `fpromote` - `fdemote` - `ceil` - `floor` - `trunc` - `nearest` Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -1464,6 +1464,13 @@
|
||||
(_ Unit (emit (MInst.VecRRR op dst src1 src2 size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuRR` instructions.
|
||||
(decl fpu_rr (FPUOp1 Reg ScalarSize) Reg)
|
||||
(rule (fpu_rr op src size)
|
||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||
(_ Unit (emit (MInst.FpuRR op size dst src))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuRRR` instructions.
|
||||
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
|
||||
(rule (fpu_rrr op src1 src2 size)
|
||||
@@ -1644,6 +1651,12 @@
|
||||
(MInst.FpuCSel64 dst if_true if_false cond)
|
||||
dst)))
|
||||
|
||||
;; Helper for emitting `MInst.FpuRound` instructions.
|
||||
(decl fpu_round (FpuRoundMode Reg) Reg)
|
||||
(rule (fpu_round op rn)
|
||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||
(_ Unit (emit (MInst.FpuRound op dst rn))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovToFpu` instructions.
|
||||
(decl mov_to_fpu (Reg ScalarSize) Reg)
|
||||
|
||||
@@ -4426,6 +4426,17 @@ fn test_aarch64_binemit() {
|
||||
"abs v1.2d, v10.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fabs,
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(16),
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"0FFAA00E",
|
||||
"fabs v15.2s, v16.2s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fabs,
|
||||
@@ -4448,6 +4459,17 @@ fn test_aarch64_binemit() {
|
||||
"fabs v3.2d, v22.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fneg,
|
||||
rd: writable_vreg(31),
|
||||
rn: vreg(0),
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"1FF8A02E",
|
||||
"fneg v31.2s, v0.2s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fneg,
|
||||
@@ -4481,6 +4503,17 @@ fn test_aarch64_binemit() {
|
||||
"fsqrt v18.2s, v25.2s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fsqrt,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(25),
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"32FBA16E",
|
||||
"fsqrt v18.4s, v25.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fsqrt,
|
||||
|
||||
@@ -230,6 +230,85 @@
|
||||
(with_flags (fpu_cmp (scalar_size ty) rn rm)
|
||||
(fpu_csel ty (Cond.Gt) rn rm)))
|
||||
|
||||
;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (sqrt x)))
|
||||
(vec_misc (VecMisc2.Fsqrt) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type (ty_scalar_float ty) (sqrt x)))
|
||||
(fpu_rr (FPUOp1.Sqrt) x (scalar_size ty)))
|
||||
|
||||
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (fneg x)))
|
||||
(vec_misc (VecMisc2.Fneg) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type (ty_scalar_float ty) (fneg x)))
|
||||
(fpu_rr (FPUOp1.Neg) x (scalar_size ty)))
|
||||
|
||||
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (fabs x)))
|
||||
(vec_misc (VecMisc2.Fabs) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type (ty_scalar_float ty) (fabs x)))
|
||||
(fpu_rr (FPUOp1.Abs) x (scalar_size ty)))
|
||||
|
||||
;;;; Rules for `fpromote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $F64 (fpromote x)))
|
||||
(fpu_rr (FPUOp1.Cvt32To64) x (ScalarSize.Size32)))
|
||||
|
||||
;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $F32 (fdemote x)))
|
||||
(fpu_rr (FPUOp1.Cvt64To32) x (ScalarSize.Size64)))
|
||||
|
||||
;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (ceil x)))
|
||||
(vec_misc (VecMisc2.Frintp) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type $F32 (ceil x)))
|
||||
(fpu_round (FpuRoundMode.Plus32) x))
|
||||
|
||||
(rule (lower (has_type $F64 (ceil x)))
|
||||
(fpu_round (FpuRoundMode.Plus64) x))
|
||||
|
||||
;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (floor x)))
|
||||
(vec_misc (VecMisc2.Frintm) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type $F32 (floor x)))
|
||||
(fpu_round (FpuRoundMode.Minus32) x))
|
||||
|
||||
(rule (lower (has_type $F64 (floor x)))
|
||||
(fpu_round (FpuRoundMode.Minus64) x))
|
||||
|
||||
;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (trunc x)))
|
||||
(vec_misc (VecMisc2.Frintz) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type $F32 (trunc x)))
|
||||
(fpu_round (FpuRoundMode.Zero32) x))
|
||||
|
||||
(rule (lower (has_type $F64 (trunc x)))
|
||||
(fpu_round (FpuRoundMode.Zero64) x))
|
||||
|
||||
;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _ _) (nearest x)))
|
||||
(vec_misc (VecMisc2.Frintn) x (vector_size ty)))
|
||||
|
||||
(rule (lower (has_type $F32 (nearest x)))
|
||||
(fpu_round (FpuRoundMode.Nearest32) x))
|
||||
|
||||
(rule (lower (has_type $F64 (nearest x)))
|
||||
(fpu_round (FpuRoundMode.Nearest64) x))
|
||||
|
||||
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller
|
||||
|
||||
@@ -1250,107 +1250,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::FminPseudo | Opcode::FmaxPseudo => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
|
||||
let ty = ty.unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
if !ty.is_vector() {
|
||||
let fpu_op = match op {
|
||||
Opcode::Sqrt => FPUOp1::Sqrt,
|
||||
Opcode::Fneg => FPUOp1::Neg,
|
||||
Opcode::Fabs => FPUOp1::Abs,
|
||||
Opcode::Fpromote => {
|
||||
if ty != F64 {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Fpromote: Unsupported type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
FPUOp1::Cvt32To64
|
||||
}
|
||||
Opcode::Fdemote => {
|
||||
if ty != F32 {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Fdemote: Unsupported type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
FPUOp1::Cvt64To32
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(Inst::FpuRR {
|
||||
fpu_op,
|
||||
size: ScalarSize::from_ty(ctx.input_ty(insn, 0)),
|
||||
rd,
|
||||
rn,
|
||||
});
|
||||
} else {
|
||||
let op = match op {
|
||||
Opcode::Fabs => VecMisc2::Fabs,
|
||||
Opcode::Fneg => VecMisc2::Fneg,
|
||||
Opcode::Sqrt => VecMisc2::Fsqrt,
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported type: {:?}",
|
||||
op, ty
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
implemented_in_isle(ctx)
|
||||
}
|
||||
|
||||
Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => {
|
||||
let ty = ctx.output_ty(insn, 0);
|
||||
if !ty.is_vector() {
|
||||
let bits = ty_bits(ty);
|
||||
let op = match (op, bits) {
|
||||
(Opcode::Ceil, 32) => FpuRoundMode::Plus32,
|
||||
(Opcode::Ceil, 64) => FpuRoundMode::Plus64,
|
||||
(Opcode::Floor, 32) => FpuRoundMode::Minus32,
|
||||
(Opcode::Floor, 64) => FpuRoundMode::Minus64,
|
||||
(Opcode::Trunc, 32) => FpuRoundMode::Zero32,
|
||||
(Opcode::Trunc, 64) => FpuRoundMode::Zero64,
|
||||
(Opcode::Nearest, 32) => FpuRoundMode::Nearest32,
|
||||
(Opcode::Nearest, 64) => FpuRoundMode::Nearest64,
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported type: {:?}",
|
||||
op, ty
|
||||
)))
|
||||
}
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::FpuRound { op, rd, rn });
|
||||
} else {
|
||||
let (op, size) = match (op, ty) {
|
||||
(Opcode::Ceil, F32X4) => (VecMisc2::Frintp, VectorSize::Size32x4),
|
||||
(Opcode::Ceil, F64X2) => (VecMisc2::Frintp, VectorSize::Size64x2),
|
||||
(Opcode::Floor, F32X4) => (VecMisc2::Frintm, VectorSize::Size32x4),
|
||||
(Opcode::Floor, F64X2) => (VecMisc2::Frintm, VectorSize::Size64x2),
|
||||
(Opcode::Trunc, F32X4) => (VecMisc2::Frintz, VectorSize::Size32x4),
|
||||
(Opcode::Trunc, F64X2) => (VecMisc2::Frintz, VectorSize::Size64x2),
|
||||
(Opcode::Nearest, F32X4) => (VecMisc2::Frintn, VectorSize::Size32x4),
|
||||
(Opcode::Nearest, F64X2) => (VecMisc2::Frintn, VectorSize::Size64x2),
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported type: {:?}",
|
||||
op, ty
|
||||
)))
|
||||
}
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::VecMisc { op, rd, rn, size });
|
||||
}
|
||||
}
|
||||
Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Fma => {
|
||||
let ty = ty.unwrap();
|
||||
|
||||
Reference in New Issue
Block a user