Improve fcvt_to_{u,s}int_sat lowering (AArch64) (#4913)

Improved the instruction lowering for the following opcodes on AArch64,
and introduced support for converting to integers less than 32-bits wide
as per the docs:
- `FcvtToSintSat`
- `FcvtToUintSat`

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-09-21 18:16:09 +01:00
committed by GitHub
parent e786bda002
commit 352c7595c6
6 changed files with 326 additions and 356 deletions

View File

@@ -1635,22 +1635,6 @@
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)
;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)
;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)
;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
@@ -3147,32 +3131,37 @@
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
;; whether the input is signed, and finally the output type.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type) Reg)
(rule (fpu_to_int_cvt_sat op src _ $I64)
(fpu_to_int op src))
(rule (fpu_to_int_cvt_sat op src _ $I32)
(fpu_to_int op src))
(rule (fpu_to_int_cvt_sat op src $false (fits_in_16 out_ty))
(let ((result Reg (fpu_to_int op src))
(max Reg (imm out_ty (ImmExtend.Zero) -1)))
(with_flags_reg
(cmp (OperandSize.Size32) result max)
(csel (Cond.Hi) max result))))
(rule (fpu_to_int_cvt_sat op src $true (fits_in_16 out_ty))
(let ((result Reg (fpu_to_int op src))
(max Reg (imm $I32 (ImmExtend.Sign) (signed_max out_ty)))
(min Reg (imm $I32 (ImmExtend.Sign) (signed_min out_ty)))
(result Reg (with_flags_reg
(cmp (operand_size out_ty) result max)
(csel (Cond.Gt) max result)))
(result Reg (with_flags_reg
(cmp (operand_size out_ty) result min)
(csel (Cond.Lt) min result))))
result))
(decl signed_min (Type) u64)
(rule (signed_min $I8) -128)
(rule (signed_min $I16) -32768)
(decl signed_max (Type) u64)
(rule (signed_max $I8) 127)
(rule (signed_max $I16) 32767)
(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)

View File

@@ -472,17 +472,17 @@
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty))
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64))
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty))
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64))
;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -492,17 +492,17 @@
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty))
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64))
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty))
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -637,68 +637,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
tmp.to_reg()
}
fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
let min: f64 = match (out_bits, signed) {
(32, true) => i32::MIN as f64,
(32, false) => 0.0,
(64, true) => i64::MIN as f64,
(64, false) => 0.0,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, min as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, min)
} else {
unimplemented!(
"unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
let max = match (out_bits, signed) {
(32, true) => i32::MAX as f64,
(32, false) => u32::MAX as f64,
(64, true) => i64::MAX as f64,
(64, false) => u64::MAX as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, max as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, max)
} else {
unimplemented!(
"unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())