Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)

* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
  - Also introduced missing support for `fcopysign` on vector values, as
    per the docs.
  - This introduces the vector encoding for the `SLI` machine
    instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`

Copyright (c) 2022 Arm Limited

* Document helpers and abstract conversion checks
This commit is contained in:
Damian Heaton
2022-08-24 18:37:14 +01:00
committed by GitHub
parent 7e3c481f4e
commit 94bcbe8446
12 changed files with 863 additions and 548 deletions

View File

@@ -619,6 +619,14 @@
(size VectorSize)
(imm u8))
;; Destructive vector shift by immediate.
(VecShiftImmMod
(op VecShiftImmModOp)
(rd WritableReg)
(rn Reg)
(size VectorSize)
(imm u8))
;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
(VecExtract
@@ -1315,6 +1323,13 @@
(Sshr)
))
;; Destructive shift-by-immediate operation on each lane of a vector.
(type VecShiftImmModOp
(enum
;; Shift left and insert
(Sli)
))
;; Atomic read-modify-write operations with acquire-release semantics
(type AtomicRMWOp
(enum
@@ -1386,6 +1401,48 @@
(decl u64_into_imm_logic (Type u64) ImmLogic)
(extern constructor u64_into_imm_logic u64_into_imm_logic)
;; Calculate the minimum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value (bool u8 u8) Reg)
(extern constructor min_fp_value min_fp_value)
;; Calculate the maximum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)
;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)
;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)
;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
(decl imm12_from_negated_u64 (Imm12) u64)
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
@@ -1533,6 +1590,12 @@
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
dst))
(decl fpu_rri (FPUOpRI Reg) Reg)
(rule (fpu_rri op src)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRI op dst src))))
dst))
;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
(rule (fpu_rrr op src1 src2 size)
@@ -2611,3 +2674,147 @@
;; to clobber LR.
(let ((_ Unit (emit (MInst.Xpaclri))))
(mov_preg (preg_link))))
;; Helper for getting the maximum shift amount for a type.
(decl max_shift (Type) u8)
(rule (max_shift $F64) 63)
(rule (max_shift $F32) 31)
;; Helper for generating `fcopysign` instruction sequences.
(decl fcopy_sign (Reg Reg Type) Reg)
(rule (fcopy_sign x y (ty_scalar_float ty))
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuMove64 dst x)))
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
dst))
(rule (fcopy_sign x y ty @ (multi_lane _ _))
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst x)))
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
dst))
;; Helpers for generating `MInst.FpuToInt` instructions.
(decl fpu_to_int_nan_check (ScalarSize Reg) Reg)
(rule (fpu_to_int_nan_check size src)
(let ((r ValueRegs
(with_flags (fpu_cmp size src src)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Vs))
(trap_code_bad_conversion_to_integer))
src))))
(value_regs_get r 0)))
;; Checks that the value is not less than the minimum bound,
;; accepting a boolean (whether the type is signed), input type,
;; output type, and registers containing the source and minimum bound.
(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg)
(rule (fpu_to_int_underflow_check $true $F32 (fits_in_16 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size32) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $true $F64 (fits_in_32 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size64) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule -1 (fpu_to_int_underflow_check $true in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Lt))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $false in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg)
(rule (fpu_to_int_overflow_check size src max)
(let ((r ValueRegs
(with_flags (fpu_cmp size src max)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Ge))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, trapping if the value
;; is a NaN or does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt op src signed in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(src Reg (fpu_to_int_nan_check size src))
(min Reg (min_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min))
(max Reg (max_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_overflow_check size src max)))
(fpu_to_int op src)))
;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.FpuToInt op dst src))))
dst))
;; Helper for generating `MInst.IntToFpu` instructions.
(decl int_to_fpu (IntToFpuOp Reg) Reg)
(rule (int_to_fpu op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.IntToFpu op dst src))))
dst))

View File

@@ -2033,6 +2033,50 @@ impl MachInstEmit for Inst {
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (is_shr, mut template) = match op {
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
};
if size.is_128bits() {
template |= 0b1 << 30;
}
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size.lane_size(), is_shr) {
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
),
};
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);

View File

@@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
VecRRRLongOp, VecShiftImmOp,
VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
};
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -767,6 +767,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::VecShiftImmMod { rd, rn, .. } => {
collector.reg_mod(rd);
collector.reg_use(rn);
}
&Inst::VecExtract { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
@@ -2371,6 +2375,20 @@ impl Inst {
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let op = match op {
VecShiftImmModOp::Sli => "sli",
};
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);

View File

@@ -406,6 +406,119 @@
(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (fcopysign x y)))
(fcopy_sign x y ty))
;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToU32) x $false $F32 out_ty))
(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToU32) x $false $F64 out_ty))
(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToU64) x $false $F64 $I64))
;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToI32) x $true $F32 out_ty))
(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToI32) x $true $F64 out_ty))
(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToI64) x $true $F64 $I64))
;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.U32ToF32) (put_in_reg_zext32 x)))
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.U32ToF64) (put_in_reg_zext32 x)))
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.U64ToF32) x))
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.U64ToF64) x))
;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.I32ToF32) (put_in_reg_sext32 x)))
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.I32ToF64) (put_in_reg_sext32 x)))
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.I64ToF32) x))
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.I64ToF64) x))
;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32))
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32))
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64))
;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32))
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32))
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller

View File

@@ -1065,17 +1065,6 @@ pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
//=============================================================================
// Helpers for instruction lowering.
pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
let bits = ty_bits(ty);
if bits <= 32 {
op32
} else if bits == 64 {
op64
} else {
panic!("choose_32_64 on > 64 bits!")
}
}
/// Checks for an instance of `op` feeding the given input.
pub(crate) fn maybe_input_insn(
c: &mut Lower<Inst>,

View File

@@ -5,12 +5,13 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
insn_inputs, lower_constant_f128, lower_constant_f64, writable_zero_reg, zero_reg, AMode,
ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp,
FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel,
MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize,
ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
insn_inputs, lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg,
zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond,
CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC,
JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize,
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
};
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::{isle::*, InputSourceInst};
@@ -519,4 +520,198 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
fn preg_link(&mut self) -> PReg {
super::regs::link_reg().to_real_reg().unwrap().into()
}
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
if in_bits == 32 {
// From float32.
let min = match (signed, out_bits) {
(true, 8) => i8::MIN as f32 - 1.,
(true, 16) => i16::MIN as f32 - 1.,
(true, 32) => i32::MIN as f32, // I32_MIN - 1 isn't precisely representable as a f32.
(true, 64) => i64::MIN as f32, // I64_MIN - 1 isn't precisely representable as a f32.
(false, _) => -1.,
_ => unimplemented!(
"unexpected {} output size of {} bits for 32-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f32(self.lower_ctx, tmp, min);
} else if in_bits == 64 {
// From float64.
let min = match (signed, out_bits) {
(true, 8) => i8::MIN as f64 - 1.,
(true, 16) => i16::MIN as f64 - 1.,
(true, 32) => i32::MIN as f64 - 1.,
(true, 64) => i64::MIN as f64,
(false, _) => -1.,
_ => unimplemented!(
"unexpected {} output size of {} bits for 64-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f64(self.lower_ctx, tmp, min);
} else {
unimplemented!(
"unexpected input size for min_fp_value: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
if in_bits == 32 {
// From float32.
let max = match (signed, out_bits) {
(true, 8) => i8::MAX as f32 + 1.,
(true, 16) => i16::MAX as f32 + 1.,
(true, 32) => (i32::MAX as u64 + 1) as f32,
(true, 64) => (i64::MAX as u64 + 1) as f32,
(false, 8) => u8::MAX as f32 + 1.,
(false, 16) => u16::MAX as f32 + 1.,
(false, 32) => (u32::MAX as u64 + 1) as f32,
(false, 64) => (u64::MAX as u128 + 1) as f32,
_ => unimplemented!(
"unexpected {} output size of {} bits for 32-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f32(self.lower_ctx, tmp, max);
} else if in_bits == 64 {
// From float64.
let max = match (signed, out_bits) {
(true, 8) => i8::MAX as f64 + 1.,
(true, 16) => i16::MAX as f64 + 1.,
(true, 32) => i32::MAX as f64 + 1.,
(true, 64) => (i64::MAX as u64 + 1) as f64,
(false, 8) => u8::MAX as f64 + 1.,
(false, 16) => u16::MAX as f64 + 1.,
(false, 32) => u32::MAX as f64 + 1.,
(false, 64) => (u64::MAX as u128 + 1) as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits for 64-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f64(self.lower_ctx, tmp, max);
} else {
unimplemented!(
"unexpected input size for max_fp_value: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
let min: f64 = match (out_bits, signed) {
(32, true) => i32::MIN as f64,
(32, false) => 0.0,
(64, true) => i64::MIN as f64,
(64, false) => 0.0,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, min as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, min)
} else {
unimplemented!(
"unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
let max = match (out_bits, signed) {
(32, true) => i32::MAX as f64,
(32, false) => u32::MAX as f64,
(64, true) => i64::MAX as f64,
(64, false) => u64::MAX as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, max as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, max)
} else {
unimplemented!(
"unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else if ty_bits == 64 {
FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else {
unimplemented!(
"unexpected input size for fpu_op_ri_ushr: {} (shift: {})",
ty_bits,
shift
);
}
}
fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else if ty_bits == 64 {
FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else {
unimplemented!(
"unexpected input size for fpu_op_ri_sli: {} (shift: {})",
ty_bits,
shift
);
}
}
}

View File

@@ -2,10 +2,9 @@
use super::lower::*;
use crate::binemit::CodeOffset;
use crate::ir::condcodes::FloatCC;
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::ir::{InstructionData, Opcode};
use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::settings as aarch64_settings;
@@ -978,408 +977,13 @@ pub(crate) fn lower_insn_to_regs(
Opcode::Fma => implemented_in_isle(ctx),
Opcode::Fcopysign => {
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
//
// This is a scalar Fcopysign.
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
// In the latter case it still sets all bits except the lowest 32 to 0.
//
// mov vd, vn
// ushr vtmp, vm, #63 / #31
// sli vd, vtmp, #63 / #31
Opcode::Fcopysign => implemented_in_isle(ctx),
let ty = ctx.output_ty(insn, 0);
Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx),
if ty != F32 && ty != F64 {
return Err(CodegenError::Unsupported(format!(
"Fcopysign: Unsupported type: {:?}",
ty
)));
}
Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx),
let bits = ty_bits(ty) as u8;
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let tmp = ctx.alloc_tmp(F64).only_reg().unwrap();
// Copy LHS to rd.
ctx.emit(Inst::gen_move(rd, rn, ty));
// Copy the sign bit to the lowest bit in tmp.
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
rd: tmp,
rn: rm,
});
// Insert the bit from tmp into the sign bit of rd.
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
rd,
rn: tmp.to_reg(),
});
}
Opcode::FcvtToUint | Opcode::FcvtToSint => {
let input_ty = ctx.input_ty(insn, 0);
let in_bits = ty_bits(input_ty);
let output_ty = ty.unwrap();
let out_bits = ty_bits(output_ty);
let signed = op == Opcode::FcvtToSint;
let op = match (signed, in_bits, out_bits) {
(false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
(true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
(false, 32, 64) => FpuToIntOp::F32ToU64,
(true, 32, 64) => FpuToIntOp::F32ToI64,
(false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
(true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
(false, 64, 64) => FpuToIntOp::F64ToU64,
(true, 64, 64) => FpuToIntOp::F64ToI64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, output_ty
)))
}
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
// First, check the output: it's important to carry the NaN conversion before the
// in-bounds conversion, per wasm semantics.
// Check that the input is not a NaN.
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(input_ty),
rn,
rm: rn,
});
let trap_code = TrapCode::BadConversionToInteger;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
});
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
// Check that the input is in range, with "truncate towards zero" semantics. This means
// we allow values that are slightly out of range:
// - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
// can be represented), and strictly less than INT_MAX+1 (when this can be
// represented).
// - for unsigned conversions, we allow values strictly greater than -1, and strictly
// less than UINT_MAX+1 (when this can be represented).
if in_bits == 32 {
// From float32.
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
(true, 8) => (
i8::min_value() as f32 - 1.,
FloatCC::GreaterThan,
i8::max_value() as f32 + 1.,
),
(true, 16) => (
i16::min_value() as f32 - 1.,
FloatCC::GreaterThan,
i16::max_value() as f32 + 1.,
),
(true, 32) => (
i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
FloatCC::GreaterThanOrEqual,
i32::max_value() as f32 + 1.,
),
(true, 64) => (
i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
FloatCC::GreaterThanOrEqual,
i64::max_value() as f32 + 1.,
),
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
_ => unreachable!(),
};
// >= low_bound
lower_constant_f32(ctx, tmp, low_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size32,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
});
// <= high_bound
lower_constant_f32(ctx, tmp, high_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size32,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
});
} else {
// From float64.
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
(true, 8) => (
i8::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i8::max_value() as f64 + 1.,
),
(true, 16) => (
i16::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i16::max_value() as f64 + 1.,
),
(true, 32) => (
i32::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i32::max_value() as f64 + 1.,
),
(true, 64) => (
i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
FloatCC::GreaterThanOrEqual,
i64::max_value() as f64 + 1.,
),
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
_ => unreachable!(),
};
// >= low_bound
lower_constant_f64(ctx, tmp, low_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size64,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
});
// <= high_bound
lower_constant_f64(ctx, tmp, high_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size64,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
});
};
// Do the conversion.
ctx.emit(Inst::FpuToInt { op, rd, rn });
}
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
let input_ty = ctx.input_ty(insn, 0);
let ty = ty.unwrap();
let signed = op == Opcode::FcvtFromSint;
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if ty.is_vector() {
if input_ty.lane_bits() != ty.lane_bits() {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, ty
)));
}
let op = if signed {
VecMisc2::Scvtf
} else {
VecMisc2::Ucvtf
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(input_ty);
let out_bits = ty_bits(ty);
let op = match (signed, in_bits, out_bits) {
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
(false, 64, 32) => IntToFpuOp::U64ToF32,
(true, 64, 32) => IntToFpuOp::I64ToF32,
(false, 64, 64) => IntToFpuOp::U64ToF64,
(true, 64, 64) => IntToFpuOp::I64ToF64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, ty
)))
}
};
let narrow_mode = match (signed, in_bits) {
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
(false, 64) => NarrowValueMode::ZeroExtend64,
(true, 64) => NarrowValueMode::SignExtend64,
_ => unreachable!(),
};
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
ctx.emit(Inst::IntToFpu { op, rd, rn });
}
}
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
let in_ty = ctx.input_ty(insn, 0);
let ty = ty.unwrap();
let out_signed = op == Opcode::FcvtToSintSat;
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if ty.is_vector() {
if in_ty.lane_bits() != ty.lane_bits() {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, in_ty, ty
)));
}
let op = if out_signed {
VecMisc2::Fcvtzs
} else {
VecMisc2::Fcvtzu
};
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(in_ty);
let out_bits = ty_bits(ty);
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
// FMIN Vtmp2, Vin, Vtmp1
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
// FMAX Vtmp2, Vtmp2, Vtmp1
// (if signed) FIMM Vtmp1, 0
// FCMP Vin, Vin
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
// convert Rout, Vtmp2
assert!(in_ty.is_float() && (in_bits == 32 || in_bits == 64));
assert!(out_bits == 32 || out_bits == 64);
let min: f64 = match (out_bits, out_signed) {
(32, true) => std::i32::MIN as f64,
(32, false) => 0.0,
(64, true) => std::i64::MIN as f64,
(64, false) => 0.0,
_ => unreachable!(),
};
let max = match (out_bits, out_signed) {
(32, true) => std::i32::MAX as f64,
(32, false) => std::u32::MAX as f64,
(64, true) => std::i64::MAX as f64,
(64, false) => std::u64::MAX as f64,
_ => unreachable!(),
};
let rtmp1 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
let rtmp2 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, max as f32);
} else {
lower_constant_f64(ctx, rtmp1, max);
}
ctx.emit(Inst::FpuRRR {
fpu_op: FPUOp2::Min,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2,
rn,
rm: rtmp1.to_reg(),
});
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, min as f32);
} else {
lower_constant_f64(ctx, rtmp1, min);
}
ctx.emit(Inst::FpuRRR {
fpu_op: FPUOp2::Max,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2,
rn: rtmp2.to_reg(),
rm: rtmp1.to_reg(),
});
if out_signed {
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, 0.0);
} else {
lower_constant_f64(ctx, rtmp1, 0.0);
}
}
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(in_ty),
rn,
rm: rn,
});
if in_bits == 32 {
ctx.emit(Inst::FpuCSel32 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
} else {
ctx.emit(Inst::FpuCSel64 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
}
let cvt = match (in_bits, out_bits, out_signed) {
(32, 32, false) => FpuToIntOp::F32ToU32,
(32, 32, true) => FpuToIntOp::F32ToI32,
(32, 64, false) => FpuToIntOp::F32ToU64,
(32, 64, true) => FpuToIntOp::F32ToI64,
(64, 32, false) => FpuToIntOp::F64ToU32,
(64, 32, true) => FpuToIntOp::F64ToI32,
(64, 64, false) => FpuToIntOp::F64ToU64,
(64, 64, true) => FpuToIntOp::F64ToI64,
_ => unreachable!(),
};
ctx.emit(Inst::FpuToInt {
op: cvt,
rd,
rn: rtmp2.to_reg(),
});
}
}
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx),
Opcode::IaddIfcout => {
// This is a two-output instruction that is needed for the

View File

@@ -9,8 +9,8 @@ block0(v0: i8):
}
; block0:
; uxtb w4, w0
; ucvtf s0, w4
; uxtb w3, w0
; ucvtf s0, w3
; ret
function u0:0(i8) -> f64 {
@@ -20,8 +20,8 @@ block0(v0: i8):
}
; block0:
; uxtb w4, w0
; ucvtf d0, w4
; uxtb w3, w0
; ucvtf d0, w3
; ret
function u0:0(i16) -> f32 {
@@ -31,8 +31,8 @@ block0(v0: i16):
}
; block0:
; uxth w4, w0
; ucvtf s0, w4
; uxth w3, w0
; ucvtf s0, w3
; ret
function u0:0(i16) -> f64 {
@@ -42,8 +42,8 @@ block0(v0: i16):
}
; block0:
; uxth w4, w0
; ucvtf d0, w4
; uxth w3, w0
; ucvtf d0, w3
; ret
function u0:0(f32) -> i8 {
@@ -55,13 +55,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #17280, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
@@ -74,13 +74,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16496, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret
@@ -93,13 +93,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #18304, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
@@ -112,13 +112,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16624, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret

View File

@@ -333,13 +333,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #20352, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
@@ -352,14 +352,14 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; movz x7, #52992, LSL #16
; fmov s7, w7
; fcmp s0, s7
; movz x6, #52992, LSL #16
; fmov s6, w6
; fcmp s0, s6
; b.ge 8 ; udf
; movz x12, #20224, LSL #16
; fmov s7, w12
; fcmp s0, s7
; b.mi 8 ; udf
; fmov s20, w12
; fcmp s0, s20
; b.lt 8 ; udf
; fcvtzs w0, s0
; ret
@@ -372,13 +372,13 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s6, #-1
; fcmp s0, s6
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #24448, LSL #16
; fmov s6, w10
; fcmp s0, s6
; b.mi 8 ; udf
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu x0, s0
; ret
@@ -391,14 +391,14 @@ block0(v0: f32):
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; movz x7, #57088, LSL #16
; fmov s7, w7
; fcmp s0, s7
; movz x6, #57088, LSL #16
; fmov s6, w6
; fcmp s0, s6
; b.ge 8 ; udf
; movz x12, #24320, LSL #16
; fmov s7, w12
; fcmp s0, s7
; b.mi 8 ; udf
; fmov s20, w12
; fcmp s0, s20
; b.lt 8 ; udf
; fcvtzs x0, s0
; ret
@@ -411,13 +411,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16880, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret
@@ -430,13 +430,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; ldr d6, pc+8 ; b 12 ; data.f64 -2147483649
; fcmp d0, d6
; ldr d5, pc+8 ; b 12 ; data.f64 -2147483649
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16864, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzs w0, d0
; ret
@@ -449,13 +449,13 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d6, #-1
; fcmp d0, d6
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #17392, LSL #48
; fmov d6, x10
; fcmp d0, d6
; b.mi 8 ; udf
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu x0, d0
; ret
@@ -468,14 +468,14 @@ block0(v0: f64):
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; movz x7, #50144, LSL #48
; fmov d7, x7
; fcmp d0, d7
; movz x6, #50144, LSL #48
; fmov d6, x6
; fcmp d0, d6
; b.ge 8 ; udf
; movz x12, #17376, LSL #48
; fmov d7, x12
; fcmp d0, d7
; b.mi 8 ; udf
; fmov d20, x12
; fcmp d0, d20
; b.lt 8 ; udf
; fcvtzs x0, d0
; ret
@@ -566,14 +566,14 @@ block0(v0: f32):
}
; block0:
; movz x6, #20352, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movi v5.2s, #0
; fmax s7, s7, s5
; movz x4, #20352, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movi v17.2s, #0
; fmax s19, s7, s17
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzu w0, s7
; fcsel s22, s17, s19, ne
; fcvtzu w0, s22
; ret
function %f50(f32) -> i32 {
@@ -583,16 +583,16 @@ block0(v0: f32):
}
; block0:
; movz x6, #20224, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movz x4, #20224, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movz x10, #52992, LSL #16
; fmov s5, w10
; fmax s7, s7, s5
; movi v5.2s, #0
; fmov s18, w10
; fmax s21, s7, s18
; movi v23.16b, #0
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzs w0, s7
; fcsel s26, s23, s21, ne
; fcvtzs w0, s26
; ret
function %f51(f32) -> i64 {
@@ -602,14 +602,14 @@ block0(v0: f32):
}
; block0:
; movz x6, #24448, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movi v5.2s, #0
; fmax s7, s7, s5
; movz x4, #24448, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movi v17.2s, #0
; fmax s19, s7, s17
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzu x0, s7
; fcsel s22, s17, s19, ne
; fcvtzu x0, s22
; ret
function %f52(f32) -> i64 {
@@ -619,16 +619,16 @@ block0(v0: f32):
}
; block0:
; movz x6, #24320, LSL #16
; fmov s5, w6
; fmin s7, s0, s5
; movz x4, #24320, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movz x10, #57088, LSL #16
; fmov s5, w10
; fmax s7, s7, s5
; movi v5.2s, #0
; fmov s18, w10
; fmax s21, s7, s18
; movi v23.16b, #0
; fcmp s0, s0
; fcsel s7, s5, s7, ne
; fcvtzs x0, s7
; fcsel s26, s23, s21, ne
; fcvtzs x0, s26
; ret
function %f53(f64) -> i32 {
@@ -638,13 +638,13 @@ block0(v0: f64):
}
; block0:
; ldr d4, pc+8 ; b 12 ; data.f64 4294967295
; fmin d6, d0, d4
; movi v4.2s, #0
; fmax d6, d6, d4
; ldr d3, pc+8 ; b 12 ; data.f64 4294967295
; fmin d5, d0, d3
; movi v7.2s, #0
; fmax d17, d5, d7
; fcmp d0, d0
; fcsel d6, d4, d6, ne
; fcvtzu w0, d6
; fcsel d20, d7, d17, ne
; fcvtzu w0, d20
; ret
function %f54(f64) -> i32 {
@@ -654,15 +654,15 @@ block0(v0: f64):
}
; block0:
; ldr d4, pc+8 ; b 12 ; data.f64 2147483647
; fmin d6, d0, d4
; ldr d3, pc+8 ; b 12 ; data.f64 2147483647
; fmin d5, d0, d3
; movz x8, #49632, LSL #48
; fmov d4, x8
; fmax d6, d6, d4
; movi v4.2s, #0
; fmov d16, x8
; fmax d19, d5, d16
; movi v21.16b, #0
; fcmp d0, d0
; fcsel d6, d4, d6, ne
; fcvtzs w0, d6
; fcsel d24, d21, d19, ne
; fcvtzs w0, d24
; ret
function %f55(f64) -> i64 {
@@ -672,14 +672,14 @@ block0(v0: f64):
}
; block0:
; movz x6, #17392, LSL #48
; fmov d5, x6
; fmin d7, d0, d5
; movi v5.2s, #0
; fmax d7, d7, d5
; movz x4, #17392, LSL #48
; fmov d4, x4
; fmin d7, d0, d4
; movi v17.2s, #0
; fmax d19, d7, d17
; fcmp d0, d0
; fcsel d7, d5, d7, ne
; fcvtzu x0, d7
; fcsel d22, d17, d19, ne
; fcvtzu x0, d22
; ret
function %f56(f64) -> i64 {
@@ -689,16 +689,16 @@ block0(v0: f64):
}
; block0:
; movz x6, #17376, LSL #48
; fmov d5, x6
; fmin d7, d0, d5
; movz x4, #17376, LSL #48
; fmov d4, x4
; fmin d7, d0, d4
; movz x10, #50144, LSL #48
; fmov d5, x10
; fmax d7, d7, d5
; movi v5.2s, #0
; fmov d18, x10
; fmax d21, d7, d18
; movi v23.16b, #0
; fcmp d0, d0
; fcsel d7, d5, d7, ne
; fcvtzs x0, d7
; fcsel d26, d23, d21, ne
; fcvtzs x0, d26
; ret
function %f57(f32x2) -> f32x2 {
@@ -946,3 +946,36 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d
; ret
function %f81(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ret
function %f82(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.4s, v1.4s, #31
; sli v0.4s, v7.4s, #31
; ret
function %f83(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.2d, v1.2d, #63
; sli v0.2d, v7.2d, #63
; ret

View File

@@ -0,0 +1,37 @@
test interpret
test run
target aarch64
; x86_64 and s390x do not support 64-bit vectors in `fcopysign`.
function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
; run: %fcopysign_f32x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
; F32 Inf
; run: %fcopysign_f32x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
; run: %fcopysign_f32x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
; F32 Epsilon / Max / Min Positive
; run: %fcopysign_f32x2([0x1.000000p-23 -0x1.000000p-23], [-0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23]
; run: %fcopysign_f32x2([0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0]) == [-0x1.fffffep127 0x1.fffffep127]
; run: %fcopysign_f32x2([0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126]
; F32 Subnormals
; run: %fcopysign_f32x2([0x0.800000p-126 -0x0.800000p-126], [-0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126]
; run: %fcopysign_f32x2([0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0]) == [-0x0.000002p-126 0x0.000002p-126]
; F32 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f32x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
; run: %fcopysign_f32x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
; run: %fcopysign_f32x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
; run: %fcopysign_f32x2([+NaN:0x1 +NaN:0x300001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x300001]
; run: %fcopysign_f32x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
; run: %fcopysign_f32x2([-NaN:0x300001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x300001 -sNaN:0x1]
; run: %fcopysign_f32x2([-sNaN:0x1 +sNaN:0x200001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x200001]
; run: %fcopysign_f32x2([-sNaN:0x200001 -sNaN:0x200001], [+NaN +NaN]) == [+sNaN:0x200001 +sNaN:0x200001]

View File

@@ -0,0 +1,63 @@
test interpret
test run
target s390x
target aarch64
; x86_64 does not support SIMD fcopysign.
function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f32x4([0x9.0 -0x9.0 0x9.0 -0x9.0], [0x9.0 0x9.0 -0x9.0 -0x9.0]) == [0x9.0 0x9.0 -0x9.0 -0x9.0]
; run: %fcopysign_f32x4([0x0.0 -0x0.0 0x0.0 -0x0.0], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.0 0x0.0 -0x0.0 0x0.0]
; F32 Inf
; run: %fcopysign_f32x4([Inf -Inf Inf -Inf], [Inf Inf -Inf -Inf]) == [Inf Inf -Inf -Inf]
; F32 Epsilon / Max / Min Positive
; run: %fcopysign_f32x4([0x1.000000p-23 -0x1.000000p-23 0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23 -0x1.fffffep127 0x1.fffffep127]
; run: %fcopysign_f32x4([0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126]
; F32 Subnormals
; run: %fcopysign_f32x4([0x0.800000p-126 -0x0.800000p-126 0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126 -0x0.000002p-126 0x0.000002p-126]
; F32 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f32x4([0x0.0 0x3.0 Inf +NaN], [-NaN +sNaN:0x1 -NaN -NaN]) == [-0x0.0 0x3.0 -Inf -NaN]
; run: %fcopysign_f32x4([-NaN +NaN:0x0 +NaN:0x1 +NaN:0x300001], [+NaN -NaN -NaN -NaN]) == [+NaN -NaN:0x0 -NaN:0x1 -NaN:0x300001]
; run: %fcopysign_f32x4([-NaN:0x0 -NaN:0x1 -NaN:0x300001 +sNaN:0x1], [+NaN +NaN +NaN -NaN]) == [+NaN:0x0 +NaN:0x1 +NaN:0x300001 -sNaN:0x1]
; run: %fcopysign_f32x4([-sNaN:0x1 +sNaN:0x200001 -sNaN:0x200001 -sNaN:0x200001], [+NaN -NaN +NaN +NaN]) == [+sNaN:0x1 -sNaN:0x200001 +sNaN:0x200001 +sNaN:0x200001]
function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
; run: %fcopysign_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
; F64 Inf
; run: %fcopysign_f64x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
; run: %fcopysign_f64x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
; F64 Epsilon / Max / Min Positive
; run: %fcopysign_f64x2([0x1.0000000000000p-52 -0x1.0000000000000p-52], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-52 0x1.0000000000000p-52]
; run: %fcopysign_f64x2([0x1.fffffffffffffp1023 -0x1.fffffffffffffp1023], [-0x0.0 0x0.0]) == [-0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
; run: %fcopysign_f64x2([0x1.0000000000000p-1022 -0x1.0000000000000p-1022], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-1022 0x1.0000000000000p-1022]
; F64 Subnormals
; run: %fcopysign_f64x2([0x0.8000000000000p-1022 -0x0.8000000000000p-1022], [-0x0.0 0x0.0]) == [-0x0.8000000000000p-1022 0x0.8000000000000p-1022]
; run: %fcopysign_f64x2([0x0.0000000000001p-1022 -0x0.0000000000001p-1022], [-0x0.0 0x0.0]) == [-0x0.0000000000001p-1022 0x0.0000000000001p-1022]
; F64 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f64x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
; run: %fcopysign_f64x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
; run: %fcopysign_f64x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
; run: %fcopysign_f64x2([+NaN:0x1 +NaN:0x4000000000001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x4000000000001]
; run: %fcopysign_f64x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
; run: %fcopysign_f64x2([-NaN:0x4000000000001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x4000000000001 -sNaN:0x1]
; run: %fcopysign_f64x2([-sNaN:0x1 +sNaN:0x4000000000001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x4000000000001]
; run: %fcopysign_f64x2([-sNaN:0x4000000000001 -sNaN:0x4000000000001], [+NaN +NaN]) == [+sNaN:0x4000000000001 +sNaN:0x4000000000001]

View File

@@ -808,7 +808,19 @@ where
}
Opcode::Fneg => assign(Value::neg(arg(0)?)?),
Opcode::Fabs => assign(Value::abs(arg(0)?)?),
Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?,
Opcode::Fcopysign => {
let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
assign(vectorizelanes(
&arg0
.into_iter()
.zip(arg1.into_iter())
.map(|(x, y)| V::copysign(x, y))
.collect::<ValueResult<SimdVec<V>>>()?,
ctrl_ty,
)?)
}
Opcode::Fmin => assign(match (arg(0)?, arg(1)?) {
(a, _) if a.is_nan()? => a,
(_, b) if b.is_nan()? => b,