Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)

* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
  - Also introduced missing support for `fcopysign` on vector values, as
    per the docs.
  - This introduces the vector encoding for the `SLI` machine
    instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`

Copyright (c) 2022 Arm Limited

* Document helpers and abstract conversion checks
This commit is contained in:
Damian Heaton
2022-08-24 18:37:14 +01:00
committed by GitHub
parent 7e3c481f4e
commit 94bcbe8446
12 changed files with 863 additions and 548 deletions

View File

@@ -619,6 +619,14 @@
(size VectorSize) (size VectorSize)
(imm u8)) (imm u8))
;; Destructive vector shift by immediate.
(VecShiftImmMod
(op VecShiftImmModOp)
(rd WritableReg)
(rn Reg)
(size VectorSize)
(imm u8))
;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`. ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
(VecExtract (VecExtract
@@ -1315,6 +1323,13 @@
(Sshr) (Sshr)
)) ))
;; Destructive shift-by-immediate operation on each lane of a vector.
(type VecShiftImmModOp
(enum
;; Shift left and insert
(Sli)
))
;; Atomic read-modify-write operations with acquire-release semantics ;; Atomic read-modify-write operations with acquire-release semantics
(type AtomicRMWOp (type AtomicRMWOp
(enum (enum
@@ -1386,6 +1401,48 @@
(decl u64_into_imm_logic (Type u64) ImmLogic) (decl u64_into_imm_logic (Type u64) ImmLogic)
(extern constructor u64_into_imm_logic u64_into_imm_logic) (extern constructor u64_into_imm_logic u64_into_imm_logic)
;; Calculate the minimum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value (bool u8 u8) Reg)
(extern constructor min_fp_value min_fp_value)
;; Calculate the maximum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)
;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)
;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)
;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
(decl imm12_from_negated_u64 (Imm12) u64) (decl imm12_from_negated_u64 (Imm12) u64)
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64) (extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
@@ -1533,6 +1590,12 @@
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size)))) (_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
dst)) dst))
(decl fpu_rri (FPUOpRI Reg) Reg)
(rule (fpu_rri op src)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRI op dst src))))
dst))
;; Helper for emitting `MInst.FpuRRR` instructions. ;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg) (decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
(rule (fpu_rrr op src1 src2 size) (rule (fpu_rrr op src1 src2 size)
@@ -2611,3 +2674,147 @@
;; to clobber LR. ;; to clobber LR.
(let ((_ Unit (emit (MInst.Xpaclri)))) (let ((_ Unit (emit (MInst.Xpaclri))))
(mov_preg (preg_link)))) (mov_preg (preg_link))))
;; Helper for getting the maximum shift amount for a type.
(decl max_shift (Type) u8)
(rule (max_shift $F64) 63)
(rule (max_shift $F32) 31)
;; Helper for generating `fcopysign` instruction sequences.
(decl fcopy_sign (Reg Reg Type) Reg)
(rule (fcopy_sign x y (ty_scalar_float ty))
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuMove64 dst x)))
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
dst))
(rule (fcopy_sign x y ty @ (multi_lane _ _))
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst x)))
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
dst))
;; Helpers for generating `MInst.FpuToInt` instructions.
(decl fpu_to_int_nan_check (ScalarSize Reg) Reg)
(rule (fpu_to_int_nan_check size src)
(let ((r ValueRegs
(with_flags (fpu_cmp size src src)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Vs))
(trap_code_bad_conversion_to_integer))
src))))
(value_regs_get r 0)))
;; Checks that the value is not less than the minimum bound,
;; accepting a boolean (whether the type is signed), input type,
;; output type, and registers containing the source and minimum bound.
(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg)
(rule (fpu_to_int_underflow_check $true $F32 (fits_in_16 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size32) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $true $F64 (fits_in_32 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size64) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule -1 (fpu_to_int_underflow_check $true in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Lt))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $false in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg)
(rule (fpu_to_int_overflow_check size src max)
(let ((r ValueRegs
(with_flags (fpu_cmp size src max)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Ge))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, trapping if the value
;; is a NaN or does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt op src signed in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(src Reg (fpu_to_int_nan_check size src))
(min Reg (min_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min))
(max Reg (max_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_overflow_check size src max)))
(fpu_to_int op src)))
;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.FpuToInt op dst src))))
dst))
;; Helper for generating `MInst.IntToFpu` instructions.
(decl int_to_fpu (IntToFpuOp Reg) Reg)
(rule (int_to_fpu op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.IntToFpu op dst src))))
dst))

View File

@@ -2033,6 +2033,50 @@ impl MachInstEmit for Inst {
let rd_enc = machreg_to_vec(rd.to_reg()); let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
} }
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (is_shr, mut template) = match op {
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
};
if size.is_128bits() {
template |= 0b1 << 30;
}
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size.lane_size(), is_shr) {
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
),
};
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecExtract { rd, rn, rm, imm4 } => { &Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);

View File

@@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
VecRRRLongOp, VecShiftImmOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
}; };
/// A floating-point unit (FPU) operation with two args, a register and an immediate. /// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -767,6 +767,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
} }
&Inst::VecShiftImmMod { rd, rn, .. } => {
collector.reg_mod(rd);
collector.reg_use(rn);
}
&Inst::VecExtract { rd, rn, rm, .. } => { &Inst::VecExtract { rd, rn, rm, .. } => {
collector.reg_def(rd); collector.reg_def(rd);
collector.reg_use(rn); collector.reg_use(rn);
@@ -2371,6 +2375,20 @@ impl Inst {
let rn = pretty_print_vreg_vector(rn, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm) format!("{} {}, {}, #{}", op, rd, rn, imm)
} }
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let op = match op {
VecShiftImmModOp::Sli => "sli",
};
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecExtract { rd, rn, rm, imm4 } => { &Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);

View File

@@ -406,6 +406,119 @@
(rule (lower (has_type (ty_scalar_float ty) (fma x y z))) (rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z)) (fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (fcopysign x y)))
(fcopy_sign x y ty))
;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToU32) x $false $F32 out_ty))
(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToU32) x $false $F64 out_ty))
(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToU64) x $false $F64 $I64))
;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToI32) x $true $F32 out_ty))
(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32))))
(fpu_to_int_cvt (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToI32) x $true $F64 out_ty))
(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64))))
(fpu_to_int_cvt (FpuToIntOp.F64ToI64) x $true $F64 $I64))
;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.U32ToF32) (put_in_reg_zext32 x)))
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.U32ToF64) (put_in_reg_zext32 x)))
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.U64ToF32) x))
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.U64ToF64) x))
;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.I32ToF32) (put_in_reg_sext32 x)))
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
(int_to_fpu (IntToFpuOp.I32ToF64) (put_in_reg_sext32 x)))
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.I64ToF32) x))
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64))))
(int_to_fpu (IntToFpuOp.I64ToF64) x))
;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32))
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32))
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64))
;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32))
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32))
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller ;; `i64` and smaller

View File

@@ -1065,17 +1065,6 @@ pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
//============================================================================= //=============================================================================
// Helpers for instruction lowering. // Helpers for instruction lowering.
pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
let bits = ty_bits(ty);
if bits <= 32 {
op32
} else if bits == 64 {
op64
} else {
panic!("choose_32_64 on > 64 bits!")
}
}
/// Checks for an instance of `op` feeding the given input. /// Checks for an instance of `op` feeding the given input.
pub(crate) fn maybe_input_insn( pub(crate) fn maybe_input_insn(
c: &mut Lower<Inst>, c: &mut Lower<Inst>,

View File

@@ -5,12 +5,13 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{ use super::{
insn_inputs, lower_constant_f128, lower_constant_f64, writable_zero_reg, zero_reg, AMode, insn_inputs, lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg,
ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond,
FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC,
MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize,
ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
}; };
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
use crate::isa::aarch64::lower::{lower_address, lower_splat_const}; use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags; use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::{isle::*, InputSourceInst}; use crate::machinst::{isle::*, InputSourceInst};
@@ -519,4 +520,198 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
fn preg_link(&mut self) -> PReg { fn preg_link(&mut self) -> PReg {
super::regs::link_reg().to_real_reg().unwrap().into() super::regs::link_reg().to_real_reg().unwrap().into()
} }
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
if in_bits == 32 {
// From float32.
let min = match (signed, out_bits) {
(true, 8) => i8::MIN as f32 - 1.,
(true, 16) => i16::MIN as f32 - 1.,
(true, 32) => i32::MIN as f32, // I32_MIN - 1 isn't precisely representable as a f32.
(true, 64) => i64::MIN as f32, // I64_MIN - 1 isn't precisely representable as a f32.
(false, _) => -1.,
_ => unimplemented!(
"unexpected {} output size of {} bits for 32-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f32(self.lower_ctx, tmp, min);
} else if in_bits == 64 {
// From float64.
let min = match (signed, out_bits) {
(true, 8) => i8::MIN as f64 - 1.,
(true, 16) => i16::MIN as f64 - 1.,
(true, 32) => i32::MIN as f64 - 1.,
(true, 64) => i64::MIN as f64,
(false, _) => -1.,
_ => unimplemented!(
"unexpected {} output size of {} bits for 64-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f64(self.lower_ctx, tmp, min);
} else {
unimplemented!(
"unexpected input size for min_fp_value: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
if in_bits == 32 {
// From float32.
let max = match (signed, out_bits) {
(true, 8) => i8::MAX as f32 + 1.,
(true, 16) => i16::MAX as f32 + 1.,
(true, 32) => (i32::MAX as u64 + 1) as f32,
(true, 64) => (i64::MAX as u64 + 1) as f32,
(false, 8) => u8::MAX as f32 + 1.,
(false, 16) => u16::MAX as f32 + 1.,
(false, 32) => (u32::MAX as u64 + 1) as f32,
(false, 64) => (u64::MAX as u128 + 1) as f32,
_ => unimplemented!(
"unexpected {} output size of {} bits for 32-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f32(self.lower_ctx, tmp, max);
} else if in_bits == 64 {
// From float64.
let max = match (signed, out_bits) {
(true, 8) => i8::MAX as f64 + 1.,
(true, 16) => i16::MAX as f64 + 1.,
(true, 32) => i32::MAX as f64 + 1.,
(true, 64) => (i64::MAX as u64 + 1) as f64,
(false, 8) => u8::MAX as f64 + 1.,
(false, 16) => u16::MAX as f64 + 1.,
(false, 32) => u32::MAX as f64 + 1.,
(false, 64) => (u64::MAX as u128 + 1) as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits for 64-bit input",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
lower_constant_f64(self.lower_ctx, tmp, max);
} else {
unimplemented!(
"unexpected input size for max_fp_value: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
let min: f64 = match (out_bits, signed) {
(32, true) => i32::MIN as f64,
(32, false) => 0.0,
(64, true) => i64::MIN as f64,
(64, false) => 0.0,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, min as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, min)
} else {
unimplemented!(
"unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
let max = match (out_bits, signed) {
(32, true) => i32::MAX as f64,
(32, false) => u32::MAX as f64,
(64, true) => i64::MAX as f64,
(64, false) => u64::MAX as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};
if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, max as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, max)
} else {
unimplemented!(
"unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}
tmp.to_reg()
}
fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else if ty_bits == 64 {
FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else {
unimplemented!(
"unexpected input size for fpu_op_ri_ushr: {} (shift: {})",
ty_bits,
shift
);
}
}
fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else if ty_bits == 64 {
FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
} else {
unimplemented!(
"unexpected input size for fpu_op_ri_sli: {} (shift: {})",
ty_bits,
shift
);
}
}
} }

View File

@@ -2,10 +2,9 @@
use super::lower::*; use super::lower::*;
use crate::binemit::CodeOffset; use crate::binemit::CodeOffset;
use crate::ir::condcodes::FloatCC;
use crate::ir::types::*; use crate::ir::types::*;
use crate::ir::Inst as IRInst; use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode}; use crate::ir::{InstructionData, Opcode};
use crate::isa::aarch64::abi::*; use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*; use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::settings as aarch64_settings; use crate::isa::aarch64::settings as aarch64_settings;
@@ -978,408 +977,13 @@ pub(crate) fn lower_insn_to_regs(
Opcode::Fma => implemented_in_isle(ctx), Opcode::Fma => implemented_in_isle(ctx),
Opcode::Fcopysign => { Opcode::Fcopysign => implemented_in_isle(ctx),
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
//
// This is a scalar Fcopysign.
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
// In the latter case it still sets all bits except the lowest 32 to 0.
//
// mov vd, vn
// ushr vtmp, vm, #63 / #31
// sli vd, vtmp, #63 / #31
let ty = ctx.output_ty(insn, 0); Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx),
if ty != F32 && ty != F64 { Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx),
return Err(CodegenError::Unsupported(format!(
"Fcopysign: Unsupported type: {:?}",
ty
)));
}
let bits = ty_bits(ty) as u8; Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx),
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let tmp = ctx.alloc_tmp(F64).only_reg().unwrap();
// Copy LHS to rd.
ctx.emit(Inst::gen_move(rd, rn, ty));
// Copy the sign bit to the lowest bit in tmp.
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
rd: tmp,
rn: rm,
});
// Insert the bit from tmp into the sign bit of rd.
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
rd,
rn: tmp.to_reg(),
});
}
Opcode::FcvtToUint | Opcode::FcvtToSint => {
let input_ty = ctx.input_ty(insn, 0);
let in_bits = ty_bits(input_ty);
let output_ty = ty.unwrap();
let out_bits = ty_bits(output_ty);
let signed = op == Opcode::FcvtToSint;
let op = match (signed, in_bits, out_bits) {
(false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
(true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
(false, 32, 64) => FpuToIntOp::F32ToU64,
(true, 32, 64) => FpuToIntOp::F32ToI64,
(false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
(true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
(false, 64, 64) => FpuToIntOp::F64ToU64,
(true, 64, 64) => FpuToIntOp::F64ToI64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, output_ty
)))
}
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
// First, check the output: it's important to carry the NaN conversion before the
// in-bounds conversion, per wasm semantics.
// Check that the input is not a NaN.
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(input_ty),
rn,
rm: rn,
});
let trap_code = TrapCode::BadConversionToInteger;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
});
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
// Check that the input is in range, with "truncate towards zero" semantics. This means
// we allow values that are slightly out of range:
// - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
// can be represented), and strictly less than INT_MAX+1 (when this can be
// represented).
// - for unsigned conversions, we allow values strictly greater than -1, and strictly
// less than UINT_MAX+1 (when this can be represented).
if in_bits == 32 {
// From float32.
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
(true, 8) => (
i8::min_value() as f32 - 1.,
FloatCC::GreaterThan,
i8::max_value() as f32 + 1.,
),
(true, 16) => (
i16::min_value() as f32 - 1.,
FloatCC::GreaterThan,
i16::max_value() as f32 + 1.,
),
(true, 32) => (
i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
FloatCC::GreaterThanOrEqual,
i32::max_value() as f32 + 1.,
),
(true, 64) => (
i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
FloatCC::GreaterThanOrEqual,
i64::max_value() as f32 + 1.,
),
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
_ => unreachable!(),
};
// >= low_bound
lower_constant_f32(ctx, tmp, low_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size32,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
});
// <= high_bound
lower_constant_f32(ctx, tmp, high_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size32,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
});
} else {
// From float64.
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
(true, 8) => (
i8::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i8::max_value() as f64 + 1.,
),
(true, 16) => (
i16::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i16::max_value() as f64 + 1.,
),
(true, 32) => (
i32::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i32::max_value() as f64 + 1.,
),
(true, 64) => (
i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
FloatCC::GreaterThanOrEqual,
i64::max_value() as f64 + 1.,
),
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
_ => unreachable!(),
};
// >= low_bound
lower_constant_f64(ctx, tmp, low_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size64,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
});
// <= high_bound
lower_constant_f64(ctx, tmp, high_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size64,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
});
};
// Do the conversion.
ctx.emit(Inst::FpuToInt { op, rd, rn });
}
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
let input_ty = ctx.input_ty(insn, 0);
let ty = ty.unwrap();
let signed = op == Opcode::FcvtFromSint;
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if ty.is_vector() {
if input_ty.lane_bits() != ty.lane_bits() {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, ty
)));
}
let op = if signed {
VecMisc2::Scvtf
} else {
VecMisc2::Ucvtf
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(input_ty);
let out_bits = ty_bits(ty);
let op = match (signed, in_bits, out_bits) {
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
(false, 64, 32) => IntToFpuOp::U64ToF32,
(true, 64, 32) => IntToFpuOp::I64ToF32,
(false, 64, 64) => IntToFpuOp::U64ToF64,
(true, 64, 64) => IntToFpuOp::I64ToF64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, ty
)))
}
};
let narrow_mode = match (signed, in_bits) {
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
(false, 64) => NarrowValueMode::ZeroExtend64,
(true, 64) => NarrowValueMode::SignExtend64,
_ => unreachable!(),
};
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
ctx.emit(Inst::IntToFpu { op, rd, rn });
}
}
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
let in_ty = ctx.input_ty(insn, 0);
let ty = ty.unwrap();
let out_signed = op == Opcode::FcvtToSintSat;
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if ty.is_vector() {
if in_ty.lane_bits() != ty.lane_bits() {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, in_ty, ty
)));
}
let op = if out_signed {
VecMisc2::Fcvtzs
} else {
VecMisc2::Fcvtzu
};
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(in_ty);
let out_bits = ty_bits(ty);
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
// FMIN Vtmp2, Vin, Vtmp1
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
// FMAX Vtmp2, Vtmp2, Vtmp1
// (if signed) FIMM Vtmp1, 0
// FCMP Vin, Vin
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
// convert Rout, Vtmp2
assert!(in_ty.is_float() && (in_bits == 32 || in_bits == 64));
assert!(out_bits == 32 || out_bits == 64);
let min: f64 = match (out_bits, out_signed) {
(32, true) => std::i32::MIN as f64,
(32, false) => 0.0,
(64, true) => std::i64::MIN as f64,
(64, false) => 0.0,
_ => unreachable!(),
};
let max = match (out_bits, out_signed) {
(32, true) => std::i32::MAX as f64,
(32, false) => std::u32::MAX as f64,
(64, true) => std::i64::MAX as f64,
(64, false) => std::u64::MAX as f64,
_ => unreachable!(),
};
let rtmp1 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
let rtmp2 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, max as f32);
} else {
lower_constant_f64(ctx, rtmp1, max);
}
ctx.emit(Inst::FpuRRR {
fpu_op: FPUOp2::Min,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2,
rn,
rm: rtmp1.to_reg(),
});
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, min as f32);
} else {
lower_constant_f64(ctx, rtmp1, min);
}
ctx.emit(Inst::FpuRRR {
fpu_op: FPUOp2::Max,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2,
rn: rtmp2.to_reg(),
rm: rtmp1.to_reg(),
});
if out_signed {
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, 0.0);
} else {
lower_constant_f64(ctx, rtmp1, 0.0);
}
}
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(in_ty),
rn,
rm: rn,
});
if in_bits == 32 {
ctx.emit(Inst::FpuCSel32 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
} else {
ctx.emit(Inst::FpuCSel64 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
}
let cvt = match (in_bits, out_bits, out_signed) {
(32, 32, false) => FpuToIntOp::F32ToU32,
(32, 32, true) => FpuToIntOp::F32ToI32,
(32, 64, false) => FpuToIntOp::F32ToU64,
(32, 64, true) => FpuToIntOp::F32ToI64,
(64, 32, false) => FpuToIntOp::F64ToU32,
(64, 32, true) => FpuToIntOp::F64ToI32,
(64, 64, false) => FpuToIntOp::F64ToU64,
(64, 64, true) => FpuToIntOp::F64ToI64,
_ => unreachable!(),
};
ctx.emit(Inst::FpuToInt {
op: cvt,
rd,
rn: rtmp2.to_reg(),
});
}
}
Opcode::IaddIfcout => { Opcode::IaddIfcout => {
// This is a two-output instruction that is needed for the // This is a two-output instruction that is needed for the

View File

@@ -9,8 +9,8 @@ block0(v0: i8):
} }
; block0: ; block0:
; uxtb w4, w0 ; uxtb w3, w0
; ucvtf s0, w4 ; ucvtf s0, w3
; ret ; ret
function u0:0(i8) -> f64 { function u0:0(i8) -> f64 {
@@ -20,8 +20,8 @@ block0(v0: i8):
} }
; block0: ; block0:
; uxtb w4, w0 ; uxtb w3, w0
; ucvtf d0, w4 ; ucvtf d0, w3
; ret ; ret
function u0:0(i16) -> f32 { function u0:0(i16) -> f32 {
@@ -31,8 +31,8 @@ block0(v0: i16):
} }
; block0: ; block0:
; uxth w4, w0 ; uxth w3, w0
; ucvtf s0, w4 ; ucvtf s0, w3
; ret ; ret
function u0:0(i16) -> f64 { function u0:0(i16) -> f64 {
@@ -42,8 +42,8 @@ block0(v0: i16):
} }
; block0: ; block0:
; uxth w4, w0 ; uxth w3, w0
; ucvtf d0, w4 ; ucvtf d0, w3
; ret ; ret
function u0:0(f32) -> i8 { function u0:0(f32) -> i8 {
@@ -55,13 +55,13 @@ block0(v0: f32):
; block0: ; block0:
; fcmp s0, s0 ; fcmp s0, s0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov s6, #-1 ; fmov s5, #-1
; fcmp s0, s6 ; fcmp s0, s5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #17280, LSL #16 ; movz x10, #17280, LSL #16
; fmov s6, w10 ; fmov s18, w10
; fcmp s0, s6 ; fcmp s0, s18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu w0, s0 ; fcvtzu w0, s0
; ret ; ret
@@ -74,13 +74,13 @@ block0(v0: f64):
; block0: ; block0:
; fcmp d0, d0 ; fcmp d0, d0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov d6, #-1 ; fmov d5, #-1
; fcmp d0, d6 ; fcmp d0, d5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #16496, LSL #48 ; movz x10, #16496, LSL #48
; fmov d6, x10 ; fmov d18, x10
; fcmp d0, d6 ; fcmp d0, d18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu w0, d0 ; fcvtzu w0, d0
; ret ; ret
@@ -93,13 +93,13 @@ block0(v0: f32):
; block0: ; block0:
; fcmp s0, s0 ; fcmp s0, s0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov s6, #-1 ; fmov s5, #-1
; fcmp s0, s6 ; fcmp s0, s5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #18304, LSL #16 ; movz x10, #18304, LSL #16
; fmov s6, w10 ; fmov s18, w10
; fcmp s0, s6 ; fcmp s0, s18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu w0, s0 ; fcvtzu w0, s0
; ret ; ret
@@ -112,13 +112,13 @@ block0(v0: f64):
; block0: ; block0:
; fcmp d0, d0 ; fcmp d0, d0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov d6, #-1 ; fmov d5, #-1
; fcmp d0, d6 ; fcmp d0, d5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #16624, LSL #48 ; movz x10, #16624, LSL #48
; fmov d6, x10 ; fmov d18, x10
; fcmp d0, d6 ; fcmp d0, d18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu w0, d0 ; fcvtzu w0, d0
; ret ; ret

View File

@@ -333,13 +333,13 @@ block0(v0: f32):
; block0: ; block0:
; fcmp s0, s0 ; fcmp s0, s0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov s6, #-1 ; fmov s5, #-1
; fcmp s0, s6 ; fcmp s0, s5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #20352, LSL #16 ; movz x10, #20352, LSL #16
; fmov s6, w10 ; fmov s18, w10
; fcmp s0, s6 ; fcmp s0, s18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu w0, s0 ; fcvtzu w0, s0
; ret ; ret
@@ -352,14 +352,14 @@ block0(v0: f32):
; block0: ; block0:
; fcmp s0, s0 ; fcmp s0, s0
; b.vc 8 ; udf ; b.vc 8 ; udf
; movz x7, #52992, LSL #16 ; movz x6, #52992, LSL #16
; fmov s7, w7 ; fmov s6, w6
; fcmp s0, s7 ; fcmp s0, s6
; b.ge 8 ; udf ; b.ge 8 ; udf
; movz x12, #20224, LSL #16 ; movz x12, #20224, LSL #16
; fmov s7, w12 ; fmov s20, w12
; fcmp s0, s7 ; fcmp s0, s20
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzs w0, s0 ; fcvtzs w0, s0
; ret ; ret
@@ -372,13 +372,13 @@ block0(v0: f32):
; block0: ; block0:
; fcmp s0, s0 ; fcmp s0, s0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov s6, #-1 ; fmov s5, #-1
; fcmp s0, s6 ; fcmp s0, s5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #24448, LSL #16 ; movz x10, #24448, LSL #16
; fmov s6, w10 ; fmov s18, w10
; fcmp s0, s6 ; fcmp s0, s18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu x0, s0 ; fcvtzu x0, s0
; ret ; ret
@@ -391,14 +391,14 @@ block0(v0: f32):
; block0: ; block0:
; fcmp s0, s0 ; fcmp s0, s0
; b.vc 8 ; udf ; b.vc 8 ; udf
; movz x7, #57088, LSL #16 ; movz x6, #57088, LSL #16
; fmov s7, w7 ; fmov s6, w6
; fcmp s0, s7 ; fcmp s0, s6
; b.ge 8 ; udf ; b.ge 8 ; udf
; movz x12, #24320, LSL #16 ; movz x12, #24320, LSL #16
; fmov s7, w12 ; fmov s20, w12
; fcmp s0, s7 ; fcmp s0, s20
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzs x0, s0 ; fcvtzs x0, s0
; ret ; ret
@@ -411,13 +411,13 @@ block0(v0: f64):
; block0: ; block0:
; fcmp d0, d0 ; fcmp d0, d0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov d6, #-1 ; fmov d5, #-1
; fcmp d0, d6 ; fcmp d0, d5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #16880, LSL #48 ; movz x10, #16880, LSL #48
; fmov d6, x10 ; fmov d18, x10
; fcmp d0, d6 ; fcmp d0, d18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu w0, d0 ; fcvtzu w0, d0
; ret ; ret
@@ -430,13 +430,13 @@ block0(v0: f64):
; block0: ; block0:
; fcmp d0, d0 ; fcmp d0, d0
; b.vc 8 ; udf ; b.vc 8 ; udf
; ldr d6, pc+8 ; b 12 ; data.f64 -2147483649 ; ldr d5, pc+8 ; b 12 ; data.f64 -2147483649
; fcmp d0, d6 ; fcmp d0, d5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #16864, LSL #48 ; movz x10, #16864, LSL #48
; fmov d6, x10 ; fmov d18, x10
; fcmp d0, d6 ; fcmp d0, d18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzs w0, d0 ; fcvtzs w0, d0
; ret ; ret
@@ -449,13 +449,13 @@ block0(v0: f64):
; block0: ; block0:
; fcmp d0, d0 ; fcmp d0, d0
; b.vc 8 ; udf ; b.vc 8 ; udf
; fmov d6, #-1 ; fmov d5, #-1
; fcmp d0, d6 ; fcmp d0, d5
; b.gt 8 ; udf ; b.gt 8 ; udf
; movz x10, #17392, LSL #48 ; movz x10, #17392, LSL #48
; fmov d6, x10 ; fmov d18, x10
; fcmp d0, d6 ; fcmp d0, d18
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzu x0, d0 ; fcvtzu x0, d0
; ret ; ret
@@ -468,14 +468,14 @@ block0(v0: f64):
; block0: ; block0:
; fcmp d0, d0 ; fcmp d0, d0
; b.vc 8 ; udf ; b.vc 8 ; udf
; movz x7, #50144, LSL #48 ; movz x6, #50144, LSL #48
; fmov d7, x7 ; fmov d6, x6
; fcmp d0, d7 ; fcmp d0, d6
; b.ge 8 ; udf ; b.ge 8 ; udf
; movz x12, #17376, LSL #48 ; movz x12, #17376, LSL #48
; fmov d7, x12 ; fmov d20, x12
; fcmp d0, d7 ; fcmp d0, d20
; b.mi 8 ; udf ; b.lt 8 ; udf
; fcvtzs x0, d0 ; fcvtzs x0, d0
; ret ; ret
@@ -566,14 +566,14 @@ block0(v0: f32):
} }
; block0: ; block0:
; movz x6, #20352, LSL #16 ; movz x4, #20352, LSL #16
; fmov s5, w6 ; fmov s4, w4
; fmin s7, s0, s5 ; fmin s7, s0, s4
; movi v5.2s, #0 ; movi v17.2s, #0
; fmax s7, s7, s5 ; fmax s19, s7, s17
; fcmp s0, s0 ; fcmp s0, s0
; fcsel s7, s5, s7, ne ; fcsel s22, s17, s19, ne
; fcvtzu w0, s7 ; fcvtzu w0, s22
; ret ; ret
function %f50(f32) -> i32 { function %f50(f32) -> i32 {
@@ -583,16 +583,16 @@ block0(v0: f32):
} }
; block0: ; block0:
; movz x6, #20224, LSL #16 ; movz x4, #20224, LSL #16
; fmov s5, w6 ; fmov s4, w4
; fmin s7, s0, s5 ; fmin s7, s0, s4
; movz x10, #52992, LSL #16 ; movz x10, #52992, LSL #16
; fmov s5, w10 ; fmov s18, w10
; fmax s7, s7, s5 ; fmax s21, s7, s18
; movi v5.2s, #0 ; movi v23.16b, #0
; fcmp s0, s0 ; fcmp s0, s0
; fcsel s7, s5, s7, ne ; fcsel s26, s23, s21, ne
; fcvtzs w0, s7 ; fcvtzs w0, s26
; ret ; ret
function %f51(f32) -> i64 { function %f51(f32) -> i64 {
@@ -602,14 +602,14 @@ block0(v0: f32):
} }
; block0: ; block0:
; movz x6, #24448, LSL #16 ; movz x4, #24448, LSL #16
; fmov s5, w6 ; fmov s4, w4
; fmin s7, s0, s5 ; fmin s7, s0, s4
; movi v5.2s, #0 ; movi v17.2s, #0
; fmax s7, s7, s5 ; fmax s19, s7, s17
; fcmp s0, s0 ; fcmp s0, s0
; fcsel s7, s5, s7, ne ; fcsel s22, s17, s19, ne
; fcvtzu x0, s7 ; fcvtzu x0, s22
; ret ; ret
function %f52(f32) -> i64 { function %f52(f32) -> i64 {
@@ -619,16 +619,16 @@ block0(v0: f32):
} }
; block0: ; block0:
; movz x6, #24320, LSL #16 ; movz x4, #24320, LSL #16
; fmov s5, w6 ; fmov s4, w4
; fmin s7, s0, s5 ; fmin s7, s0, s4
; movz x10, #57088, LSL #16 ; movz x10, #57088, LSL #16
; fmov s5, w10 ; fmov s18, w10
; fmax s7, s7, s5 ; fmax s21, s7, s18
; movi v5.2s, #0 ; movi v23.16b, #0
; fcmp s0, s0 ; fcmp s0, s0
; fcsel s7, s5, s7, ne ; fcsel s26, s23, s21, ne
; fcvtzs x0, s7 ; fcvtzs x0, s26
; ret ; ret
function %f53(f64) -> i32 { function %f53(f64) -> i32 {
@@ -638,13 +638,13 @@ block0(v0: f64):
} }
; block0: ; block0:
; ldr d4, pc+8 ; b 12 ; data.f64 4294967295 ; ldr d3, pc+8 ; b 12 ; data.f64 4294967295
; fmin d6, d0, d4 ; fmin d5, d0, d3
; movi v4.2s, #0 ; movi v7.2s, #0
; fmax d6, d6, d4 ; fmax d17, d5, d7
; fcmp d0, d0 ; fcmp d0, d0
; fcsel d6, d4, d6, ne ; fcsel d20, d7, d17, ne
; fcvtzu w0, d6 ; fcvtzu w0, d20
; ret ; ret
function %f54(f64) -> i32 { function %f54(f64) -> i32 {
@@ -654,15 +654,15 @@ block0(v0: f64):
} }
; block0: ; block0:
; ldr d4, pc+8 ; b 12 ; data.f64 2147483647 ; ldr d3, pc+8 ; b 12 ; data.f64 2147483647
; fmin d6, d0, d4 ; fmin d5, d0, d3
; movz x8, #49632, LSL #48 ; movz x8, #49632, LSL #48
; fmov d4, x8 ; fmov d16, x8
; fmax d6, d6, d4 ; fmax d19, d5, d16
; movi v4.2s, #0 ; movi v21.16b, #0
; fcmp d0, d0 ; fcmp d0, d0
; fcsel d6, d4, d6, ne ; fcsel d24, d21, d19, ne
; fcvtzs w0, d6 ; fcvtzs w0, d24
; ret ; ret
function %f55(f64) -> i64 { function %f55(f64) -> i64 {
@@ -672,14 +672,14 @@ block0(v0: f64):
} }
; block0: ; block0:
; movz x6, #17392, LSL #48 ; movz x4, #17392, LSL #48
; fmov d5, x6 ; fmov d4, x4
; fmin d7, d0, d5 ; fmin d7, d0, d4
; movi v5.2s, #0 ; movi v17.2s, #0
; fmax d7, d7, d5 ; fmax d19, d7, d17
; fcmp d0, d0 ; fcmp d0, d0
; fcsel d7, d5, d7, ne ; fcsel d22, d17, d19, ne
; fcvtzu x0, d7 ; fcvtzu x0, d22
; ret ; ret
function %f56(f64) -> i64 { function %f56(f64) -> i64 {
@@ -689,16 +689,16 @@ block0(v0: f64):
} }
; block0: ; block0:
; movz x6, #17376, LSL #48 ; movz x4, #17376, LSL #48
; fmov d5, x6 ; fmov d4, x4
; fmin d7, d0, d5 ; fmin d7, d0, d4
; movz x10, #50144, LSL #48 ; movz x10, #50144, LSL #48
; fmov d5, x10 ; fmov d18, x10
; fmax d7, d7, d5 ; fmax d21, d7, d18
; movi v5.2s, #0 ; movi v23.16b, #0
; fcmp d0, d0 ; fcmp d0, d0
; fcsel d7, d5, d7, ne ; fcsel d26, d23, d21, ne
; fcvtzs x0, d7 ; fcvtzs x0, d26
; ret ; ret
function %f57(f32x2) -> f32x2 { function %f57(f32x2) -> f32x2 {
@@ -946,3 +946,36 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
; mov v0.16b, v2.16b ; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d ; fmla v0.2d, v17.2d, v1.2d
; ret ; ret
function %f81(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.2s, v1.2s, #31
; sli v0.2s, v7.2s, #31
; ret
function %f82(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.4s, v1.4s, #31
; sli v0.4s, v7.4s, #31
; ret
function %f83(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v7.2d, v1.2d, #63
; sli v0.2d, v7.2d, #63
; ret

View File

@@ -0,0 +1,37 @@
test interpret
test run
target aarch64
; x86_64 and s390x do not support 64-bit vectors in `fcopysign`.
function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
; run: %fcopysign_f32x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
; F32 Inf
; run: %fcopysign_f32x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
; run: %fcopysign_f32x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
; F32 Epsilon / Max / Min Positive
; run: %fcopysign_f32x2([0x1.000000p-23 -0x1.000000p-23], [-0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23]
; run: %fcopysign_f32x2([0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0]) == [-0x1.fffffep127 0x1.fffffep127]
; run: %fcopysign_f32x2([0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126]
; F32 Subnormals
; run: %fcopysign_f32x2([0x0.800000p-126 -0x0.800000p-126], [-0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126]
; run: %fcopysign_f32x2([0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0]) == [-0x0.000002p-126 0x0.000002p-126]
; F32 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f32x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
; run: %fcopysign_f32x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
; run: %fcopysign_f32x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
; run: %fcopysign_f32x2([+NaN:0x1 +NaN:0x300001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x300001]
; run: %fcopysign_f32x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
; run: %fcopysign_f32x2([-NaN:0x300001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x300001 -sNaN:0x1]
; run: %fcopysign_f32x2([-sNaN:0x1 +sNaN:0x200001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x200001]
; run: %fcopysign_f32x2([-sNaN:0x200001 -sNaN:0x200001], [+NaN +NaN]) == [+sNaN:0x200001 +sNaN:0x200001]

View File

@@ -0,0 +1,63 @@
test interpret
test run
target s390x
target aarch64
; x86_64 does not support SIMD fcopysign.
function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f32x4([0x9.0 -0x9.0 0x9.0 -0x9.0], [0x9.0 0x9.0 -0x9.0 -0x9.0]) == [0x9.0 0x9.0 -0x9.0 -0x9.0]
; run: %fcopysign_f32x4([0x0.0 -0x0.0 0x0.0 -0x0.0], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.0 0x0.0 -0x0.0 0x0.0]
; F32 Inf
; run: %fcopysign_f32x4([Inf -Inf Inf -Inf], [Inf Inf -Inf -Inf]) == [Inf Inf -Inf -Inf]
; F32 Epsilon / Max / Min Positive
; run: %fcopysign_f32x4([0x1.000000p-23 -0x1.000000p-23 0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23 -0x1.fffffep127 0x1.fffffep127]
; run: %fcopysign_f32x4([0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126]
; F32 Subnormals
; run: %fcopysign_f32x4([0x0.800000p-126 -0x0.800000p-126 0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126 -0x0.000002p-126 0x0.000002p-126]
; F32 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f32x4([0x0.0 0x3.0 Inf +NaN], [-NaN +sNaN:0x1 -NaN -NaN]) == [-0x0.0 0x3.0 -Inf -NaN]
; run: %fcopysign_f32x4([-NaN +NaN:0x0 +NaN:0x1 +NaN:0x300001], [+NaN -NaN -NaN -NaN]) == [+NaN -NaN:0x0 -NaN:0x1 -NaN:0x300001]
; run: %fcopysign_f32x4([-NaN:0x0 -NaN:0x1 -NaN:0x300001 +sNaN:0x1], [+NaN +NaN +NaN -NaN]) == [+NaN:0x0 +NaN:0x1 +NaN:0x300001 -sNaN:0x1]
; run: %fcopysign_f32x4([-sNaN:0x1 +sNaN:0x200001 -sNaN:0x200001 -sNaN:0x200001], [+NaN -NaN +NaN +NaN]) == [+sNaN:0x1 -sNaN:0x200001 +sNaN:0x200001 +sNaN:0x200001]
function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
; run: %fcopysign_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
; F64 Inf
; run: %fcopysign_f64x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
; run: %fcopysign_f64x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
; F64 Epsilon / Max / Min Positive
; run: %fcopysign_f64x2([0x1.0000000000000p-52 -0x1.0000000000000p-52], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-52 0x1.0000000000000p-52]
; run: %fcopysign_f64x2([0x1.fffffffffffffp1023 -0x1.fffffffffffffp1023], [-0x0.0 0x0.0]) == [-0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
; run: %fcopysign_f64x2([0x1.0000000000000p-1022 -0x1.0000000000000p-1022], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-1022 0x1.0000000000000p-1022]
; F64 Subnormals
; run: %fcopysign_f64x2([0x0.8000000000000p-1022 -0x0.8000000000000p-1022], [-0x0.0 0x0.0]) == [-0x0.8000000000000p-1022 0x0.8000000000000p-1022]
; run: %fcopysign_f64x2([0x0.0000000000001p-1022 -0x0.0000000000001p-1022], [-0x0.0 0x0.0]) == [-0x0.0000000000001p-1022 0x0.0000000000001p-1022]
; F64 NaN's
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
; run: %fcopysign_f64x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
; run: %fcopysign_f64x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
; run: %fcopysign_f64x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
; run: %fcopysign_f64x2([+NaN:0x1 +NaN:0x4000000000001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x4000000000001]
; run: %fcopysign_f64x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
; run: %fcopysign_f64x2([-NaN:0x4000000000001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x4000000000001 -sNaN:0x1]
; run: %fcopysign_f64x2([-sNaN:0x1 +sNaN:0x4000000000001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x4000000000001]
; run: %fcopysign_f64x2([-sNaN:0x4000000000001 -sNaN:0x4000000000001], [+NaN +NaN]) == [+sNaN:0x4000000000001 +sNaN:0x4000000000001]

View File

@@ -808,7 +808,19 @@ where
} }
Opcode::Fneg => assign(Value::neg(arg(0)?)?), Opcode::Fneg => assign(Value::neg(arg(0)?)?),
Opcode::Fabs => assign(Value::abs(arg(0)?)?), Opcode::Fabs => assign(Value::abs(arg(0)?)?),
Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?, Opcode::Fcopysign => {
let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
assign(vectorizelanes(
&arg0
.into_iter()
.zip(arg1.into_iter())
.map(|(x, y)| V::copysign(x, y))
.collect::<ValueResult<SimdVec<V>>>()?,
ctrl_ty,
)?)
}
Opcode::Fmin => assign(match (arg(0)?, arg(1)?) { Opcode::Fmin => assign(match (arg(0)?, arg(1)?) {
(a, _) if a.is_nan()? => a, (a, _) if a.is_nan()? => a,
(_, b) if b.is_nan()? => b, (_, b) if b.is_nan()? => b,