Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)
* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)
Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
- Also introduced missing support for `fcopysign` on vector values, as
per the docs.
- This introduces the vector encoding for the `SLI` machine
instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`
Copyright (c) 2022 Arm Limited
* Document helpers and abstract conversion checks
This commit is contained in:
@@ -619,6 +619,14 @@
|
|||||||
(size VectorSize)
|
(size VectorSize)
|
||||||
(imm u8))
|
(imm u8))
|
||||||
|
|
||||||
|
;; Destructive vector shift by immediate.
|
||||||
|
(VecShiftImmMod
|
||||||
|
(op VecShiftImmModOp)
|
||||||
|
(rd WritableReg)
|
||||||
|
(rn Reg)
|
||||||
|
(size VectorSize)
|
||||||
|
(imm u8))
|
||||||
|
|
||||||
;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
|
;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
|
||||||
;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
|
;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
|
||||||
(VecExtract
|
(VecExtract
|
||||||
@@ -1315,6 +1323,13 @@
|
|||||||
(Sshr)
|
(Sshr)
|
||||||
))
|
))
|
||||||
|
|
||||||
|
;; Destructive shift-by-immediate operation on each lane of a vector.
|
||||||
|
(type VecShiftImmModOp
|
||||||
|
(enum
|
||||||
|
;; Shift left and insert
|
||||||
|
(Sli)
|
||||||
|
))
|
||||||
|
|
||||||
;; Atomic read-modify-write operations with acquire-release semantics
|
;; Atomic read-modify-write operations with acquire-release semantics
|
||||||
(type AtomicRMWOp
|
(type AtomicRMWOp
|
||||||
(enum
|
(enum
|
||||||
@@ -1386,6 +1401,48 @@
|
|||||||
(decl u64_into_imm_logic (Type u64) ImmLogic)
|
(decl u64_into_imm_logic (Type u64) ImmLogic)
|
||||||
(extern constructor u64_into_imm_logic u64_into_imm_logic)
|
(extern constructor u64_into_imm_logic u64_into_imm_logic)
|
||||||
|
|
||||||
|
;; Calculate the minimum floating-point bound for a conversion to floating
|
||||||
|
;; point from an integer type.
|
||||||
|
;; Accepts whether the output is signed, the size of the input
|
||||||
|
;; floating point type in bits, and the size of the output integer type
|
||||||
|
;; in bits.
|
||||||
|
(decl min_fp_value (bool u8 u8) Reg)
|
||||||
|
(extern constructor min_fp_value min_fp_value)
|
||||||
|
|
||||||
|
;; Calculate the maximum floating-point bound for a conversion to floating
|
||||||
|
;; point from an integer type.
|
||||||
|
;; Accepts whether the output is signed, the size of the input
|
||||||
|
;; floating point type in bits, and the size of the output integer type
|
||||||
|
;; in bits.
|
||||||
|
(decl max_fp_value (bool u8 u8) Reg)
|
||||||
|
(extern constructor max_fp_value max_fp_value)
|
||||||
|
|
||||||
|
;; Calculate the minimum acceptable floating-point value for a conversion to
|
||||||
|
;; floating point from an integer type.
|
||||||
|
;; Accepts whether the output is signed, the size of the input
|
||||||
|
;; floating point type in bits, and the size of the output integer type
|
||||||
|
;; in bits.
|
||||||
|
(decl min_fp_value_sat (bool u8 u8) Reg)
|
||||||
|
(extern constructor min_fp_value_sat min_fp_value_sat)
|
||||||
|
|
||||||
|
;; Calculate the maximum acceptable floating-point value for a conversion to
|
||||||
|
;; floating point from an integer type.
|
||||||
|
;; Accepts whether the output is signed, the size of the input
|
||||||
|
;; floating point type in bits, and the size of the output integer type
|
||||||
|
;; in bits.
|
||||||
|
(decl max_fp_value_sat (bool u8 u8) Reg)
|
||||||
|
(extern constructor max_fp_value_sat max_fp_value_sat)
|
||||||
|
|
||||||
|
;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
|
||||||
|
;; and the amount to shift by.
|
||||||
|
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
|
||||||
|
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
|
||||||
|
|
||||||
|
;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
|
||||||
|
;; and the amount to shift by.
|
||||||
|
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
|
||||||
|
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
|
||||||
|
|
||||||
(decl imm12_from_negated_u64 (Imm12) u64)
|
(decl imm12_from_negated_u64 (Imm12) u64)
|
||||||
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
|
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
|
||||||
|
|
||||||
@@ -1533,6 +1590,12 @@
|
|||||||
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
|
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
(decl fpu_rri (FPUOpRI Reg) Reg)
|
||||||
|
(rule (fpu_rri op src)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||||
|
(_ Unit (emit (MInst.FpuRRI op dst src))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.FpuRRR` instructions.
|
;; Helper for emitting `MInst.FpuRRR` instructions.
|
||||||
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
|
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
|
||||||
(rule (fpu_rrr op src1 src2 size)
|
(rule (fpu_rrr op src1 src2 size)
|
||||||
@@ -2611,3 +2674,147 @@
|
|||||||
;; to clobber LR.
|
;; to clobber LR.
|
||||||
(let ((_ Unit (emit (MInst.Xpaclri))))
|
(let ((_ Unit (emit (MInst.Xpaclri))))
|
||||||
(mov_preg (preg_link))))
|
(mov_preg (preg_link))))
|
||||||
|
|
||||||
|
;; Helper for getting the maximum shift amount for a type.
|
||||||
|
|
||||||
|
(decl max_shift (Type) u8)
|
||||||
|
(rule (max_shift $F64) 63)
|
||||||
|
(rule (max_shift $F32) 31)
|
||||||
|
|
||||||
|
;; Helper for generating `fcopysign` instruction sequences.
|
||||||
|
|
||||||
|
(decl fcopy_sign (Reg Reg Type) Reg)
|
||||||
|
(rule (fcopy_sign x y (ty_scalar_float ty))
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||||
|
(_ Unit (emit (MInst.FpuMove64 dst x)))
|
||||||
|
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
|
||||||
|
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
|
||||||
|
dst))
|
||||||
|
(rule (fcopy_sign x y ty @ (multi_lane _ _))
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
|
(_ Unit (emit (MInst.FpuMove128 dst x)))
|
||||||
|
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
|
||||||
|
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
;; Helpers for generating `MInst.FpuToInt` instructions.
|
||||||
|
|
||||||
|
(decl fpu_to_int_nan_check (ScalarSize Reg) Reg)
|
||||||
|
(rule (fpu_to_int_nan_check size src)
|
||||||
|
(let ((r ValueRegs
|
||||||
|
(with_flags (fpu_cmp size src src)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.TrapIf (cond_br_cond (Cond.Vs))
|
||||||
|
(trap_code_bad_conversion_to_integer))
|
||||||
|
src))))
|
||||||
|
(value_regs_get r 0)))
|
||||||
|
|
||||||
|
;; Checks that the value is not less than the minimum bound,
|
||||||
|
;; accepting a boolean (whether the type is signed), input type,
|
||||||
|
;; output type, and registers containing the source and minimum bound.
|
||||||
|
(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg)
|
||||||
|
(rule (fpu_to_int_underflow_check $true $F32 (fits_in_16 out_ty) src min)
|
||||||
|
(let ((r ValueRegs
|
||||||
|
(with_flags (fpu_cmp (ScalarSize.Size32) src min)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.TrapIf (cond_br_cond (Cond.Le))
|
||||||
|
(trap_code_integer_overflow))
|
||||||
|
src))))
|
||||||
|
(value_regs_get r 0)))
|
||||||
|
(rule (fpu_to_int_underflow_check $true $F64 (fits_in_32 out_ty) src min)
|
||||||
|
(let ((r ValueRegs
|
||||||
|
(with_flags (fpu_cmp (ScalarSize.Size64) src min)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.TrapIf (cond_br_cond (Cond.Le))
|
||||||
|
(trap_code_integer_overflow))
|
||||||
|
src))))
|
||||||
|
(value_regs_get r 0)))
|
||||||
|
(rule -1 (fpu_to_int_underflow_check $true in_ty _out_ty src min)
|
||||||
|
(let ((r ValueRegs
|
||||||
|
(with_flags (fpu_cmp (scalar_size in_ty) src min)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.TrapIf (cond_br_cond (Cond.Lt))
|
||||||
|
(trap_code_integer_overflow))
|
||||||
|
src))))
|
||||||
|
(value_regs_get r 0)))
|
||||||
|
(rule (fpu_to_int_underflow_check $false in_ty _out_ty src min)
|
||||||
|
(let ((r ValueRegs
|
||||||
|
(with_flags (fpu_cmp (scalar_size in_ty) src min)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.TrapIf (cond_br_cond (Cond.Le))
|
||||||
|
(trap_code_integer_overflow))
|
||||||
|
src))))
|
||||||
|
(value_regs_get r 0)))
|
||||||
|
|
||||||
|
(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg)
|
||||||
|
(rule (fpu_to_int_overflow_check size src max)
|
||||||
|
(let ((r ValueRegs
|
||||||
|
(with_flags (fpu_cmp size src max)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.TrapIf (cond_br_cond (Cond.Ge))
|
||||||
|
(trap_code_integer_overflow))
|
||||||
|
src))))
|
||||||
|
(value_regs_get r 0)))
|
||||||
|
|
||||||
|
;; Emits the appropriate instruction sequence to convert a
|
||||||
|
;; floating-point value to an integer, trapping if the value
|
||||||
|
;; is a NaN or does not fit in the target type.
|
||||||
|
;; Accepts the specific conversion op, the source register,
|
||||||
|
;; whether the input is signed, and finally the input and output
|
||||||
|
;; types.
|
||||||
|
(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg)
|
||||||
|
(rule (fpu_to_int_cvt op src signed in_ty out_ty)
|
||||||
|
(let ((size ScalarSize (scalar_size in_ty))
|
||||||
|
(in_bits u8 (ty_bits in_ty))
|
||||||
|
(out_bits u8 (ty_bits out_ty))
|
||||||
|
(src Reg (fpu_to_int_nan_check size src))
|
||||||
|
(min Reg (min_fp_value signed in_bits out_bits))
|
||||||
|
(src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min))
|
||||||
|
(max Reg (max_fp_value signed in_bits out_bits))
|
||||||
|
(src Reg (fpu_to_int_overflow_check size src max)))
|
||||||
|
(fpu_to_int op src)))
|
||||||
|
|
||||||
|
;; Emits the appropriate instruction sequence to convert a
|
||||||
|
;; floating-point value to an integer, saturating if the value
|
||||||
|
;; does not fit in the target type.
|
||||||
|
;; Accepts the specific conversion op, the source register,
|
||||||
|
;; whether the input is signed, and finally the input and output
|
||||||
|
;; types.
|
||||||
|
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
|
||||||
|
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
|
||||||
|
(let ((size ScalarSize (scalar_size in_ty))
|
||||||
|
(in_bits u8 (ty_bits in_ty))
|
||||||
|
(out_bits u8 (ty_bits out_ty))
|
||||||
|
(max Reg (max_fp_value_sat $true in_bits out_bits))
|
||||||
|
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
|
||||||
|
(min Reg (min_fp_value_sat $true in_bits out_bits))
|
||||||
|
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
|
||||||
|
(zero Reg (constant_f128 0))
|
||||||
|
(tmp ValueRegs (with_flags (fpu_cmp size src src)
|
||||||
|
(fpu_csel in_ty (Cond.Ne) zero tmp))))
|
||||||
|
(fpu_to_int op (value_regs_get tmp 0))))
|
||||||
|
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
|
||||||
|
(let ((size ScalarSize (scalar_size in_ty))
|
||||||
|
(in_bits u8 (ty_bits in_ty))
|
||||||
|
(out_bits u8 (ty_bits out_ty))
|
||||||
|
(max Reg (max_fp_value_sat $false in_bits out_bits))
|
||||||
|
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
|
||||||
|
(min Reg (min_fp_value_sat $false in_bits out_bits))
|
||||||
|
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
|
||||||
|
(tmp ValueRegs (with_flags (fpu_cmp size src src)
|
||||||
|
(fpu_csel in_ty (Cond.Ne) min tmp))))
|
||||||
|
(fpu_to_int op (value_regs_get tmp 0))))
|
||||||
|
|
||||||
|
(decl fpu_to_int (FpuToIntOp Reg) Reg)
|
||||||
|
(rule (fpu_to_int op src)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||||
|
(_ Unit (emit (MInst.FpuToInt op dst src))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
;; Helper for generating `MInst.IntToFpu` instructions.
|
||||||
|
|
||||||
|
(decl int_to_fpu (IntToFpuOp Reg) Reg)
|
||||||
|
(rule (int_to_fpu op src)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
|
(_ Unit (emit (MInst.IntToFpu op dst src))))
|
||||||
|
dst))
|
||||||
|
|||||||
@@ -2033,6 +2033,50 @@ impl MachInstEmit for Inst {
|
|||||||
let rd_enc = machreg_to_vec(rd.to_reg());
|
let rd_enc = machreg_to_vec(rd.to_reg());
|
||||||
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
|
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
|
||||||
}
|
}
|
||||||
|
&Inst::VecShiftImmMod {
|
||||||
|
op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
size,
|
||||||
|
imm,
|
||||||
|
} => {
|
||||||
|
let rd = allocs.next_writable(rd);
|
||||||
|
let rn = allocs.next(rn);
|
||||||
|
let (is_shr, mut template) = match op {
|
||||||
|
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
|
||||||
|
};
|
||||||
|
if size.is_128bits() {
|
||||||
|
template |= 0b1 << 30;
|
||||||
|
}
|
||||||
|
let imm = imm as u32;
|
||||||
|
// Deal with the somewhat strange encoding scheme for, and limits on,
|
||||||
|
// the shift amount.
|
||||||
|
let immh_immb = match (size.lane_size(), is_shr) {
|
||||||
|
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
|
||||||
|
0b_1000_000_u32 | (64 - imm)
|
||||||
|
}
|
||||||
|
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
|
||||||
|
0b_0100_000_u32 | (32 - imm)
|
||||||
|
}
|
||||||
|
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
|
||||||
|
0b_0010_000_u32 | (16 - imm)
|
||||||
|
}
|
||||||
|
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
|
||||||
|
0b_0001_000_u32 | (8 - imm)
|
||||||
|
}
|
||||||
|
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
|
||||||
|
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
|
||||||
|
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
|
||||||
|
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
|
||||||
|
_ => panic!(
|
||||||
|
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
|
||||||
|
op, size, imm
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let rn_enc = machreg_to_vec(rn);
|
||||||
|
let rd_enc = machreg_to_vec(rd.to_reg());
|
||||||
|
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
|
||||||
|
}
|
||||||
&Inst::VecExtract { rd, rn, rm, imm4 } => {
|
&Inst::VecExtract { rd, rn, rm, imm4 } => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
|
|||||||
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
|
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
|
||||||
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
|
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
|
||||||
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
|
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
|
||||||
VecRRRLongOp, VecShiftImmOp,
|
VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||||
@@ -767,6 +767,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::VecShiftImmMod { rd, rn, .. } => {
|
||||||
|
collector.reg_mod(rd);
|
||||||
|
collector.reg_use(rn);
|
||||||
|
}
|
||||||
&Inst::VecExtract { rd, rn, rm, .. } => {
|
&Inst::VecExtract { rd, rn, rm, .. } => {
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
collector.reg_use(rn);
|
collector.reg_use(rn);
|
||||||
@@ -2371,6 +2375,20 @@ impl Inst {
|
|||||||
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
||||||
format!("{} {}, {}, #{}", op, rd, rn, imm)
|
format!("{} {}, {}, #{}", op, rd, rn, imm)
|
||||||
}
|
}
|
||||||
|
&Inst::VecShiftImmMod {
|
||||||
|
op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
size,
|
||||||
|
imm,
|
||||||
|
} => {
|
||||||
|
let op = match op {
|
||||||
|
VecShiftImmModOp::Sli => "sli",
|
||||||
|
};
|
||||||
|
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||||
|
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
||||||
|
format!("{} {}, {}, #{}", op, rd, rn, imm)
|
||||||
|
}
|
||||||
&Inst::VecExtract { rd, rn, rm, imm4 } => {
|
&Inst::VecExtract { rd, rn, rm, imm4 } => {
|
||||||
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
|
||||||
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
|
||||||
|
|||||||
@@ -406,6 +406,119 @@
|
|||||||
(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
|
||||||
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
|
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
|
||||||
|
|
||||||
|
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (fcopysign x y)))
|
||||||
|
(fcopy_sign x y ty))
|
||||||
|
|
||||||
|
;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F32ToU32) x $false $F32 out_ty))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F32ToU64) x $false $F32 $I64))
|
||||||
|
|
||||||
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F64ToU32) x $false $F64 out_ty))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F64ToU64) x $false $F64 $I64))
|
||||||
|
|
||||||
|
;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F32ToI32) x $true $F32 out_ty))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F32ToI64) x $true $F32 $I64))
|
||||||
|
|
||||||
|
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F64ToI32) x $true $F64 out_ty))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt (FpuToIntOp.F64ToI64) x $true $F64 $I64))
|
||||||
|
|
||||||
|
;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _)))))
|
||||||
|
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _)))))
|
||||||
|
(vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
|
||||||
|
(int_to_fpu (IntToFpuOp.U32ToF32) (put_in_reg_zext32 x)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
|
||||||
|
(int_to_fpu (IntToFpuOp.U32ToF64) (put_in_reg_zext32 x)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64))))
|
||||||
|
(int_to_fpu (IntToFpuOp.U64ToF32) x))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64))))
|
||||||
|
(int_to_fpu (IntToFpuOp.U64ToF64) x))
|
||||||
|
|
||||||
|
;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _)))))
|
||||||
|
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _)))))
|
||||||
|
(vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
|
||||||
|
(int_to_fpu (IntToFpuOp.I32ToF32) (put_in_reg_sext32 x)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
|
||||||
|
(int_to_fpu (IntToFpuOp.I32ToF64) (put_in_reg_sext32 x)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64))))
|
||||||
|
(int_to_fpu (IntToFpuOp.I64ToF32) x))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64))))
|
||||||
|
(int_to_fpu (IntToFpuOp.I64ToF64) x))
|
||||||
|
|
||||||
|
;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _)))))
|
||||||
|
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
|
||||||
|
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64))
|
||||||
|
|
||||||
|
;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _)))))
|
||||||
|
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
|
||||||
|
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
|
||||||
|
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64))
|
||||||
|
|
||||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; `i64` and smaller
|
;; `i64` and smaller
|
||||||
|
|||||||
@@ -1065,17 +1065,6 @@ pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
|
|||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Helpers for instruction lowering.
|
// Helpers for instruction lowering.
|
||||||
|
|
||||||
pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
|
|
||||||
let bits = ty_bits(ty);
|
|
||||||
if bits <= 32 {
|
|
||||||
op32
|
|
||||||
} else if bits == 64 {
|
|
||||||
op64
|
|
||||||
} else {
|
|
||||||
panic!("choose_32_64 on > 64 bits!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Checks for an instance of `op` feeding the given input.
|
/// Checks for an instance of `op` feeding the given input.
|
||||||
pub(crate) fn maybe_input_insn(
|
pub(crate) fn maybe_input_insn(
|
||||||
c: &mut Lower<Inst>,
|
c: &mut Lower<Inst>,
|
||||||
|
|||||||
@@ -5,12 +5,13 @@ pub mod generated_code;
|
|||||||
|
|
||||||
// Types that the generated ISLE code uses via `use super::*`.
|
// Types that the generated ISLE code uses via `use super::*`.
|
||||||
use super::{
|
use super::{
|
||||||
insn_inputs, lower_constant_f128, lower_constant_f64, writable_zero_reg, zero_reg, AMode,
|
insn_inputs, lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg,
|
||||||
ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp,
|
zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond,
|
||||||
FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel,
|
CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC,
|
||||||
MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize,
|
JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize,
|
||||||
ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
|
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
|
||||||
};
|
};
|
||||||
|
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
|
||||||
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
|
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
|
||||||
use crate::isa::aarch64::settings::Flags as IsaFlags;
|
use crate::isa::aarch64::settings::Flags as IsaFlags;
|
||||||
use crate::machinst::{isle::*, InputSourceInst};
|
use crate::machinst::{isle::*, InputSourceInst};
|
||||||
@@ -519,4 +520,198 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
|
|||||||
fn preg_link(&mut self) -> PReg {
|
fn preg_link(&mut self) -> PReg {
|
||||||
super::regs::link_reg().to_real_reg().unwrap().into()
|
super::regs::link_reg().to_real_reg().unwrap().into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||||
|
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||||
|
|
||||||
|
if in_bits == 32 {
|
||||||
|
// From float32.
|
||||||
|
let min = match (signed, out_bits) {
|
||||||
|
(true, 8) => i8::MIN as f32 - 1.,
|
||||||
|
(true, 16) => i16::MIN as f32 - 1.,
|
||||||
|
(true, 32) => i32::MIN as f32, // I32_MIN - 1 isn't precisely representable as a f32.
|
||||||
|
(true, 64) => i64::MIN as f32, // I64_MIN - 1 isn't precisely representable as a f32.
|
||||||
|
|
||||||
|
(false, _) => -1.,
|
||||||
|
_ => unimplemented!(
|
||||||
|
"unexpected {} output size of {} bits for 32-bit input",
|
||||||
|
if signed { "signed" } else { "unsigned" },
|
||||||
|
out_bits
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
lower_constant_f32(self.lower_ctx, tmp, min);
|
||||||
|
} else if in_bits == 64 {
|
||||||
|
// From float64.
|
||||||
|
let min = match (signed, out_bits) {
|
||||||
|
(true, 8) => i8::MIN as f64 - 1.,
|
||||||
|
(true, 16) => i16::MIN as f64 - 1.,
|
||||||
|
(true, 32) => i32::MIN as f64 - 1.,
|
||||||
|
(true, 64) => i64::MIN as f64,
|
||||||
|
|
||||||
|
(false, _) => -1.,
|
||||||
|
_ => unimplemented!(
|
||||||
|
"unexpected {} output size of {} bits for 64-bit input",
|
||||||
|
if signed { "signed" } else { "unsigned" },
|
||||||
|
out_bits
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
lower_constant_f64(self.lower_ctx, tmp, min);
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"unexpected input size for min_fp_value: {} (signed: {}, output size: {})",
|
||||||
|
in_bits,
|
||||||
|
signed,
|
||||||
|
out_bits
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp.to_reg()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||||
|
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||||
|
|
||||||
|
if in_bits == 32 {
|
||||||
|
// From float32.
|
||||||
|
let max = match (signed, out_bits) {
|
||||||
|
(true, 8) => i8::MAX as f32 + 1.,
|
||||||
|
(true, 16) => i16::MAX as f32 + 1.,
|
||||||
|
(true, 32) => (i32::MAX as u64 + 1) as f32,
|
||||||
|
(true, 64) => (i64::MAX as u64 + 1) as f32,
|
||||||
|
|
||||||
|
(false, 8) => u8::MAX as f32 + 1.,
|
||||||
|
(false, 16) => u16::MAX as f32 + 1.,
|
||||||
|
(false, 32) => (u32::MAX as u64 + 1) as f32,
|
||||||
|
(false, 64) => (u64::MAX as u128 + 1) as f32,
|
||||||
|
_ => unimplemented!(
|
||||||
|
"unexpected {} output size of {} bits for 32-bit input",
|
||||||
|
if signed { "signed" } else { "unsigned" },
|
||||||
|
out_bits
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
lower_constant_f32(self.lower_ctx, tmp, max);
|
||||||
|
} else if in_bits == 64 {
|
||||||
|
// From float64.
|
||||||
|
let max = match (signed, out_bits) {
|
||||||
|
(true, 8) => i8::MAX as f64 + 1.,
|
||||||
|
(true, 16) => i16::MAX as f64 + 1.,
|
||||||
|
(true, 32) => i32::MAX as f64 + 1.,
|
||||||
|
(true, 64) => (i64::MAX as u64 + 1) as f64,
|
||||||
|
|
||||||
|
(false, 8) => u8::MAX as f64 + 1.,
|
||||||
|
(false, 16) => u16::MAX as f64 + 1.,
|
||||||
|
(false, 32) => u32::MAX as f64 + 1.,
|
||||||
|
(false, 64) => (u64::MAX as u128 + 1) as f64,
|
||||||
|
_ => unimplemented!(
|
||||||
|
"unexpected {} output size of {} bits for 64-bit input",
|
||||||
|
if signed { "signed" } else { "unsigned" },
|
||||||
|
out_bits
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
lower_constant_f64(self.lower_ctx, tmp, max);
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"unexpected input size for max_fp_value: {} (signed: {}, output size: {})",
|
||||||
|
in_bits,
|
||||||
|
signed,
|
||||||
|
out_bits
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp.to_reg()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||||
|
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||||
|
|
||||||
|
let min: f64 = match (out_bits, signed) {
|
||||||
|
(32, true) => i32::MIN as f64,
|
||||||
|
(32, false) => 0.0,
|
||||||
|
(64, true) => i64::MIN as f64,
|
||||||
|
(64, false) => 0.0,
|
||||||
|
_ => unimplemented!(
|
||||||
|
"unexpected {} output size of {} bits",
|
||||||
|
if signed { "signed" } else { "unsigned" },
|
||||||
|
out_bits
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
if in_bits == 32 {
|
||||||
|
lower_constant_f32(self.lower_ctx, tmp, min as f32)
|
||||||
|
} else if in_bits == 64 {
|
||||||
|
lower_constant_f64(self.lower_ctx, tmp, min)
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})",
|
||||||
|
in_bits,
|
||||||
|
signed,
|
||||||
|
out_bits
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp.to_reg()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||||
|
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||||
|
|
||||||
|
let max = match (out_bits, signed) {
|
||||||
|
(32, true) => i32::MAX as f64,
|
||||||
|
(32, false) => u32::MAX as f64,
|
||||||
|
(64, true) => i64::MAX as f64,
|
||||||
|
(64, false) => u64::MAX as f64,
|
||||||
|
_ => unimplemented!(
|
||||||
|
"unexpected {} output size of {} bits",
|
||||||
|
if signed { "signed" } else { "unsigned" },
|
||||||
|
out_bits
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
if in_bits == 32 {
|
||||||
|
lower_constant_f32(self.lower_ctx, tmp, max as f32)
|
||||||
|
} else if in_bits == 64 {
|
||||||
|
lower_constant_f64(self.lower_ctx, tmp, max)
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})",
|
||||||
|
in_bits,
|
||||||
|
signed,
|
||||||
|
out_bits
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp.to_reg()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
|
||||||
|
if ty_bits == 32 {
|
||||||
|
FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
||||||
|
} else if ty_bits == 64 {
|
||||||
|
FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"unexpected input size for fpu_op_ri_ushr: {} (shift: {})",
|
||||||
|
ty_bits,
|
||||||
|
shift
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fpu_op_ri_sli(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
|
||||||
|
if ty_bits == 32 {
|
||||||
|
FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
||||||
|
} else if ty_bits == 64 {
|
||||||
|
FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"unexpected input size for fpu_op_ri_sli: {} (shift: {})",
|
||||||
|
ty_bits,
|
||||||
|
shift
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,10 +2,9 @@
|
|||||||
|
|
||||||
use super::lower::*;
|
use super::lower::*;
|
||||||
use crate::binemit::CodeOffset;
|
use crate::binemit::CodeOffset;
|
||||||
use crate::ir::condcodes::FloatCC;
|
|
||||||
use crate::ir::types::*;
|
use crate::ir::types::*;
|
||||||
use crate::ir::Inst as IRInst;
|
use crate::ir::Inst as IRInst;
|
||||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
use crate::ir::{InstructionData, Opcode};
|
||||||
use crate::isa::aarch64::abi::*;
|
use crate::isa::aarch64::abi::*;
|
||||||
use crate::isa::aarch64::inst::*;
|
use crate::isa::aarch64::inst::*;
|
||||||
use crate::isa::aarch64::settings as aarch64_settings;
|
use crate::isa::aarch64::settings as aarch64_settings;
|
||||||
@@ -978,408 +977,13 @@ pub(crate) fn lower_insn_to_regs(
|
|||||||
|
|
||||||
Opcode::Fma => implemented_in_isle(ctx),
|
Opcode::Fma => implemented_in_isle(ctx),
|
||||||
|
|
||||||
Opcode::Fcopysign => {
|
Opcode::Fcopysign => implemented_in_isle(ctx),
|
||||||
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
|
|
||||||
//
|
|
||||||
// This is a scalar Fcopysign.
|
|
||||||
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
|
|
||||||
// In the latter case it still sets all bits except the lowest 32 to 0.
|
|
||||||
//
|
|
||||||
// mov vd, vn
|
|
||||||
// ushr vtmp, vm, #63 / #31
|
|
||||||
// sli vd, vtmp, #63 / #31
|
|
||||||
|
|
||||||
let ty = ctx.output_ty(insn, 0);
|
Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx),
|
||||||
|
|
||||||
if ty != F32 && ty != F64 {
|
Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx),
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"Fcopysign: Unsupported type: {:?}",
|
|
||||||
ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let bits = ty_bits(ty) as u8;
|
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx),
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let tmp = ctx.alloc_tmp(F64).only_reg().unwrap();
|
|
||||||
|
|
||||||
// Copy LHS to rd.
|
|
||||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
|
||||||
|
|
||||||
// Copy the sign bit to the lowest bit in tmp.
|
|
||||||
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
|
||||||
ctx.emit(Inst::FpuRRI {
|
|
||||||
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
|
|
||||||
rd: tmp,
|
|
||||||
rn: rm,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Insert the bit from tmp into the sign bit of rd.
|
|
||||||
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
|
||||||
ctx.emit(Inst::FpuRRI {
|
|
||||||
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
|
|
||||||
rd,
|
|
||||||
rn: tmp.to_reg(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::FcvtToUint | Opcode::FcvtToSint => {
|
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
|
||||||
let in_bits = ty_bits(input_ty);
|
|
||||||
let output_ty = ty.unwrap();
|
|
||||||
let out_bits = ty_bits(output_ty);
|
|
||||||
let signed = op == Opcode::FcvtToSint;
|
|
||||||
let op = match (signed, in_bits, out_bits) {
|
|
||||||
(false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
|
|
||||||
(true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
|
|
||||||
(false, 32, 64) => FpuToIntOp::F32ToU64,
|
|
||||||
(true, 32, 64) => FpuToIntOp::F32ToI64,
|
|
||||||
(false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
|
|
||||||
(true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
|
|
||||||
(false, 64, 64) => FpuToIntOp::F64ToU64,
|
|
||||||
(true, 64, 64) => FpuToIntOp::F64ToI64,
|
|
||||||
_ => {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"{}: Unsupported types: {:?} -> {:?}",
|
|
||||||
op, input_ty, output_ty
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
|
|
||||||
// First, check the output: it's important to carry the NaN conversion before the
|
|
||||||
// in-bounds conversion, per wasm semantics.
|
|
||||||
|
|
||||||
// Check that the input is not a NaN.
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::from_ty(input_ty),
|
|
||||||
rn,
|
|
||||||
rm: rn,
|
|
||||||
});
|
|
||||||
let trap_code = TrapCode::BadConversionToInteger;
|
|
||||||
ctx.emit(Inst::TrapIf {
|
|
||||||
trap_code,
|
|
||||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
|
|
||||||
});
|
|
||||||
|
|
||||||
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
|
||||||
|
|
||||||
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
|
||||||
// we allow values that are slightly out of range:
|
|
||||||
// - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
|
|
||||||
// can be represented), and strictly less than INT_MAX+1 (when this can be
|
|
||||||
// represented).
|
|
||||||
// - for unsigned conversions, we allow values strictly greater than -1, and strictly
|
|
||||||
// less than UINT_MAX+1 (when this can be represented).
|
|
||||||
|
|
||||||
if in_bits == 32 {
|
|
||||||
// From float32.
|
|
||||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
|
||||||
(true, 8) => (
|
|
||||||
i8::min_value() as f32 - 1.,
|
|
||||||
FloatCC::GreaterThan,
|
|
||||||
i8::max_value() as f32 + 1.,
|
|
||||||
),
|
|
||||||
(true, 16) => (
|
|
||||||
i16::min_value() as f32 - 1.,
|
|
||||||
FloatCC::GreaterThan,
|
|
||||||
i16::max_value() as f32 + 1.,
|
|
||||||
),
|
|
||||||
(true, 32) => (
|
|
||||||
i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
|
|
||||||
FloatCC::GreaterThanOrEqual,
|
|
||||||
i32::max_value() as f32 + 1.,
|
|
||||||
),
|
|
||||||
(true, 64) => (
|
|
||||||
i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
|
|
||||||
FloatCC::GreaterThanOrEqual,
|
|
||||||
i64::max_value() as f32 + 1.,
|
|
||||||
),
|
|
||||||
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
|
|
||||||
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
|
|
||||||
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
|
|
||||||
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// >= low_bound
|
|
||||||
lower_constant_f32(ctx, tmp, low_bound);
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::Size32,
|
|
||||||
rn,
|
|
||||||
rm: tmp.to_reg(),
|
|
||||||
});
|
|
||||||
let trap_code = TrapCode::IntegerOverflow;
|
|
||||||
ctx.emit(Inst::TrapIf {
|
|
||||||
trap_code,
|
|
||||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// <= high_bound
|
|
||||||
lower_constant_f32(ctx, tmp, high_bound);
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::Size32,
|
|
||||||
rn,
|
|
||||||
rm: tmp.to_reg(),
|
|
||||||
});
|
|
||||||
let trap_code = TrapCode::IntegerOverflow;
|
|
||||||
ctx.emit(Inst::TrapIf {
|
|
||||||
trap_code,
|
|
||||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// From float64.
|
|
||||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
|
||||||
(true, 8) => (
|
|
||||||
i8::min_value() as f64 - 1.,
|
|
||||||
FloatCC::GreaterThan,
|
|
||||||
i8::max_value() as f64 + 1.,
|
|
||||||
),
|
|
||||||
(true, 16) => (
|
|
||||||
i16::min_value() as f64 - 1.,
|
|
||||||
FloatCC::GreaterThan,
|
|
||||||
i16::max_value() as f64 + 1.,
|
|
||||||
),
|
|
||||||
(true, 32) => (
|
|
||||||
i32::min_value() as f64 - 1.,
|
|
||||||
FloatCC::GreaterThan,
|
|
||||||
i32::max_value() as f64 + 1.,
|
|
||||||
),
|
|
||||||
(true, 64) => (
|
|
||||||
i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
|
|
||||||
FloatCC::GreaterThanOrEqual,
|
|
||||||
i64::max_value() as f64 + 1.,
|
|
||||||
),
|
|
||||||
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
|
|
||||||
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
|
|
||||||
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
|
|
||||||
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// >= low_bound
|
|
||||||
lower_constant_f64(ctx, tmp, low_bound);
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::Size64,
|
|
||||||
rn,
|
|
||||||
rm: tmp.to_reg(),
|
|
||||||
});
|
|
||||||
let trap_code = TrapCode::IntegerOverflow;
|
|
||||||
ctx.emit(Inst::TrapIf {
|
|
||||||
trap_code,
|
|
||||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// <= high_bound
|
|
||||||
lower_constant_f64(ctx, tmp, high_bound);
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::Size64,
|
|
||||||
rn,
|
|
||||||
rm: tmp.to_reg(),
|
|
||||||
});
|
|
||||||
let trap_code = TrapCode::IntegerOverflow;
|
|
||||||
ctx.emit(Inst::TrapIf {
|
|
||||||
trap_code,
|
|
||||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Do the conversion.
|
|
||||||
ctx.emit(Inst::FpuToInt { op, rd, rn });
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
|
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
|
||||||
let ty = ty.unwrap();
|
|
||||||
let signed = op == Opcode::FcvtFromSint;
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
|
|
||||||
if ty.is_vector() {
|
|
||||||
if input_ty.lane_bits() != ty.lane_bits() {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"{}: Unsupported types: {:?} -> {:?}",
|
|
||||||
op, input_ty, ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let op = if signed {
|
|
||||||
VecMisc2::Scvtf
|
|
||||||
} else {
|
|
||||||
VecMisc2::Ucvtf
|
|
||||||
};
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecMisc {
|
|
||||||
op,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let in_bits = ty_bits(input_ty);
|
|
||||||
let out_bits = ty_bits(ty);
|
|
||||||
let op = match (signed, in_bits, out_bits) {
|
|
||||||
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
|
|
||||||
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
|
|
||||||
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
|
|
||||||
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
|
|
||||||
(false, 64, 32) => IntToFpuOp::U64ToF32,
|
|
||||||
(true, 64, 32) => IntToFpuOp::I64ToF32,
|
|
||||||
(false, 64, 64) => IntToFpuOp::U64ToF64,
|
|
||||||
(true, 64, 64) => IntToFpuOp::I64ToF64,
|
|
||||||
_ => {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"{}: Unsupported types: {:?} -> {:?}",
|
|
||||||
op, input_ty, ty
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let narrow_mode = match (signed, in_bits) {
|
|
||||||
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
|
|
||||||
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
|
|
||||||
(false, 64) => NarrowValueMode::ZeroExtend64,
|
|
||||||
(true, 64) => NarrowValueMode::SignExtend64,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
|
||||||
ctx.emit(Inst::IntToFpu { op, rd, rn });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
|
|
||||||
let in_ty = ctx.input_ty(insn, 0);
|
|
||||||
let ty = ty.unwrap();
|
|
||||||
let out_signed = op == Opcode::FcvtToSintSat;
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
|
|
||||||
if ty.is_vector() {
|
|
||||||
if in_ty.lane_bits() != ty.lane_bits() {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"{}: Unsupported types: {:?} -> {:?}",
|
|
||||||
op, in_ty, ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let op = if out_signed {
|
|
||||||
VecMisc2::Fcvtzs
|
|
||||||
} else {
|
|
||||||
VecMisc2::Fcvtzu
|
|
||||||
};
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecMisc {
|
|
||||||
op,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let in_bits = ty_bits(in_ty);
|
|
||||||
let out_bits = ty_bits(ty);
|
|
||||||
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
|
||||||
// FMIN Vtmp2, Vin, Vtmp1
|
|
||||||
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
|
||||||
// FMAX Vtmp2, Vtmp2, Vtmp1
|
|
||||||
// (if signed) FIMM Vtmp1, 0
|
|
||||||
// FCMP Vin, Vin
|
|
||||||
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
|
|
||||||
// convert Rout, Vtmp2
|
|
||||||
|
|
||||||
assert!(in_ty.is_float() && (in_bits == 32 || in_bits == 64));
|
|
||||||
assert!(out_bits == 32 || out_bits == 64);
|
|
||||||
|
|
||||||
let min: f64 = match (out_bits, out_signed) {
|
|
||||||
(32, true) => std::i32::MIN as f64,
|
|
||||||
(32, false) => 0.0,
|
|
||||||
(64, true) => std::i64::MIN as f64,
|
|
||||||
(64, false) => 0.0,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let max = match (out_bits, out_signed) {
|
|
||||||
(32, true) => std::i32::MAX as f64,
|
|
||||||
(32, false) => std::u32::MAX as f64,
|
|
||||||
(64, true) => std::i64::MAX as f64,
|
|
||||||
(64, false) => std::u64::MAX as f64,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let rtmp1 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
|
|
||||||
let rtmp2 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
|
|
||||||
|
|
||||||
if in_bits == 32 {
|
|
||||||
lower_constant_f32(ctx, rtmp1, max as f32);
|
|
||||||
} else {
|
|
||||||
lower_constant_f64(ctx, rtmp1, max);
|
|
||||||
}
|
|
||||||
ctx.emit(Inst::FpuRRR {
|
|
||||||
fpu_op: FPUOp2::Min,
|
|
||||||
size: ScalarSize::from_ty(in_ty),
|
|
||||||
rd: rtmp2,
|
|
||||||
rn,
|
|
||||||
rm: rtmp1.to_reg(),
|
|
||||||
});
|
|
||||||
if in_bits == 32 {
|
|
||||||
lower_constant_f32(ctx, rtmp1, min as f32);
|
|
||||||
} else {
|
|
||||||
lower_constant_f64(ctx, rtmp1, min);
|
|
||||||
}
|
|
||||||
ctx.emit(Inst::FpuRRR {
|
|
||||||
fpu_op: FPUOp2::Max,
|
|
||||||
size: ScalarSize::from_ty(in_ty),
|
|
||||||
rd: rtmp2,
|
|
||||||
rn: rtmp2.to_reg(),
|
|
||||||
rm: rtmp1.to_reg(),
|
|
||||||
});
|
|
||||||
if out_signed {
|
|
||||||
if in_bits == 32 {
|
|
||||||
lower_constant_f32(ctx, rtmp1, 0.0);
|
|
||||||
} else {
|
|
||||||
lower_constant_f64(ctx, rtmp1, 0.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::from_ty(in_ty),
|
|
||||||
rn,
|
|
||||||
rm: rn,
|
|
||||||
});
|
|
||||||
if in_bits == 32 {
|
|
||||||
ctx.emit(Inst::FpuCSel32 {
|
|
||||||
rd: rtmp2,
|
|
||||||
rn: rtmp1.to_reg(),
|
|
||||||
rm: rtmp2.to_reg(),
|
|
||||||
cond: Cond::Ne,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
ctx.emit(Inst::FpuCSel64 {
|
|
||||||
rd: rtmp2,
|
|
||||||
rn: rtmp1.to_reg(),
|
|
||||||
rm: rtmp2.to_reg(),
|
|
||||||
cond: Cond::Ne,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let cvt = match (in_bits, out_bits, out_signed) {
|
|
||||||
(32, 32, false) => FpuToIntOp::F32ToU32,
|
|
||||||
(32, 32, true) => FpuToIntOp::F32ToI32,
|
|
||||||
(32, 64, false) => FpuToIntOp::F32ToU64,
|
|
||||||
(32, 64, true) => FpuToIntOp::F32ToI64,
|
|
||||||
(64, 32, false) => FpuToIntOp::F64ToU32,
|
|
||||||
(64, 32, true) => FpuToIntOp::F64ToI32,
|
|
||||||
(64, 64, false) => FpuToIntOp::F64ToU64,
|
|
||||||
(64, 64, true) => FpuToIntOp::F64ToI64,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
ctx.emit(Inst::FpuToInt {
|
|
||||||
op: cvt,
|
|
||||||
rd,
|
|
||||||
rn: rtmp2.to_reg(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::IaddIfcout => {
|
Opcode::IaddIfcout => {
|
||||||
// This is a two-output instruction that is needed for the
|
// This is a two-output instruction that is needed for the
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ block0(v0: i8):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uxtb w4, w0
|
; uxtb w3, w0
|
||||||
; ucvtf s0, w4
|
; ucvtf s0, w3
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function u0:0(i8) -> f64 {
|
function u0:0(i8) -> f64 {
|
||||||
@@ -20,8 +20,8 @@ block0(v0: i8):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uxtb w4, w0
|
; uxtb w3, w0
|
||||||
; ucvtf d0, w4
|
; ucvtf d0, w3
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function u0:0(i16) -> f32 {
|
function u0:0(i16) -> f32 {
|
||||||
@@ -31,8 +31,8 @@ block0(v0: i16):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uxth w4, w0
|
; uxth w3, w0
|
||||||
; ucvtf s0, w4
|
; ucvtf s0, w3
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function u0:0(i16) -> f64 {
|
function u0:0(i16) -> f64 {
|
||||||
@@ -42,8 +42,8 @@ block0(v0: i16):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; uxth w4, w0
|
; uxth w3, w0
|
||||||
; ucvtf d0, w4
|
; ucvtf d0, w3
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function u0:0(f32) -> i8 {
|
function u0:0(f32) -> i8 {
|
||||||
@@ -55,13 +55,13 @@ block0(v0: f32):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov s6, #-1
|
; fmov s5, #-1
|
||||||
; fcmp s0, s6
|
; fcmp s0, s5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #17280, LSL #16
|
; movz x10, #17280, LSL #16
|
||||||
; fmov s6, w10
|
; fmov s18, w10
|
||||||
; fcmp s0, s6
|
; fcmp s0, s18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu w0, s0
|
; fcvtzu w0, s0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -74,13 +74,13 @@ block0(v0: f64):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov d6, #-1
|
; fmov d5, #-1
|
||||||
; fcmp d0, d6
|
; fcmp d0, d5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #16496, LSL #48
|
; movz x10, #16496, LSL #48
|
||||||
; fmov d6, x10
|
; fmov d18, x10
|
||||||
; fcmp d0, d6
|
; fcmp d0, d18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu w0, d0
|
; fcvtzu w0, d0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -93,13 +93,13 @@ block0(v0: f32):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov s6, #-1
|
; fmov s5, #-1
|
||||||
; fcmp s0, s6
|
; fcmp s0, s5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #18304, LSL #16
|
; movz x10, #18304, LSL #16
|
||||||
; fmov s6, w10
|
; fmov s18, w10
|
||||||
; fcmp s0, s6
|
; fcmp s0, s18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu w0, s0
|
; fcvtzu w0, s0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -112,13 +112,13 @@ block0(v0: f64):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov d6, #-1
|
; fmov d5, #-1
|
||||||
; fcmp d0, d6
|
; fcmp d0, d5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #16624, LSL #48
|
; movz x10, #16624, LSL #48
|
||||||
; fmov d6, x10
|
; fmov d18, x10
|
||||||
; fcmp d0, d6
|
; fcmp d0, d18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu w0, d0
|
; fcvtzu w0, d0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -333,13 +333,13 @@ block0(v0: f32):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov s6, #-1
|
; fmov s5, #-1
|
||||||
; fcmp s0, s6
|
; fcmp s0, s5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #20352, LSL #16
|
; movz x10, #20352, LSL #16
|
||||||
; fmov s6, w10
|
; fmov s18, w10
|
||||||
; fcmp s0, s6
|
; fcmp s0, s18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu w0, s0
|
; fcvtzu w0, s0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -352,14 +352,14 @@ block0(v0: f32):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; movz x7, #52992, LSL #16
|
; movz x6, #52992, LSL #16
|
||||||
; fmov s7, w7
|
; fmov s6, w6
|
||||||
; fcmp s0, s7
|
; fcmp s0, s6
|
||||||
; b.ge 8 ; udf
|
; b.ge 8 ; udf
|
||||||
; movz x12, #20224, LSL #16
|
; movz x12, #20224, LSL #16
|
||||||
; fmov s7, w12
|
; fmov s20, w12
|
||||||
; fcmp s0, s7
|
; fcmp s0, s20
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzs w0, s0
|
; fcvtzs w0, s0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -372,13 +372,13 @@ block0(v0: f32):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov s6, #-1
|
; fmov s5, #-1
|
||||||
; fcmp s0, s6
|
; fcmp s0, s5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #24448, LSL #16
|
; movz x10, #24448, LSL #16
|
||||||
; fmov s6, w10
|
; fmov s18, w10
|
||||||
; fcmp s0, s6
|
; fcmp s0, s18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu x0, s0
|
; fcvtzu x0, s0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -391,14 +391,14 @@ block0(v0: f32):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; movz x7, #57088, LSL #16
|
; movz x6, #57088, LSL #16
|
||||||
; fmov s7, w7
|
; fmov s6, w6
|
||||||
; fcmp s0, s7
|
; fcmp s0, s6
|
||||||
; b.ge 8 ; udf
|
; b.ge 8 ; udf
|
||||||
; movz x12, #24320, LSL #16
|
; movz x12, #24320, LSL #16
|
||||||
; fmov s7, w12
|
; fmov s20, w12
|
||||||
; fcmp s0, s7
|
; fcmp s0, s20
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzs x0, s0
|
; fcvtzs x0, s0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -411,13 +411,13 @@ block0(v0: f64):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov d6, #-1
|
; fmov d5, #-1
|
||||||
; fcmp d0, d6
|
; fcmp d0, d5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #16880, LSL #48
|
; movz x10, #16880, LSL #48
|
||||||
; fmov d6, x10
|
; fmov d18, x10
|
||||||
; fcmp d0, d6
|
; fcmp d0, d18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu w0, d0
|
; fcvtzu w0, d0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -430,13 +430,13 @@ block0(v0: f64):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; ldr d6, pc+8 ; b 12 ; data.f64 -2147483649
|
; ldr d5, pc+8 ; b 12 ; data.f64 -2147483649
|
||||||
; fcmp d0, d6
|
; fcmp d0, d5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #16864, LSL #48
|
; movz x10, #16864, LSL #48
|
||||||
; fmov d6, x10
|
; fmov d18, x10
|
||||||
; fcmp d0, d6
|
; fcmp d0, d18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzs w0, d0
|
; fcvtzs w0, d0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -449,13 +449,13 @@ block0(v0: f64):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; fmov d6, #-1
|
; fmov d5, #-1
|
||||||
; fcmp d0, d6
|
; fcmp d0, d5
|
||||||
; b.gt 8 ; udf
|
; b.gt 8 ; udf
|
||||||
; movz x10, #17392, LSL #48
|
; movz x10, #17392, LSL #48
|
||||||
; fmov d6, x10
|
; fmov d18, x10
|
||||||
; fcmp d0, d6
|
; fcmp d0, d18
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzu x0, d0
|
; fcvtzu x0, d0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -468,14 +468,14 @@ block0(v0: f64):
|
|||||||
; block0:
|
; block0:
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; b.vc 8 ; udf
|
; b.vc 8 ; udf
|
||||||
; movz x7, #50144, LSL #48
|
; movz x6, #50144, LSL #48
|
||||||
; fmov d7, x7
|
; fmov d6, x6
|
||||||
; fcmp d0, d7
|
; fcmp d0, d6
|
||||||
; b.ge 8 ; udf
|
; b.ge 8 ; udf
|
||||||
; movz x12, #17376, LSL #48
|
; movz x12, #17376, LSL #48
|
||||||
; fmov d7, x12
|
; fmov d20, x12
|
||||||
; fcmp d0, d7
|
; fcmp d0, d20
|
||||||
; b.mi 8 ; udf
|
; b.lt 8 ; udf
|
||||||
; fcvtzs x0, d0
|
; fcvtzs x0, d0
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -566,14 +566,14 @@ block0(v0: f32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x6, #20352, LSL #16
|
; movz x4, #20352, LSL #16
|
||||||
; fmov s5, w6
|
; fmov s4, w4
|
||||||
; fmin s7, s0, s5
|
; fmin s7, s0, s4
|
||||||
; movi v5.2s, #0
|
; movi v17.2s, #0
|
||||||
; fmax s7, s7, s5
|
; fmax s19, s7, s17
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; fcsel s7, s5, s7, ne
|
; fcsel s22, s17, s19, ne
|
||||||
; fcvtzu w0, s7
|
; fcvtzu w0, s22
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f50(f32) -> i32 {
|
function %f50(f32) -> i32 {
|
||||||
@@ -583,16 +583,16 @@ block0(v0: f32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x6, #20224, LSL #16
|
; movz x4, #20224, LSL #16
|
||||||
; fmov s5, w6
|
; fmov s4, w4
|
||||||
; fmin s7, s0, s5
|
; fmin s7, s0, s4
|
||||||
; movz x10, #52992, LSL #16
|
; movz x10, #52992, LSL #16
|
||||||
; fmov s5, w10
|
; fmov s18, w10
|
||||||
; fmax s7, s7, s5
|
; fmax s21, s7, s18
|
||||||
; movi v5.2s, #0
|
; movi v23.16b, #0
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; fcsel s7, s5, s7, ne
|
; fcsel s26, s23, s21, ne
|
||||||
; fcvtzs w0, s7
|
; fcvtzs w0, s26
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f51(f32) -> i64 {
|
function %f51(f32) -> i64 {
|
||||||
@@ -602,14 +602,14 @@ block0(v0: f32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x6, #24448, LSL #16
|
; movz x4, #24448, LSL #16
|
||||||
; fmov s5, w6
|
; fmov s4, w4
|
||||||
; fmin s7, s0, s5
|
; fmin s7, s0, s4
|
||||||
; movi v5.2s, #0
|
; movi v17.2s, #0
|
||||||
; fmax s7, s7, s5
|
; fmax s19, s7, s17
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; fcsel s7, s5, s7, ne
|
; fcsel s22, s17, s19, ne
|
||||||
; fcvtzu x0, s7
|
; fcvtzu x0, s22
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f52(f32) -> i64 {
|
function %f52(f32) -> i64 {
|
||||||
@@ -619,16 +619,16 @@ block0(v0: f32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x6, #24320, LSL #16
|
; movz x4, #24320, LSL #16
|
||||||
; fmov s5, w6
|
; fmov s4, w4
|
||||||
; fmin s7, s0, s5
|
; fmin s7, s0, s4
|
||||||
; movz x10, #57088, LSL #16
|
; movz x10, #57088, LSL #16
|
||||||
; fmov s5, w10
|
; fmov s18, w10
|
||||||
; fmax s7, s7, s5
|
; fmax s21, s7, s18
|
||||||
; movi v5.2s, #0
|
; movi v23.16b, #0
|
||||||
; fcmp s0, s0
|
; fcmp s0, s0
|
||||||
; fcsel s7, s5, s7, ne
|
; fcsel s26, s23, s21, ne
|
||||||
; fcvtzs x0, s7
|
; fcvtzs x0, s26
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f53(f64) -> i32 {
|
function %f53(f64) -> i32 {
|
||||||
@@ -638,13 +638,13 @@ block0(v0: f64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ldr d4, pc+8 ; b 12 ; data.f64 4294967295
|
; ldr d3, pc+8 ; b 12 ; data.f64 4294967295
|
||||||
; fmin d6, d0, d4
|
; fmin d5, d0, d3
|
||||||
; movi v4.2s, #0
|
; movi v7.2s, #0
|
||||||
; fmax d6, d6, d4
|
; fmax d17, d5, d7
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; fcsel d6, d4, d6, ne
|
; fcsel d20, d7, d17, ne
|
||||||
; fcvtzu w0, d6
|
; fcvtzu w0, d20
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f54(f64) -> i32 {
|
function %f54(f64) -> i32 {
|
||||||
@@ -654,15 +654,15 @@ block0(v0: f64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; ldr d4, pc+8 ; b 12 ; data.f64 2147483647
|
; ldr d3, pc+8 ; b 12 ; data.f64 2147483647
|
||||||
; fmin d6, d0, d4
|
; fmin d5, d0, d3
|
||||||
; movz x8, #49632, LSL #48
|
; movz x8, #49632, LSL #48
|
||||||
; fmov d4, x8
|
; fmov d16, x8
|
||||||
; fmax d6, d6, d4
|
; fmax d19, d5, d16
|
||||||
; movi v4.2s, #0
|
; movi v21.16b, #0
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; fcsel d6, d4, d6, ne
|
; fcsel d24, d21, d19, ne
|
||||||
; fcvtzs w0, d6
|
; fcvtzs w0, d24
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f55(f64) -> i64 {
|
function %f55(f64) -> i64 {
|
||||||
@@ -672,14 +672,14 @@ block0(v0: f64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x6, #17392, LSL #48
|
; movz x4, #17392, LSL #48
|
||||||
; fmov d5, x6
|
; fmov d4, x4
|
||||||
; fmin d7, d0, d5
|
; fmin d7, d0, d4
|
||||||
; movi v5.2s, #0
|
; movi v17.2s, #0
|
||||||
; fmax d7, d7, d5
|
; fmax d19, d7, d17
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; fcsel d7, d5, d7, ne
|
; fcsel d22, d17, d19, ne
|
||||||
; fcvtzu x0, d7
|
; fcvtzu x0, d22
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f56(f64) -> i64 {
|
function %f56(f64) -> i64 {
|
||||||
@@ -689,16 +689,16 @@ block0(v0: f64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x6, #17376, LSL #48
|
; movz x4, #17376, LSL #48
|
||||||
; fmov d5, x6
|
; fmov d4, x4
|
||||||
; fmin d7, d0, d5
|
; fmin d7, d0, d4
|
||||||
; movz x10, #50144, LSL #48
|
; movz x10, #50144, LSL #48
|
||||||
; fmov d5, x10
|
; fmov d18, x10
|
||||||
; fmax d7, d7, d5
|
; fmax d21, d7, d18
|
||||||
; movi v5.2s, #0
|
; movi v23.16b, #0
|
||||||
; fcmp d0, d0
|
; fcmp d0, d0
|
||||||
; fcsel d7, d5, d7, ne
|
; fcsel d26, d23, d21, ne
|
||||||
; fcvtzs x0, d7
|
; fcvtzs x0, d26
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f57(f32x2) -> f32x2 {
|
function %f57(f32x2) -> f32x2 {
|
||||||
@@ -946,3 +946,36 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
|
|||||||
; mov v0.16b, v2.16b
|
; mov v0.16b, v2.16b
|
||||||
; fmla v0.2d, v17.2d, v1.2d
|
; fmla v0.2d, v17.2d, v1.2d
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
function %f81(f32x2, f32x2) -> f32x2 {
|
||||||
|
block0(v0: f32x2, v1: f32x2):
|
||||||
|
v2 = fcopysign v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; ushr v7.2s, v1.2s, #31
|
||||||
|
; sli v0.2s, v7.2s, #31
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f82(f32x4, f32x4) -> f32x4 {
|
||||||
|
block0(v0: f32x4, v1: f32x4):
|
||||||
|
v2 = fcopysign v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; ushr v7.4s, v1.4s, #31
|
||||||
|
; sli v0.4s, v7.4s, #31
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f83(f64x2, f64x2) -> f64x2 {
|
||||||
|
block0(v0: f64x2, v1: f64x2):
|
||||||
|
v2 = fcopysign v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; ushr v7.2d, v1.2d, #63
|
||||||
|
; sli v0.2d, v7.2d, #63
|
||||||
|
; ret
|
||||||
|
|||||||
@@ -0,0 +1,37 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
; x86_64 and s390x do not support 64-bit vectors in `fcopysign`.
|
||||||
|
|
||||||
|
function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 {
|
||||||
|
block0(v0: f32x2, v1: f32x2):
|
||||||
|
v2 = fcopysign v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
|
||||||
|
; run: %fcopysign_f32x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
|
||||||
|
; run: %fcopysign_f32x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
|
||||||
|
|
||||||
|
; F32 Inf
|
||||||
|
; run: %fcopysign_f32x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
|
||||||
|
; run: %fcopysign_f32x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
|
||||||
|
|
||||||
|
; F32 Epsilon / Max / Min Positive
|
||||||
|
; run: %fcopysign_f32x2([0x1.000000p-23 -0x1.000000p-23], [-0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23]
|
||||||
|
; run: %fcopysign_f32x2([0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0]) == [-0x1.fffffep127 0x1.fffffep127]
|
||||||
|
; run: %fcopysign_f32x2([0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126]
|
||||||
|
|
||||||
|
; F32 Subnormals
|
||||||
|
; run: %fcopysign_f32x2([0x0.800000p-126 -0x0.800000p-126], [-0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126]
|
||||||
|
; run: %fcopysign_f32x2([0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0]) == [-0x0.000002p-126 0x0.000002p-126]
|
||||||
|
|
||||||
|
; F32 NaN's
|
||||||
|
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
|
||||||
|
; run: %fcopysign_f32x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
|
||||||
|
; run: %fcopysign_f32x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
|
||||||
|
; run: %fcopysign_f32x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
|
||||||
|
; run: %fcopysign_f32x2([+NaN:0x1 +NaN:0x300001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x300001]
|
||||||
|
; run: %fcopysign_f32x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
|
||||||
|
; run: %fcopysign_f32x2([-NaN:0x300001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x300001 -sNaN:0x1]
|
||||||
|
; run: %fcopysign_f32x2([-sNaN:0x1 +sNaN:0x200001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x200001]
|
||||||
|
; run: %fcopysign_f32x2([-sNaN:0x200001 -sNaN:0x200001], [+NaN +NaN]) == [+sNaN:0x200001 +sNaN:0x200001]
|
||||||
63
cranelift/filetests/filetests/runtests/simd-fcopysign.clif
Normal file
63
cranelift/filetests/filetests/runtests/simd-fcopysign.clif
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target s390x
|
||||||
|
target aarch64
|
||||||
|
; x86_64 does not support SIMD fcopysign.
|
||||||
|
|
||||||
|
function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
|
||||||
|
block0(v0: f32x4, v1: f32x4):
|
||||||
|
v2 = fcopysign v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %fcopysign_f32x4([0x9.0 -0x9.0 0x9.0 -0x9.0], [0x9.0 0x9.0 -0x9.0 -0x9.0]) == [0x9.0 0x9.0 -0x9.0 -0x9.0]
|
||||||
|
; run: %fcopysign_f32x4([0x0.0 -0x0.0 0x0.0 -0x0.0], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.0 0x0.0 -0x0.0 0x0.0]
|
||||||
|
|
||||||
|
; F32 Inf
|
||||||
|
; run: %fcopysign_f32x4([Inf -Inf Inf -Inf], [Inf Inf -Inf -Inf]) == [Inf Inf -Inf -Inf]
|
||||||
|
|
||||||
|
; F32 Epsilon / Max / Min Positive
|
||||||
|
; run: %fcopysign_f32x4([0x1.000000p-23 -0x1.000000p-23 0x1.fffffep127 -0x1.fffffep127], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-23 0x1.000000p-23 -0x1.fffffep127 0x1.fffffep127]
|
||||||
|
; run: %fcopysign_f32x4([0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x1.000000p-126 0x1.000000p-126 -0x1.000000p-126 0x1.000000p-126]
|
||||||
|
|
||||||
|
; F32 Subnormals
|
||||||
|
; run: %fcopysign_f32x4([0x0.800000p-126 -0x0.800000p-126 0x0.000002p-126 -0x0.000002p-126], [-0x0.0 0x0.0 -0x0.0 0x0.0]) == [-0x0.800000p-126 0x0.800000p-126 -0x0.000002p-126 0x0.000002p-126]
|
||||||
|
|
||||||
|
; F32 NaN's
|
||||||
|
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
|
||||||
|
; run: %fcopysign_f32x4([0x0.0 0x3.0 Inf +NaN], [-NaN +sNaN:0x1 -NaN -NaN]) == [-0x0.0 0x3.0 -Inf -NaN]
|
||||||
|
; run: %fcopysign_f32x4([-NaN +NaN:0x0 +NaN:0x1 +NaN:0x300001], [+NaN -NaN -NaN -NaN]) == [+NaN -NaN:0x0 -NaN:0x1 -NaN:0x300001]
|
||||||
|
; run: %fcopysign_f32x4([-NaN:0x0 -NaN:0x1 -NaN:0x300001 +sNaN:0x1], [+NaN +NaN +NaN -NaN]) == [+NaN:0x0 +NaN:0x1 +NaN:0x300001 -sNaN:0x1]
|
||||||
|
; run: %fcopysign_f32x4([-sNaN:0x1 +sNaN:0x200001 -sNaN:0x200001 -sNaN:0x200001], [+NaN -NaN +NaN +NaN]) == [+sNaN:0x1 -sNaN:0x200001 +sNaN:0x200001 +sNaN:0x200001]
|
||||||
|
|
||||||
|
function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
|
||||||
|
block0(v0: f64x2, v1: f64x2):
|
||||||
|
v2 = fcopysign v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [0x9.0 0x9.0]) == [0x9.0 0x9.0]
|
||||||
|
; run: %fcopysign_f64x2([0x9.0 -0x9.0], [-0x9.0 -0x9.0]) == [-0x9.0 -0x9.0]
|
||||||
|
; run: %fcopysign_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-0x0.0 0x0.0]
|
||||||
|
|
||||||
|
; F64 Inf
|
||||||
|
; run: %fcopysign_f64x2([Inf -Inf], [Inf Inf]) == [Inf Inf]
|
||||||
|
; run: %fcopysign_f64x2([Inf -Inf], [-Inf -Inf]) == [-Inf -Inf]
|
||||||
|
|
||||||
|
; F64 Epsilon / Max / Min Positive
|
||||||
|
; run: %fcopysign_f64x2([0x1.0000000000000p-52 -0x1.0000000000000p-52], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-52 0x1.0000000000000p-52]
|
||||||
|
; run: %fcopysign_f64x2([0x1.fffffffffffffp1023 -0x1.fffffffffffffp1023], [-0x0.0 0x0.0]) == [-0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
|
||||||
|
; run: %fcopysign_f64x2([0x1.0000000000000p-1022 -0x1.0000000000000p-1022], [-0x0.0 0x0.0]) == [-0x1.0000000000000p-1022 0x1.0000000000000p-1022]
|
||||||
|
|
||||||
|
; F64 Subnormals
|
||||||
|
; run: %fcopysign_f64x2([0x0.8000000000000p-1022 -0x0.8000000000000p-1022], [-0x0.0 0x0.0]) == [-0x0.8000000000000p-1022 0x0.8000000000000p-1022]
|
||||||
|
; run: %fcopysign_f64x2([0x0.0000000000001p-1022 -0x0.0000000000001p-1022], [-0x0.0 0x0.0]) == [-0x0.0000000000001p-1022 0x0.0000000000001p-1022]
|
||||||
|
|
||||||
|
; F64 NaN's
|
||||||
|
; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
|
||||||
|
; run: %fcopysign_f64x2([0x0.0 0x3.0], [-NaN +sNaN:0x1]) == [-0x0.0 0x3.0]
|
||||||
|
; run: %fcopysign_f64x2([Inf +NaN], [-NaN -NaN]) == [-Inf -NaN]
|
||||||
|
; run: %fcopysign_f64x2([-NaN +NaN:0x0], [+NaN -NaN]) == [+NaN -NaN:0x0]
|
||||||
|
; run: %fcopysign_f64x2([+NaN:0x1 +NaN:0x4000000000001], [-NaN -NaN]) == [-NaN:0x1 -NaN:0x4000000000001]
|
||||||
|
; run: %fcopysign_f64x2([-NaN:0x0 -NaN:0x1], [+NaN +NaN]) == [+NaN:0x0 +NaN:0x1]
|
||||||
|
; run: %fcopysign_f64x2([-NaN:0x4000000000001 +sNaN:0x1], [+NaN -NaN]) == [+NaN:0x4000000000001 -sNaN:0x1]
|
||||||
|
; run: %fcopysign_f64x2([-sNaN:0x1 +sNaN:0x4000000000001], [+NaN -NaN]) == [+sNaN:0x1 -sNaN:0x4000000000001]
|
||||||
|
; run: %fcopysign_f64x2([-sNaN:0x4000000000001 -sNaN:0x4000000000001], [+NaN +NaN]) == [+sNaN:0x4000000000001 +sNaN:0x4000000000001]
|
||||||
@@ -808,7 +808,19 @@ where
|
|||||||
}
|
}
|
||||||
Opcode::Fneg => assign(Value::neg(arg(0)?)?),
|
Opcode::Fneg => assign(Value::neg(arg(0)?)?),
|
||||||
Opcode::Fabs => assign(Value::abs(arg(0)?)?),
|
Opcode::Fabs => assign(Value::abs(arg(0)?)?),
|
||||||
Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?,
|
Opcode::Fcopysign => {
|
||||||
|
let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
|
||||||
|
let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
|
||||||
|
|
||||||
|
assign(vectorizelanes(
|
||||||
|
&arg0
|
||||||
|
.into_iter()
|
||||||
|
.zip(arg1.into_iter())
|
||||||
|
.map(|(x, y)| V::copysign(x, y))
|
||||||
|
.collect::<ValueResult<SimdVec<V>>>()?,
|
||||||
|
ctrl_ty,
|
||||||
|
)?)
|
||||||
|
}
|
||||||
Opcode::Fmin => assign(match (arg(0)?, arg(1)?) {
|
Opcode::Fmin => assign(match (arg(0)?, arg(1)?) {
|
||||||
(a, _) if a.is_nan()? => a,
|
(a, _) if a.is_nan()? => a,
|
||||||
(_, b) if b.is_nan()? => b,
|
(_, b) if b.is_nan()? => b,
|
||||||
|
|||||||
Reference in New Issue
Block a user