Port fcmp to ISLE (AArch64) (#4819)
Ported the existing implementation of `fcmp` for AArch64 to ISLE. This also ports the `lower_vector_comparison` method to ISLE. Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -2638,6 +2638,11 @@
|
|||||||
;; TODO: Port lower_splat_const() to ISLE.
|
;; TODO: Port lower_splat_const() to ISLE.
|
||||||
(extern constructor splat_const splat_const)
|
(extern constructor splat_const splat_const)
|
||||||
|
|
||||||
|
;; Lower a FloatCC to a Cond.
|
||||||
|
(decl fp_cond_code (FloatCC) Cond)
|
||||||
|
;; TODO: Port lower_fp_condcode() to ISLE.
|
||||||
|
(extern constructor fp_cond_code fp_cond_code)
|
||||||
|
|
||||||
;; Generate comparison to zero operator from input condition code
|
;; Generate comparison to zero operator from input condition code
|
||||||
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
|
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
|
||||||
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
|
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
|
||||||
@@ -2966,3 +2971,100 @@
|
|||||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||||
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
|
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for emitting instruction sequences to perform a vector comparison.
|
||||||
|
|
||||||
|
(decl vec_cmp_vc (Reg Reg VectorSize) Reg)
|
||||||
|
(rule (vec_cmp_vc rn rm size)
|
||||||
|
(let ((dst Reg (vec_rrr (VecALUOp.Fcmeq) rn rn size))
|
||||||
|
(tmp Reg (vec_rrr (VecALUOp.Fcmeq) rm rm size))
|
||||||
|
(dst Reg (vec_rrr (VecALUOp.And) dst tmp size)))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
(decl vec_cmp (Reg Reg Type Cond) Reg)
|
||||||
|
|
||||||
|
;; Floating point Vs / Vc
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Vc))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(vec_cmp_vc rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Vs))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(let ((tmp Reg (vec_cmp_vc rn rm (vector_size ty))))
|
||||||
|
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
|
||||||
|
|
||||||
|
;; 'Less than' operations are implemented by swapping the order of
|
||||||
|
;; operands and using the 'greater than' instructions.
|
||||||
|
;; 'Not equal' is implemented with 'equal' and inverting the result.
|
||||||
|
|
||||||
|
;; Floating-point
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Eq))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Ne))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(let ((tmp Reg (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty))))
|
||||||
|
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Ge))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Fcmge) rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Gt))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)))
|
||||||
|
;; Floating-point swapped-operands
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Mi))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Ls))
|
||||||
|
(if (ty_vector_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Fcmge) rm rn (vector_size ty)))
|
||||||
|
|
||||||
|
;; Integer
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Eq))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Ne))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(let ((tmp Reg (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty))))
|
||||||
|
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Ge))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmge) rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Gt))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmgt) rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Hs))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmhs) rn rm (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Hi))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmhi) rn rm (vector_size ty)))
|
||||||
|
;; Integer swapped-operands
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Le))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmge) rm rn (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Lt))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmgt) rm rn (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Ls))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmhs) rm rn (vector_size ty)))
|
||||||
|
(rule (vec_cmp rn rm ty (Cond.Lo))
|
||||||
|
(if (ty_vector_not_float ty))
|
||||||
|
(vec_rrr (VecALUOp.Cmhi) rm rn (vector_size ty)))
|
||||||
|
|
||||||
|
;; Helper for determining if any value in a vector is true.
|
||||||
|
;; This operation is implemented by using umaxp to create a scalar value, which
|
||||||
|
;; is then compared against zero.
|
||||||
|
;;
|
||||||
|
;; umaxp vn.4s, vm.4s, vm.4s
|
||||||
|
;; mov xm, vn.d[0]
|
||||||
|
;; cmp xm, #0
|
||||||
|
(decl vanytrue (Reg Type) ProducesFlags)
|
||||||
|
(rule (vanytrue src (ty_vec128 ty))
|
||||||
|
(let ((src Reg (vec_rrr (VecALUOp.Umaxp) src src (VectorSize.Size32x4)))
|
||||||
|
(src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
|
||||||
|
(cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))
|
||||||
|
(rule (vanytrue src ty)
|
||||||
|
(if (ty_vec64 ty))
|
||||||
|
(let ((src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
|
||||||
|
(cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))
|
||||||
|
|||||||
@@ -2349,13 +2349,15 @@ impl MachInstEmit for Inst {
|
|||||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let imm5 = match size {
|
let q = size.is_128bits() as u32;
|
||||||
VectorSize::Size32x4 => 0b00100,
|
let imm5 = match size.lane_size() {
|
||||||
VectorSize::Size64x2 => 0b01000,
|
ScalarSize::Size32 => 0b00100,
|
||||||
|
ScalarSize::Size64 => 0b01000,
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
};
|
};
|
||||||
sink.put4(
|
sink.put4(
|
||||||
0b010_01110000_00000_000001_00000_00000
|
0b000_01110000_00000_000001_00000_00000
|
||||||
|
| (q << 30)
|
||||||
| (imm5 << 16)
|
| (imm5 << 16)
|
||||||
| (machreg_to_vec(rn) << 5)
|
| (machreg_to_vec(rn) << 5)
|
||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
|||||||
@@ -182,24 +182,9 @@
|
|||||||
|
|
||||||
;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; This operation is implemented by using umaxp to create a scalar value, which
|
(rule (lower (has_type out_ty (vany_true x @ (value_type in_ty))))
|
||||||
;; is then compared against zero.
|
(with_flags (vanytrue x in_ty)
|
||||||
;;
|
(materialize_bool_result (ty_bits out_ty) (Cond.Ne))))
|
||||||
;; umaxp vn.4s, vm.4s, vm.4s
|
|
||||||
;; mov xm, vn.d[0]
|
|
||||||
;; cmp xm, #0
|
|
||||||
;; cset xm, ne
|
|
||||||
(rule (lower (vany_true x @ (value_type (ty_vec128 ty))))
|
|
||||||
(let ((x1 Reg (vec_rrr (VecALUOp.Umaxp) x x (VectorSize.Size32x4)))
|
|
||||||
(x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
|
|
||||||
(with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0))
|
|
||||||
(materialize_bool_result (ty_bits ty) (Cond.Ne)))))
|
|
||||||
|
|
||||||
(rule (lower (vany_true x @ (value_type ty)))
|
|
||||||
(if (ty_vec64 ty))
|
|
||||||
(let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64))))
|
|
||||||
(with_flags (cmp_imm (OperandSize.Size64) x1 (u8_into_imm12 0))
|
|
||||||
(materialize_bool_result (ty_bits ty) (Cond.Ne)))))
|
|
||||||
|
|
||||||
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -1704,6 +1689,16 @@
|
|||||||
(vec_size VectorSize (vector_size ty)))
|
(vec_size VectorSize (vector_size ty)))
|
||||||
(value_reg (float_cmp_zero_swap cond rn vec_size))))
|
(value_reg (float_cmp_zero_swap cond rn vec_size))))
|
||||||
|
|
||||||
|
(rule (lower (has_type out_ty
|
||||||
|
(fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
|
||||||
|
(with_flags (fpu_cmp (scalar_size in_ty) x y)
|
||||||
|
(materialize_bool_result
|
||||||
|
(ty_bits out_ty)
|
||||||
|
(fp_cond_code cond))))
|
||||||
|
|
||||||
|
(rule (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
|
||||||
|
(if (ty_vector_float in_ty))
|
||||||
|
(vec_cmp x y in_ty (fp_cond_code cond)))
|
||||||
|
|
||||||
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -1879,7 +1874,6 @@
|
|||||||
(atomic_cas flags addr src1 src2))))
|
(atomic_cas flags addr src1 src2))))
|
||||||
(atomic_cas_loop addr src1 src2 ty))
|
(atomic_cas_loop addr src1 src2 ty))
|
||||||
|
|
||||||
|
|
||||||
;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
(rule (lower (fvdemote x))
|
(rule (lower (fvdemote x))
|
||||||
(fcvtn x (ScalarSize.Size32)))
|
(fcvtn x (ScalarSize.Size32)))
|
||||||
|
|||||||
@@ -6,12 +6,14 @@ use generated_code::Context;
|
|||||||
|
|
||||||
// Types that the generated ISLE code uses via `use super::*`.
|
// Types that the generated ISLE code uses via `use super::*`.
|
||||||
use super::{
|
use super::{
|
||||||
lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, zero_reg,
|
lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode,
|
||||||
AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind,
|
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
|
||||||
ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo,
|
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
|
||||||
MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg,
|
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
|
||||||
ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
|
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
|
||||||
|
NZCV,
|
||||||
};
|
};
|
||||||
|
use crate::ir::condcodes;
|
||||||
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
|
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
|
||||||
use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
|
use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
|
||||||
use crate::isa::aarch64::settings::Flags as IsaFlags;
|
use crate::isa::aarch64::settings::Flags as IsaFlags;
|
||||||
@@ -520,6 +522,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
rd.to_reg()
|
rd.to_reg()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
|
||||||
|
lower_fp_condcode(*cc)
|
||||||
|
}
|
||||||
|
|
||||||
fn preg_sp(&mut self) -> PReg {
|
fn preg_sp(&mut self) -> PReg {
|
||||||
super::regs::stack_reg().to_real_reg().unwrap().into()
|
super::regs::stack_reg().to_real_reg().unwrap().into()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -299,25 +299,7 @@ pub(crate) fn lower_insn_to_regs(
|
|||||||
lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?;
|
lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Fcmp => {
|
Opcode::Fcmp => implemented_in_isle(ctx),
|
||||||
let condcode = ctx.data(insn).fp_cond_code().unwrap();
|
|
||||||
let cond = lower_fp_condcode(condcode);
|
|
||||||
let ty = ctx.input_ty(insn, 0);
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
|
|
||||||
if !ty.is_vector() {
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::from_ty(ty),
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
});
|
|
||||||
materialize_bool_result(ctx, insn, rd, cond);
|
|
||||||
} else {
|
|
||||||
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Debugtrap => implemented_in_isle(ctx),
|
Opcode::Debugtrap => implemented_in_isle(ctx),
|
||||||
|
|
||||||
|
|||||||
@@ -397,6 +397,23 @@ macro_rules! isle_prelude_methods {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty.is_vector() && ty.lane_type().is_float() {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty.is_vector() && !ty.lane_type().is_float() {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
|
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
|
||||||
if ty.is_vector() && ty.bits() == 64 {
|
if ty.is_vector() && ty.bits() == 64 {
|
||||||
|
|||||||
@@ -381,6 +381,15 @@
|
|||||||
(decl ty_float_or_vec (Type) Type)
|
(decl ty_float_or_vec (Type) Type)
|
||||||
(extern extractor ty_float_or_vec ty_float_or_vec)
|
(extern extractor ty_float_or_vec ty_float_or_vec)
|
||||||
|
|
||||||
|
;; A pure constructor that only matches vector floating-point types.
|
||||||
|
(decl pure ty_vector_float (Type) Type)
|
||||||
|
(extern constructor ty_vector_float ty_vector_float)
|
||||||
|
|
||||||
|
;; A pure constructor that only matches vector types with lanes which
|
||||||
|
;; are not floating-point.
|
||||||
|
(decl pure ty_vector_not_float (Type) Type)
|
||||||
|
(extern constructor ty_vector_not_float ty_vector_not_float)
|
||||||
|
|
||||||
;; A pure constructor/extractor that only matches 64-bit vector types.
|
;; A pure constructor/extractor that only matches 64-bit vector types.
|
||||||
(decl pure ty_vec64 (Type) Type)
|
(decl pure ty_vec64 (Type) Type)
|
||||||
(extern constructor ty_vec64 ty_vec64_ctor)
|
(extern constructor ty_vec64 ty_vec64_ctor)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ block0(v0: b32x4):
|
|||||||
; umaxp v3.4s, v0.4s, v0.4s
|
; umaxp v3.4s, v0.4s, v0.4s
|
||||||
; mov x5, v3.d[0]
|
; mov x5, v3.d[0]
|
||||||
; subs xzr, x5, #0
|
; subs xzr, x5, #0
|
||||||
; csetm x0, ne
|
; cset x0, ne
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %vall_true_i64x2(i64x2) -> b1 {
|
function %vall_true_i64x2(i64x2) -> b1 {
|
||||||
|
|||||||
@@ -39,3 +39,11 @@ block0(v0:f32x4):
|
|||||||
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
|
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
|
||||||
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
|
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
|
||||||
; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
|
; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
|
||||||
|
|
||||||
|
function %fcvt_low_from_sint(i32x4) -> f64x2 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = fcvt_low_from_sint.f64x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
|
||||||
|
; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]
|
||||||
|
|||||||
Reference in New Issue
Block a user