Port fcmp to ISLE (AArch64) (#4819)

Ported the existing implementation of `fcmp` for AArch64 to ISLE.

This also ports the `lower_vector_comparison` method to ISLE.

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-08-30 17:06:15 +01:00
committed by GitHub
parent e910b8fbfb
commit 3d9d759380
9 changed files with 171 additions and 51 deletions

View File

@@ -2638,6 +2638,11 @@
;; TODO: Port lower_splat_const() to ISLE. ;; TODO: Port lower_splat_const() to ISLE.
(extern constructor splat_const splat_const) (extern constructor splat_const splat_const)
;; Lower a FloatCC to a Cond.
(decl fp_cond_code (FloatCC) Cond)
;; TODO: Port lower_fp_condcode() to ISLE.
(extern constructor fp_cond_code fp_cond_code)
;; Generate comparison to zero operator from input condition code ;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2) (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op) (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
@@ -2966,3 +2971,100 @@
(let ((dst WritableReg (temp_writable_reg $I64)) (let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset)))) (_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
dst)) dst))
;; Helper for emitting instruction sequences to perform a vector comparison.
(decl vec_cmp_vc (Reg Reg VectorSize) Reg)
(rule (vec_cmp_vc rn rm size)
(let ((dst Reg (vec_rrr (VecALUOp.Fcmeq) rn rn size))
(tmp Reg (vec_rrr (VecALUOp.Fcmeq) rm rm size))
(dst Reg (vec_rrr (VecALUOp.And) dst tmp size)))
dst))
(decl vec_cmp (Reg Reg Type Cond) Reg)
;; Floating point Vs / Vc
(rule (vec_cmp rn rm ty (Cond.Vc))
(if (ty_vector_float ty))
(vec_cmp_vc rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Vs))
(if (ty_vector_float ty))
(let ((tmp Reg (vec_cmp_vc rn rm (vector_size ty))))
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
;; 'Less than' operations are implemented by swapping the order of
;; operands and using the 'greater than' instructions.
;; 'Not equal' is implemented with 'equal' and inverting the result.
;; Floating-point
(rule (vec_cmp rn rm ty (Cond.Eq))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ne))
(if (ty_vector_float ty))
(let ((tmp Reg (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty))))
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
(rule (vec_cmp rn rm ty (Cond.Ge))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmge) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Gt))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)))
;; Floating-point swapped-operands
(rule (vec_cmp rn rm ty (Cond.Mi))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ls))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmge) rm rn (vector_size ty)))
;; Integer
(rule (vec_cmp rn rm ty (Cond.Eq))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ne))
(if (ty_vector_not_float ty))
(let ((tmp Reg (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty))))
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
(rule (vec_cmp rn rm ty (Cond.Ge))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmge) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Gt))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmgt) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Hs))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhs) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Hi))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhi) rn rm (vector_size ty)))
;; Integer swapped-operands
(rule (vec_cmp rn rm ty (Cond.Le))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmge) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Lt))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmgt) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ls))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhs) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Lo))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhi) rm rn (vector_size ty)))
;; Helper for determining if any value in a vector is true.
;; This operation is implemented by using umaxp to create a scalar value, which
;; is then compared against zero.
;;
;; umaxp vn.4s, vm.4s, vm.4s
;; mov xm, vn.d[0]
;; cmp xm, #0
(decl vanytrue (Reg Type) ProducesFlags)
(rule (vanytrue src (ty_vec128 ty))
(let ((src Reg (vec_rrr (VecALUOp.Umaxp) src src (VectorSize.Size32x4)))
(src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
(cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))
(rule (vanytrue src ty)
(if (ty_vec64 ty))
(let ((src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
(cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))

View File

@@ -2349,13 +2349,15 @@ impl MachInstEmit for Inst {
&Inst::VecDupFromFpu { rd, rn, size } => { &Inst::VecDupFromFpu { rd, rn, size } => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let imm5 = match size { let q = size.is_128bits() as u32;
VectorSize::Size32x4 => 0b00100, let imm5 = match size.lane_size() {
VectorSize::Size64x2 => 0b01000, ScalarSize::Size32 => 0b00100,
ScalarSize::Size64 => 0b01000,
_ => unimplemented!(), _ => unimplemented!(),
}; };
sink.put4( sink.put4(
0b010_01110000_00000_000001_00000_00000 0b000_01110000_00000_000001_00000_00000
| (q << 30)
| (imm5 << 16) | (imm5 << 16)
| (machreg_to_vec(rn) << 5) | (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()), | machreg_to_vec(rd.to_reg()),

View File

@@ -182,24 +182,9 @@
;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; This operation is implemented by using umaxp to create a scalar value, which (rule (lower (has_type out_ty (vany_true x @ (value_type in_ty))))
;; is then compared against zero. (with_flags (vanytrue x in_ty)
;; (materialize_bool_result (ty_bits out_ty) (Cond.Ne))))
;; umaxp vn.4s, vm.4s, vm.4s
;; mov xm, vn.d[0]
;; cmp xm, #0
;; cset xm, ne
(rule (lower (vany_true x @ (value_type (ty_vec128 ty))))
(let ((x1 Reg (vec_rrr (VecALUOp.Umaxp) x x (VectorSize.Size32x4)))
(x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
(with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0))
(materialize_bool_result (ty_bits ty) (Cond.Ne)))))
(rule (lower (vany_true x @ (value_type ty)))
(if (ty_vec64 ty))
(let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64))))
(with_flags (cmp_imm (OperandSize.Size64) x1 (u8_into_imm12 0))
(materialize_bool_result (ty_bits ty) (Cond.Ne)))))
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1704,6 +1689,16 @@
(vec_size VectorSize (vector_size ty))) (vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero_swap cond rn vec_size)))) (value_reg (float_cmp_zero_swap cond rn vec_size))))
(rule (lower (has_type out_ty
(fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
(with_flags (fpu_cmp (scalar_size in_ty) x y)
(materialize_bool_result
(ty_bits out_ty)
(fp_cond_code cond))))
(rule (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
(if (ty_vector_float in_ty))
(vec_cmp x y in_ty (fp_cond_code cond)))
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1879,7 +1874,6 @@
(atomic_cas flags addr src1 src2)))) (atomic_cas flags addr src1 src2))))
(atomic_cas_loop addr src1 src2 ty)) (atomic_cas_loop addr src1 src2 ty))
;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (fvdemote x)) (rule (lower (fvdemote x))
(fcvtn x (ScalarSize.Size32))) (fcvtn x (ScalarSize.Size32)))

View File

@@ -6,12 +6,14 @@ use generated_code::Context;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{ use super::{
lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, zero_reg, lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode,
AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV,
}; };
use crate::ir::condcodes;
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm}; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const}; use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags; use crate::isa::aarch64::settings::Flags as IsaFlags;
@@ -520,6 +522,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
rd.to_reg() rd.to_reg()
} }
fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
lower_fp_condcode(*cc)
}
fn preg_sp(&mut self) -> PReg { fn preg_sp(&mut self) -> PReg {
super::regs::stack_reg().to_real_reg().unwrap().into() super::regs::stack_reg().to_real_reg().unwrap().into()
} }

View File

@@ -299,25 +299,7 @@ pub(crate) fn lower_insn_to_regs(
lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?; lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?;
} }
Opcode::Fcmp => { Opcode::Fcmp => implemented_in_isle(ctx),
let condcode = ctx.data(insn).fp_cond_code().unwrap();
let cond = lower_fp_condcode(condcode);
let ty = ctx.input_ty(insn, 0);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if !ty.is_vector() {
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(ty),
rn,
rm,
});
materialize_bool_result(ctx, insn, rd, cond);
} else {
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
}
}
Opcode::Debugtrap => implemented_in_isle(ctx), Opcode::Debugtrap => implemented_in_isle(ctx),

View File

@@ -397,6 +397,23 @@ macro_rules! isle_prelude_methods {
} }
} }
fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && !ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}
#[inline] #[inline]
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> { fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 { if ty.is_vector() && ty.bits() == 64 {

View File

@@ -381,6 +381,15 @@
(decl ty_float_or_vec (Type) Type) (decl ty_float_or_vec (Type) Type)
(extern extractor ty_float_or_vec ty_float_or_vec) (extern extractor ty_float_or_vec ty_float_or_vec)
;; A pure constructor that only matches vector floating-point types.
(decl pure ty_vector_float (Type) Type)
(extern constructor ty_vector_float ty_vector_float)
;; A pure constructor that only matches vector types with lanes which
;; are not floating-point.
(decl pure ty_vector_not_float (Type) Type)
(extern constructor ty_vector_not_float ty_vector_not_float)
;; A pure constructor/extractor that only matches 64-bit vector types. ;; A pure constructor/extractor that only matches 64-bit vector types.
(decl pure ty_vec64 (Type) Type) (decl pure ty_vec64 (Type) Type)
(extern constructor ty_vec64 ty_vec64_ctor) (extern constructor ty_vec64 ty_vec64_ctor)

View File

@@ -22,7 +22,7 @@ block0(v0: b32x4):
; umaxp v3.4s, v0.4s, v0.4s ; umaxp v3.4s, v0.4s, v0.4s
; mov x5, v3.d[0] ; mov x5, v3.d[0]
; subs xzr, x5, #0 ; subs xzr, x5, #0
; csetm x0, ne ; cset x0, ne
; ret ; ret
function %vall_true_i64x2(i64x2) -> b1 { function %vall_true_i64x2(i64x2) -> b1 {

View File

@@ -39,3 +39,11 @@ block0(v0:f32x4):
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0] ; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0] ; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0] ; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
function %fcvt_low_from_sint(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = fcvt_low_from_sint.f64x2 v0
return v1
}
; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]