From e9b08b856de1ac0a70b823ce87a56db41d810508 Mon Sep 17 00:00:00 2001 From: Damian Heaton <87125748+dheaton-arm@users.noreply.github.com> Date: Tue, 13 Sep 2022 16:56:50 +0100 Subject: [PATCH] Port `icmp` to ISLE (AArch64) (#4898) * Port `icmp` to ISLE (AArch64) Ported the existing implementation of `icmp` (and, by extension, the `lower_icmp` function) to ISLE for AArch64. Copyright (c) 2022 Arm Limited * Allow 'producer chains', eliminating `Nop0`s Copyright (c) 2022 Arm Limited --- cranelift/codegen/src/isa/aarch64/inst.isle | 190 ++++++++++++++++++ cranelift/codegen/src/isa/aarch64/lower.isle | 3 + .../codegen/src/isa/aarch64/lower/isle.rs | 12 +- .../codegen/src/isa/aarch64/lower_inst.rs | 6 +- cranelift/codegen/src/isa/x64/inst.isle | 6 - cranelift/codegen/src/isa/x64/lower/isle.rs | 5 - cranelift/codegen/src/machinst/isle.rs | 25 ++- cranelift/codegen/src/prelude.isle | 53 +++++ .../filetests/isa/aarch64/condbr.clif | 48 ++--- .../isa/aarch64/iconst-icmp-small.clif | 12 +- .../isa/aarch64/simd-comparison-legalize.clif | 4 +- 11 files changed, 310 insertions(+), 54 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 0f753726ed..5a95b22d66 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1960,6 +1960,12 @@ (MInst.AluRRImm12 (ALUOp.AddS) size (writable_zero_reg) src1 src2))) +(decl cmp (OperandSize Reg Reg) ProducesFlags) +(rule (cmp size src1 src2) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRR (ALUOp.SubS) size (writable_zero_reg) + src1 src2))) + (decl cmp_imm (OperandSize Reg Imm12) ProducesFlags) (rule (cmp_imm size src1 src2) (ProducesFlags.ProducesFlagsSideEffect @@ -1970,6 +1976,12 @@ (rule (cmp64_imm src1 src2) (cmp_imm (OperandSize.Size64) src1 src2)) +(decl cmp_extend (OperandSize Reg Reg ExtendOp) ProducesFlags) +(rule (cmp_extend size src1 src2 extend) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRRExtend (ALUOp.SubS) size (writable_zero_reg) + src1 src2 extend))) + ;; Helper for emitting `sbc` instructions. (decl sbc_paired (Type Reg Reg) ConsumesFlags) (rule (sbc_paired ty src1 src2) @@ -2199,6 +2211,13 @@ (MInst.CSNeg dst cond if_true if_false) dst))) +;; Helper for generating `MInst.CCmp` instructions. +;; Creates a new `ProducesFlags` from the supplied `ProducesFlags` followed +;; immediately by the `MInst.CCmp` instruction. +(decl ccmp (OperandSize Reg Reg NZCV Cond ProducesFlags) ProducesFlags) +(rule (ccmp size rn rm nzcv cond inst_input) + (produces_flags_append inst_input (MInst.CCmp size rn rm nzcv cond))) + ;; Helper for generating `MInst.CCmpImm` instructions. (decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags) (rule (ccmp_imm size 1 rn imm nzcv cond) @@ -2845,6 +2864,11 @@ ;; TODO: Port lower_fp_condcode() to ISLE. (extern constructor fp_cond_code fp_cond_code) +;; Lower an integer cond code. +(decl cond_code (IntCC) Cond) +;; TODO: Port lower_condcode() to ISLE. +(extern constructor cond_code cond_code) + ;; Generate comparison to zero operator from input condition code (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2) (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op) @@ -3280,3 +3304,169 @@ (let ((dst WritableReg (temp_writable_reg $I64)) (_ Unit (emit (MInst.ElfTlsGetAddr name dst)))) dst)) + +;; Helpers for lowering `icmp` sequences. +;; `lower_icmp` contains shared functionality for lowering `icmp` +;; sequences, which `lower_icmp_into_{reg,flags}` extend from. +(decl lower_icmp (IntCC Value Value Type) ProducesFlags) +(decl lower_icmp_into_reg (IntCC Value Value Type Type) ValueRegs) +(decl lower_icmp_into_flags (IntCC Value Value Type) ProducesFlags) +;; For most cases, `lower_icmp_into_flags` is the same as `lower_icmp`, +;; except for some I128 cases (see below). +(rule -1 (lower_icmp_into_flags cond x y ty) (lower_icmp cond x y ty)) + +;; Vectors. +;; `icmp` into flags for vectors is invalid. +(rule (lower_icmp_into_reg cond x y in_ty @ (multi_lane _ _) _out_ty) + (let ((cond Cond (cond_code cond)) + (rn Reg (put_in_reg x)) + (rm Reg (put_in_reg y))) + (vec_cmp rn rm in_ty cond))) + +;; Determines the appropriate extend op given the value type and whether it is signed. +(decl lower_icmp_extend (Type bool) ExtendOp) +(rule (lower_icmp_extend $I8 $true) (ExtendOp.SXTB)) +(rule (lower_icmp_extend $I16 $true) (ExtendOp.SXTH)) +(rule (lower_icmp_extend $I8 $false) (ExtendOp.UXTB)) +(rule (lower_icmp_extend $I16 $false) (ExtendOp.UXTH)) + +;; Integers <= 64-bits. +(rule (lower_icmp_into_reg cond rn rm in_ty out_ty) + (if (ty_int_bool_ref_scalar_64 in_ty)) + (let ((cc Cond (cond_code cond))) + (with_flags + (lower_icmp cond rn rm in_ty) + (materialize_bool_result (ty_bits out_ty) cc)))) + +(rule 1 (lower_icmp cond rn rm (fits_in_16 ty)) + (if (signed_cond_code cond)) + (let ((rn Reg (put_in_reg_sext32 rn))) + (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $true)))) +(rule (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty)) + (let ((rn Reg (put_in_reg_zext32 rn))) + (cmp_imm (operand_size ty) rn rm))) +(rule -1 (lower_icmp cond rn rm (fits_in_16 ty)) + (let ((rn Reg (put_in_reg_zext32 rn))) + (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $false)))) +(rule -2 (lower_icmp cond rn (imm12_from_value rm) ty) + (if (ty_int_bool_ref_scalar_64 ty)) + (cmp_imm (operand_size ty) rn rm)) +(rule -3 (lower_icmp cond rn rm ty) + (if (ty_int_bool_ref_scalar_64 ty)) + (cmp (operand_size ty) rn rm)) + +;; 128-bit integers. +(rule (lower_icmp_into_reg cond @ (IntCC.Equal) rn rm $I128 out_ty) + (let ((cc Cond (cond_code cond))) + (with_flags + (lower_icmp cond rn rm $I128) + (materialize_bool_result (ty_bits out_ty) cc)))) +(rule (lower_icmp_into_reg cond @ (IntCC.NotEqual) rn rm $I128 out_ty) + (let ((cc Cond (cond_code cond))) + (with_flags + (lower_icmp cond rn rm $I128) + (materialize_bool_result (ty_bits out_ty) cc)))) + +;; cmp lhs_lo, rhs_lo +;; ccmp lhs_hi, rhs_hi, #0, eq +(decl lower_icmp_i128_eq_ne (Value Value) ProducesFlags) +(rule (lower_icmp_i128_eq_ne lhs rhs) + (let ((lhs_lo Reg (value_regs_get lhs 0)) + (lhs_hi Reg (value_regs_get lhs 1)) + (rhs_lo Reg (value_regs_get rhs 0)) + (rhs_hi Reg (value_regs_get rhs 1)) + (cmp_inst ProducesFlags (cmp (OperandSize.Size64) lhs_lo rhs_lo))) + (ccmp (OperandSize.Size64) lhs_hi rhs_hi + (nzcv $false $false $false $false) (Cond.Eq) cmp_inst))) + +(rule (lower_icmp (IntCC.Equal) lhs rhs $I128) + (lower_icmp_i128_eq_ne lhs rhs)) +(rule (lower_icmp (IntCC.NotEqual) lhs rhs $I128) + (lower_icmp_i128_eq_ne lhs rhs)) + +;; cmp lhs_lo, rhs_lo +;; cset tmp1, unsigned_cond +;; cmp lhs_hi, rhs_hi +;; cset tmp2, cond +;; csel dst, tmp1, tmp2, eq +(rule -1 (lower_icmp_into_reg cond lhs rhs $I128 out_ty) + (let ((unsigned_cond Cond (cond_code (intcc_unsigned cond))) + (cond Cond (cond_code cond)) + (lhs_lo Reg (value_regs_get lhs 0)) + (lhs_hi Reg (value_regs_get lhs 1)) + (rhs_lo Reg (value_regs_get rhs 0)) + (rhs_hi Reg (value_regs_get rhs 1)) + (tmp1 ValueRegs + (with_flags (cmp (OperandSize.Size64) lhs_lo rhs_lo) + (materialize_bool_result + (ty_bits out_ty) unsigned_cond))) + (tmp1 Reg (value_regs_get tmp1 0)) + (dst ValueRegs + (with_flags (cmp (OperandSize.Size64) lhs_hi rhs_hi) + (lower_icmp_i128_consumer cond (ty_bits out_ty) + tmp1 lhs_hi rhs_hi)))) + dst)) + +(decl lower_icmp_i128_consumer (Cond u8 Reg Reg Reg) ConsumesFlags) +(rule (lower_icmp_i128_consumer cond 1 tmp1 lhs_hi rhs_hi) + (let ((tmp2 WritableReg (temp_writable_reg $I64)) + (dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + (MInst.CSet tmp2 cond) + (MInst.CSel dst (Cond.Eq) tmp1 tmp2) + (value_reg dst)))) +(rule (lower_icmp_i128_consumer cond 128 tmp1 lhs_hi rhs_hi) + (let ((tmp2 WritableReg (temp_writable_reg $I64)) + (dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + (MInst.CSetm tmp2 cond) + (MInst.CSel dst (Cond.Eq) tmp1 tmp2) + (value_regs dst dst)))) +(rule -1 (lower_icmp_i128_consumer cond _out_ty_bits tmp1 lhs_hi rhs_hi) + (let ((tmp2 WritableReg (temp_writable_reg $I64)) + (dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + (MInst.CSetm tmp2 cond) + (MInst.CSel dst (Cond.Eq) tmp1 tmp2) + (value_reg dst)))) + +;; Exceptional `lower_icmp_into_flags` rules. +;; We need to guarantee that the flags for `cond` are correct, so we +;; compare `dst` with 1. +(rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Sign) 1))) ;; mov tmp, #1 + (cmp (OperandSize.Size64) dst tmp))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Zero) 1))) + (cmp (OperandSize.Size64) dst tmp))) +(rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Sign) 1))) + (cmp (OperandSize.Size64) tmp dst))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Zero) 1))) + (cmp (OperandSize.Size64) tmp dst))) +;; For strict comparisons, we compare with 0. +(rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0))) + (cmp (OperandSize.Size64) dst (zero_reg)))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0))) + (cmp (OperandSize.Size64) dst (zero_reg)))) +(rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0))) + (cmp (OperandSize.Size64) (zero_reg) dst))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1)) + (dst Reg (value_regs_get dst 0))) + (cmp (OperandSize.Size64) (zero_reg) dst))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 323e9c9af6..1ef60dd728 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1730,6 +1730,9 @@ (vec_size VectorSize (vector_size ty))) (value_reg (int_cmp_zero_swap cond rn vec_size)))) +(rule -1 (lower (has_type out_ty (icmp cond x @ (value_type in_ty) y))) + (lower_icmp_into_reg cond x y in_ty out_ty)) + ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (trap trap_code)) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index edeb32f75b..075917ae2c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -6,10 +6,10 @@ use generated_code::Context; // Types that the generated ISLE code uses via `use super::*`. use super::{ - fp_reg, lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode, - stack_reg, writable_link_reg, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, - ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, - FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, + fp_reg, lower_condcode, lower_constant_f128, lower_constant_f32, lower_constant_f64, + lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg, zero_reg, AMode, + ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, + FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2, VectorSize, NZCV, @@ -517,6 +517,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { lower_fp_condcode(*cc) } + fn cond_code(&mut self, cc: &condcodes::IntCC) -> Cond { + lower_condcode(*cc) + } + fn preg_sp(&mut self) -> PReg { super::regs::stack_reg().to_real_reg().unwrap().into() } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c9a09049cf..c15d93bbc3 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -293,11 +293,7 @@ pub(crate) fn lower_insn_to_regs( panic!("Should never reach ifcmp as isel root!"); } - Opcode::Icmp => { - let condcode = ctx.data(insn).cond_code().unwrap(); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?; - } + Opcode::Icmp => implemented_in_isle(ctx), Opcode::Fcmp => implemented_in_isle(ctx), diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index c343d08715..bef45b9b18 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1362,12 +1362,6 @@ (decl intcc_without_eq (IntCC) IntCC) (extern constructor intcc_without_eq intcc_without_eq) -;; This is a direct import of `IntCC::unsigned`. -;; Get the corresponding IntCC with the signed component removed. -;; For conditions without a signed component, this is a no-op. -(decl intcc_unsigned (IntCC) IntCC) -(extern constructor intcc_unsigned intcc_unsigned) - ;;;; Helpers for Getting Particular Physical Registers ;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; These should only be used for legalization purposes, when we can't otherwise diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 188a5b48f2..a39db95c89 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -596,11 +596,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { x.without_equal() } - #[inline] - fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC { - x.unsigned() - } - #[inline] fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC { CC::from_intcc(*intcc) diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index a5a60b44de..feff699d4e 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -8,8 +8,8 @@ use target_lexicon::Triple; pub use super::MachLabel; pub use crate::ir::{ - dynamic_to_fixed, ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef, - GlobalValue, Immediate, SigRef, StackSlot, + condcodes, dynamic_to_fixed, ArgumentExtension, Constant, DynamicStackSlot, ExternalName, + FuncRef, GlobalValue, Immediate, SigRef, StackSlot, }; pub use crate::isa::unwind::UnwindInst; pub use crate::machinst::{ @@ -1087,6 +1087,27 @@ macro_rules! isle_prelude_methods { fn gen_move(&mut self, ty: Type, dst: WritableReg, src: Reg) -> MInst { MInst::gen_move(dst, src, ty) } + + #[inline] + fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC { + x.unsigned() + } + + #[inline] + fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option { + match cc { + IntCC::Equal + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedGreaterThan + | IntCC::UnsignedLessThanOrEqual + | IntCC::UnsignedLessThan + | IntCC::NotEqual => None, + IntCC::SignedGreaterThanOrEqual + | IntCC::SignedGreaterThan + | IntCC::SignedLessThanOrEqual + | IntCC::SignedLessThan => Some(*cc), + } + } }; } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 5decea26a4..d8a4458a2a 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -571,6 +571,16 @@ (decl pure is_sinkable_inst (Value) Inst) (extern constructor is_sinkable_inst is_sinkable_inst) +;; This is a direct import of `IntCC::unsigned`. +;; Get the corresponding IntCC with the signed component removed. +;; For conditions without a signed component, this is a no-op. +(decl intcc_unsigned (IntCC) IntCC) +(extern constructor intcc_unsigned intcc_unsigned) + +;; Pure constructor that only matches signed integer cond codes. +(decl pure signed_cond_code (IntCC) IntCC) +(extern constructor signed_cond_code signed_cond_code) + ;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Emit an instruction. @@ -657,11 +667,17 @@ ;; them. (AlreadyExistingFlags) (ProducesFlagsSideEffect (inst MInst)) + (ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst)) ;; Not directly combinable with a ConsumesFlags; ;; used in s390x and unwrapped directly by `trapif`. (ProducesFlagsReturnsReg (inst MInst) (result Reg)) (ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg)))) +;; Chain another producer to a `ProducesFlags`. +(decl produces_flags_append (ProducesFlags MInst) ProducesFlags) +(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2) + (ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2)) + ;; Newtype wrapper around `MInst` for instructions that consume flags. ;; ;; Variant determines how result is given when combined with a @@ -761,6 +777,43 @@ (_v Unit (emit consumer_inst_4))) consumer_result)) +(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) + (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) + (let ((_ Unit (emit producer_inst1)) + (_ Unit (emit producer_inst2)) + (_ Unit (emit consumer_inst))) + (value_reg consumer_result))) + +(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_ Unit (emit producer_inst1)) + (_ Unit (emit producer_inst2)) + (_ Unit (emit consumer_inst_1)) + (_ Unit (emit consumer_inst_2))) + consumer_result)) + +(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_inst_3 + consumer_inst_4 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_ Unit (emit producer_inst1)) + (_ Unit (emit producer_inst2)) + (_ Unit (emit consumer_inst_1)) + (_ Unit (emit consumer_inst_2)) + (_ Unit (emit consumer_inst_3)) + (_ Unit (emit consumer_inst_4))) + consumer_result)) + (decl with_flags_reg (ProducesFlags ConsumesFlags) Reg) (rule (with_flags_reg p c) (let ((v ValueRegs (with_flags p c))) diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index 3e6d753ec9..923233a5ba 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -45,10 +45,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, lo +; cset x7, lo ; subs xzr, x1, x3 -; cset x11, lt -; csel x0, x8, x11, eq +; cset x10, lt +; csel x0, x7, x10, eq ; ret function %icmp_ult_i128(i128, i128) -> b1 { @@ -59,10 +59,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, lo +; cset x7, lo ; subs xzr, x1, x3 -; cset x11, lo -; csel x0, x8, x11, eq +; cset x10, lo +; csel x0, x7, x10, eq ; ret function %icmp_sle_i128(i128, i128) -> b1 { @@ -73,10 +73,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, ls +; cset x7, ls ; subs xzr, x1, x3 -; cset x11, le -; csel x0, x8, x11, eq +; cset x10, le +; csel x0, x7, x10, eq ; ret function %icmp_ule_i128(i128, i128) -> b1 { @@ -87,10 +87,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, ls +; cset x7, ls ; subs xzr, x1, x3 -; cset x11, ls -; csel x0, x8, x11, eq +; cset x10, ls +; csel x0, x7, x10, eq ; ret function %icmp_sgt_i128(i128, i128) -> b1 { @@ -101,10 +101,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, hi +; cset x7, hi ; subs xzr, x1, x3 -; cset x11, gt -; csel x0, x8, x11, eq +; cset x10, gt +; csel x0, x7, x10, eq ; ret function %icmp_ugt_i128(i128, i128) -> b1 { @@ -115,10 +115,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, hi +; cset x7, hi ; subs xzr, x1, x3 -; cset x11, hi -; csel x0, x8, x11, eq +; cset x10, hi +; csel x0, x7, x10, eq ; ret function %icmp_sge_i128(i128, i128) -> b1 { @@ -129,10 +129,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, hs +; cset x7, hs ; subs xzr, x1, x3 -; cset x11, ge -; csel x0, x8, x11, eq +; cset x10, ge +; csel x0, x7, x10, eq ; ret function %icmp_uge_i128(i128, i128) -> b1 { @@ -143,10 +143,10 @@ block0(v0: i128, v1: i128): ; block0: ; subs xzr, x0, x2 -; cset x8, hs +; cset x7, hs ; subs xzr, x1, x3 -; cset x11, hs -; csel x0, x8, x11, eq +; cset x10, hs +; csel x0, x7, x10, eq ; ret function %f(i64, i64) -> i64 { diff --git a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif index b6be2e7bcb..bac108aadb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif +++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif @@ -15,11 +15,11 @@ block0: } ; block0: -; movz x3, #56780 -; uxth w5, w3 -; movz x7, #56780 -; subs wzr, w5, w7, UXTH -; cset x4, ne -; and w0, w4, #1 +; movz x2, #56780 +; uxth w4, w2 +; movz x6, #56780 +; subs wzr, w4, w6, UXTH +; cset x9, ne +; and w0, w9, #1 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/aarch64/simd-comparison-legalize.clif index b16a6bef53..5f724d03fc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-comparison-legalize.clif @@ -9,8 +9,8 @@ block0(v0: i32x4, v1: i32x4): } ; block0: -; cmeq v0.4s, v0.4s, v1.4s -; mvn v0.16b, v0.16b +; cmeq v4.4s, v0.4s, v1.4s +; mvn v0.16b, v4.16b ; ret function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {