Port branches to ISLE (AArch64) (#4943)
* Port branches to ISLE (AArch64) Ported the existing implementations of the following opcodes for AArch64 to ISLE: - `Brz` - `Brnz` - `Brif` - `Brff` - `BrIcmp` - `Jump` - `BrTable` Copyright (c) 2022 Arm Limited * Remove dead code Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -1619,6 +1619,18 @@
|
|||||||
(decl u64_into_imm_logic (Type u64) ImmLogic)
|
(decl u64_into_imm_logic (Type u64) ImmLogic)
|
||||||
(extern constructor u64_into_imm_logic u64_into_imm_logic)
|
(extern constructor u64_into_imm_logic u64_into_imm_logic)
|
||||||
|
|
||||||
|
(decl branch_target (VecMachLabel u8) BranchTarget)
|
||||||
|
(extern constructor branch_target branch_target)
|
||||||
|
|
||||||
|
(decl targets_jt_size (VecMachLabel) u32)
|
||||||
|
(extern constructor targets_jt_size targets_jt_size)
|
||||||
|
|
||||||
|
(decl targets_jt_space (VecMachLabel) CodeOffset)
|
||||||
|
(extern constructor targets_jt_space targets_jt_space)
|
||||||
|
|
||||||
|
(decl targets_jt_info (VecMachLabel) BoxJTSequenceInfo)
|
||||||
|
(extern constructor targets_jt_info targets_jt_info)
|
||||||
|
|
||||||
;; Calculate the minimum floating-point bound for a conversion to floating
|
;; Calculate the minimum floating-point bound for a conversion to floating
|
||||||
;; point from an integer type.
|
;; point from an integer type.
|
||||||
;; Accepts whether the output is signed, the size of the input
|
;; Accepts whether the output is signed, the size of the input
|
||||||
@@ -1698,6 +1710,9 @@
|
|||||||
(decl cond_br_zero (Reg) CondBrKind)
|
(decl cond_br_zero (Reg) CondBrKind)
|
||||||
(extern constructor cond_br_zero cond_br_zero)
|
(extern constructor cond_br_zero cond_br_zero)
|
||||||
|
|
||||||
|
(decl cond_br_not_zero (Reg) CondBrKind)
|
||||||
|
(extern constructor cond_br_not_zero cond_br_not_zero)
|
||||||
|
|
||||||
(decl cond_br_cond (Cond) CondBrKind)
|
(decl cond_br_cond (Cond) CondBrKind)
|
||||||
(extern constructor cond_br_cond cond_br_cond)
|
(extern constructor cond_br_cond cond_br_cond)
|
||||||
|
|
||||||
@@ -2893,6 +2908,11 @@
|
|||||||
;; TODO: Port lower_condcode() to ISLE.
|
;; TODO: Port lower_condcode() to ISLE.
|
||||||
(extern constructor cond_code cond_code)
|
(extern constructor cond_code cond_code)
|
||||||
|
|
||||||
|
;; Invert a condition code.
|
||||||
|
(decl invert_cond (Cond) Cond)
|
||||||
|
;; TODO: Port cond.invert() to ISLE.
|
||||||
|
(extern constructor invert_cond invert_cond)
|
||||||
|
|
||||||
;; Generate comparison to zero operator from input condition code
|
;; Generate comparison to zero operator from input condition code
|
||||||
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
|
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
|
||||||
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
|
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
|
||||||
@@ -3530,3 +3550,65 @@
|
|||||||
(rule (lower_select flags cond ty rn rm)
|
(rule (lower_select flags cond ty rn rm)
|
||||||
(if (ty_int_bool_ref_scalar_64 ty))
|
(if (ty_int_bool_ref_scalar_64 ty))
|
||||||
(with_flags flags (csel cond rn rm)))
|
(with_flags flags (csel cond rn rm)))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.Jump` instructions.
|
||||||
|
(decl aarch64_jump (BranchTarget) SideEffectNoResult)
|
||||||
|
(rule (aarch64_jump target)
|
||||||
|
(SideEffectNoResult.Inst (MInst.Jump target)))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.JTSequence` instructions.
|
||||||
|
;; Emit the compound instruction that does:
|
||||||
|
;;
|
||||||
|
;; b.hs default
|
||||||
|
;; csel rB, xzr, rIndex, hs
|
||||||
|
;; csdb
|
||||||
|
;; adr rA, jt
|
||||||
|
;; ldrsw rB, [rA, rB, uxtw #2]
|
||||||
|
;; add rA, rA, rB
|
||||||
|
;; br rA
|
||||||
|
;; [jt entries]
|
||||||
|
;;
|
||||||
|
;; This must be *one* instruction in the vcode because
|
||||||
|
;; we cannot allow regalloc to insert any spills/fills
|
||||||
|
;; in the middle of the sequence; otherwise, the ADR's
|
||||||
|
;; PC-rel offset to the jumptable would be incorrect.
|
||||||
|
;; (The alternative is to introduce a relocation pass
|
||||||
|
;; for inlined jumptables, which is much worse, IMHO.)
|
||||||
|
(decl jt_sequence (Reg BoxJTSequenceInfo) ConsumesFlags)
|
||||||
|
(rule (jt_sequence ridx info)
|
||||||
|
(let ((rtmp1 WritableReg (temp_writable_reg $I64))
|
||||||
|
(rtmp2 WritableReg (temp_writable_reg $I64)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsSideEffect
|
||||||
|
(MInst.JTSequence info ridx rtmp1 rtmp2))))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.CondBr` instructions.
|
||||||
|
(decl cond_br (BranchTarget BranchTarget CondBrKind) ConsumesFlags)
|
||||||
|
(rule (cond_br taken not_taken kind)
|
||||||
|
(ConsumesFlags.ConsumesFlagsSideEffect
|
||||||
|
(MInst.CondBr taken not_taken kind)))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.MovToNZCV` instructions.
|
||||||
|
(decl mov_to_nzcv (Reg) ProducesFlags)
|
||||||
|
(rule (mov_to_nzcv rn)
|
||||||
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
|
(MInst.MovToNZCV rn)))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.EmitIsland` instructions.
|
||||||
|
(decl emit_island (CodeOffset) SideEffectNoResult)
|
||||||
|
(rule (emit_island needed_space)
|
||||||
|
(SideEffectNoResult.Inst
|
||||||
|
(MInst.EmitIsland needed_space)))
|
||||||
|
|
||||||
|
;; Helper for emitting `br_table` sequences.
|
||||||
|
(decl br_table_impl (u64 Reg VecMachLabel) InstOutput)
|
||||||
|
(rule (br_table_impl (imm12_from_u64 jt_size) ridx targets)
|
||||||
|
(let ((jt_info BoxJTSequenceInfo (targets_jt_info targets)))
|
||||||
|
(side_effect (with_flags_side_effect
|
||||||
|
(cmp_imm (OperandSize.Size32) ridx jt_size)
|
||||||
|
(jt_sequence ridx jt_info)))))
|
||||||
|
(rule -1 (br_table_impl jt_size ridx targets)
|
||||||
|
(let ((jt_size Reg (imm $I64 (ImmExtend.Zero) jt_size))
|
||||||
|
(jt_info BoxJTSequenceInfo (targets_jt_info targets)))
|
||||||
|
(side_effect (with_flags_side_effect
|
||||||
|
(cmp (OperandSize.Size32) ridx jt_size)
|
||||||
|
(jt_sequence ridx jt_info)))))
|
||||||
|
|||||||
@@ -558,18 +558,6 @@ pub enum ScalarSize {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ScalarSize {
|
impl ScalarSize {
|
||||||
/// Convert from a needed width to the smallest size that fits.
|
|
||||||
pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
|
|
||||||
match bits.into().next_power_of_two() {
|
|
||||||
8 => ScalarSize::Size8,
|
|
||||||
16 => ScalarSize::Size16,
|
|
||||||
32 => ScalarSize::Size32,
|
|
||||||
64 => ScalarSize::Size64,
|
|
||||||
128 => ScalarSize::Size128,
|
|
||||||
w => panic!("Unexpected type width: {}", w),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert to an integer operand size.
|
/// Convert to an integer operand size.
|
||||||
pub fn operand_size(&self) -> OperandSize {
|
pub fn operand_size(&self) -> OperandSize {
|
||||||
match self {
|
match self {
|
||||||
@@ -579,13 +567,6 @@ impl ScalarSize {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert from a type into the smallest size that fits.
|
|
||||||
pub fn from_ty(ty: Type) -> ScalarSize {
|
|
||||||
debug_assert!(!ty.is_vector());
|
|
||||||
|
|
||||||
Self::from_bits(ty_bits(ty))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the encoding bits that are used by some scalar FP instructions
|
/// Return the encoding bits that are used by some scalar FP instructions
|
||||||
/// for a particular operand size.
|
/// for a particular operand size.
|
||||||
pub fn ftype(&self) -> u32 {
|
pub fn ftype(&self) -> u32 {
|
||||||
@@ -645,32 +626,6 @@ impl VectorSize {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert from a type into a vector operand size.
|
|
||||||
pub fn from_ty(ty: Type) -> VectorSize {
|
|
||||||
debug_assert!(ty.is_vector());
|
|
||||||
|
|
||||||
match ty {
|
|
||||||
B8X8 => VectorSize::Size8x8,
|
|
||||||
B8X16 => VectorSize::Size8x16,
|
|
||||||
B16X4 => VectorSize::Size16x4,
|
|
||||||
B16X8 => VectorSize::Size16x8,
|
|
||||||
B32X2 => VectorSize::Size32x2,
|
|
||||||
B32X4 => VectorSize::Size32x4,
|
|
||||||
B64X2 => VectorSize::Size64x2,
|
|
||||||
F32X2 => VectorSize::Size32x2,
|
|
||||||
F32X4 => VectorSize::Size32x4,
|
|
||||||
F64X2 => VectorSize::Size64x2,
|
|
||||||
I8X8 => VectorSize::Size8x8,
|
|
||||||
I8X16 => VectorSize::Size8x16,
|
|
||||||
I16X4 => VectorSize::Size16x4,
|
|
||||||
I16X8 => VectorSize::Size16x8,
|
|
||||||
I32X2 => VectorSize::Size32x2,
|
|
||||||
I32X4 => VectorSize::Size32x4,
|
|
||||||
I64X2 => VectorSize::Size64x2,
|
|
||||||
_ => unimplemented!("Unsupported type: {}", ty),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
|
/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
|
||||||
pub fn operand_size(&self) -> OperandSize {
|
pub fn operand_size(&self) -> OperandSize {
|
||||||
match self {
|
match self {
|
||||||
|
|||||||
@@ -4,6 +4,16 @@
|
|||||||
;; register(s) within which the lowered instruction's result values live.
|
;; register(s) within which the lowered instruction's result values live.
|
||||||
(decl lower (Inst) InstOutput)
|
(decl lower (Inst) InstOutput)
|
||||||
|
|
||||||
|
;; Variant of the main lowering constructor term, which receives an
|
||||||
|
;; additional argument (a vector of branch targets to be used) for
|
||||||
|
;; implementing branches.
|
||||||
|
;; For two-branch instructions, the first target is `taken` and the second
|
||||||
|
;; `not_taken`, even if it is a Fallthrough instruction: because we reorder
|
||||||
|
;; blocks while we lower, the fallthrough in the new order is not (necessarily)
|
||||||
|
;; the same as the fallthrough in CLIF. So, we use the explicitly-provided
|
||||||
|
;; target.
|
||||||
|
(decl lower_branch (Inst VecMachLabel) InstOutput)
|
||||||
|
|
||||||
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
|
(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
|
||||||
@@ -2497,12 +2507,185 @@
|
|||||||
|
|
||||||
;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; TODO: requires icmp/fcmp first.
|
;; `brz` following `icmp`, possibly converted via `bint`.
|
||||||
|
(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(cond Cond (invert_cond cond)) ;; negate for `brz`
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(cond Cond (invert_cond cond)) ;; negate for `brz`
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
;; `brnz` following `icmp`, possibly converted via `bint`.
|
||||||
|
(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
;; `brz` following `fcmp`, possibly converted via `bint`.
|
||||||
|
(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
|
||||||
|
(let ((cond Cond (fp_cond_code cc))
|
||||||
|
(cond Cond (invert_cond cond)) ;; negate for `brz`
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
|
||||||
|
(let ((cond Cond (fp_cond_code cc))
|
||||||
|
(cond Cond (invert_cond cond)) ;; negate for `brz`
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
;; `brnz` following `fcmp`, possibly converted via `bint`.
|
||||||
|
(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
|
||||||
|
(let ((cond Cond (fp_cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
|
||||||
|
(let ((cond Cond (fp_cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
;; standard `brz`
|
||||||
|
(rule (lower_branch (brz c @ (value_type $I128) _ _) targets)
|
||||||
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
||||||
|
(c ValueRegs (put_in_regs c))
|
||||||
|
(c_lo Reg (value_regs_get c 0))
|
||||||
|
(c_hi Reg (value_regs_get c 1))
|
||||||
|
(rt Reg (orr $I64 c_lo c_hi))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect flags
|
||||||
|
(cond_br taken not_taken (cond_br_zero rt))))))
|
||||||
|
(rule (lower_branch (brz c @ (value_type ty) _ _) targets)
|
||||||
|
(if (ty_int_bool_ref_scalar_64 ty))
|
||||||
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
||||||
|
(rt Reg (put_in_reg_zext64 c))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect flags
|
||||||
|
(cond_br taken not_taken (cond_br_zero rt))))))
|
||||||
|
;; standard `brnz`
|
||||||
|
(rule (lower_branch (brnz c @ (value_type $I128) _ _) targets)
|
||||||
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
||||||
|
(c ValueRegs (put_in_regs c))
|
||||||
|
(c_lo Reg (value_regs_get c 0))
|
||||||
|
(c_hi Reg (value_regs_get c 1))
|
||||||
|
(rt Reg (orr $I64 c_lo c_hi))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect flags
|
||||||
|
(cond_br taken not_taken (cond_br_not_zero rt))))))
|
||||||
|
(rule (lower_branch (brnz c @ (value_type ty) _ _) targets)
|
||||||
|
(if (ty_int_bool_ref_scalar_64 ty))
|
||||||
|
(let ((flags ProducesFlags (flags_to_producesflags c))
|
||||||
|
(rt Reg (put_in_reg_zext64 c))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect flags
|
||||||
|
(cond_br taken not_taken (cond_br_not_zero rt))))))
|
||||||
|
|
||||||
|
;; `br_icmp`
|
||||||
|
(rule (lower_branch (br_icmp cc x @ (value_type ty) y _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
|
||||||
|
;; `brif`
|
||||||
|
(rule (lower_branch (brif cc (ifcmp x @ (value_type ty) y) _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
;; If the `ifcmp` result is actually placed in a register, we need to move it
|
||||||
|
;; back into the flags.
|
||||||
|
(rule -1 (lower_branch (brif cc f _ _) targets)
|
||||||
|
(let ((cond Cond (cond_code cc))
|
||||||
|
(rn Reg (put_in_reg f))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (mov_to_nzcv rn)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
|
||||||
|
;; `brff`
|
||||||
|
(rule (lower_branch (brff cc (ffcmp x @ (value_type ty) y) _ _) targets)
|
||||||
|
(let ((cond Cond (fp_cond_code cc))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
;; If the `ffcmp` result is actually placed in a register, we need to move it
|
||||||
|
;; back into the flags.
|
||||||
|
(rule -1 (lower_branch (brff cc f _ _) targets)
|
||||||
|
(let ((cond Cond (fp_cond_code cc))
|
||||||
|
(rn Reg (put_in_reg f))
|
||||||
|
(taken BranchTarget (branch_target targets 0))
|
||||||
|
(not_taken BranchTarget (branch_target targets 1)))
|
||||||
|
(side_effect
|
||||||
|
(with_flags_side_effect (mov_to_nzcv rn)
|
||||||
|
(cond_br taken not_taken
|
||||||
|
(cond_br_cond cond))))))
|
||||||
|
|
||||||
;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; TODO.
|
(rule (lower_branch (jump _ _) targets)
|
||||||
|
(side_effect (aarch64_jump (branch_target targets 0))))
|
||||||
|
|
||||||
;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; TODO.
|
;; `targets` contains the default target with the list of branch targets
|
||||||
|
;; concatenated.
|
||||||
|
(rule (lower_branch (br_table idx _ _) targets)
|
||||||
|
(let ((jt_size u32 (targets_jt_size targets))
|
||||||
|
(_ InstOutput (side_effect
|
||||||
|
(emit_island (targets_jt_space targets))))
|
||||||
|
(ridx Reg (put_in_reg_zext32 idx)))
|
||||||
|
(br_table_impl (u32_as_u64 jt_size) ridx targets)))
|
||||||
|
|||||||
@@ -16,112 +16,29 @@ use crate::isa::aarch64::inst::*;
|
|||||||
use crate::isa::aarch64::AArch64Backend;
|
use crate::isa::aarch64::AArch64Backend;
|
||||||
use crate::machinst::lower::*;
|
use crate::machinst::lower::*;
|
||||||
use crate::machinst::{Reg, Writable};
|
use crate::machinst::{Reg, Writable};
|
||||||
|
use crate::CodegenResult;
|
||||||
use crate::{machinst::*, trace};
|
use crate::{machinst::*, trace};
|
||||||
use crate::{CodegenError, CodegenResult};
|
|
||||||
use smallvec::{smallvec, SmallVec};
|
use smallvec::{smallvec, SmallVec};
|
||||||
use std::cmp;
|
|
||||||
|
|
||||||
pub mod isle;
|
pub mod isle;
|
||||||
|
|
||||||
//============================================================================
|
|
||||||
// Result enum types.
|
|
||||||
//
|
|
||||||
// Lowering of a given value results in one of these enums, depending on the
|
|
||||||
// modes in which we can accept the value.
|
|
||||||
|
|
||||||
/// A lowering result: register, register-shift. An SSA value can always be
|
|
||||||
/// lowered into one of these options; the register form is the fallback.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
enum ResultRS {
|
|
||||||
Reg(Reg),
|
|
||||||
RegShift(Reg, ShiftOpAndAmt),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A lowering result: register, register-shift, register-extend. An SSA value can always be
|
|
||||||
/// lowered into one of these options; the register form is the fallback.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
enum ResultRSE {
|
|
||||||
Reg(Reg),
|
|
||||||
RegShift(Reg, ShiftOpAndAmt),
|
|
||||||
RegExtend(Reg, ExtendOp),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ResultRSE {
|
|
||||||
fn from_rs(rs: ResultRS) -> ResultRSE {
|
|
||||||
match rs {
|
|
||||||
ResultRS::Reg(r) => ResultRSE::Reg(r),
|
|
||||||
ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
|
|
||||||
/// An SSA value can always be lowered into one of these options; the register form is the
|
|
||||||
/// fallback.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub(crate) enum ResultRSEImm12 {
|
|
||||||
Reg(Reg),
|
|
||||||
RegShift(Reg, ShiftOpAndAmt),
|
|
||||||
RegExtend(Reg, ExtendOp),
|
|
||||||
Imm12(Imm12),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ResultRSEImm12 {
|
|
||||||
fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
|
|
||||||
match rse {
|
|
||||||
ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
|
|
||||||
ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
|
|
||||||
ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//============================================================================
|
//============================================================================
|
||||||
// Lowering: convert instruction inputs to forms that we can use.
|
// Lowering: convert instruction inputs to forms that we can use.
|
||||||
|
|
||||||
/// Lower an instruction input to a 64-bit constant, if possible.
|
|
||||||
pub(crate) fn input_to_const(ctx: &mut Lower<Inst>, input: InsnInput) -> Option<u64> {
|
|
||||||
let input = ctx.get_input_as_source_or_const(input.insn, input.input);
|
|
||||||
input.constant
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Lower an instruction input to a constant register-shift amount, if possible.
|
|
||||||
pub(crate) fn input_to_shiftimm(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
input: InsnInput,
|
|
||||||
) -> Option<ShiftOpShiftImm> {
|
|
||||||
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
|
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
|
||||||
/// parameter to `put_input_in_*` below.
|
/// parameter to `put_input_in_*` below.
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
pub(crate) enum NarrowValueMode {
|
pub(crate) enum NarrowValueMode {
|
||||||
None,
|
None,
|
||||||
/// Zero-extend to 32 bits if original is < 32 bits.
|
|
||||||
ZeroExtend32,
|
|
||||||
/// Sign-extend to 32 bits if original is < 32 bits.
|
|
||||||
SignExtend32,
|
|
||||||
/// Zero-extend to 64 bits if original is < 64 bits.
|
/// Zero-extend to 64 bits if original is < 64 bits.
|
||||||
ZeroExtend64,
|
ZeroExtend64,
|
||||||
/// Sign-extend to 64 bits if original is < 64 bits.
|
|
||||||
SignExtend64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NarrowValueMode {
|
impl NarrowValueMode {
|
||||||
fn is_32bit(&self) -> bool {
|
fn is_32bit(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
NarrowValueMode::None => false,
|
NarrowValueMode::None => false,
|
||||||
NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
|
NarrowValueMode::ZeroExtend64 => false,
|
||||||
NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_signed(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true,
|
|
||||||
NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false,
|
|
||||||
NarrowValueMode::None => false,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -159,29 +76,6 @@ fn extend_reg(
|
|||||||
let from_bits = ty_bits(ty) as u8;
|
let from_bits = ty_bits(ty) as u8;
|
||||||
match (narrow_mode, from_bits) {
|
match (narrow_mode, from_bits) {
|
||||||
(NarrowValueMode::None, _) => in_reg,
|
(NarrowValueMode::None, _) => in_reg,
|
||||||
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
|
||||||
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::Extend {
|
|
||||||
rd: tmp,
|
|
||||||
rn: in_reg,
|
|
||||||
signed: false,
|
|
||||||
from_bits,
|
|
||||||
to_bits: 32,
|
|
||||||
});
|
|
||||||
tmp.to_reg()
|
|
||||||
}
|
|
||||||
(NarrowValueMode::SignExtend32, n) if n < 32 => {
|
|
||||||
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::Extend {
|
|
||||||
rd: tmp,
|
|
||||||
rn: in_reg,
|
|
||||||
signed: true,
|
|
||||||
from_bits,
|
|
||||||
to_bits: 32,
|
|
||||||
});
|
|
||||||
tmp.to_reg()
|
|
||||||
}
|
|
||||||
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
|
|
||||||
|
|
||||||
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
|
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
|
||||||
if is_const {
|
if is_const {
|
||||||
@@ -199,17 +93,6 @@ fn extend_reg(
|
|||||||
tmp.to_reg()
|
tmp.to_reg()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(NarrowValueMode::SignExtend64, n) if n < 64 => {
|
|
||||||
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::Extend {
|
|
||||||
rd: tmp,
|
|
||||||
rn: in_reg,
|
|
||||||
signed: true,
|
|
||||||
from_bits,
|
|
||||||
to_bits: 64,
|
|
||||||
});
|
|
||||||
tmp.to_reg()
|
|
||||||
}
|
|
||||||
(_, 64) => in_reg,
|
(_, 64) => in_reg,
|
||||||
(_, 128) => in_reg,
|
(_, 128) => in_reg,
|
||||||
|
|
||||||
@@ -261,72 +144,6 @@ fn put_value_in_reg(ctx: &mut Lower<Inst>, value: Value, narrow_mode: NarrowValu
|
|||||||
extend_reg(ctx, ty, reg, is_const, narrow_mode)
|
extend_reg(ctx, ty, reg, is_const, narrow_mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lower an instruction input to multiple regs
|
|
||||||
pub(crate) fn put_input_in_regs(ctx: &mut Lower<Inst>, input: InsnInput) -> ValueRegs<Reg> {
|
|
||||||
let value = ctx.input_as_value(input.insn, input.input);
|
|
||||||
let (in_regs, _, _) = lower_value_to_regs(ctx, value);
|
|
||||||
in_regs
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
|
|
||||||
///
|
|
||||||
/// The `narrow_mode` flag indicates whether the consumer of this value needs
|
|
||||||
/// the high bits clear. For many operations, such as an add/sub/mul or any
|
|
||||||
/// bitwise logical operation, the low-bit results depend only on the low-bit
|
|
||||||
/// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
|
|
||||||
/// value is stored in the low 8 bits of the register and the high 24 bits are
|
|
||||||
/// undefined. If the op truly needs the high N bits clear (such as for a
|
|
||||||
/// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
|
|
||||||
/// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
|
|
||||||
/// register will be provided the extended value.
|
|
||||||
fn put_input_in_rs(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
input: InsnInput,
|
|
||||||
narrow_mode: NarrowValueMode,
|
|
||||||
) -> ResultRS {
|
|
||||||
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
|
|
||||||
// Unique or non-unique use is fine for merging here.
|
|
||||||
if let Some((insn, 0)) = inputs.inst.as_inst() {
|
|
||||||
let op = ctx.data(insn).opcode();
|
|
||||||
|
|
||||||
if op == Opcode::Ishl {
|
|
||||||
let shiftee = InsnInput { insn, input: 0 };
|
|
||||||
let shift_amt = InsnInput { insn, input: 1 };
|
|
||||||
|
|
||||||
// Can we get the shift amount as an immediate?
|
|
||||||
if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
|
|
||||||
let shiftee_bits = ty_bits(ctx.input_ty(insn, 0));
|
|
||||||
if shiftee_bits <= std::u8::MAX as usize {
|
|
||||||
let shiftimm = shiftimm.mask(shiftee_bits as u8);
|
|
||||||
let reg = put_input_in_reg(ctx, shiftee, narrow_mode);
|
|
||||||
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
|
|
||||||
/// This does not actually codegen the source instruction; it just uses the
|
|
||||||
/// vreg into which the source instruction will generate its value.
|
|
||||||
///
|
|
||||||
/// See note on `put_input_in_rs` for a description of `narrow_mode`.
|
|
||||||
fn put_input_in_rse(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
input: InsnInput,
|
|
||||||
narrow_mode: NarrowValueMode,
|
|
||||||
) -> ResultRSE {
|
|
||||||
let value = ctx.input_as_value(input.insn, input.input);
|
|
||||||
if let Some((val, extendop)) = get_as_extended_value(ctx, value, narrow_mode) {
|
|
||||||
let reg = put_value_in_reg(ctx, val, NarrowValueMode::None);
|
|
||||||
return ResultRSE::RegExtend(reg, extendop);
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_as_extended_value(
|
fn get_as_extended_value(
|
||||||
ctx: &mut Lower<Inst>,
|
ctx: &mut Lower<Inst>,
|
||||||
val: Value,
|
val: Value,
|
||||||
@@ -351,13 +168,8 @@ fn get_as_extended_value(
|
|||||||
// A single zero-extend or sign-extend is equal to itself.
|
// A single zero-extend or sign-extend is equal to itself.
|
||||||
(_, NarrowValueMode::None) => true,
|
(_, NarrowValueMode::None) => true,
|
||||||
// Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
|
// Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
|
||||||
(false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => true,
|
(false, NarrowValueMode::ZeroExtend64) => true,
|
||||||
(true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => true,
|
(true, NarrowValueMode::ZeroExtend64) => false,
|
||||||
// A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend
|
|
||||||
(false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
(true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => false,
|
|
||||||
} {
|
} {
|
||||||
let extendop = match (sign_extend, inner_bits) {
|
let extendop = match (sign_extend, inner_bits) {
|
||||||
(true, 8) => ExtendOp::SXTB,
|
(true, 8) => ExtendOp::SXTB,
|
||||||
@@ -379,25 +191,9 @@ fn get_as_extended_value(
|
|||||||
&& ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64))
|
&& ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64))
|
||||||
{
|
{
|
||||||
let extendop = match (narrow_mode, out_bits) {
|
let extendop = match (narrow_mode, out_bits) {
|
||||||
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
|
(NarrowValueMode::ZeroExtend64, 1) => ExtendOp::UXTB,
|
||||||
ExtendOp::SXTB
|
(NarrowValueMode::ZeroExtend64, 8) => ExtendOp::UXTB,
|
||||||
}
|
(NarrowValueMode::ZeroExtend64, 16) => ExtendOp::UXTH,
|
||||||
(NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
|
|
||||||
ExtendOp::UXTB
|
|
||||||
}
|
|
||||||
(NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
|
|
||||||
ExtendOp::SXTB
|
|
||||||
}
|
|
||||||
(NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
|
|
||||||
ExtendOp::UXTB
|
|
||||||
}
|
|
||||||
(NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
|
|
||||||
ExtendOp::SXTH
|
|
||||||
}
|
|
||||||
(NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
|
|
||||||
ExtendOp::UXTH
|
|
||||||
}
|
|
||||||
(NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
|
|
||||||
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
|
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
@@ -406,73 +202,6 @@ fn get_as_extended_value(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn put_input_in_rse_imm12(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
input: InsnInput,
|
|
||||||
narrow_mode: NarrowValueMode,
|
|
||||||
) -> ResultRSEImm12 {
|
|
||||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
|
||||||
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
|
|
||||||
let out_ty_bits = ty_bits(ctx.input_ty(input.insn, input.input));
|
|
||||||
let is_negative = (i.bits as u64) & (1 << (cmp::max(out_ty_bits, 1) - 1)) != 0;
|
|
||||||
|
|
||||||
// This condition can happen if we matched a value that overflows the output type of
|
|
||||||
// its `iconst` when viewed as a signed value (i.e. iconst.i8 200).
|
|
||||||
// When that happens we need to lower as a negative value, which we cannot do here.
|
|
||||||
if !(narrow_mode.is_signed() && is_negative) {
|
|
||||||
return ResultRSEImm12::Imm12(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
|
|
||||||
}
|
|
||||||
|
|
||||||
//============================================================================
|
|
||||||
// ALU instruction constructors.
|
|
||||||
|
|
||||||
pub(crate) fn alu_inst_imm12(
|
|
||||||
op: ALUOp,
|
|
||||||
ty: Type,
|
|
||||||
rd: Writable<Reg>,
|
|
||||||
rn: Reg,
|
|
||||||
rm: ResultRSEImm12,
|
|
||||||
) -> Inst {
|
|
||||||
let size = OperandSize::from_ty(ty);
|
|
||||||
match rm {
|
|
||||||
ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
|
|
||||||
alu_op: op,
|
|
||||||
size,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
imm12,
|
|
||||||
},
|
|
||||||
ResultRSEImm12::Reg(rm) => Inst::AluRRR {
|
|
||||||
alu_op: op,
|
|
||||||
size,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
},
|
|
||||||
ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
|
|
||||||
alu_op: op,
|
|
||||||
size,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
shiftop,
|
|
||||||
},
|
|
||||||
ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
|
|
||||||
alu_op: op,
|
|
||||||
size,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
extendop,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//============================================================================
|
//============================================================================
|
||||||
// Lowering: addressing mode support. Takes instruction directly, rather
|
// Lowering: addressing mode support. Takes instruction directly, rather
|
||||||
// than an `InsnInput`, to do more introspection.
|
// than an `InsnInput`, to do more introspection.
|
||||||
@@ -967,129 +696,6 @@ pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn lower_vector_compare(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
rd: Writable<Reg>,
|
|
||||||
mut rn: Reg,
|
|
||||||
mut rm: Reg,
|
|
||||||
ty: Type,
|
|
||||||
cond: Cond,
|
|
||||||
) -> CodegenResult<()> {
|
|
||||||
let is_float = ty.lane_type().is_float();
|
|
||||||
let size = VectorSize::from_ty(ty);
|
|
||||||
|
|
||||||
if is_float && (cond == Cond::Vc || cond == Cond::Vs) {
|
|
||||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op: VecALUOp::Fcmeq,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm: rn,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op: VecALUOp::Fcmeq,
|
|
||||||
rd: tmp,
|
|
||||||
rn: rm,
|
|
||||||
rm,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op: VecALUOp::And,
|
|
||||||
rd,
|
|
||||||
rn: rd.to_reg(),
|
|
||||||
rm: tmp.to_reg(),
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
|
|
||||||
if cond == Cond::Vs {
|
|
||||||
ctx.emit(Inst::VecMisc {
|
|
||||||
op: VecMisc2::Not,
|
|
||||||
rd,
|
|
||||||
rn: rd.to_reg(),
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// 'Less than' operations are implemented by swapping
|
|
||||||
// the order of operands and using the 'greater than'
|
|
||||||
// instructions.
|
|
||||||
// 'Not equal' is implemented with 'equal' and inverting
|
|
||||||
// the result.
|
|
||||||
let (alu_op, swap) = match (is_float, cond) {
|
|
||||||
(false, Cond::Eq) => (VecALUOp::Cmeq, false),
|
|
||||||
(false, Cond::Ne) => (VecALUOp::Cmeq, false),
|
|
||||||
(false, Cond::Ge) => (VecALUOp::Cmge, false),
|
|
||||||
(false, Cond::Gt) => (VecALUOp::Cmgt, false),
|
|
||||||
(false, Cond::Le) => (VecALUOp::Cmge, true),
|
|
||||||
(false, Cond::Lt) => (VecALUOp::Cmgt, true),
|
|
||||||
(false, Cond::Hs) => (VecALUOp::Cmhs, false),
|
|
||||||
(false, Cond::Hi) => (VecALUOp::Cmhi, false),
|
|
||||||
(false, Cond::Ls) => (VecALUOp::Cmhs, true),
|
|
||||||
(false, Cond::Lo) => (VecALUOp::Cmhi, true),
|
|
||||||
(true, Cond::Eq) => (VecALUOp::Fcmeq, false),
|
|
||||||
(true, Cond::Ne) => (VecALUOp::Fcmeq, false),
|
|
||||||
(true, Cond::Mi) => (VecALUOp::Fcmgt, true),
|
|
||||||
(true, Cond::Ls) => (VecALUOp::Fcmge, true),
|
|
||||||
(true, Cond::Ge) => (VecALUOp::Fcmge, false),
|
|
||||||
(true, Cond::Gt) => (VecALUOp::Fcmgt, false),
|
|
||||||
_ => {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"Unsupported {} SIMD vector comparison: {:?}",
|
|
||||||
if is_float {
|
|
||||||
"floating-point"
|
|
||||||
} else {
|
|
||||||
"integer"
|
|
||||||
},
|
|
||||||
cond
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if swap {
|
|
||||||
std::mem::swap(&mut rn, &mut rm);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
|
|
||||||
if cond == Cond::Ne {
|
|
||||||
ctx.emit(Inst::VecMisc {
|
|
||||||
op: VecMisc2::Not,
|
|
||||||
rd,
|
|
||||||
rn: rd.to_reg(),
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Determines whether this condcode interprets inputs as signed or unsigned. See the
|
|
||||||
/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs
|
|
||||||
/// for further insights into this.
|
|
||||||
pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
|
|
||||||
match cc {
|
|
||||||
IntCC::Equal
|
|
||||||
| IntCC::UnsignedGreaterThanOrEqual
|
|
||||||
| IntCC::UnsignedGreaterThan
|
|
||||||
| IntCC::UnsignedLessThanOrEqual
|
|
||||||
| IntCC::UnsignedLessThan
|
|
||||||
| IntCC::NotEqual => false,
|
|
||||||
IntCC::SignedGreaterThanOrEqual
|
|
||||||
| IntCC::SignedGreaterThan
|
|
||||||
| IntCC::SignedLessThanOrEqual
|
|
||||||
| IntCC::SignedLessThan => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Helpers for instruction lowering.
|
// Helpers for instruction lowering.
|
||||||
|
|
||||||
@@ -1142,256 +748,6 @@ pub(crate) fn maybe_value_multi(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
|
|
||||||
/// Bint or a bitcast).
|
|
||||||
///
|
|
||||||
/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
|
|
||||||
/// a bit more generic.
|
|
||||||
pub(crate) fn maybe_input_insn_via_conv(
|
|
||||||
c: &mut Lower<Inst>,
|
|
||||||
input: InsnInput,
|
|
||||||
op: Opcode,
|
|
||||||
conv: Opcode,
|
|
||||||
) -> Option<IRInst> {
|
|
||||||
let inputs = c.get_input_as_source_or_const(input.insn, input.input);
|
|
||||||
if let Some((src_inst, _)) = inputs.inst.as_inst() {
|
|
||||||
let data = c.data(src_inst);
|
|
||||||
if data.opcode() == op {
|
|
||||||
return Some(src_inst);
|
|
||||||
}
|
|
||||||
if data.opcode() == conv {
|
|
||||||
let inputs = c.get_input_as_source_or_const(src_inst, 0);
|
|
||||||
if let Some((src_inst, _)) = inputs.inst.as_inst() {
|
|
||||||
let data = c.data(src_inst);
|
|
||||||
if data.opcode() == op {
|
|
||||||
return Some(src_inst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Specifies what [lower_icmp] should do when lowering
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub(crate) enum IcmpOutput {
|
|
||||||
/// Lowers the comparison into a cond code, discarding the results. The cond code emitted can
|
|
||||||
/// be checked in the resulting [IcmpResult].
|
|
||||||
CondCode,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IcmpOutput {
|
|
||||||
pub fn reg(&self) -> Option<Writable<Reg>> {
|
|
||||||
match self {
|
|
||||||
IcmpOutput::CondCode => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The output of an Icmp lowering.
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub(crate) enum IcmpResult {
|
|
||||||
/// The result was output into the given [Cond]. Callers may perform operations using this [Cond]
|
|
||||||
/// and its inverse, other [Cond]'s are not guaranteed to be correct.
|
|
||||||
CondCode(Cond),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IcmpResult {
|
|
||||||
pub fn unwrap_cond(&self) -> Cond {
|
|
||||||
match self {
|
|
||||||
IcmpResult::CondCode(c) => *c,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Lower an icmp comparision
|
|
||||||
///
|
|
||||||
/// We can lower into the status flags, or materialize the result into a register
|
|
||||||
/// This is controlled by the `output` parameter.
|
|
||||||
pub(crate) fn lower_icmp(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
insn: IRInst,
|
|
||||||
condcode: IntCC,
|
|
||||||
output: IcmpOutput,
|
|
||||||
) -> CodegenResult<IcmpResult> {
|
|
||||||
trace!(
|
|
||||||
"lower_icmp: insn {}, condcode: {}, output: {:?}",
|
|
||||||
insn,
|
|
||||||
condcode,
|
|
||||||
output
|
|
||||||
);
|
|
||||||
|
|
||||||
let rd = output.reg().unwrap_or(writable_zero_reg());
|
|
||||||
let inputs = insn_inputs(ctx, insn);
|
|
||||||
let cond = lower_condcode(condcode);
|
|
||||||
let is_signed = condcode_is_signed(condcode);
|
|
||||||
let ty = ctx.input_ty(insn, 0);
|
|
||||||
let bits = ty_bits(ty);
|
|
||||||
let narrow_mode = match (bits <= 32, is_signed) {
|
|
||||||
(true, true) => NarrowValueMode::SignExtend32,
|
|
||||||
(true, false) => NarrowValueMode::ZeroExtend32,
|
|
||||||
(false, true) => NarrowValueMode::SignExtend64,
|
|
||||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
|
||||||
};
|
|
||||||
let mut should_materialize = output.reg().is_some();
|
|
||||||
|
|
||||||
let out_condcode = if ty == I128 {
|
|
||||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
|
||||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
|
||||||
|
|
||||||
let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
|
|
||||||
let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
|
|
||||||
|
|
||||||
match condcode {
|
|
||||||
IntCC::Equal | IntCC::NotEqual => {
|
|
||||||
// cmp lhs_lo, rhs_lo
|
|
||||||
// ccmp lhs_hi, rhs_hi, #0, eq
|
|
||||||
// cset dst, {eq, ne}
|
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::SubS,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd: writable_zero_reg(),
|
|
||||||
rn: lhs.regs()[0],
|
|
||||||
rm: rhs.regs()[0],
|
|
||||||
});
|
|
||||||
ctx.emit(Inst::CCmp {
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rn: lhs.regs()[1],
|
|
||||||
rm: rhs.regs()[1],
|
|
||||||
nzcv: NZCV::new(false, false, false, false),
|
|
||||||
cond: Cond::Eq,
|
|
||||||
});
|
|
||||||
cond
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// cmp lhs_lo, rhs_lo
|
|
||||||
// cset tmp1, unsigned_cond
|
|
||||||
// cmp lhs_hi, rhs_hi
|
|
||||||
// cset tmp2, cond
|
|
||||||
// csel dst, tmp1, tmp2, eq
|
|
||||||
|
|
||||||
let rd = output.reg().unwrap_or(tmp1);
|
|
||||||
let unsigned_cond = lower_condcode(condcode.unsigned());
|
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::SubS,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd: writable_zero_reg(),
|
|
||||||
rn: lhs.regs()[0],
|
|
||||||
rm: rhs.regs()[0],
|
|
||||||
});
|
|
||||||
materialize_bool_result(ctx, insn, tmp1, unsigned_cond);
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::SubS,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd: writable_zero_reg(),
|
|
||||||
rn: lhs.regs()[1],
|
|
||||||
rm: rhs.regs()[1],
|
|
||||||
});
|
|
||||||
materialize_bool_result(ctx, insn, tmp2, cond);
|
|
||||||
ctx.emit(Inst::CSel {
|
|
||||||
cond: Cond::Eq,
|
|
||||||
rd,
|
|
||||||
rn: tmp1.to_reg(),
|
|
||||||
rm: tmp2.to_reg(),
|
|
||||||
});
|
|
||||||
|
|
||||||
if output == IcmpOutput::CondCode {
|
|
||||||
// We only need to guarantee that the flags for `cond` are correct, so we can
|
|
||||||
// compare rd with 0 or 1
|
|
||||||
|
|
||||||
// If we are doing compare or equal, we want to compare with 1 instead of zero
|
|
||||||
if condcode.without_equal() != condcode {
|
|
||||||
lower_constant_u64(ctx, tmp2, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
let xzr = zero_reg();
|
|
||||||
let rd = rd.to_reg();
|
|
||||||
let tmp2 = tmp2.to_reg();
|
|
||||||
let (rn, rm) = match condcode {
|
|
||||||
IntCC::SignedGreaterThanOrEqual => (rd, tmp2),
|
|
||||||
IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2),
|
|
||||||
IntCC::SignedLessThanOrEqual => (tmp2, rd),
|
|
||||||
IntCC::UnsignedLessThanOrEqual => (tmp2, rd),
|
|
||||||
IntCC::SignedGreaterThan => (rd, xzr),
|
|
||||||
IntCC::UnsignedGreaterThan => (rd, xzr),
|
|
||||||
IntCC::SignedLessThan => (xzr, rd),
|
|
||||||
IntCC::UnsignedLessThan => (xzr, rd),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::SubS,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd: writable_zero_reg(),
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prevent a second materialize_bool_result to be emitted at the end of the function
|
|
||||||
should_materialize = false;
|
|
||||||
cond
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if ty.is_vector() {
|
|
||||||
assert_ne!(output, IcmpOutput::CondCode);
|
|
||||||
should_materialize = false;
|
|
||||||
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
|
||||||
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
|
|
||||||
cond
|
|
||||||
} else {
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
|
||||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
|
|
||||||
ctx.emit(alu_inst_imm12(ALUOp::SubS, ty, writable_zero_reg(), rn, rm));
|
|
||||||
cond
|
|
||||||
};
|
|
||||||
|
|
||||||
// Most of the comparisons above produce flags by default, if the caller requested the result
|
|
||||||
// in a register we materialize those flags into a register. Some branches do end up producing
|
|
||||||
// the result as a register by default, so we ignore those.
|
|
||||||
if should_materialize {
|
|
||||||
materialize_bool_result(ctx, insn, rd, out_condcode);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(match output {
|
|
||||||
IcmpOutput::CondCode => IcmpResult::CondCode(out_condcode),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn lower_fcmp_or_ffcmp_to_flags(ctx: &mut Lower<Inst>, insn: IRInst) {
|
|
||||||
let ty = ctx.input_ty(insn, 0);
|
|
||||||
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
ctx.emit(Inst::FpuCmp {
|
|
||||||
size: ScalarSize::from_ty(ty),
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Materialize a boolean value into a register from the flags
|
|
||||||
/// (e.g set by a comparison).
|
|
||||||
/// A 0 / -1 (all-ones) result as expected for bool operations.
|
|
||||||
pub(crate) fn materialize_bool_result(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
insn: IRInst,
|
|
||||||
rd: Writable<Reg>,
|
|
||||||
cond: Cond,
|
|
||||||
) {
|
|
||||||
// A boolean is 0 / -1; if output width is > 1 use `csetm`,
|
|
||||||
// otherwise use `cset`.
|
|
||||||
if ty_bits(ctx.output_ty(insn, 0)) > 1 {
|
|
||||||
ctx.emit(Inst::CSetm { rd, cond });
|
|
||||||
} else {
|
|
||||||
ctx.emit(Inst::CSet { rd, cond });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Lowering-backend trait implementation.
|
// Lowering-backend trait implementation.
|
||||||
|
|
||||||
@@ -1408,7 +764,33 @@ impl LowerBackend for AArch64Backend {
|
|||||||
branches: &[IRInst],
|
branches: &[IRInst],
|
||||||
targets: &[MachLabel],
|
targets: &[MachLabel],
|
||||||
) -> CodegenResult<()> {
|
) -> CodegenResult<()> {
|
||||||
lower_inst::lower_branch(ctx, branches, targets)
|
// A block should end with at most two branches. The first may be a
|
||||||
|
// conditional branch; a conditional branch can be followed only by an
|
||||||
|
// unconditional branch or fallthrough. Otherwise, if only one branch,
|
||||||
|
// it may be an unconditional branch, a fallthrough, a return, or a
|
||||||
|
// trap. These conditions are verified by `is_ebb_basic()` during the
|
||||||
|
// verifier pass.
|
||||||
|
assert!(branches.len() <= 2);
|
||||||
|
if branches.len() == 2 {
|
||||||
|
let op1 = ctx.data(branches[1]).opcode();
|
||||||
|
assert!(op1 == Opcode::Jump);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok(()) = super::lower::isle::lower_branch(
|
||||||
|
ctx,
|
||||||
|
&self.triple,
|
||||||
|
&self.flags,
|
||||||
|
&self.isa_flags,
|
||||||
|
branches[0],
|
||||||
|
targets,
|
||||||
|
) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
unreachable!(
|
||||||
|
"implemented in ISLE: branch = `{}`",
|
||||||
|
ctx.dfg().display_inst(branches[0]),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
||||||
|
|||||||
@@ -67,6 +67,25 @@ pub(crate) fn lower(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn lower_branch(
|
||||||
|
lower_ctx: &mut Lower<MInst>,
|
||||||
|
triple: &Triple,
|
||||||
|
flags: &Flags,
|
||||||
|
isa_flags: &IsaFlags,
|
||||||
|
branch: Inst,
|
||||||
|
targets: &[MachLabel],
|
||||||
|
) -> Result<(), ()> {
|
||||||
|
lower_common(
|
||||||
|
lower_ctx,
|
||||||
|
triple,
|
||||||
|
flags,
|
||||||
|
isa_flags,
|
||||||
|
&[],
|
||||||
|
branch,
|
||||||
|
|cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ExtendedValue {
|
pub struct ExtendedValue {
|
||||||
val: Value,
|
val: Value,
|
||||||
extend: ExtendOp,
|
extend: ExtendOp,
|
||||||
@@ -342,6 +361,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
CondBrKind::Zero(reg)
|
CondBrKind::Zero(reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cond_br_not_zero(&mut self, reg: Reg) -> CondBrKind {
|
||||||
|
CondBrKind::NotZero(reg)
|
||||||
|
}
|
||||||
|
|
||||||
fn cond_br_cond(&mut self, cond: &Cond) -> CondBrKind {
|
fn cond_br_cond(&mut self, cond: &Cond) -> CondBrKind {
|
||||||
CondBrKind::Cond(*cond)
|
CondBrKind::Cond(*cond)
|
||||||
}
|
}
|
||||||
@@ -521,6 +544,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
lower_condcode(*cc)
|
lower_condcode(*cc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn invert_cond(&mut self, cond: &Cond) -> Cond {
|
||||||
|
(*cond).invert()
|
||||||
|
}
|
||||||
fn preg_sp(&mut self) -> PReg {
|
fn preg_sp(&mut self) -> PReg {
|
||||||
super::regs::stack_reg().to_real_reg().unwrap().into()
|
super::regs::stack_reg().to_real_reg().unwrap().into()
|
||||||
}
|
}
|
||||||
@@ -533,6 +559,34 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
super::regs::link_reg().to_real_reg().unwrap().into()
|
super::regs::link_reg().to_real_reg().unwrap().into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn branch_target(&mut self, elements: &VecMachLabel, idx: u8) -> BranchTarget {
|
||||||
|
BranchTarget::Label(elements[idx as usize])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn targets_jt_size(&mut self, elements: &VecMachLabel) -> u32 {
|
||||||
|
(elements.len() - 1) as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn targets_jt_space(&mut self, elements: &VecMachLabel) -> CodeOffset {
|
||||||
|
// calculate the number of bytes needed for the jumptable sequence:
|
||||||
|
// 4 bytes per instruction, with 8 instructions base + the size of
|
||||||
|
// the jumptable more.
|
||||||
|
4 * (8 + self.targets_jt_size(elements))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn targets_jt_info(&mut self, elements: &VecMachLabel) -> BoxJTSequenceInfo {
|
||||||
|
let targets: Vec<BranchTarget> = elements
|
||||||
|
.iter()
|
||||||
|
.skip(1)
|
||||||
|
.map(|bix| BranchTarget::Label(*bix))
|
||||||
|
.collect();
|
||||||
|
let default_target = BranchTarget::Label(elements[0]);
|
||||||
|
Box::new(JTSequenceInfo {
|
||||||
|
targets,
|
||||||
|
default_target,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
|
||||||
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
//! Lower a single Cranelift instruction into vcode.
|
//! Lower a single Cranelift instruction into vcode.
|
||||||
|
|
||||||
use super::lower::*;
|
|
||||||
use crate::binemit::CodeOffset;
|
|
||||||
use crate::ir::types::*;
|
|
||||||
use crate::ir::Inst as IRInst;
|
use crate::ir::Inst as IRInst;
|
||||||
use crate::ir::Opcode;
|
use crate::ir::Opcode;
|
||||||
use crate::isa::aarch64::inst::*;
|
use crate::isa::aarch64::inst::*;
|
||||||
@@ -11,8 +8,6 @@ use crate::machinst::lower::*;
|
|||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::settings::Flags;
|
use crate::settings::Flags;
|
||||||
use crate::{CodegenError, CodegenResult};
|
use crate::{CodegenError, CodegenResult};
|
||||||
use alloc::boxed::Box;
|
|
||||||
use alloc::vec::Vec;
|
|
||||||
use target_lexicon::Triple;
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
/// Actually codegen an instruction's results into registers.
|
/// Actually codegen an instruction's results into registers.
|
||||||
@@ -323,269 +318,3 @@ pub(crate) fn lower_insn_to_regs(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn lower_branch(
|
|
||||||
ctx: &mut Lower<Inst>,
|
|
||||||
branches: &[IRInst],
|
|
||||||
targets: &[MachLabel],
|
|
||||||
) -> CodegenResult<()> {
|
|
||||||
// A block should end with at most two branches. The first may be a
|
|
||||||
// conditional branch; a conditional branch can be followed only by an
|
|
||||||
// unconditional branch or fallthrough. Otherwise, if only one branch,
|
|
||||||
// it may be an unconditional branch, a fallthrough, a return, or a
|
|
||||||
// trap. These conditions are verified by `is_ebb_basic()` during the
|
|
||||||
// verifier pass.
|
|
||||||
assert!(branches.len() <= 2);
|
|
||||||
|
|
||||||
if branches.len() == 2 {
|
|
||||||
// Must be a conditional branch followed by an unconditional branch.
|
|
||||||
let op0 = ctx.data(branches[0]).opcode();
|
|
||||||
let op1 = ctx.data(branches[1]).opcode();
|
|
||||||
|
|
||||||
assert!(op1 == Opcode::Jump);
|
|
||||||
let taken = BranchTarget::Label(targets[0]);
|
|
||||||
// not_taken target is the target of the second branch, even if it is a Fallthrough
|
|
||||||
// instruction: because we reorder blocks while we lower, the fallthrough in the new
|
|
||||||
// order is not (necessarily) the same as the fallthrough in CLIF. So we use the
|
|
||||||
// explicitly-provided target.
|
|
||||||
let not_taken = BranchTarget::Label(targets[1]);
|
|
||||||
|
|
||||||
match op0 {
|
|
||||||
Opcode::Brz | Opcode::Brnz => {
|
|
||||||
let ty = ctx.input_ty(branches[0], 0);
|
|
||||||
let flag_input = InsnInput {
|
|
||||||
insn: branches[0],
|
|
||||||
input: 0,
|
|
||||||
};
|
|
||||||
if let Some(icmp_insn) =
|
|
||||||
maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
|
|
||||||
{
|
|
||||||
let condcode = ctx.data(icmp_insn).cond_code().unwrap();
|
|
||||||
let cond =
|
|
||||||
lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond();
|
|
||||||
let negated = op0 == Opcode::Brz;
|
|
||||||
let cond = if negated { cond.invert() } else { cond };
|
|
||||||
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind: CondBrKind::Cond(cond),
|
|
||||||
});
|
|
||||||
} else if let Some(fcmp_insn) =
|
|
||||||
maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
|
|
||||||
{
|
|
||||||
let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
|
|
||||||
let cond = lower_fp_condcode(condcode);
|
|
||||||
let negated = op0 == Opcode::Brz;
|
|
||||||
let cond = if negated { cond.invert() } else { cond };
|
|
||||||
|
|
||||||
lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn);
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind: CondBrKind::Cond(cond),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let rt = if ty == I128 {
|
|
||||||
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
|
|
||||||
let input = put_input_in_regs(ctx, flag_input);
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::Orr,
|
|
||||||
size: OperandSize::Size64,
|
|
||||||
rd: tmp,
|
|
||||||
rn: input.regs()[0],
|
|
||||||
rm: input.regs()[1],
|
|
||||||
});
|
|
||||||
tmp.to_reg()
|
|
||||||
} else {
|
|
||||||
put_input_in_reg(ctx, flag_input, NarrowValueMode::ZeroExtend64)
|
|
||||||
};
|
|
||||||
let kind = match op0 {
|
|
||||||
Opcode::Brz => CondBrKind::Zero(rt),
|
|
||||||
Opcode::Brnz => CondBrKind::NotZero(rt),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Opcode::BrIcmp => {
|
|
||||||
let condcode = ctx.data(branches[0]).cond_code().unwrap();
|
|
||||||
let cond =
|
|
||||||
lower_icmp(ctx, branches[0], condcode, IcmpOutput::CondCode)?.unwrap_cond();
|
|
||||||
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind: CondBrKind::Cond(cond),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Brif => {
|
|
||||||
let condcode = ctx.data(branches[0]).cond_code().unwrap();
|
|
||||||
|
|
||||||
let flag_input = InsnInput {
|
|
||||||
insn: branches[0],
|
|
||||||
input: 0,
|
|
||||||
};
|
|
||||||
if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) {
|
|
||||||
let cond =
|
|
||||||
lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond();
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind: CondBrKind::Cond(cond),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// If the ifcmp result is actually placed in a
|
|
||||||
// register, we need to move it back into the flags.
|
|
||||||
let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
|
|
||||||
ctx.emit(Inst::MovToNZCV { rn });
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind: CondBrKind::Cond(lower_condcode(condcode)),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Brff => {
|
|
||||||
let condcode = ctx.data(branches[0]).fp_cond_code().unwrap();
|
|
||||||
let cond = lower_fp_condcode(condcode);
|
|
||||||
let kind = CondBrKind::Cond(cond);
|
|
||||||
let flag_input = InsnInput {
|
|
||||||
insn: branches[0],
|
|
||||||
input: 0,
|
|
||||||
};
|
|
||||||
if let Some(ffcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ffcmp) {
|
|
||||||
lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// If the ffcmp result is actually placed in a
|
|
||||||
// register, we need to move it back into the flags.
|
|
||||||
let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
|
|
||||||
ctx.emit(Inst::MovToNZCV { rn });
|
|
||||||
ctx.emit(Inst::CondBr {
|
|
||||||
taken,
|
|
||||||
not_taken,
|
|
||||||
kind,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => unimplemented!(),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Must be an unconditional branch or an indirect branch.
|
|
||||||
let op = ctx.data(branches[0]).opcode();
|
|
||||||
match op {
|
|
||||||
Opcode::Jump => {
|
|
||||||
assert!(branches.len() == 1);
|
|
||||||
ctx.emit(Inst::Jump {
|
|
||||||
dest: BranchTarget::Label(targets[0]),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::BrTable => {
|
|
||||||
// Expand `br_table index, default, JT` to:
|
|
||||||
//
|
|
||||||
// emit_island // this forces an island at this point
|
|
||||||
// // if the jumptable would push us past
|
|
||||||
// // the deadline
|
|
||||||
// cmp idx, #jt_size
|
|
||||||
// b.hs default
|
|
||||||
// csel vTmp2, xzr, idx, hs
|
|
||||||
// csdb
|
|
||||||
// adr vTmp1, PC+16
|
|
||||||
// ldr vTmp2, [vTmp1, vTmp2, uxtw #2]
|
|
||||||
// add vTmp1, vTmp1, vTmp2
|
|
||||||
// br vTmp1
|
|
||||||
// [jumptable offsets relative to JT base]
|
|
||||||
let jt_size = targets.len() - 1;
|
|
||||||
assert!(jt_size <= std::u32::MAX as usize);
|
|
||||||
|
|
||||||
ctx.emit(Inst::EmitIsland {
|
|
||||||
needed_space: 4 * (8 + jt_size) as CodeOffset,
|
|
||||||
});
|
|
||||||
|
|
||||||
let ridx = put_input_in_reg(
|
|
||||||
ctx,
|
|
||||||
InsnInput {
|
|
||||||
insn: branches[0],
|
|
||||||
input: 0,
|
|
||||||
},
|
|
||||||
NarrowValueMode::ZeroExtend32,
|
|
||||||
);
|
|
||||||
|
|
||||||
let rtmp1 = ctx.alloc_tmp(I32).only_reg().unwrap();
|
|
||||||
let rtmp2 = ctx.alloc_tmp(I32).only_reg().unwrap();
|
|
||||||
|
|
||||||
// Bounds-check, leaving condition codes for JTSequence's
|
|
||||||
// branch to default target below.
|
|
||||||
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
|
||||||
ctx.emit(Inst::AluRRImm12 {
|
|
||||||
alu_op: ALUOp::SubS,
|
|
||||||
size: OperandSize::Size32,
|
|
||||||
rd: writable_zero_reg(),
|
|
||||||
rn: ridx,
|
|
||||||
imm12,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
lower_constant_u64(ctx, rtmp1, jt_size as u64);
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: ALUOp::SubS,
|
|
||||||
size: OperandSize::Size32,
|
|
||||||
rd: writable_zero_reg(),
|
|
||||||
rn: ridx,
|
|
||||||
rm: rtmp1.to_reg(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit the compound instruction that does:
|
|
||||||
//
|
|
||||||
// b.hs default
|
|
||||||
// csel rB, xzr, rIndex, hs
|
|
||||||
// csdb
|
|
||||||
// adr rA, jt
|
|
||||||
// ldrsw rB, [rA, rB, uxtw #2]
|
|
||||||
// add rA, rA, rB
|
|
||||||
// br rA
|
|
||||||
// [jt entries]
|
|
||||||
//
|
|
||||||
// This must be *one* instruction in the vcode because
|
|
||||||
// we cannot allow regalloc to insert any spills/fills
|
|
||||||
// in the middle of the sequence; otherwise, the ADR's
|
|
||||||
// PC-rel offset to the jumptable would be incorrect.
|
|
||||||
// (The alternative is to introduce a relocation pass
|
|
||||||
// for inlined jumptables, which is much worse, IMHO.)
|
|
||||||
|
|
||||||
let jt_targets: Vec<BranchTarget> = targets
|
|
||||||
.iter()
|
|
||||||
.skip(1)
|
|
||||||
.map(|bix| BranchTarget::Label(*bix))
|
|
||||||
.collect();
|
|
||||||
let default_target = BranchTarget::Label(targets[0]);
|
|
||||||
ctx.emit(Inst::JTSequence {
|
|
||||||
ridx,
|
|
||||||
rtmp1,
|
|
||||||
rtmp2,
|
|
||||||
info: Box::new(JTSequenceInfo {
|
|
||||||
targets: jt_targets,
|
|
||||||
default_target,
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => panic!("Unknown branch type!"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -24,12 +24,6 @@ pub(crate) struct InsnOutput {
|
|||||||
pub(crate) output: usize,
|
pub(crate) output: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn insn_inputs<I: VCodeInst>(ctx: &Lower<I>, insn: IRInst) -> SmallVec<[InsnInput; 4]> {
|
|
||||||
(0..ctx.num_inputs(insn))
|
|
||||||
.map(|i| InsnInput { insn, input: i })
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn insn_outputs<I: VCodeInst>(
|
pub(crate) fn insn_outputs<I: VCodeInst>(
|
||||||
ctx: &Lower<I>,
|
ctx: &Lower<I>,
|
||||||
insn: IRInst,
|
insn: IRInst,
|
||||||
|
|||||||
@@ -853,6 +853,11 @@
|
|||||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||||
(SideEffectNoResult.Inst3 p c1 c2))
|
(SideEffectNoResult.Inst3 p c1 c2))
|
||||||
|
|
||||||
|
(rule (with_flags_side_effect
|
||||||
|
(ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
|
||||||
|
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||||
|
(SideEffectNoResult.Inst3 p1 p2 c))
|
||||||
|
|
||||||
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(decl trap_code_division_by_zero () TrapCode)
|
(decl trap_code_division_by_zero () TrapCode)
|
||||||
|
|||||||
@@ -290,8 +290,8 @@ block1:
|
|||||||
; cset x6, lo
|
; cset x6, lo
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, lt
|
; cset x9, lt
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; subs xzr, xzr, x6
|
; subs xzr, xzr, x11
|
||||||
; b.lt label1 ; b label2
|
; b.lt label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -314,8 +314,8 @@ block1:
|
|||||||
; cset x6, lo
|
; cset x6, lo
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, lo
|
; cset x9, lo
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; subs xzr, xzr, x6
|
; subs xzr, xzr, x11
|
||||||
; b.lo label1 ; b label2
|
; b.lo label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -338,9 +338,9 @@ block1:
|
|||||||
; cset x6, ls
|
; cset x6, ls
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, le
|
; cset x9, le
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; movz x9, #1
|
; movz w13, #1
|
||||||
; subs xzr, x9, x6
|
; subs xzr, x13, x11
|
||||||
; b.le label1 ; b label2
|
; b.le label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -363,9 +363,9 @@ block1:
|
|||||||
; cset x6, ls
|
; cset x6, ls
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, ls
|
; cset x9, ls
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; movz x9, #1
|
; orr x13, xzr, #1
|
||||||
; subs xzr, x9, x6
|
; subs xzr, x13, x11
|
||||||
; b.ls label1 ; b label2
|
; b.ls label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -388,8 +388,8 @@ block1:
|
|||||||
; cset x6, hi
|
; cset x6, hi
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, gt
|
; cset x9, gt
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; subs xzr, x6, xzr
|
; subs xzr, x11, xzr
|
||||||
; b.gt label1 ; b label2
|
; b.gt label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -412,8 +412,8 @@ block1:
|
|||||||
; cset x6, hi
|
; cset x6, hi
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, hi
|
; cset x9, hi
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; subs xzr, x6, xzr
|
; subs xzr, x11, xzr
|
||||||
; b.hi label1 ; b label2
|
; b.hi label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -436,9 +436,9 @@ block1:
|
|||||||
; cset x6, hs
|
; cset x6, hs
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, ge
|
; cset x9, ge
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; movz x9, #1
|
; movz w13, #1
|
||||||
; subs xzr, x6, x9
|
; subs xzr, x11, x13
|
||||||
; b.ge label1 ; b label2
|
; b.ge label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
@@ -461,9 +461,9 @@ block1:
|
|||||||
; cset x6, hs
|
; cset x6, hs
|
||||||
; subs xzr, x1, x3
|
; subs xzr, x1, x3
|
||||||
; cset x9, hs
|
; cset x9, hs
|
||||||
; csel x6, x6, x9, eq
|
; csel x11, x6, x9, eq
|
||||||
; movz x9, #1
|
; orr x13, xzr, #1
|
||||||
; subs xzr, x6, x9
|
; subs xzr, x11, x13
|
||||||
; b.hs label1 ; b label2
|
; b.hs label1 ; b label2
|
||||||
; block1:
|
; block1:
|
||||||
; b label3
|
; b label3
|
||||||
|
|||||||
Reference in New Issue
Block a user