Port branches to ISLE (AArch64) (#4943)

* Port branches to ISLE (AArch64) Ported the existing implementations of the following opcodes for AArch64 to ISLE: - `Brz` - `Brnz` - `Brif` - `Brff` - `BrIcmp` - `Jump` - `BrTable` Copyright (c) 2022 Arm Limited * Remove dead code Copyright (c) 2022 Arm Limited
2022-09-26 09:45:32 +01:00
parent 11e90049d2
commit 3a2b32bf4d
9 changed files with 381 additions and 997 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1619,6 +1619,18 @@
 (decl u64_into_imm_logic (Type u64) ImmLogic)
 (extern constructor u64_into_imm_logic u64_into_imm_logic)

+(decl branch_target (VecMachLabel u8) BranchTarget)
+(extern constructor branch_target branch_target)
+
+(decl targets_jt_size (VecMachLabel) u32)
+(extern constructor targets_jt_size targets_jt_size)
+
+(decl targets_jt_space (VecMachLabel) CodeOffset)
+(extern constructor targets_jt_space targets_jt_space)
+
+(decl targets_jt_info (VecMachLabel) BoxJTSequenceInfo)
+(extern constructor targets_jt_info targets_jt_info)
+
 ;; Calculate the minimum floating-point bound for a conversion to floating
 ;; point from an integer type.
 ;; Accepts whether the output is signed, the size of the input
@@ -1698,6 +1710,9 @@
 (decl cond_br_zero (Reg) CondBrKind)
 (extern constructor cond_br_zero cond_br_zero)

+(decl cond_br_not_zero (Reg) CondBrKind)
+(extern constructor cond_br_not_zero cond_br_not_zero)
+
 (decl cond_br_cond (Cond) CondBrKind)
 (extern constructor cond_br_cond cond_br_cond)

@@ -2893,6 +2908,11 @@
 ;; TODO: Port lower_condcode() to ISLE.
 (extern constructor cond_code cond_code)

+;; Invert a condition code.
+(decl invert_cond (Cond) Cond)
+;; TODO: Port cond.invert() to ISLE.
+(extern constructor invert_cond invert_cond)
+
 ;; Generate comparison to zero operator from input condition code
 (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
 (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
@@ -3530,3 +3550,65 @@
 (rule (lower_select flags cond ty rn rm)
      (if (ty_int_bool_ref_scalar_64 ty))
      (with_flags flags (csel cond rn rm)))
+
+;; Helper for emitting `MInst.Jump` instructions.
+(decl aarch64_jump (BranchTarget) SideEffectNoResult)
+(rule (aarch64_jump target)
+      (SideEffectNoResult.Inst (MInst.Jump target)))
+
+;; Helper for emitting `MInst.JTSequence` instructions.
+;; Emit the compound instruction that does:
+;;
+;; b.hs default
+;; csel rB, xzr, rIndex, hs
+;; csdb
+;; adr rA, jt
+;; ldrsw rB, [rA, rB, uxtw #2]
+;; add rA, rA, rB
+;; br rA
+;; [jt entries]
+;;
+;; This must be *one* instruction in the vcode because
+;; we cannot allow regalloc to insert any spills/fills
+;; in the middle of the sequence; otherwise, the ADR's
+;; PC-rel offset to the jumptable would be incorrect.
+;; (The alternative is to introduce a relocation pass
+;; for inlined jumptables, which is much worse, IMHO.)
+(decl jt_sequence (Reg BoxJTSequenceInfo) ConsumesFlags)
+(rule (jt_sequence ridx info)
+      (let ((rtmp1 WritableReg (temp_writable_reg $I64))
+            (rtmp2 WritableReg (temp_writable_reg $I64)))
+       (ConsumesFlags.ConsumesFlagsSideEffect
+        (MInst.JTSequence info ridx rtmp1 rtmp2))))
+
+;; Helper for emitting `MInst.CondBr` instructions.
+(decl cond_br (BranchTarget BranchTarget CondBrKind) ConsumesFlags)
+(rule (cond_br taken not_taken kind)
+      (ConsumesFlags.ConsumesFlagsSideEffect
+       (MInst.CondBr taken not_taken kind)))
+
+;; Helper for emitting `MInst.MovToNZCV` instructions.
+(decl mov_to_nzcv (Reg) ProducesFlags)
+(rule (mov_to_nzcv rn)
+      (ProducesFlags.ProducesFlagsSideEffect
+       (MInst.MovToNZCV rn)))
+
+;; Helper for emitting `MInst.EmitIsland` instructions.
+(decl emit_island (CodeOffset) SideEffectNoResult)
+(rule (emit_island needed_space)
+      (SideEffectNoResult.Inst
+       (MInst.EmitIsland needed_space)))
+
+;; Helper for emitting `br_table` sequences.
+(decl br_table_impl (u64 Reg VecMachLabel) InstOutput)
+(rule (br_table_impl (imm12_from_u64 jt_size) ridx targets)
+      (let ((jt_info BoxJTSequenceInfo (targets_jt_info targets)))
+       (side_effect (with_flags_side_effect
+            (cmp_imm (OperandSize.Size32) ridx jt_size)
+            (jt_sequence ridx jt_info)))))
+(rule -1 (br_table_impl jt_size ridx targets)
+      (let ((jt_size Reg (imm $I64 (ImmExtend.Zero) jt_size))
+            (jt_info BoxJTSequenceInfo (targets_jt_info targets)))
+       (side_effect (with_flags_side_effect
+            (cmp (OperandSize.Size32) ridx jt_size)
+            (jt_sequence ridx jt_info)))))
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -558,18 +558,6 @@ pub enum ScalarSize {
 }

 impl ScalarSize {
-    /// Convert from a needed width to the smallest size that fits.
-    pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
-        match bits.into().next_power_of_two() {
-            8 => ScalarSize::Size8,
-            16 => ScalarSize::Size16,
-            32 => ScalarSize::Size32,
-            64 => ScalarSize::Size64,
-            128 => ScalarSize::Size128,
-            w => panic!("Unexpected type width: {}", w),
-        }
-    }
-
    /// Convert to an integer operand size.
    pub fn operand_size(&self) -> OperandSize {
        match self {
@@ -579,13 +567,6 @@ impl ScalarSize {
        }
    }

-    /// Convert from a type into the smallest size that fits.
-    pub fn from_ty(ty: Type) -> ScalarSize {
-        debug_assert!(!ty.is_vector());
-
-        Self::from_bits(ty_bits(ty))
-    }
-
    /// Return the encoding bits that are used by some scalar FP instructions
    /// for a particular operand size.
    pub fn ftype(&self) -> u32 {
@@ -645,32 +626,6 @@ impl VectorSize {
        }
    }

-    /// Convert from a type into a vector operand size.
-    pub fn from_ty(ty: Type) -> VectorSize {
-        debug_assert!(ty.is_vector());
-
-        match ty {
-            B8X8 => VectorSize::Size8x8,
-            B8X16 => VectorSize::Size8x16,
-            B16X4 => VectorSize::Size16x4,
-            B16X8 => VectorSize::Size16x8,
-            B32X2 => VectorSize::Size32x2,
-            B32X4 => VectorSize::Size32x4,
-            B64X2 => VectorSize::Size64x2,
-            F32X2 => VectorSize::Size32x2,
-            F32X4 => VectorSize::Size32x4,
-            F64X2 => VectorSize::Size64x2,
-            I8X8 => VectorSize::Size8x8,
-            I8X16 => VectorSize::Size8x16,
-            I16X4 => VectorSize::Size16x4,
-            I16X8 => VectorSize::Size16x8,
-            I32X2 => VectorSize::Size32x2,
-            I32X4 => VectorSize::Size32x4,
-            I64X2 => VectorSize::Size64x2,
-            _ => unimplemented!("Unsupported type: {}", ty),
-        }
-    }
-
    /// Get the integer operand size that corresponds to a lane of a vector with a certain size.
    pub fn operand_size(&self) -> OperandSize {
        match self {
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -4,6 +4,16 @@
 ;; register(s) within which the lowered instruction's result values live.
 (decl lower (Inst) InstOutput)

+;; Variant of the main lowering constructor term, which receives an
+;; additional argument (a vector of branch targets to be used) for
+;; implementing branches.
+;; For two-branch instructions, the first target is `taken` and the second
+;; `not_taken`, even if it is a Fallthrough instruction: because we reorder
+;; blocks while we lower, the fallthrough in the new order is not (necessarily)
+;; the same as the fallthrough in CLIF. So, we use the explicitly-provided
+;; target.
+(decl lower_branch (Inst VecMachLabel) InstOutput)
+
 ;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty (iconst (u64_from_imm64 n))))
@@ -2497,12 +2507,185 @@

 ;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; TODO: requires icmp/fcmp first.
+;; `brz` following `icmp`, possibly converted via `bint`.
+(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (cond Cond (invert_cond cond)) ;; negate for `brz`
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (cond Cond (invert_cond cond)) ;; negate for `brz`
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+;; `brnz` following `icmp`, possibly converted via `bint`.
+(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+;; `brz` following `fcmp`, possibly converted via `bint`.
+(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
+      (let ((cond Cond (fp_cond_code cc))
+            (cond Cond (invert_cond cond)) ;; negate for `brz`
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
+      (let ((cond Cond (fp_cond_code cc))
+            (cond Cond (invert_cond cond)) ;; negate for `brz`
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+;; `brnz` following `fcmp`, possibly converted via `bint`.
+(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
+      (let ((cond Cond (fp_cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
+      (let ((cond Cond (fp_cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+;; standard `brz`
+(rule (lower_branch (brz c @ (value_type $I128) _ _) targets)
+      (let ((flags ProducesFlags (flags_to_producesflags c))
+            (c ValueRegs (put_in_regs c))
+            (c_lo Reg (value_regs_get c 0))
+            (c_hi Reg (value_regs_get c 1))
+            (rt Reg (orr $I64 c_lo c_hi))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect flags
+         (cond_br taken not_taken (cond_br_zero rt))))))
+(rule (lower_branch (brz c @ (value_type ty) _ _) targets)
+      (if (ty_int_bool_ref_scalar_64 ty))
+      (let ((flags ProducesFlags (flags_to_producesflags c))
+            (rt Reg (put_in_reg_zext64 c))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect flags
+         (cond_br taken not_taken (cond_br_zero rt))))))
+;; standard `brnz`
+(rule (lower_branch (brnz c @ (value_type $I128) _ _) targets)
+      (let ((flags ProducesFlags (flags_to_producesflags c))
+            (c ValueRegs (put_in_regs c))
+            (c_lo Reg (value_regs_get c 0))
+            (c_hi Reg (value_regs_get c 1))
+            (rt Reg (orr $I64 c_lo c_hi))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect flags
+         (cond_br taken not_taken (cond_br_not_zero rt))))))
+(rule (lower_branch (brnz c @ (value_type ty) _ _) targets)
+      (if (ty_int_bool_ref_scalar_64 ty))
+      (let ((flags ProducesFlags (flags_to_producesflags c))
+            (rt Reg (put_in_reg_zext64 c))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect flags
+         (cond_br taken not_taken (cond_br_not_zero rt))))))
+
+;; `br_icmp`
+(rule (lower_branch (br_icmp cc x @ (value_type ty) y _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+
+;; `brif`
+(rule (lower_branch (brif cc (ifcmp x @ (value_type ty) y) _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+;; If the `ifcmp` result is actually placed in a register, we need to move it
+;; back into the flags.
+(rule -1 (lower_branch (brif cc f _ _) targets)
+      (let ((cond Cond (cond_code cc))
+            (rn Reg (put_in_reg f))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (mov_to_nzcv rn)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+
+;; `brff`
+(rule (lower_branch (brff cc (ffcmp x @ (value_type ty) y) _ _) targets)
+      (let ((cond Cond (fp_cond_code cc))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))
+;; If the `ffcmp` result is actually placed in a register, we need to move it
+;; back into the flags.
+(rule -1 (lower_branch (brff cc f _ _) targets)
+      (let ((cond Cond (fp_cond_code cc))
+            (rn Reg (put_in_reg f))
+            (taken BranchTarget (branch_target targets 0))
+            (not_taken BranchTarget (branch_target targets 1)))
+       (side_effect
+        (with_flags_side_effect (mov_to_nzcv rn)
+                                (cond_br taken not_taken
+                                 (cond_br_cond cond))))))

 ;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; TODO.
+(rule (lower_branch (jump _ _) targets)
+      (side_effect (aarch64_jump (branch_target targets 0))))

 ;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; TODO.
+;; `targets` contains the default target with the list of branch targets
+;; concatenated.
+(rule (lower_branch (br_table idx _ _) targets)
+      (let ((jt_size u32 (targets_jt_size targets))
+            (_ InstOutput (side_effect
+                  (emit_island (targets_jt_space targets))))
+            (ridx Reg (put_in_reg_zext32 idx)))
+       (br_table_impl (u32_as_u64 jt_size) ridx targets)))
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -16,112 +16,29 @@ use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;
 use crate::machinst::lower::*;
 use crate::machinst::{Reg, Writable};
+use crate::CodegenResult;
 use crate::{machinst::*, trace};
-use crate::{CodegenError, CodegenResult};
 use smallvec::{smallvec, SmallVec};
-use std::cmp;

 pub mod isle;

-//============================================================================
-// Result enum types.
-//
-// Lowering of a given value results in one of these enums, depending on the
-// modes in which we can accept the value.
-
-/// A lowering result: register, register-shift.  An SSA value can always be
-/// lowered into one of these options; the register form is the fallback.
-#[derive(Clone, Debug)]
-enum ResultRS {
-    Reg(Reg),
-    RegShift(Reg, ShiftOpAndAmt),
-}
-
-/// A lowering result: register, register-shift, register-extend.  An SSA value can always be
-/// lowered into one of these options; the register form is the fallback.
-#[derive(Clone, Debug)]
-enum ResultRSE {
-    Reg(Reg),
-    RegShift(Reg, ShiftOpAndAmt),
-    RegExtend(Reg, ExtendOp),
-}
-
-impl ResultRSE {
-    fn from_rs(rs: ResultRS) -> ResultRSE {
-        match rs {
-            ResultRS::Reg(r) => ResultRSE::Reg(r),
-            ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
-        }
-    }
-}
-
-/// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
-/// An SSA value can always be lowered into one of these options; the register form is the
-/// fallback.
-#[derive(Clone, Debug)]
-pub(crate) enum ResultRSEImm12 {
-    Reg(Reg),
-    RegShift(Reg, ShiftOpAndAmt),
-    RegExtend(Reg, ExtendOp),
-    Imm12(Imm12),
-}
-
-impl ResultRSEImm12 {
-    fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
-        match rse {
-            ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
-            ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
-            ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
-        }
-    }
-}
-
 //============================================================================
 // Lowering: convert instruction inputs to forms that we can use.

-/// Lower an instruction input to a 64-bit constant, if possible.
-pub(crate) fn input_to_const(ctx: &mut Lower<Inst>, input: InsnInput) -> Option<u64> {
-    let input = ctx.get_input_as_source_or_const(input.insn, input.input);
-    input.constant
-}
-
-/// Lower an instruction input to a constant register-shift amount, if possible.
-pub(crate) fn input_to_shiftimm(
-    ctx: &mut Lower<Inst>,
-    input: InsnInput,
-) -> Option<ShiftOpShiftImm> {
-    input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
-}
-
 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
 /// parameter to `put_input_in_*` below.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum NarrowValueMode {
    None,
-    /// Zero-extend to 32 bits if original is < 32 bits.
-    ZeroExtend32,
-    /// Sign-extend to 32 bits if original is < 32 bits.
-    SignExtend32,
    /// Zero-extend to 64 bits if original is < 64 bits.
    ZeroExtend64,
-    /// Sign-extend to 64 bits if original is < 64 bits.
-    SignExtend64,
 }

 impl NarrowValueMode {
    fn is_32bit(&self) -> bool {
        match self {
            NarrowValueMode::None => false,
-            NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
-            NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
-        }
-    }
-
-    fn is_signed(&self) -> bool {
-        match self {
-            NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true,
-            NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false,
-            NarrowValueMode::None => false,
+            NarrowValueMode::ZeroExtend64 => false,
        }
    }
 }
@@ -159,29 +76,6 @@ fn extend_reg(
    let from_bits = ty_bits(ty) as u8;
    match (narrow_mode, from_bits) {
        (NarrowValueMode::None, _) => in_reg,
-        (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
-            ctx.emit(Inst::Extend {
-                rd: tmp,
-                rn: in_reg,
-                signed: false,
-                from_bits,
-                to_bits: 32,
-            });
-            tmp.to_reg()
-        }
-        (NarrowValueMode::SignExtend32, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
-            ctx.emit(Inst::Extend {
-                rd: tmp,
-                rn: in_reg,
-                signed: true,
-                from_bits,
-                to_bits: 32,
-            });
-            tmp.to_reg()
-        }
-        (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,

        (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
            if is_const {
@@ -199,17 +93,6 @@ fn extend_reg(
                tmp.to_reg()
            }
        }
-        (NarrowValueMode::SignExtend64, n) if n < 64 => {
-            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
-            ctx.emit(Inst::Extend {
-                rd: tmp,
-                rn: in_reg,
-                signed: true,
-                from_bits,
-                to_bits: 64,
-            });
-            tmp.to_reg()
-        }
        (_, 64) => in_reg,
        (_, 128) => in_reg,

@@ -261,72 +144,6 @@ fn put_value_in_reg(ctx: &mut Lower<Inst>, value: Value, narrow_mode: NarrowValu
    extend_reg(ctx, ty, reg, is_const, narrow_mode)
 }

-/// Lower an instruction input to multiple regs
-pub(crate) fn put_input_in_regs(ctx: &mut Lower<Inst>, input: InsnInput) -> ValueRegs<Reg> {
-    let value = ctx.input_as_value(input.insn, input.input);
-    let (in_regs, _, _) = lower_value_to_regs(ctx, value);
-    in_regs
-}
-
-/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
-///
-/// The `narrow_mode` flag indicates whether the consumer of this value needs
-/// the high bits clear. For many operations, such as an add/sub/mul or any
-/// bitwise logical operation, the low-bit results depend only on the low-bit
-/// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
-/// value is stored in the low 8 bits of the register and the high 24 bits are
-/// undefined. If the op truly needs the high N bits clear (such as for a
-/// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
-/// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
-/// register will be provided the extended value.
-fn put_input_in_rs(
-    ctx: &mut Lower<Inst>,
-    input: InsnInput,
-    narrow_mode: NarrowValueMode,
-) -> ResultRS {
-    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
-    // Unique or non-unique use is fine for merging here.
-    if let Some((insn, 0)) = inputs.inst.as_inst() {
-        let op = ctx.data(insn).opcode();
-
-        if op == Opcode::Ishl {
-            let shiftee = InsnInput { insn, input: 0 };
-            let shift_amt = InsnInput { insn, input: 1 };
-
-            // Can we get the shift amount as an immediate?
-            if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
-                let shiftee_bits = ty_bits(ctx.input_ty(insn, 0));
-                if shiftee_bits <= std::u8::MAX as usize {
-                    let shiftimm = shiftimm.mask(shiftee_bits as u8);
-                    let reg = put_input_in_reg(ctx, shiftee, narrow_mode);
-                    return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
-                }
-            }
-        }
-    }
-
-    ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode))
-}
-
-/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
-/// This does not actually codegen the source instruction; it just uses the
-/// vreg into which the source instruction will generate its value.
-///
-/// See note on `put_input_in_rs` for a description of `narrow_mode`.
-fn put_input_in_rse(
-    ctx: &mut Lower<Inst>,
-    input: InsnInput,
-    narrow_mode: NarrowValueMode,
-) -> ResultRSE {
-    let value = ctx.input_as_value(input.insn, input.input);
-    if let Some((val, extendop)) = get_as_extended_value(ctx, value, narrow_mode) {
-        let reg = put_value_in_reg(ctx, val, NarrowValueMode::None);
-        return ResultRSE::RegExtend(reg, extendop);
-    }
-
-    ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
-}
-
 fn get_as_extended_value(
    ctx: &mut Lower<Inst>,
    val: Value,
@@ -351,13 +168,8 @@ fn get_as_extended_value(
            // A single zero-extend or sign-extend is equal to itself.
            (_, NarrowValueMode::None) => true,
            // Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
-            (false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => true,
-            (true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => true,
-            // A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend
-            (false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => {
-                false
-            }
-            (true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => false,
+            (false, NarrowValueMode::ZeroExtend64) => true,
+            (true, NarrowValueMode::ZeroExtend64) => false,
        } {
            let extendop = match (sign_extend, inner_bits) {
                (true, 8) => ExtendOp::SXTB,
@@ -379,25 +191,9 @@ fn get_as_extended_value(
        && ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64))
    {
        let extendop = match (narrow_mode, out_bits) {
-            (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
-                ExtendOp::SXTB
-            }
-            (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
-                ExtendOp::UXTB
-            }
-            (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
-                ExtendOp::SXTB
-            }
-            (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
-                ExtendOp::UXTB
-            }
-            (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
-                ExtendOp::SXTH
-            }
-            (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
-                ExtendOp::UXTH
-            }
-            (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
+            (NarrowValueMode::ZeroExtend64, 1) => ExtendOp::UXTB,
+            (NarrowValueMode::ZeroExtend64, 8) => ExtendOp::UXTB,
+            (NarrowValueMode::ZeroExtend64, 16) => ExtendOp::UXTH,
            (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
            _ => unreachable!(),
        };
@@ -406,73 +202,6 @@ fn get_as_extended_value(
    None
 }

-pub(crate) fn put_input_in_rse_imm12(
-    ctx: &mut Lower<Inst>,
-    input: InsnInput,
-    narrow_mode: NarrowValueMode,
-) -> ResultRSEImm12 {
-    if let Some(imm_value) = input_to_const(ctx, input) {
-        if let Some(i) = Imm12::maybe_from_u64(imm_value) {
-            let out_ty_bits = ty_bits(ctx.input_ty(input.insn, input.input));
-            let is_negative = (i.bits as u64) & (1 << (cmp::max(out_ty_bits, 1) - 1)) != 0;
-
-            // This condition can happen if we matched a value that overflows the output type of
-            // its `iconst` when viewed as a signed value (i.e. iconst.i8 200).
-            // When that happens we need to lower as a negative value, which we cannot do here.
-            if !(narrow_mode.is_signed() && is_negative) {
-                return ResultRSEImm12::Imm12(i);
-            }
-        }
-    }
-
-    ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
-}
-
-//============================================================================
-// ALU instruction constructors.
-
-pub(crate) fn alu_inst_imm12(
-    op: ALUOp,
-    ty: Type,
-    rd: Writable<Reg>,
-    rn: Reg,
-    rm: ResultRSEImm12,
-) -> Inst {
-    let size = OperandSize::from_ty(ty);
-    match rm {
-        ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
-            alu_op: op,
-            size,
-            rd,
-            rn,
-            imm12,
-        },
-        ResultRSEImm12::Reg(rm) => Inst::AluRRR {
-            alu_op: op,
-            size,
-            rd,
-            rn,
-            rm,
-        },
-        ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
-            alu_op: op,
-            size,
-            rd,
-            rn,
-            rm,
-            shiftop,
-        },
-        ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
-            alu_op: op,
-            size,
-            rd,
-            rn,
-            rm,
-            extendop,
-        },
-    }
-}
-
 //============================================================================
 // Lowering: addressing mode support. Takes instruction directly, rather
 // than an `InsnInput`, to do more introspection.
@@ -967,129 +696,6 @@ pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
    }
 }

-pub(crate) fn lower_vector_compare(
-    ctx: &mut Lower<Inst>,
-    rd: Writable<Reg>,
-    mut rn: Reg,
-    mut rm: Reg,
-    ty: Type,
-    cond: Cond,
-) -> CodegenResult<()> {
-    let is_float = ty.lane_type().is_float();
-    let size = VectorSize::from_ty(ty);
-
-    if is_float && (cond == Cond::Vc || cond == Cond::Vs) {
-        let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
-
-        ctx.emit(Inst::VecRRR {
-            alu_op: VecALUOp::Fcmeq,
-            rd,
-            rn,
-            rm: rn,
-            size,
-        });
-        ctx.emit(Inst::VecRRR {
-            alu_op: VecALUOp::Fcmeq,
-            rd: tmp,
-            rn: rm,
-            rm,
-            size,
-        });
-        ctx.emit(Inst::VecRRR {
-            alu_op: VecALUOp::And,
-            rd,
-            rn: rd.to_reg(),
-            rm: tmp.to_reg(),
-            size,
-        });
-
-        if cond == Cond::Vs {
-            ctx.emit(Inst::VecMisc {
-                op: VecMisc2::Not,
-                rd,
-                rn: rd.to_reg(),
-                size,
-            });
-        }
-    } else {
-        // 'Less than' operations are implemented by swapping
-        // the order of operands and using the 'greater than'
-        // instructions.
-        // 'Not equal' is implemented with 'equal' and inverting
-        // the result.
-        let (alu_op, swap) = match (is_float, cond) {
-            (false, Cond::Eq) => (VecALUOp::Cmeq, false),
-            (false, Cond::Ne) => (VecALUOp::Cmeq, false),
-            (false, Cond::Ge) => (VecALUOp::Cmge, false),
-            (false, Cond::Gt) => (VecALUOp::Cmgt, false),
-            (false, Cond::Le) => (VecALUOp::Cmge, true),
-            (false, Cond::Lt) => (VecALUOp::Cmgt, true),
-            (false, Cond::Hs) => (VecALUOp::Cmhs, false),
-            (false, Cond::Hi) => (VecALUOp::Cmhi, false),
-            (false, Cond::Ls) => (VecALUOp::Cmhs, true),
-            (false, Cond::Lo) => (VecALUOp::Cmhi, true),
-            (true, Cond::Eq) => (VecALUOp::Fcmeq, false),
-            (true, Cond::Ne) => (VecALUOp::Fcmeq, false),
-            (true, Cond::Mi) => (VecALUOp::Fcmgt, true),
-            (true, Cond::Ls) => (VecALUOp::Fcmge, true),
-            (true, Cond::Ge) => (VecALUOp::Fcmge, false),
-            (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
-            _ => {
-                return Err(CodegenError::Unsupported(format!(
-                    "Unsupported {} SIMD vector comparison: {:?}",
-                    if is_float {
-                        "floating-point"
-                    } else {
-                        "integer"
-                    },
-                    cond
-                )))
-            }
-        };
-
-        if swap {
-            std::mem::swap(&mut rn, &mut rm);
-        }
-
-        ctx.emit(Inst::VecRRR {
-            alu_op,
-            rd,
-            rn,
-            rm,
-            size,
-        });
-
-        if cond == Cond::Ne {
-            ctx.emit(Inst::VecMisc {
-                op: VecMisc2::Not,
-                rd,
-                rn: rd.to_reg(),
-                size,
-            });
-        }
-    }
-
-    Ok(())
-}
-
-/// Determines whether this condcode interprets inputs as signed or unsigned.  See the
-/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs
-/// for further insights into this.
-pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
-    match cc {
-        IntCC::Equal
-        | IntCC::UnsignedGreaterThanOrEqual
-        | IntCC::UnsignedGreaterThan
-        | IntCC::UnsignedLessThanOrEqual
-        | IntCC::UnsignedLessThan
-        | IntCC::NotEqual => false,
-        IntCC::SignedGreaterThanOrEqual
-        | IntCC::SignedGreaterThan
-        | IntCC::SignedLessThanOrEqual
-        | IntCC::SignedLessThan => true,
-    }
-}
-
 //=============================================================================
 // Helpers for instruction lowering.

@@ -1142,256 +748,6 @@ pub(crate) fn maybe_value_multi(
    None
 }

-/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
-/// Bint or a bitcast).
-///
-/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
-/// a bit more generic.
-pub(crate) fn maybe_input_insn_via_conv(
-    c: &mut Lower<Inst>,
-    input: InsnInput,
-    op: Opcode,
-    conv: Opcode,
-) -> Option<IRInst> {
-    let inputs = c.get_input_as_source_or_const(input.insn, input.input);
-    if let Some((src_inst, _)) = inputs.inst.as_inst() {
-        let data = c.data(src_inst);
-        if data.opcode() == op {
-            return Some(src_inst);
-        }
-        if data.opcode() == conv {
-            let inputs = c.get_input_as_source_or_const(src_inst, 0);
-            if let Some((src_inst, _)) = inputs.inst.as_inst() {
-                let data = c.data(src_inst);
-                if data.opcode() == op {
-                    return Some(src_inst);
-                }
-            }
-        }
-    }
-    None
-}
-
-/// Specifies what [lower_icmp] should do when lowering
-#[derive(Debug, Clone, PartialEq)]
-pub(crate) enum IcmpOutput {
-    /// Lowers the comparison into a cond code, discarding the results. The cond code emitted can
-    /// be checked in the resulting [IcmpResult].
-    CondCode,
-}
-
-impl IcmpOutput {
-    pub fn reg(&self) -> Option<Writable<Reg>> {
-        match self {
-            IcmpOutput::CondCode => None,
-        }
-    }
-}
-
-/// The output of an Icmp lowering.
-#[derive(Debug, Clone, PartialEq)]
-pub(crate) enum IcmpResult {
-    /// The result was output into the given [Cond]. Callers may perform operations using this [Cond]
-    /// and its inverse, other [Cond]'s are not guaranteed to be correct.
-    CondCode(Cond),
-}
-
-impl IcmpResult {
-    pub fn unwrap_cond(&self) -> Cond {
-        match self {
-            IcmpResult::CondCode(c) => *c,
-        }
-    }
-}
-
-/// Lower an icmp comparision
-///
-/// We can lower into the status flags, or materialize the result into a register
-/// This is controlled by the `output` parameter.
-pub(crate) fn lower_icmp(
-    ctx: &mut Lower<Inst>,
-    insn: IRInst,
-    condcode: IntCC,
-    output: IcmpOutput,
-) -> CodegenResult<IcmpResult> {
-    trace!(
-        "lower_icmp: insn {}, condcode: {}, output: {:?}",
-        insn,
-        condcode,
-        output
-    );
-
-    let rd = output.reg().unwrap_or(writable_zero_reg());
-    let inputs = insn_inputs(ctx, insn);
-    let cond = lower_condcode(condcode);
-    let is_signed = condcode_is_signed(condcode);
-    let ty = ctx.input_ty(insn, 0);
-    let bits = ty_bits(ty);
-    let narrow_mode = match (bits <= 32, is_signed) {
-        (true, true) => NarrowValueMode::SignExtend32,
-        (true, false) => NarrowValueMode::ZeroExtend32,
-        (false, true) => NarrowValueMode::SignExtend64,
-        (false, false) => NarrowValueMode::ZeroExtend64,
-    };
-    let mut should_materialize = output.reg().is_some();
-
-    let out_condcode = if ty == I128 {
-        let lhs = put_input_in_regs(ctx, inputs[0]);
-        let rhs = put_input_in_regs(ctx, inputs[1]);
-
-        let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
-        let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
-
-        match condcode {
-            IntCC::Equal | IntCC::NotEqual => {
-                // cmp lhs_lo, rhs_lo
-                // ccmp lhs_hi, rhs_hi, #0, eq
-                // cset dst, {eq, ne}
-
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::SubS,
-                    size: OperandSize::Size64,
-                    rd: writable_zero_reg(),
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[0],
-                });
-                ctx.emit(Inst::CCmp {
-                    size: OperandSize::Size64,
-                    rn: lhs.regs()[1],
-                    rm: rhs.regs()[1],
-                    nzcv: NZCV::new(false, false, false, false),
-                    cond: Cond::Eq,
-                });
-                cond
-            }
-            _ => {
-                // cmp     lhs_lo, rhs_lo
-                // cset    tmp1, unsigned_cond
-                // cmp     lhs_hi, rhs_hi
-                // cset    tmp2, cond
-                // csel    dst, tmp1, tmp2, eq
-
-                let rd = output.reg().unwrap_or(tmp1);
-                let unsigned_cond = lower_condcode(condcode.unsigned());
-
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::SubS,
-                    size: OperandSize::Size64,
-                    rd: writable_zero_reg(),
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[0],
-                });
-                materialize_bool_result(ctx, insn, tmp1, unsigned_cond);
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::SubS,
-                    size: OperandSize::Size64,
-                    rd: writable_zero_reg(),
-                    rn: lhs.regs()[1],
-                    rm: rhs.regs()[1],
-                });
-                materialize_bool_result(ctx, insn, tmp2, cond);
-                ctx.emit(Inst::CSel {
-                    cond: Cond::Eq,
-                    rd,
-                    rn: tmp1.to_reg(),
-                    rm: tmp2.to_reg(),
-                });
-
-                if output == IcmpOutput::CondCode {
-                    // We only need to guarantee that the flags for `cond` are correct, so we can
-                    // compare rd with 0 or 1
-
-                    // If we are doing compare or equal, we want to compare with 1 instead of zero
-                    if condcode.without_equal() != condcode {
-                        lower_constant_u64(ctx, tmp2, 1);
-                    }
-
-                    let xzr = zero_reg();
-                    let rd = rd.to_reg();
-                    let tmp2 = tmp2.to_reg();
-                    let (rn, rm) = match condcode {
-                        IntCC::SignedGreaterThanOrEqual => (rd, tmp2),
-                        IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2),
-                        IntCC::SignedLessThanOrEqual => (tmp2, rd),
-                        IntCC::UnsignedLessThanOrEqual => (tmp2, rd),
-                        IntCC::SignedGreaterThan => (rd, xzr),
-                        IntCC::UnsignedGreaterThan => (rd, xzr),
-                        IntCC::SignedLessThan => (xzr, rd),
-                        IntCC::UnsignedLessThan => (xzr, rd),
-                        _ => unreachable!(),
-                    };
-
-                    ctx.emit(Inst::AluRRR {
-                        alu_op: ALUOp::SubS,
-                        size: OperandSize::Size64,
-                        rd: writable_zero_reg(),
-                        rn,
-                        rm,
-                    });
-                }
-
-                // Prevent a second materialize_bool_result to be emitted at the end of the function
-                should_materialize = false;
-                cond
-            }
-        }
-    } else if ty.is_vector() {
-        assert_ne!(output, IcmpOutput::CondCode);
-        should_materialize = false;
-
-        let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-        let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
-        lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
-        cond
-    } else {
-        let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-        let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
-        ctx.emit(alu_inst_imm12(ALUOp::SubS, ty, writable_zero_reg(), rn, rm));
-        cond
-    };
-
-    // Most of the comparisons above produce flags by default, if the caller requested the result
-    // in a register we materialize those flags into a register. Some branches do end up producing
-    // the result as a register by default, so we ignore those.
-    if should_materialize {
-        materialize_bool_result(ctx, insn, rd, out_condcode);
-    }
-
-    Ok(match output {
-        IcmpOutput::CondCode => IcmpResult::CondCode(out_condcode),
-    })
-}
-
-pub(crate) fn lower_fcmp_or_ffcmp_to_flags(ctx: &mut Lower<Inst>, insn: IRInst) {
-    let ty = ctx.input_ty(insn, 0);
-    let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
-    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-    ctx.emit(Inst::FpuCmp {
-        size: ScalarSize::from_ty(ty),
-        rn,
-        rm,
-    });
-}
-
-/// Materialize a boolean value into a register from the flags
-/// (e.g set by a comparison).
-/// A 0 / -1 (all-ones) result as expected for bool operations.
-pub(crate) fn materialize_bool_result(
-    ctx: &mut Lower<Inst>,
-    insn: IRInst,
-    rd: Writable<Reg>,
-    cond: Cond,
-) {
-    // A boolean is 0 / -1; if output width is > 1 use `csetm`,
-    // otherwise use `cset`.
-    if ty_bits(ctx.output_ty(insn, 0)) > 1 {
-        ctx.emit(Inst::CSetm { rd, cond });
-    } else {
-        ctx.emit(Inst::CSet { rd, cond });
-    }
-}
-
 //=============================================================================
 // Lowering-backend trait implementation.

@@ -1408,7 +764,33 @@ impl LowerBackend for AArch64Backend {
        branches: &[IRInst],
        targets: &[MachLabel],
    ) -> CodegenResult<()> {
-        lower_inst::lower_branch(ctx, branches, targets)
+        // A block should end with at most two branches. The first may be a
+        // conditional branch; a conditional branch can be followed only by an
+        // unconditional branch or fallthrough. Otherwise, if only one branch,
+        // it may be an unconditional branch, a fallthrough, a return, or a
+        // trap. These conditions are verified by `is_ebb_basic()` during the
+        // verifier pass.
+        assert!(branches.len() <= 2);
+        if branches.len() == 2 {
+            let op1 = ctx.data(branches[1]).opcode();
+            assert!(op1 == Opcode::Jump);
+        }
+
+        if let Ok(()) = super::lower::isle::lower_branch(
+            ctx,
+            &self.triple,
+            &self.flags,
+            &self.isa_flags,
+            branches[0],
+            targets,
+        ) {
+            return Ok(());
+        }
+
+        unreachable!(
+            "implemented in ISLE: branch = `{}`",
+            ctx.dfg().display_inst(branches[0]),
+        );
    }

    fn maybe_pinned_reg(&self) -> Option<Reg> {
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -67,6 +67,25 @@ pub(crate) fn lower(
    )
 }

+pub(crate) fn lower_branch(
+    lower_ctx: &mut Lower<MInst>,
+    triple: &Triple,
+    flags: &Flags,
+    isa_flags: &IsaFlags,
+    branch: Inst,
+    targets: &[MachLabel],
+) -> Result<(), ()> {
+    lower_common(
+        lower_ctx,
+        triple,
+        flags,
+        isa_flags,
+        &[],
+        branch,
+        |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()),
+    )
+}
+
 pub struct ExtendedValue {
    val: Value,
    extend: ExtendOp,
@@ -342,6 +361,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
        CondBrKind::Zero(reg)
    }

+    fn cond_br_not_zero(&mut self, reg: Reg) -> CondBrKind {
+        CondBrKind::NotZero(reg)
+    }
+
    fn cond_br_cond(&mut self, cond: &Cond) -> CondBrKind {
        CondBrKind::Cond(*cond)
    }
@@ -521,6 +544,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
        lower_condcode(*cc)
    }

+    fn invert_cond(&mut self, cond: &Cond) -> Cond {
+        (*cond).invert()
+    }
    fn preg_sp(&mut self) -> PReg {
        super::regs::stack_reg().to_real_reg().unwrap().into()
    }
@@ -533,6 +559,34 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
        super::regs::link_reg().to_real_reg().unwrap().into()
    }

+    fn branch_target(&mut self, elements: &VecMachLabel, idx: u8) -> BranchTarget {
+        BranchTarget::Label(elements[idx as usize])
+    }
+
+    fn targets_jt_size(&mut self, elements: &VecMachLabel) -> u32 {
+        (elements.len() - 1) as u32
+    }
+
+    fn targets_jt_space(&mut self, elements: &VecMachLabel) -> CodeOffset {
+        // calculate the number of bytes needed for the jumptable sequence:
+        // 4 bytes per instruction, with 8 instructions base + the size of
+        // the jumptable more.
+        4 * (8 + self.targets_jt_size(elements))
+    }
+
+    fn targets_jt_info(&mut self, elements: &VecMachLabel) -> BoxJTSequenceInfo {
+        let targets: Vec<BranchTarget> = elements
+            .iter()
+            .skip(1)
+            .map(|bix| BranchTarget::Label(*bix))
+            .collect();
+        let default_target = BranchTarget::Label(elements[0]);
+        Box::new(JTSequenceInfo {
+            targets,
+            default_target,
+        })
+    }
+
    fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
        let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();

--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1,8 +1,5 @@
 //! Lower a single Cranelift instruction into vcode.

-use super::lower::*;
-use crate::binemit::CodeOffset;
-use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
 use crate::ir::Opcode;
 use crate::isa::aarch64::inst::*;
@@ -11,8 +8,6 @@ use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::settings::Flags;
 use crate::{CodegenError, CodegenResult};
-use alloc::boxed::Box;
-use alloc::vec::Vec;
 use target_lexicon::Triple;

 /// Actually codegen an instruction's results into registers.
@@ -323,269 +318,3 @@ pub(crate) fn lower_insn_to_regs(

    Ok(())
 }
-
-pub(crate) fn lower_branch(
-    ctx: &mut Lower<Inst>,
-    branches: &[IRInst],
-    targets: &[MachLabel],
-) -> CodegenResult<()> {
-    // A block should end with at most two branches. The first may be a
-    // conditional branch; a conditional branch can be followed only by an
-    // unconditional branch or fallthrough. Otherwise, if only one branch,
-    // it may be an unconditional branch, a fallthrough, a return, or a
-    // trap. These conditions are verified by `is_ebb_basic()` during the
-    // verifier pass.
-    assert!(branches.len() <= 2);
-
-    if branches.len() == 2 {
-        // Must be a conditional branch followed by an unconditional branch.
-        let op0 = ctx.data(branches[0]).opcode();
-        let op1 = ctx.data(branches[1]).opcode();
-
-        assert!(op1 == Opcode::Jump);
-        let taken = BranchTarget::Label(targets[0]);
-        // not_taken target is the target of the second branch, even if it is a Fallthrough
-        // instruction: because we reorder blocks while we lower, the fallthrough in the new
-        // order is not (necessarily) the same as the fallthrough in CLIF. So we use the
-        // explicitly-provided target.
-        let not_taken = BranchTarget::Label(targets[1]);
-
-        match op0 {
-            Opcode::Brz | Opcode::Brnz => {
-                let ty = ctx.input_ty(branches[0], 0);
-                let flag_input = InsnInput {
-                    insn: branches[0],
-                    input: 0,
-                };
-                if let Some(icmp_insn) =
-                    maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
-                {
-                    let condcode = ctx.data(icmp_insn).cond_code().unwrap();
-                    let cond =
-                        lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond();
-                    let negated = op0 == Opcode::Brz;
-                    let cond = if negated { cond.invert() } else { cond };
-
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind: CondBrKind::Cond(cond),
-                    });
-                } else if let Some(fcmp_insn) =
-                    maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
-                {
-                    let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
-                    let cond = lower_fp_condcode(condcode);
-                    let negated = op0 == Opcode::Brz;
-                    let cond = if negated { cond.invert() } else { cond };
-
-                    lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn);
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind: CondBrKind::Cond(cond),
-                    });
-                } else {
-                    let rt = if ty == I128 {
-                        let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
-                        let input = put_input_in_regs(ctx, flag_input);
-                        ctx.emit(Inst::AluRRR {
-                            alu_op: ALUOp::Orr,
-                            size: OperandSize::Size64,
-                            rd: tmp,
-                            rn: input.regs()[0],
-                            rm: input.regs()[1],
-                        });
-                        tmp.to_reg()
-                    } else {
-                        put_input_in_reg(ctx, flag_input, NarrowValueMode::ZeroExtend64)
-                    };
-                    let kind = match op0 {
-                        Opcode::Brz => CondBrKind::Zero(rt),
-                        Opcode::Brnz => CondBrKind::NotZero(rt),
-                        _ => unreachable!(),
-                    };
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind,
-                    });
-                }
-            }
-            Opcode::BrIcmp => {
-                let condcode = ctx.data(branches[0]).cond_code().unwrap();
-                let cond =
-                    lower_icmp(ctx, branches[0], condcode, IcmpOutput::CondCode)?.unwrap_cond();
-
-                ctx.emit(Inst::CondBr {
-                    taken,
-                    not_taken,
-                    kind: CondBrKind::Cond(cond),
-                });
-            }
-
-            Opcode::Brif => {
-                let condcode = ctx.data(branches[0]).cond_code().unwrap();
-
-                let flag_input = InsnInput {
-                    insn: branches[0],
-                    input: 0,
-                };
-                if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) {
-                    let cond =
-                        lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond();
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind: CondBrKind::Cond(cond),
-                    });
-                } else {
-                    // If the ifcmp result is actually placed in a
-                    // register, we need to move it back into the flags.
-                    let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
-                    ctx.emit(Inst::MovToNZCV { rn });
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind: CondBrKind::Cond(lower_condcode(condcode)),
-                    });
-                }
-            }
-
-            Opcode::Brff => {
-                let condcode = ctx.data(branches[0]).fp_cond_code().unwrap();
-                let cond = lower_fp_condcode(condcode);
-                let kind = CondBrKind::Cond(cond);
-                let flag_input = InsnInput {
-                    insn: branches[0],
-                    input: 0,
-                };
-                if let Some(ffcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ffcmp) {
-                    lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind,
-                    });
-                } else {
-                    // If the ffcmp result is actually placed in a
-                    // register, we need to move it back into the flags.
-                    let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
-                    ctx.emit(Inst::MovToNZCV { rn });
-                    ctx.emit(Inst::CondBr {
-                        taken,
-                        not_taken,
-                        kind,
-                    });
-                }
-            }
-
-            _ => unimplemented!(),
-        }
-    } else {
-        // Must be an unconditional branch or an indirect branch.
-        let op = ctx.data(branches[0]).opcode();
-        match op {
-            Opcode::Jump => {
-                assert!(branches.len() == 1);
-                ctx.emit(Inst::Jump {
-                    dest: BranchTarget::Label(targets[0]),
-                });
-            }
-
-            Opcode::BrTable => {
-                // Expand `br_table index, default, JT` to:
-                //
-                //   emit_island  // this forces an island at this point
-                //                // if the jumptable would push us past
-                //                // the deadline
-                //   cmp idx, #jt_size
-                //   b.hs default
-                //   csel vTmp2, xzr, idx, hs
-                //   csdb
-                //   adr vTmp1, PC+16
-                //   ldr vTmp2, [vTmp1, vTmp2, uxtw #2]
-                //   add vTmp1, vTmp1, vTmp2
-                //   br vTmp1
-                //   [jumptable offsets relative to JT base]
-                let jt_size = targets.len() - 1;
-                assert!(jt_size <= std::u32::MAX as usize);
-
-                ctx.emit(Inst::EmitIsland {
-                    needed_space: 4 * (8 + jt_size) as CodeOffset,
-                });
-
-                let ridx = put_input_in_reg(
-                    ctx,
-                    InsnInput {
-                        insn: branches[0],
-                        input: 0,
-                    },
-                    NarrowValueMode::ZeroExtend32,
-                );
-
-                let rtmp1 = ctx.alloc_tmp(I32).only_reg().unwrap();
-                let rtmp2 = ctx.alloc_tmp(I32).only_reg().unwrap();
-
-                // Bounds-check, leaving condition codes for JTSequence's
-                // branch to default target below.
-                if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
-                    ctx.emit(Inst::AluRRImm12 {
-                        alu_op: ALUOp::SubS,
-                        size: OperandSize::Size32,
-                        rd: writable_zero_reg(),
-                        rn: ridx,
-                        imm12,
-                    });
-                } else {
-                    lower_constant_u64(ctx, rtmp1, jt_size as u64);
-                    ctx.emit(Inst::AluRRR {
-                        alu_op: ALUOp::SubS,
-                        size: OperandSize::Size32,
-                        rd: writable_zero_reg(),
-                        rn: ridx,
-                        rm: rtmp1.to_reg(),
-                    });
-                }
-
-                // Emit the compound instruction that does:
-                //
-                // b.hs default
-                // csel rB, xzr, rIndex, hs
-                // csdb
-                // adr rA, jt
-                // ldrsw rB, [rA, rB, uxtw #2]
-                // add rA, rA, rB
-                // br rA
-                // [jt entries]
-                //
-                // This must be *one* instruction in the vcode because
-                // we cannot allow regalloc to insert any spills/fills
-                // in the middle of the sequence; otherwise, the ADR's
-                // PC-rel offset to the jumptable would be incorrect.
-                // (The alternative is to introduce a relocation pass
-                // for inlined jumptables, which is much worse, IMHO.)
-
-                let jt_targets: Vec<BranchTarget> = targets
-                    .iter()
-                    .skip(1)
-                    .map(|bix| BranchTarget::Label(*bix))
-                    .collect();
-                let default_target = BranchTarget::Label(targets[0]);
-                ctx.emit(Inst::JTSequence {
-                    ridx,
-                    rtmp1,
-                    rtmp2,
-                    info: Box::new(JTSequenceInfo {
-                        targets: jt_targets,
-                        default_target,
-                    }),
-                });
-            }
-
-            _ => panic!("Unknown branch type!"),
-        }
-    }
-
-    Ok(())
-}
--- a/cranelift/codegen/src/machinst/inst_common.rs
+++ b/cranelift/codegen/src/machinst/inst_common.rs
@@ -24,12 +24,6 @@ pub(crate) struct InsnOutput {
    pub(crate) output: usize,
 }

-pub(crate) fn insn_inputs<I: VCodeInst>(ctx: &Lower<I>, insn: IRInst) -> SmallVec<[InsnInput; 4]> {
-    (0..ctx.num_inputs(insn))
-        .map(|i| InsnInput { insn, input: i })
-        .collect()
-}
-
 pub(crate) fn insn_outputs<I: VCodeInst>(
    ctx: &Lower<I>,
    insn: IRInst,
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -853,6 +853,11 @@
        (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
      (SideEffectNoResult.Inst3 p c1 c2))

+(rule (with_flags_side_effect
+        (ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
+        (ConsumesFlags.ConsumesFlagsSideEffect c))
+      (SideEffectNoResult.Inst3 p1 p2 c))
+
 ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl trap_code_division_by_zero () TrapCode)
--- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
@@ -290,8 +290,8 @@ block1:
 ;   cset x6, lo
 ;   subs xzr, x1, x3
 ;   cset x9, lt
-;   csel x6, x6, x9, eq
-;   subs xzr, xzr, x6
+;   csel x11, x6, x9, eq
+;   subs xzr, xzr, x11
 ;   b.lt label1 ; b label2
 ; block1:
 ;   b label3
@@ -314,8 +314,8 @@ block1:
 ;   cset x6, lo
 ;   subs xzr, x1, x3
 ;   cset x9, lo
-;   csel x6, x6, x9, eq
-;   subs xzr, xzr, x6
+;   csel x11, x6, x9, eq
+;   subs xzr, xzr, x11
 ;   b.lo label1 ; b label2
 ; block1:
 ;   b label3
@@ -338,9 +338,9 @@ block1:
 ;   cset x6, ls
 ;   subs xzr, x1, x3
 ;   cset x9, le
-;   csel x6, x6, x9, eq
-;   movz x9, #1
-;   subs xzr, x9, x6
+;   csel x11, x6, x9, eq
+;   movz w13, #1
+;   subs xzr, x13, x11
 ;   b.le label1 ; b label2
 ; block1:
 ;   b label3
@@ -363,9 +363,9 @@ block1:
 ;   cset x6, ls
 ;   subs xzr, x1, x3
 ;   cset x9, ls
-;   csel x6, x6, x9, eq
-;   movz x9, #1
-;   subs xzr, x9, x6
+;   csel x11, x6, x9, eq
+;   orr x13, xzr, #1
+;   subs xzr, x13, x11
 ;   b.ls label1 ; b label2
 ; block1:
 ;   b label3
@@ -388,8 +388,8 @@ block1:
 ;   cset x6, hi
 ;   subs xzr, x1, x3
 ;   cset x9, gt
-;   csel x6, x6, x9, eq
-;   subs xzr, x6, xzr
+;   csel x11, x6, x9, eq
+;   subs xzr, x11, xzr
 ;   b.gt label1 ; b label2
 ; block1:
 ;   b label3
@@ -412,8 +412,8 @@ block1:
 ;   cset x6, hi
 ;   subs xzr, x1, x3
 ;   cset x9, hi
-;   csel x6, x6, x9, eq
-;   subs xzr, x6, xzr
+;   csel x11, x6, x9, eq
+;   subs xzr, x11, xzr
 ;   b.hi label1 ; b label2
 ; block1:
 ;   b label3
@@ -436,9 +436,9 @@ block1:
 ;   cset x6, hs
 ;   subs xzr, x1, x3
 ;   cset x9, ge
-;   csel x6, x6, x9, eq
-;   movz x9, #1
-;   subs xzr, x6, x9
+;   csel x11, x6, x9, eq
+;   movz w13, #1
+;   subs xzr, x11, x13
 ;   b.ge label1 ; b label2
 ; block1:
 ;   b label3
@@ -461,9 +461,9 @@ block1:
 ;   cset x6, hs
 ;   subs xzr, x1, x3
 ;   cset x9, hs
-;   csel x6, x6, x9, eq
-;   movz x9, #1
-;   subs xzr, x6, x9
+;   csel x11, x6, x9, eq
+;   orr x13, xzr, #1
+;   subs xzr, x11, x13
 ;   b.hs label1 ; b label2
 ; block1:
 ;   b label3