AArch64: avoid branches with explicit offsets at lowering stage.

In discussions with @bnjbvr, it came up that generating `OneWayCondBr`s with explicit, hardcoded PC-offsets as part of lowered instruction sequences is actually unsafe, because the register allocator *might* insert a spill or reload into the middle of our sequence. We were careful about this in some cases but somehow missed that it was a general restriction. Conceptually, all inter-instruction references should be via labels at the VCode level; explicit offsets are only ever known at emission time, and resolved by the `MachBuffer`. To allow for conditional trap checks without modifying the CFG (as seen by regalloc) during lowering, this PR instead adds a `TrapIf` pseudo-instruction that conditionally skips a single embedded trap instruction. It lowers to the same `condbr label ; trap ; label: ...` sequence, but without the hardcoded branch-target offset in the lowering code.
2020-07-01 16:28:41 -07:00
parent f2dd1535d5
commit b7ecad1d74
11 changed files with 267 additions and 312 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -282,14 +282,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                //   msub rd, rd, rm, rn  ; rd = rn - rd * rm

                // Check for divide by 0.
-                let branch_size = 8;
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(branch_size),
-                    kind: CondBrKind::NotZero(rm),
-                });
-
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Zero(rm),
+                });

                ctx.emit(Inst::AluRRRR {
                    alu_op: ALUOp::MSub64,
@@ -300,17 +297,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                });
            } else {
                if div_op == ALUOp::SDiv64 {
-                    //   cbz rm, #20
+                    //   cbnz rm, #8
+                    //   udf ; divide by zero
                    //   cmn rm, 1
                    //   ccmp rn, 1, #nzcv, eq
-                    //   b.vc 12
+                    //   b.vc #8
                    //   udf ; signed overflow
-                    //   udf ; divide by zero

                    // Check for divide by 0.
-                    let branch_size = 20;
-                    ctx.emit(Inst::OneWayCondBr {
-                        target: BranchTarget::ResolvedOffset(branch_size),
+                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
+                    ctx.emit(Inst::TrapIf {
+                        trap_info,
                        kind: CondBrKind::Zero(rm),
                    });

@@ -336,27 +333,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        nzcv: NZCV::new(false, false, false, false),
                        cond: Cond::Eq,
                    });
-                    ctx.emit(Inst::OneWayCondBr {
-                        target: BranchTarget::ResolvedOffset(12),
-                        kind: CondBrKind::Cond(Cond::Vc),
-                    });
-
                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                    ctx.emit(Inst::Udf { trap_info });
+                    ctx.emit(Inst::TrapIf {
+                        trap_info,
+                        kind: CondBrKind::Cond(Cond::Vs),
+                    });
                } else {
                    //   cbnz rm, #8
                    //   udf ; divide by zero

                    // Check for divide by 0.
-                    let branch_size = 8;
-                    ctx.emit(Inst::OneWayCondBr {
-                        target: BranchTarget::ResolvedOffset(branch_size),
-                        kind: CondBrKind::NotZero(rm),
+                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
+                    ctx.emit(Inst::TrapIf {
+                        trap_info,
+                        kind: CondBrKind::Zero(rm),
                    });
                }
-
-                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
-                ctx.emit(Inst::Udf { trap_info });
            }
        }

@@ -1324,15 +1316,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                cond
            };

-            // Branch around the break instruction with inverted cond. Go straight to lowered
-            // one-target form; this is logically part of a single-in single-out template lowering.
-            let cond = cond.invert();
-            ctx.emit(Inst::OneWayCondBr {
-                target: BranchTarget::ResolvedOffset(8),
+            ctx.emit(Inst::TrapIf {
+                trap_info,
                kind: CondBrKind::Cond(cond),
            });
-
-            ctx.emit(Inst::Udf { trap_info })
        }

        Opcode::Safepoint => {
@@ -1711,12 +1698,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
            }
-            ctx.emit(Inst::OneWayCondBr {
-                target: BranchTarget::ResolvedOffset(8),
-                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
-            });
            let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
-            ctx.emit(Inst::Udf { trap_info });
+            ctx.emit(Inst::TrapIf {
+                trap_info,
+                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
+            });

            let tmp = ctx.alloc_tmp(RegClass::V128, I128);

@@ -1752,12 +1738,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
+                });

                // <= high_bound
                lower_constant_f32(ctx, tmp, high_bound);
@@ -1765,12 +1750,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
+                });
            } else {
                // From float64.
                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
@@ -1795,12 +1779,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
+                });

                // <= high_bound
                lower_constant_f64(ctx, tmp, high_bound);
@@ -1808,12 +1791,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
+                });
            };

            // Do the conversion.
@@ -2307,7 +2289,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
                let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);

-                // Bounds-check and branch to default.
+                // Bounds-check, leaving condition codes for JTSequence's
+                // branch to default target below.
                if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
                    ctx.emit(Inst::AluRRImm12 {
                        alu_op: ALUOp::SubS32,
@@ -2324,14 +2307,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                        rm: rtmp1.to_reg(),
                    });
                }
-                let default_target = BranchTarget::Label(targets[0]);
-                ctx.emit(Inst::OneWayCondBr {
-                    target: default_target.clone(),
-                    kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
-                });

                // Emit the compound instruction that does:
                //
+                // b.hs default
                // adr rA, jt
                // ldrsw rB, [rA, rIndex, UXTW 2]
                // add rA, rA, rB
@@ -2350,6 +2329,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    .skip(1)
                    .map(|bix| BranchTarget::Label(*bix))
                    .collect();
+                let default_target = BranchTarget::Label(targets[0]);
                let targets_for_term: Vec<MachLabel> = targets.to_vec();
                ctx.emit(Inst::JTSequence {
                    ridx,
@@ -2357,6 +2337,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    rtmp2,
                    info: Box::new(JTSequenceInfo {
                        targets: jt_targets,
+                        default_target,
                        targets_for_term: targets_for_term,
                    }),
                });