Port branches to ISLE (AArch64) (#4943)

* Port branches to ISLE (AArch64)

Ported the existing implementations of the following opcodes for AArch64
to ISLE:
- `Brz`
- `Brnz`
- `Brif`
- `Brff`
- `BrIcmp`
- `Jump`
- `BrTable`

Copyright (c) 2022 Arm Limited

* Remove dead code

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-09-26 09:45:32 +01:00
committed by GitHub
parent 11e90049d2
commit 3a2b32bf4d
9 changed files with 381 additions and 997 deletions

View File

@@ -1,8 +1,5 @@
//! Lower a single Cranelift instruction into vcode.
use super::lower::*;
use crate::binemit::CodeOffset;
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::Opcode;
use crate::isa::aarch64::inst::*;
@@ -11,8 +8,6 @@ use crate::machinst::lower::*;
use crate::machinst::*;
use crate::settings::Flags;
use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use target_lexicon::Triple;
/// Actually codegen an instruction's results into registers.
@@ -323,269 +318,3 @@ pub(crate) fn lower_insn_to_regs(
Ok(())
}
pub(crate) fn lower_branch(
ctx: &mut Lower<Inst>,
branches: &[IRInst],
targets: &[MachLabel],
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
// it may be an unconditional branch, a fallthrough, a return, or a
// trap. These conditions are verified by `is_ebb_basic()` during the
// verifier pass.
assert!(branches.len() <= 2);
if branches.len() == 2 {
// Must be a conditional branch followed by an unconditional branch.
let op0 = ctx.data(branches[0]).opcode();
let op1 = ctx.data(branches[1]).opcode();
assert!(op1 == Opcode::Jump);
let taken = BranchTarget::Label(targets[0]);
// not_taken target is the target of the second branch, even if it is a Fallthrough
// instruction: because we reorder blocks while we lower, the fallthrough in the new
// order is not (necessarily) the same as the fallthrough in CLIF. So we use the
// explicitly-provided target.
let not_taken = BranchTarget::Label(targets[1]);
match op0 {
Opcode::Brz | Opcode::Brnz => {
let ty = ctx.input_ty(branches[0], 0);
let flag_input = InsnInput {
insn: branches[0],
input: 0,
};
if let Some(icmp_insn) =
maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
{
let condcode = ctx.data(icmp_insn).cond_code().unwrap();
let cond =
lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond();
let negated = op0 == Opcode::Brz;
let cond = if negated { cond.invert() } else { cond };
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
});
} else if let Some(fcmp_insn) =
maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
{
let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
let cond = lower_fp_condcode(condcode);
let negated = op0 == Opcode::Brz;
let cond = if negated { cond.invert() } else { cond };
lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn);
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
});
} else {
let rt = if ty == I128 {
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let input = put_input_in_regs(ctx, flag_input);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Orr,
size: OperandSize::Size64,
rd: tmp,
rn: input.regs()[0],
rm: input.regs()[1],
});
tmp.to_reg()
} else {
put_input_in_reg(ctx, flag_input, NarrowValueMode::ZeroExtend64)
};
let kind = match op0 {
Opcode::Brz => CondBrKind::Zero(rt),
Opcode::Brnz => CondBrKind::NotZero(rt),
_ => unreachable!(),
};
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind,
});
}
}
Opcode::BrIcmp => {
let condcode = ctx.data(branches[0]).cond_code().unwrap();
let cond =
lower_icmp(ctx, branches[0], condcode, IcmpOutput::CondCode)?.unwrap_cond();
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
});
}
Opcode::Brif => {
let condcode = ctx.data(branches[0]).cond_code().unwrap();
let flag_input = InsnInput {
insn: branches[0],
input: 0,
};
if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) {
let cond =
lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond();
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
});
} else {
// If the ifcmp result is actually placed in a
// register, we need to move it back into the flags.
let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
ctx.emit(Inst::MovToNZCV { rn });
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(lower_condcode(condcode)),
});
}
}
Opcode::Brff => {
let condcode = ctx.data(branches[0]).fp_cond_code().unwrap();
let cond = lower_fp_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let flag_input = InsnInput {
insn: branches[0],
input: 0,
};
if let Some(ffcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ffcmp) {
lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind,
});
} else {
// If the ffcmp result is actually placed in a
// register, we need to move it back into the flags.
let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
ctx.emit(Inst::MovToNZCV { rn });
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind,
});
}
}
_ => unimplemented!(),
}
} else {
// Must be an unconditional branch or an indirect branch.
let op = ctx.data(branches[0]).opcode();
match op {
Opcode::Jump => {
assert!(branches.len() == 1);
ctx.emit(Inst::Jump {
dest: BranchTarget::Label(targets[0]),
});
}
Opcode::BrTable => {
// Expand `br_table index, default, JT` to:
//
// emit_island // this forces an island at this point
// // if the jumptable would push us past
// // the deadline
// cmp idx, #jt_size
// b.hs default
// csel vTmp2, xzr, idx, hs
// csdb
// adr vTmp1, PC+16
// ldr vTmp2, [vTmp1, vTmp2, uxtw #2]
// add vTmp1, vTmp1, vTmp2
// br vTmp1
// [jumptable offsets relative to JT base]
let jt_size = targets.len() - 1;
assert!(jt_size <= std::u32::MAX as usize);
ctx.emit(Inst::EmitIsland {
needed_space: 4 * (8 + jt_size) as CodeOffset,
});
let ridx = put_input_in_reg(
ctx,
InsnInput {
insn: branches[0],
input: 0,
},
NarrowValueMode::ZeroExtend32,
);
let rtmp1 = ctx.alloc_tmp(I32).only_reg().unwrap();
let rtmp2 = ctx.alloc_tmp(I32).only_reg().unwrap();
// Bounds-check, leaving condition codes for JTSequence's
// branch to default target below.
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
ctx.emit(Inst::AluRRImm12 {
alu_op: ALUOp::SubS,
size: OperandSize::Size32,
rd: writable_zero_reg(),
rn: ridx,
imm12,
});
} else {
lower_constant_u64(ctx, rtmp1, jt_size as u64);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS,
size: OperandSize::Size32,
rd: writable_zero_reg(),
rn: ridx,
rm: rtmp1.to_reg(),
});
}
// Emit the compound instruction that does:
//
// b.hs default
// csel rB, xzr, rIndex, hs
// csdb
// adr rA, jt
// ldrsw rB, [rA, rB, uxtw #2]
// add rA, rA, rB
// br rA
// [jt entries]
//
// This must be *one* instruction in the vcode because
// we cannot allow regalloc to insert any spills/fills
// in the middle of the sequence; otherwise, the ADR's
// PC-rel offset to the jumptable would be incorrect.
// (The alternative is to introduce a relocation pass
// for inlined jumptables, which is much worse, IMHO.)
let jt_targets: Vec<BranchTarget> = targets
.iter()
.skip(1)
.map(|bix| BranchTarget::Label(*bix))
.collect();
let default_target = BranchTarget::Label(targets[0]);
ctx.emit(Inst::JTSequence {
ridx,
rtmp1,
rtmp2,
info: Box::new(JTSequenceInfo {
targets: jt_targets,
default_target,
}),
});
}
_ => panic!("Unknown branch type!"),
}
}
Ok(())
}