Address review comments.

This commit is contained in:
Chris Fallin
2020-05-18 15:40:17 -07:00
parent 687aca00fe
commit bdd2873c8c
7 changed files with 313 additions and 89 deletions

View File

@@ -352,6 +352,13 @@ impl MachInstEmit for Inst {
type State = EmitState;
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
// N.B.: we *must* not exceed the "worst-case size" used to compute
// where to insert islands, except when islands are explicitly triggered
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
// to allow disabling the check for `JTSequence`, which is always
// emitted following an `EmitIsland`.
let mut start_off = sink.cur_offset();
match self {
&Inst::AluRRR { alu_op, rd, rn, rm } => {
let top11 = match alu_op {
@@ -1307,6 +1314,10 @@ impl MachInstEmit for Inst {
LabelUse::PCRel32,
);
}
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
// disable the worst-case-size check in this case.
start_off = sink.cur_offset();
}
&Inst::LoadConst64 { rd, const_data } => {
let inst = Inst::ULoad64 {
@@ -1418,5 +1429,8 @@ impl MachInstEmit for Inst {
}
}
}
let end_off = sink.cur_offset();
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
}
}

View File

@@ -657,6 +657,15 @@ pub enum Inst {
/// A one-way conditional branch, invisible to the CFG processing; used *only* as part of
/// straight-line sequences in code to be emitted.
///
/// In more detail:
/// - This branch is lowered to a branch at the machine-code level, but does not end a basic
/// block, and does not create edges in the CFG seen by regalloc.
/// - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is
/// lowered from a single CLIF instruction. For example, certain arithmetic operations may
/// use these branches to handle certain conditions, such as overflows, traps, etc.
///
/// See, e.g., the lowering of `trapif` (conditional trap) for an example.
OneWayCondBr {
target: BranchTarget,
kind: CondBrKind,
@@ -678,7 +687,7 @@ pub enum Inst {
trap_info: (SourceLoc, TrapCode),
},
/// Load the address (using a PC-relative offset) of a memory location, using the `ADR`
/// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
/// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
/// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
/// need full `MemLabel` support.
@@ -734,9 +743,26 @@ pub enum Inst {
offset: i64,
},
/// Meta-insn, no-op in generated code: emit constant/branch veneer island at this point (with
/// a guard jump around it) if less than the needed space is available before the next branch
/// deadline.
/// Meta-insn, no-op in generated code: emit constant/branch veneer island
/// at this point (with a guard jump around it) if less than the needed
/// space is available before the next branch deadline. See the `MachBuffer`
/// implementation in `machinst/buffer.rs` for the overall algorithm. In
/// brief, we retain a set of "pending/unresolved label references" from
/// branches as we scan forward through instructions to emit machine code;
/// if we notice we're about to go out of range on an unresolved reference,
/// we stop, emit a bunch of "veneers" (branches in a form that has a longer
/// range, e.g. a 26-bit-offset unconditional jump), and point the original
/// label references to those. This is an "island" because it comes in the
/// middle of the code.
///
/// This meta-instruction is a necessary part of the logic that determines
/// where to place islands. Ordinarily, we want to place them between basic
/// blocks, so we compute the worst-case size of each block, and emit the
/// island before starting a block if we would exceed a deadline before the
/// end of the block. However, some sequences (such as an inline jumptable)
/// are variable-length and not accounted for by this logic; so these
/// lowered sequences include an `EmitIsland` to trigger island generation
/// where necessary.
EmitIsland {
/// The needed space before the next deadline.
needed_space: CodeOffset,
@@ -1770,6 +1796,18 @@ impl MachInst for Inst {
));
ret
} else {
// Must be an integer type.
debug_assert!(
ty == B1
|| ty == I8
|| ty == B8
|| ty == I16
|| ty == B16
|| ty == I32
|| ty == B32
|| ty == I64
|| ty == B64
);
Inst::load_constant(to_reg, value)
}
}
@@ -2601,7 +2639,8 @@ pub enum LabelUse {
/// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
/// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
Adr21,
/// 32-bit PC relative constant offset (from address of constant itself). Used in jump tables.
/// 32-bit PC relative constant offset (from address of constant itself),
/// signed. Used in jump tables.
PCRel32,
}

View File

@@ -188,7 +188,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
let inputs = ctx.get_input(input.insn, input.input);
let in_reg = if let Some(c) = inputs.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
let to_reg = ctx.tmp(Inst::rc_for_type(ty).unwrap(), ty);
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
ctx.emit(inst);
}
@@ -201,7 +201,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -212,7 +212,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::SignExtend32, n) if n < 32 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -229,7 +229,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
// Constants are zero-extended to full 64-bit width on load already.
in_reg
} else {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -241,7 +241,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
}
}
(NarrowValueMode::SignExtend64, n) if n < 64 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -529,7 +529,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
}
// Otherwise, generate add instructions.
let addr = ctx.tmp(RegClass::I64, I64);
let addr = ctx.alloc_tmp(RegClass::I64, I64);
// Get the const into a reg.
lower_constant_u64(ctx, addr.clone(), offset as u64);
@@ -541,7 +541,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// In an addition, the stack register is the zero register, so divert it to another
// register just before doing the actual add.
let reg = if reg == stack_reg() {
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
ctx.emit(Inst::Mov {
rd: tmp,
rm: stack_reg(),

View File

@@ -84,8 +84,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
VecALUOp::UQAddScalar
};
let va = ctx.tmp(RegClass::V128, I128);
let vb = ctx.tmp(RegClass::V128, I128);
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
@@ -115,8 +115,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
VecALUOp::UQSubScalar
};
let va = ctx.tmp(RegClass::V128, I128);
let vb = ctx.tmp(RegClass::V128, I128);
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
@@ -498,7 +498,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// ignored (because of the implicit masking done by the instruction),
// so this is equivalent to negating the input.
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
let tmp = ctx.tmp(RegClass::I64, ty);
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
ctx.emit(Inst::AluRRR {
alu_op,
rd: tmp,
@@ -521,7 +521,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Really ty_bits_size - rn, but the upper bits of the result are
// ignored (because of the implicit masking done by the instruction),
// so this is equivalent to negating the input.
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub32,
rd: tmp,
@@ -534,7 +534,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
};
// Explicitly mask the rotation count.
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImmLogic {
alu_op: ALUOp::And32,
rd: tmp_masked_rm,
@@ -543,8 +543,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
let tmp_masked_rm = tmp_masked_rm.to_reg();
let tmp1 = ctx.tmp(RegClass::I64, I32);
let tmp2 = ctx.tmp(RegClass::I64, I32);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImm12 {
alu_op: ALUOp::Sub32,
rd: tmp1,
@@ -583,7 +583,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
immshift.imm &= ty_bits_size - 1;
let tmp1 = ctx.tmp(RegClass::I64, I32);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr32,
rd: tmp1,
@@ -688,7 +688,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// and fix the sequence below to work properly for this.
let narrow_mode = NarrowValueMode::ZeroExtend64;
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
// the rest of the code is identical to the 64-bit version.
@@ -997,7 +997,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Bitselect => {
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let rd = output_to_reg(ctx, outputs[0]);
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
@@ -1475,8 +1475,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = output_to_reg(ctx, outputs[0]);
let tmp1 = ctx.tmp(RegClass::I64, I64);
let tmp2 = ctx.tmp(RegClass::I64, I64);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
let imml = if bits == 32 {
@@ -1546,7 +1546,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
ctx.emit(Inst::Udf { trap_info });
let tmp = ctx.tmp(RegClass::V128, I128);
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
// Check that the input is in range, with "truncate towards zero" semantics. This means
// we allow values that are slightly out of range:
@@ -1712,8 +1712,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => unreachable!(),
};
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
if in_bits == 32 {
ctx.emit(Inst::LoadFpuConst32 {
@@ -2072,7 +2072,9 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
Opcode::BrTable => {
// Expand `br_table index, default, JT` to:
//
// (emit island with guard jump if needed)
// emit_island // this forces an island at this point
// // if the jumptable would push us past
// // the deadline
// subs idx, #jt_size
// b.hs default
// adr vTmp1, PC+16
@@ -2096,8 +2098,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
NarrowValueMode::ZeroExtend32,
);
let rtmp1 = ctx.tmp(RegClass::I64, I32);
let rtmp2 = ctx.tmp(RegClass::I64, I32);
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
// Bounds-check and branch to default.
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {