Address review comments.
This commit is contained in:
@@ -352,6 +352,13 @@ impl MachInstEmit for Inst {
|
||||
type State = EmitState;
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
|
||||
// N.B.: we *must* not exceed the "worst-case size" used to compute
|
||||
// where to insert islands, except when islands are explicitly triggered
|
||||
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
|
||||
// to allow disabling the check for `JTSequence`, which is always
|
||||
// emitted following an `EmitIsland`.
|
||||
let mut start_off = sink.cur_offset();
|
||||
|
||||
match self {
|
||||
&Inst::AluRRR { alu_op, rd, rn, rm } => {
|
||||
let top11 = match alu_op {
|
||||
@@ -1307,6 +1314,10 @@ impl MachInstEmit for Inst {
|
||||
LabelUse::PCRel32,
|
||||
);
|
||||
}
|
||||
|
||||
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
|
||||
// disable the worst-case-size check in this case.
|
||||
start_off = sink.cur_offset();
|
||||
}
|
||||
&Inst::LoadConst64 { rd, const_data } => {
|
||||
let inst = Inst::ULoad64 {
|
||||
@@ -1418,5 +1429,8 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let end_off = sink.cur_offset();
|
||||
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -657,6 +657,15 @@ pub enum Inst {
|
||||
|
||||
/// A one-way conditional branch, invisible to the CFG processing; used *only* as part of
|
||||
/// straight-line sequences in code to be emitted.
|
||||
///
|
||||
/// In more detail:
|
||||
/// - This branch is lowered to a branch at the machine-code level, but does not end a basic
|
||||
/// block, and does not create edges in the CFG seen by regalloc.
|
||||
/// - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is
|
||||
/// lowered from a single CLIF instruction. For example, certain arithmetic operations may
|
||||
/// use these branches to handle certain conditions, such as overflows, traps, etc.
|
||||
///
|
||||
/// See, e.g., the lowering of `trapif` (conditional trap) for an example.
|
||||
OneWayCondBr {
|
||||
target: BranchTarget,
|
||||
kind: CondBrKind,
|
||||
@@ -678,7 +687,7 @@ pub enum Inst {
|
||||
trap_info: (SourceLoc, TrapCode),
|
||||
},
|
||||
|
||||
/// Load the address (using a PC-relative offset) of a memory location, using the `ADR`
|
||||
/// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
|
||||
/// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
|
||||
/// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
|
||||
/// need full `MemLabel` support.
|
||||
@@ -734,9 +743,26 @@ pub enum Inst {
|
||||
offset: i64,
|
||||
},
|
||||
|
||||
/// Meta-insn, no-op in generated code: emit constant/branch veneer island at this point (with
|
||||
/// a guard jump around it) if less than the needed space is available before the next branch
|
||||
/// deadline.
|
||||
/// Meta-insn, no-op in generated code: emit constant/branch veneer island
|
||||
/// at this point (with a guard jump around it) if less than the needed
|
||||
/// space is available before the next branch deadline. See the `MachBuffer`
|
||||
/// implementation in `machinst/buffer.rs` for the overall algorithm. In
|
||||
/// brief, we retain a set of "pending/unresolved label references" from
|
||||
/// branches as we scan forward through instructions to emit machine code;
|
||||
/// if we notice we're about to go out of range on an unresolved reference,
|
||||
/// we stop, emit a bunch of "veneers" (branches in a form that has a longer
|
||||
/// range, e.g. a 26-bit-offset unconditional jump), and point the original
|
||||
/// label references to those. This is an "island" because it comes in the
|
||||
/// middle of the code.
|
||||
///
|
||||
/// This meta-instruction is a necessary part of the logic that determines
|
||||
/// where to place islands. Ordinarily, we want to place them between basic
|
||||
/// blocks, so we compute the worst-case size of each block, and emit the
|
||||
/// island before starting a block if we would exceed a deadline before the
|
||||
/// end of the block. However, some sequences (such as an inline jumptable)
|
||||
/// are variable-length and not accounted for by this logic; so these
|
||||
/// lowered sequences include an `EmitIsland` to trigger island generation
|
||||
/// where necessary.
|
||||
EmitIsland {
|
||||
/// The needed space before the next deadline.
|
||||
needed_space: CodeOffset,
|
||||
@@ -1770,6 +1796,18 @@ impl MachInst for Inst {
|
||||
));
|
||||
ret
|
||||
} else {
|
||||
// Must be an integer type.
|
||||
debug_assert!(
|
||||
ty == B1
|
||||
|| ty == I8
|
||||
|| ty == B8
|
||||
|| ty == I16
|
||||
|| ty == B16
|
||||
|| ty == I32
|
||||
|| ty == B32
|
||||
|| ty == I64
|
||||
|| ty == B64
|
||||
);
|
||||
Inst::load_constant(to_reg, value)
|
||||
}
|
||||
}
|
||||
@@ -2601,7 +2639,8 @@ pub enum LabelUse {
|
||||
/// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
|
||||
/// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
|
||||
Adr21,
|
||||
/// 32-bit PC relative constant offset (from address of constant itself). Used in jump tables.
|
||||
/// 32-bit PC relative constant offset (from address of constant itself),
|
||||
/// signed. Used in jump tables.
|
||||
PCRel32,
|
||||
}
|
||||
|
||||
|
||||
@@ -188,7 +188,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
let in_reg = if let Some(c) = inputs.constant {
|
||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||
let to_reg = ctx.tmp(Inst::rc_for_type(ty).unwrap(), ty);
|
||||
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
|
||||
for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
@@ -201,7 +201,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
||||
match (narrow_mode, from_bits) {
|
||||
(NarrowValueMode::None, _) => in_reg,
|
||||
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
@@ -212,7 +212,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
||||
tmp.to_reg()
|
||||
}
|
||||
(NarrowValueMode::SignExtend32, n) if n < 32 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
@@ -229,7 +229,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
||||
// Constants are zero-extended to full 64-bit width on load already.
|
||||
in_reg
|
||||
} else {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
@@ -241,7 +241,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
(NarrowValueMode::SignExtend64, n) if n < 64 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
@@ -529,7 +529,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
// Otherwise, generate add instructions.
|
||||
let addr = ctx.tmp(RegClass::I64, I64);
|
||||
let addr = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
|
||||
// Get the const into a reg.
|
||||
lower_constant_u64(ctx, addr.clone(), offset as u64);
|
||||
@@ -541,7 +541,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
||||
// In an addition, the stack register is the zero register, so divert it to another
|
||||
// register just before doing the actual add.
|
||||
let reg = if reg == stack_reg() {
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
ctx.emit(Inst::Mov {
|
||||
rd: tmp,
|
||||
rm: stack_reg(),
|
||||
|
||||
@@ -84,8 +84,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
} else {
|
||||
VecALUOp::UQAddScalar
|
||||
};
|
||||
let va = ctx.tmp(RegClass::V128, I128);
|
||||
let vb = ctx.tmp(RegClass::V128, I128);
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
@@ -115,8 +115,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
} else {
|
||||
VecALUOp::UQSubScalar
|
||||
};
|
||||
let va = ctx.tmp(RegClass::V128, I128);
|
||||
let vb = ctx.tmp(RegClass::V128, I128);
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
@@ -498,7 +498,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// ignored (because of the implicit masking done by the instruction),
|
||||
// so this is equivalent to negating the input.
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
let tmp = ctx.tmp(RegClass::I64, ty);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: tmp,
|
||||
@@ -521,7 +521,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// Really ty_bits_size - rn, but the upper bits of the result are
|
||||
// ignored (because of the implicit masking done by the instruction),
|
||||
// so this is equivalent to negating the input.
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::Sub32,
|
||||
rd: tmp,
|
||||
@@ -534,7 +534,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
};
|
||||
|
||||
// Explicitly mask the rotation count.
|
||||
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op: ALUOp::And32,
|
||||
rd: tmp_masked_rm,
|
||||
@@ -543,8 +543,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
let tmp_masked_rm = tmp_masked_rm.to_reg();
|
||||
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp2 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Sub32,
|
||||
rd: tmp1,
|
||||
@@ -583,7 +583,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
immshift.imm &= ty_bits_size - 1;
|
||||
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImmShift {
|
||||
alu_op: ALUOp::Lsr32,
|
||||
rd: tmp1,
|
||||
@@ -688,7 +688,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// and fix the sequence below to work properly for this.
|
||||
let narrow_mode = NarrowValueMode::ZeroExtend64;
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
|
||||
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
||||
// the rest of the code is identical to the 64-bit version.
|
||||
@@ -997,7 +997,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Bitselect => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
@@ -1475,8 +1475,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp2 = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
|
||||
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
|
||||
let imml = if bits == 32 {
|
||||
@@ -1546,7 +1546,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
|
||||
let tmp = ctx.tmp(RegClass::V128, I128);
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
|
||||
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
||||
// we allow values that are slightly out of range:
|
||||
@@ -1712,8 +1712,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
@@ -2072,7 +2072,9 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
||||
Opcode::BrTable => {
|
||||
// Expand `br_table index, default, JT` to:
|
||||
//
|
||||
// (emit island with guard jump if needed)
|
||||
// emit_island // this forces an island at this point
|
||||
// // if the jumptable would push us past
|
||||
// // the deadline
|
||||
// subs idx, #jt_size
|
||||
// b.hs default
|
||||
// adr vTmp1, PC+16
|
||||
@@ -2096,8 +2098,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
||||
NarrowValueMode::ZeroExtend32,
|
||||
);
|
||||
|
||||
let rtmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.tmp(RegClass::I64, I32);
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
|
||||
// Bounds-check and branch to default.
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
||||
|
||||
Reference in New Issue
Block a user