Address review comments.
This commit is contained in:
@@ -352,6 +352,13 @@ impl MachInstEmit for Inst {
|
|||||||
type State = EmitState;
|
type State = EmitState;
|
||||||
|
|
||||||
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
|
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
|
||||||
|
// N.B.: we *must* not exceed the "worst-case size" used to compute
|
||||||
|
// where to insert islands, except when islands are explicitly triggered
|
||||||
|
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
|
||||||
|
// to allow disabling the check for `JTSequence`, which is always
|
||||||
|
// emitted following an `EmitIsland`.
|
||||||
|
let mut start_off = sink.cur_offset();
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
&Inst::AluRRR { alu_op, rd, rn, rm } => {
|
&Inst::AluRRR { alu_op, rd, rn, rm } => {
|
||||||
let top11 = match alu_op {
|
let top11 = match alu_op {
|
||||||
@@ -1307,6 +1314,10 @@ impl MachInstEmit for Inst {
|
|||||||
LabelUse::PCRel32,
|
LabelUse::PCRel32,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
|
||||||
|
// disable the worst-case-size check in this case.
|
||||||
|
start_off = sink.cur_offset();
|
||||||
}
|
}
|
||||||
&Inst::LoadConst64 { rd, const_data } => {
|
&Inst::LoadConst64 { rd, const_data } => {
|
||||||
let inst = Inst::ULoad64 {
|
let inst = Inst::ULoad64 {
|
||||||
@@ -1418,5 +1429,8 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let end_off = sink.cur_offset();
|
||||||
|
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -657,6 +657,15 @@ pub enum Inst {
|
|||||||
|
|
||||||
/// A one-way conditional branch, invisible to the CFG processing; used *only* as part of
|
/// A one-way conditional branch, invisible to the CFG processing; used *only* as part of
|
||||||
/// straight-line sequences in code to be emitted.
|
/// straight-line sequences in code to be emitted.
|
||||||
|
///
|
||||||
|
/// In more detail:
|
||||||
|
/// - This branch is lowered to a branch at the machine-code level, but does not end a basic
|
||||||
|
/// block, and does not create edges in the CFG seen by regalloc.
|
||||||
|
/// - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is
|
||||||
|
/// lowered from a single CLIF instruction. For example, certain arithmetic operations may
|
||||||
|
/// use these branches to handle certain conditions, such as overflows, traps, etc.
|
||||||
|
///
|
||||||
|
/// See, e.g., the lowering of `trapif` (conditional trap) for an example.
|
||||||
OneWayCondBr {
|
OneWayCondBr {
|
||||||
target: BranchTarget,
|
target: BranchTarget,
|
||||||
kind: CondBrKind,
|
kind: CondBrKind,
|
||||||
@@ -678,7 +687,7 @@ pub enum Inst {
|
|||||||
trap_info: (SourceLoc, TrapCode),
|
trap_info: (SourceLoc, TrapCode),
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Load the address (using a PC-relative offset) of a memory location, using the `ADR`
|
/// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
|
||||||
/// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
|
/// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
|
||||||
/// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
|
/// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
|
||||||
/// need full `MemLabel` support.
|
/// need full `MemLabel` support.
|
||||||
@@ -734,9 +743,26 @@ pub enum Inst {
|
|||||||
offset: i64,
|
offset: i64,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Meta-insn, no-op in generated code: emit constant/branch veneer island at this point (with
|
/// Meta-insn, no-op in generated code: emit constant/branch veneer island
|
||||||
/// a guard jump around it) if less than the needed space is available before the next branch
|
/// at this point (with a guard jump around it) if less than the needed
|
||||||
/// deadline.
|
/// space is available before the next branch deadline. See the `MachBuffer`
|
||||||
|
/// implementation in `machinst/buffer.rs` for the overall algorithm. In
|
||||||
|
/// brief, we retain a set of "pending/unresolved label references" from
|
||||||
|
/// branches as we scan forward through instructions to emit machine code;
|
||||||
|
/// if we notice we're about to go out of range on an unresolved reference,
|
||||||
|
/// we stop, emit a bunch of "veneers" (branches in a form that has a longer
|
||||||
|
/// range, e.g. a 26-bit-offset unconditional jump), and point the original
|
||||||
|
/// label references to those. This is an "island" because it comes in the
|
||||||
|
/// middle of the code.
|
||||||
|
///
|
||||||
|
/// This meta-instruction is a necessary part of the logic that determines
|
||||||
|
/// where to place islands. Ordinarily, we want to place them between basic
|
||||||
|
/// blocks, so we compute the worst-case size of each block, and emit the
|
||||||
|
/// island before starting a block if we would exceed a deadline before the
|
||||||
|
/// end of the block. However, some sequences (such as an inline jumptable)
|
||||||
|
/// are variable-length and not accounted for by this logic; so these
|
||||||
|
/// lowered sequences include an `EmitIsland` to trigger island generation
|
||||||
|
/// where necessary.
|
||||||
EmitIsland {
|
EmitIsland {
|
||||||
/// The needed space before the next deadline.
|
/// The needed space before the next deadline.
|
||||||
needed_space: CodeOffset,
|
needed_space: CodeOffset,
|
||||||
@@ -1770,6 +1796,18 @@ impl MachInst for Inst {
|
|||||||
));
|
));
|
||||||
ret
|
ret
|
||||||
} else {
|
} else {
|
||||||
|
// Must be an integer type.
|
||||||
|
debug_assert!(
|
||||||
|
ty == B1
|
||||||
|
|| ty == I8
|
||||||
|
|| ty == B8
|
||||||
|
|| ty == I16
|
||||||
|
|| ty == B16
|
||||||
|
|| ty == I32
|
||||||
|
|| ty == B32
|
||||||
|
|| ty == I64
|
||||||
|
|| ty == B64
|
||||||
|
);
|
||||||
Inst::load_constant(to_reg, value)
|
Inst::load_constant(to_reg, value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2601,7 +2639,8 @@ pub enum LabelUse {
|
|||||||
/// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
|
/// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
|
||||||
/// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
|
/// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
|
||||||
Adr21,
|
Adr21,
|
||||||
/// 32-bit PC relative constant offset (from address of constant itself). Used in jump tables.
|
/// 32-bit PC relative constant offset (from address of constant itself),
|
||||||
|
/// signed. Used in jump tables.
|
||||||
PCRel32,
|
PCRel32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -188,7 +188,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||||||
let inputs = ctx.get_input(input.insn, input.input);
|
let inputs = ctx.get_input(input.insn, input.input);
|
||||||
let in_reg = if let Some(c) = inputs.constant {
|
let in_reg = if let Some(c) = inputs.constant {
|
||||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||||
let to_reg = ctx.tmp(Inst::rc_for_type(ty).unwrap(), ty);
|
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
|
||||||
for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
|
for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
|
||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
}
|
}
|
||||||
@@ -201,7 +201,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||||||
match (narrow_mode, from_bits) {
|
match (narrow_mode, from_bits) {
|
||||||
(NarrowValueMode::None, _) => in_reg,
|
(NarrowValueMode::None, _) => in_reg,
|
||||||
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
||||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::Extend {
|
ctx.emit(Inst::Extend {
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
rn: in_reg,
|
rn: in_reg,
|
||||||
@@ -212,7 +212,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||||||
tmp.to_reg()
|
tmp.to_reg()
|
||||||
}
|
}
|
||||||
(NarrowValueMode::SignExtend32, n) if n < 32 => {
|
(NarrowValueMode::SignExtend32, n) if n < 32 => {
|
||||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::Extend {
|
ctx.emit(Inst::Extend {
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
rn: in_reg,
|
rn: in_reg,
|
||||||
@@ -229,7 +229,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||||||
// Constants are zero-extended to full 64-bit width on load already.
|
// Constants are zero-extended to full 64-bit width on load already.
|
||||||
in_reg
|
in_reg
|
||||||
} else {
|
} else {
|
||||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::Extend {
|
ctx.emit(Inst::Extend {
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
rn: in_reg,
|
rn: in_reg,
|
||||||
@@ -241,7 +241,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
(NarrowValueMode::SignExtend64, n) if n < 64 => {
|
(NarrowValueMode::SignExtend64, n) if n < 64 => {
|
||||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::Extend {
|
ctx.emit(Inst::Extend {
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
rn: in_reg,
|
rn: in_reg,
|
||||||
@@ -529,7 +529,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, generate add instructions.
|
// Otherwise, generate add instructions.
|
||||||
let addr = ctx.tmp(RegClass::I64, I64);
|
let addr = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
|
|
||||||
// Get the const into a reg.
|
// Get the const into a reg.
|
||||||
lower_constant_u64(ctx, addr.clone(), offset as u64);
|
lower_constant_u64(ctx, addr.clone(), offset as u64);
|
||||||
@@ -541,7 +541,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||||||
// In an addition, the stack register is the zero register, so divert it to another
|
// In an addition, the stack register is the zero register, so divert it to another
|
||||||
// register just before doing the actual add.
|
// register just before doing the actual add.
|
||||||
let reg = if reg == stack_reg() {
|
let reg = if reg == stack_reg() {
|
||||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
ctx.emit(Inst::Mov {
|
ctx.emit(Inst::Mov {
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
rm: stack_reg(),
|
rm: stack_reg(),
|
||||||
|
|||||||
@@ -84,8 +84,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
} else {
|
} else {
|
||||||
VecALUOp::UQAddScalar
|
VecALUOp::UQAddScalar
|
||||||
};
|
};
|
||||||
let va = ctx.tmp(RegClass::V128, I128);
|
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let vb = ctx.tmp(RegClass::V128, I128);
|
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
@@ -115,8 +115,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
} else {
|
} else {
|
||||||
VecALUOp::UQSubScalar
|
VecALUOp::UQSubScalar
|
||||||
};
|
};
|
||||||
let va = ctx.tmp(RegClass::V128, I128);
|
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let vb = ctx.tmp(RegClass::V128, I128);
|
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
@@ -498,7 +498,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// ignored (because of the implicit masking done by the instruction),
|
// ignored (because of the implicit masking done by the instruction),
|
||||||
// so this is equivalent to negating the input.
|
// so this is equivalent to negating the input.
|
||||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||||
let tmp = ctx.tmp(RegClass::I64, ty);
|
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
|
||||||
ctx.emit(Inst::AluRRR {
|
ctx.emit(Inst::AluRRR {
|
||||||
alu_op,
|
alu_op,
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
@@ -521,7 +521,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// Really ty_bits_size - rn, but the upper bits of the result are
|
// Really ty_bits_size - rn, but the upper bits of the result are
|
||||||
// ignored (because of the implicit masking done by the instruction),
|
// ignored (because of the implicit masking done by the instruction),
|
||||||
// so this is equivalent to negating the input.
|
// so this is equivalent to negating the input.
|
||||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::AluRRR {
|
ctx.emit(Inst::AluRRR {
|
||||||
alu_op: ALUOp::Sub32,
|
alu_op: ALUOp::Sub32,
|
||||||
rd: tmp,
|
rd: tmp,
|
||||||
@@ -534,7 +534,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Explicitly mask the rotation count.
|
// Explicitly mask the rotation count.
|
||||||
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
|
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::AluRRImmLogic {
|
ctx.emit(Inst::AluRRImmLogic {
|
||||||
alu_op: ALUOp::And32,
|
alu_op: ALUOp::And32,
|
||||||
rd: tmp_masked_rm,
|
rd: tmp_masked_rm,
|
||||||
@@ -543,8 +543,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
let tmp_masked_rm = tmp_masked_rm.to_reg();
|
let tmp_masked_rm = tmp_masked_rm.to_reg();
|
||||||
|
|
||||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
let tmp2 = ctx.tmp(RegClass::I64, I32);
|
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::AluRRImm12 {
|
ctx.emit(Inst::AluRRImm12 {
|
||||||
alu_op: ALUOp::Sub32,
|
alu_op: ALUOp::Sub32,
|
||||||
rd: tmp1,
|
rd: tmp1,
|
||||||
@@ -583,7 +583,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
immshift.imm &= ty_bits_size - 1;
|
immshift.imm &= ty_bits_size - 1;
|
||||||
|
|
||||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
ctx.emit(Inst::AluRRImmShift {
|
ctx.emit(Inst::AluRRImmShift {
|
||||||
alu_op: ALUOp::Lsr32,
|
alu_op: ALUOp::Lsr32,
|
||||||
rd: tmp1,
|
rd: tmp1,
|
||||||
@@ -688,7 +688,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// and fix the sequence below to work properly for this.
|
// and fix the sequence below to work properly for this.
|
||||||
let narrow_mode = NarrowValueMode::ZeroExtend64;
|
let narrow_mode = NarrowValueMode::ZeroExtend64;
|
||||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
|
|
||||||
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
||||||
// the rest of the code is identical to the 64-bit version.
|
// the rest of the code is identical to the 64-bit version.
|
||||||
@@ -997,7 +997,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Bitselect => {
|
Opcode::Bitselect => {
|
||||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
@@ -1475,8 +1475,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let tmp1 = ctx.tmp(RegClass::I64, I64);
|
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
let tmp2 = ctx.tmp(RegClass::I64, I64);
|
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
|
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
|
||||||
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
|
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
|
||||||
let imml = if bits == 32 {
|
let imml = if bits == 32 {
|
||||||
@@ -1546,7 +1546,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
|
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
|
||||||
ctx.emit(Inst::Udf { trap_info });
|
ctx.emit(Inst::Udf { trap_info });
|
||||||
|
|
||||||
let tmp = ctx.tmp(RegClass::V128, I128);
|
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
|
|
||||||
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
||||||
// we allow values that are slightly out of range:
|
// we allow values that are slightly out of range:
|
||||||
@@ -1712,8 +1712,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
|
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||||
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
|
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||||
|
|
||||||
if in_bits == 32 {
|
if in_bits == 32 {
|
||||||
ctx.emit(Inst::LoadFpuConst32 {
|
ctx.emit(Inst::LoadFpuConst32 {
|
||||||
@@ -2072,7 +2072,9 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||||||
Opcode::BrTable => {
|
Opcode::BrTable => {
|
||||||
// Expand `br_table index, default, JT` to:
|
// Expand `br_table index, default, JT` to:
|
||||||
//
|
//
|
||||||
// (emit island with guard jump if needed)
|
// emit_island // this forces an island at this point
|
||||||
|
// // if the jumptable would push us past
|
||||||
|
// // the deadline
|
||||||
// subs idx, #jt_size
|
// subs idx, #jt_size
|
||||||
// b.hs default
|
// b.hs default
|
||||||
// adr vTmp1, PC+16
|
// adr vTmp1, PC+16
|
||||||
@@ -2096,8 +2098,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||||||
NarrowValueMode::ZeroExtend32,
|
NarrowValueMode::ZeroExtend32,
|
||||||
);
|
);
|
||||||
|
|
||||||
let rtmp1 = ctx.tmp(RegClass::I64, I32);
|
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
let rtmp2 = ctx.tmp(RegClass::I64, I32);
|
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
|
|
||||||
// Bounds-check and branch to default.
|
// Bounds-check and branch to default.
|
||||||
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
||||||
|
|||||||
@@ -3,12 +3,54 @@
|
|||||||
//! This module handles the translation from CLIF BBs to VCode BBs.
|
//! This module handles the translation from CLIF BBs to VCode BBs.
|
||||||
//!
|
//!
|
||||||
//! The basic idea is that we compute a sequence of "lowered blocks" that
|
//! The basic idea is that we compute a sequence of "lowered blocks" that
|
||||||
//! correspond to subgraphs of the CLIF CFG plus an implicit block on *every*
|
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
|
||||||
//! edge (not just critical edges). Conceptually, the lowering pipeline wants to
|
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
|
||||||
//! insert moves for phi-nodes on every block-to-block transfer; these blocks
|
//! moves for phi-nodes on every block-to-block transfer; these blocks always
|
||||||
//! always conceptually exist, but may be merged with an "original" CLIF block
|
//! conceptually exist, but may be merged with an "original" CLIF block (and
|
||||||
//! (and hence not actually exist; this is equivalent to inserting the blocks
|
//! hence not actually exist; this is equivalent to inserting the blocks only on
|
||||||
//! only on critical edges).
|
//! critical edges).
|
||||||
|
//!
|
||||||
|
//! In other words, starting from a CFG like this (where each "CLIF block" and
|
||||||
|
//! "(edge N->M)" is a separate basic block):
|
||||||
|
//!
|
||||||
|
//! ```plain
|
||||||
|
//!
|
||||||
|
//! CLIF block 0
|
||||||
|
//! / \
|
||||||
|
//! (edge 0->1) (edge 0->2)
|
||||||
|
//! | |
|
||||||
|
//! CLIF block 1 CLIF block 2
|
||||||
|
//! \ /
|
||||||
|
//! (edge 1->3) (edge 2->3)
|
||||||
|
//! \ /
|
||||||
|
//! CLIF block 3
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! We can produce a CFG of lowered blocks like so:
|
||||||
|
//!
|
||||||
|
//! ```plain
|
||||||
|
//! +--------------+
|
||||||
|
//! | CLIF block 0 |
|
||||||
|
//! +--------------+
|
||||||
|
//! / \
|
||||||
|
//! +--------------+ +--------------+
|
||||||
|
//! | (edge 0->1) | |(edge 0->2) |
|
||||||
|
//! | CLIF block 1 | | CLIF block 2 |
|
||||||
|
//! +--------------+ +--------------+
|
||||||
|
//! \ /
|
||||||
|
//! +-----------+ +-----------+
|
||||||
|
//! |(edge 1->3)| |(edge 2->3)|
|
||||||
|
//! +-----------+ +-----------+
|
||||||
|
//! \ /
|
||||||
|
//! +------------+
|
||||||
|
//! |CLIF block 3|
|
||||||
|
//! +------------+
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
|
||||||
|
//! blocks' original bodies, but the out-edges could not because for simplicity
|
||||||
|
//! in the successor-function definition, we only ever merge an edge onto one
|
||||||
|
//! side of an original CLIF block.)
|
||||||
//!
|
//!
|
||||||
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
|
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
|
||||||
//! block prepended or appended with an edge block (never both, though), or just
|
//! block prepended or appended with an edge block (never both, though), or just
|
||||||
@@ -23,6 +65,9 @@
|
|||||||
//! have content, because this computation happens as part of lowering *before*
|
//! have content, because this computation happens as part of lowering *before*
|
||||||
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
|
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
|
||||||
//! particular edge. But it works relatively well and is conceptually simple.
|
//! particular edge. But it works relatively well and is conceptually simple.
|
||||||
|
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
|
||||||
|
//! branch editing that in practice elides empty blocks and simplifies some of
|
||||||
|
//! the other redundancies that this scheme produces.
|
||||||
|
|
||||||
use crate::entity::SecondaryMap;
|
use crate::entity::SecondaryMap;
|
||||||
use crate::fx::{FxHashMap, FxHashSet};
|
use crate::fx::{FxHashMap, FxHashSet};
|
||||||
|
|||||||
@@ -1,12 +1,116 @@
|
|||||||
//! In-memory representation of compiled machine code, with labels and fixups to
|
//! In-memory representation of compiled machine code, with labels and fixups to
|
||||||
//! refer to those labels. Handles constant-pool island insertion and also
|
//! refer to those labels. Handles constant-pool island insertion and also
|
||||||
//! veneer insertion for out-of-range jumps.
|
//! veneer insertion for out-of-range jumps.
|
||||||
|
//!
|
||||||
|
//! This code exists to solve three problems:
|
||||||
|
//!
|
||||||
|
//! - Branch targets for forward branches are not known until later, when we
|
||||||
|
//! emit code in a single pass through the instruction structs.
|
||||||
|
//!
|
||||||
|
//! - On many architectures, address references or offsets have limited range.
|
||||||
|
//! For example, on AArch64, conditional branches can only target code +/- 1MB
|
||||||
|
//! from the branch itself.
|
||||||
|
//!
|
||||||
|
//! - The lowering of control flow from the CFG-with-edges produced by
|
||||||
|
//! [BlockLoweringOrder], combined with many empty edge blocks when the register
|
||||||
|
//! allocator does not need to insert any spills/reloads/moves in edge blocks,
|
||||||
|
//! results in many suboptimal branch patterns. The lowering also pays no
|
||||||
|
//! attention to block order, and so two-target conditional forms (cond-br
|
||||||
|
//! followed by uncond-br) can often by avoided because one of the targets is
|
||||||
|
//! the fallthrough. There are several cases here where we can simplify to use
|
||||||
|
//! fewer branches.
|
||||||
|
//!
|
||||||
|
//! This "buffer" implements a single-pass code emission strategy (with a later
|
||||||
|
//! "fixup" pass, but only through recorded fixups, not all instructions). The
|
||||||
|
//! basic idea is:
|
||||||
|
//!
|
||||||
|
//! - Emit branches as they are, including two-target (cond/uncond) compound
|
||||||
|
//! forms, but with zero offsets and optimistically assuming the target will be
|
||||||
|
//! in range. Record the "fixup" for later. Targets are denoted instead by
|
||||||
|
//! symbolic "labels" that are then bound to certain offsets in the buffer as
|
||||||
|
//! we emit code. (Nominally, there is a label at the start of every basic
|
||||||
|
//! block.)
|
||||||
|
//!
|
||||||
|
//! - As we do this, track the offset in the buffer at which the first label
|
||||||
|
//! reference "goes out of range". We call this the "deadline". If we reach the
|
||||||
|
//! deadline and we still have not bound the label to which an unresolved branch
|
||||||
|
//! refers, we have a problem!
|
||||||
|
//!
|
||||||
|
//! - To solve this problem, we emit "islands" full of "veneers". An island is
|
||||||
|
//! simply a chunk of code inserted in the middle of the code actually produced
|
||||||
|
//! by the emitter (e.g., vcode iterating over instruction structs). The emitter
|
||||||
|
//! has some awareness of this: it either asks for an island between blocks, so
|
||||||
|
//! it is not accidentally executed, or else it emits a branch around the island
|
||||||
|
//! when all other options fail (see [Inst::EmitIsland] meta-instruction).
|
||||||
|
//!
|
||||||
|
//! - A "veneer" is an instruction (or sequence of instructions) in an "island"
|
||||||
|
//! that implements a longer-range reference to a label. The idea is that, for
|
||||||
|
//! example, a branch with a limited range can branch to a "veneer" instead,
|
||||||
|
//! which is simply a branch in a form that can use a longer-range reference. On
|
||||||
|
//! AArch64, for example, conditionals have a +/- 1 MB range, but a conditional
|
||||||
|
//! can branch to an unconditional branch which has a +/- 128 MB range. Hence, a
|
||||||
|
//! conditional branch's label reference can be fixed up with a "veneer" to
|
||||||
|
//! achieve a longer range.
|
||||||
|
//!
|
||||||
|
//! - To implement all of this, we require the backend to provide a `LabelUse`
|
||||||
|
//! type that implements a trait. This is nominally an enum that records one of
|
||||||
|
//! several kinds of references to an offset in code -- basically, a relocation
|
||||||
|
//! type -- and will usually correspond to different instruction formats. The
|
||||||
|
//! `LabelUse` implementation specifies the maximum range, how to patch in the
|
||||||
|
//! actual label location when known, and how to generate a veneer to extend the
|
||||||
|
//! range.
|
||||||
|
//!
|
||||||
|
//! That satisfies label references, but we still may have suboptimal branch
|
||||||
|
//! patterns. To clean up the branches, we do a simple "peephole"-style
|
||||||
|
//! optimization on the fly. To do so, the emitter (e.g., `Inst::emit()`)
|
||||||
|
//! informs the buffer of branches in the code and, in the case of conditionals,
|
||||||
|
//! the code that would have been emitted to invert this branch's condition. We
|
||||||
|
//! track the "latest branches": these are branches that are contiguous up to
|
||||||
|
//! the current offset. (If any code is emitted after a branch, that branch or
|
||||||
|
//! run of contiguous branches is no longer "latest".) The latest branches are
|
||||||
|
//! those that we can edit by simply truncating the buffer and doing something
|
||||||
|
//! else instead.
|
||||||
|
//!
|
||||||
|
//! To optimize branches, we implement several simple rules, and try to apply
|
||||||
|
//! them to the "latest branches" when possible:
|
||||||
|
//!
|
||||||
|
//! - A branch with a label target, when that label is bound to the ending
|
||||||
|
//! offset of the branch (the fallthrough location), can be removed altogether,
|
||||||
|
//! because the branch would have no effect).
|
||||||
|
//!
|
||||||
|
//! - An unconditional branch that starts at a label location, and branches to
|
||||||
|
//! another label, results in a "label alias": all references to the label bound
|
||||||
|
//! *to* this branch instruction are instead resolved to the *target* of the
|
||||||
|
//! branch instruction. This effectively removes empty blocks that just
|
||||||
|
//! unconditionally branch to the next block. We call this "branch threading".
|
||||||
|
//!
|
||||||
|
//! - A conditional followed by an unconditional, when the conditional branches
|
||||||
|
//! to the unconditional's fallthrough, results in (i) the truncation of the
|
||||||
|
//! unconditional, (ii) the inversion of the condition's condition, and (iii)
|
||||||
|
//! replacement of the conditional's target (using the original target of the
|
||||||
|
//! unconditional). This is a fancy way of saying "we can flip a two-target
|
||||||
|
//! conditional branch's taken/not-taken targets if it works better with our
|
||||||
|
//! fallthrough". To make this work, the emitter actually gives the buffer
|
||||||
|
//! *both* forms of every conditional branch: the true form is emitted into the
|
||||||
|
//! buffer, and the "inverted" machine-code bytes are provided as part of the
|
||||||
|
//! branch-fixup metadata.
|
||||||
|
//!
|
||||||
|
//! - An unconditional B preceded by another unconditional P, when B's label(s) have
|
||||||
|
//! been redirected to target(B), can be removed entirely. This is an extension
|
||||||
|
//! of the branch-threading optimization, and is valid because if we know there
|
||||||
|
//! will be no fallthrough into this branch instruction (the prior instruction
|
||||||
|
//! is an unconditional jump), and if we know we have successfully redirected
|
||||||
|
//! all labels, then this branch instruction is unreachable. Note that this
|
||||||
|
//! works because the redirection happens before the label is ever resolved
|
||||||
|
//! (fixups happen at island emission time, at which point latest-branches are
|
||||||
|
//! cleared, or at the end of emission), so we are sure to catch and redirect
|
||||||
|
//! all possible paths to this instruction.
|
||||||
|
|
||||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
||||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||||
use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst};
|
use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst};
|
||||||
|
|
||||||
use log::debug;
|
use log::trace;
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
@@ -35,10 +139,11 @@ pub struct MachBuffer<I: VCodeInst> {
|
|||||||
cur_srcloc: Option<(CodeOffset, SourceLoc)>,
|
cur_srcloc: Option<(CodeOffset, SourceLoc)>,
|
||||||
/// Known label offsets; `UNKNOWN_LABEL_OFFSET` if unknown.
|
/// Known label offsets; `UNKNOWN_LABEL_OFFSET` if unknown.
|
||||||
label_offsets: SmallVec<[CodeOffset; 16]>,
|
label_offsets: SmallVec<[CodeOffset; 16]>,
|
||||||
/// Label aliases: one label points to an unconditional jump to another
|
/// Label aliases: when one label points to an unconditional jump, and that
|
||||||
/// label, so references to the first should be resolved as references
|
/// jump points to another label, we can redirect references to the first
|
||||||
/// to the second. (We don't chase arbitrarily deep to avoid problems
|
/// label immediately to the second. (We don't chase arbitrarily deep to
|
||||||
/// with cycles.)
|
/// avoid problems with cycles, but rather only one level, i.e. through one
|
||||||
|
/// jump.)
|
||||||
label_aliases: SmallVec<[MachLabel; 16]>,
|
label_aliases: SmallVec<[MachLabel; 16]>,
|
||||||
/// Constants that must be emitted at some point.
|
/// Constants that must be emitted at some point.
|
||||||
pending_constants: SmallVec<[MachLabelConstant; 16]>,
|
pending_constants: SmallVec<[MachLabelConstant; 16]>,
|
||||||
@@ -129,13 +234,13 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Add a byte.
|
/// Add a byte.
|
||||||
pub fn put1(&mut self, value: u8) {
|
pub fn put1(&mut self, value: u8) {
|
||||||
debug!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value);
|
trace!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value);
|
||||||
self.data.push(value);
|
self.data.push(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add 2 bytes.
|
/// Add 2 bytes.
|
||||||
pub fn put2(&mut self, value: u16) {
|
pub fn put2(&mut self, value: u16) {
|
||||||
debug!(
|
trace!(
|
||||||
"MachBuffer: put 16-bit word @ {}: {:x}",
|
"MachBuffer: put 16-bit word @ {}: {:x}",
|
||||||
self.cur_offset(),
|
self.cur_offset(),
|
||||||
value
|
value
|
||||||
@@ -146,7 +251,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Add 4 bytes.
|
/// Add 4 bytes.
|
||||||
pub fn put4(&mut self, value: u32) {
|
pub fn put4(&mut self, value: u32) {
|
||||||
debug!(
|
trace!(
|
||||||
"MachBuffer: put 32-bit word @ {}: {:x}",
|
"MachBuffer: put 32-bit word @ {}: {:x}",
|
||||||
self.cur_offset(),
|
self.cur_offset(),
|
||||||
value
|
value
|
||||||
@@ -157,7 +262,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Add 8 bytes.
|
/// Add 8 bytes.
|
||||||
pub fn put8(&mut self, value: u64) {
|
pub fn put8(&mut self, value: u64) {
|
||||||
debug!(
|
trace!(
|
||||||
"MachBuffer: put 64-bit word @ {}: {:x}",
|
"MachBuffer: put 64-bit word @ {}: {:x}",
|
||||||
self.cur_offset(),
|
self.cur_offset(),
|
||||||
value
|
value
|
||||||
@@ -168,7 +273,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Add a slice of bytes.
|
/// Add a slice of bytes.
|
||||||
pub fn put_data(&mut self, data: &[u8]) {
|
pub fn put_data(&mut self, data: &[u8]) {
|
||||||
debug!(
|
trace!(
|
||||||
"MachBuffer: put data @ {}: len {}",
|
"MachBuffer: put data @ {}: len {}",
|
||||||
self.cur_offset(),
|
self.cur_offset(),
|
||||||
data.len()
|
data.len()
|
||||||
@@ -178,7 +283,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Reserve appended space and return a mutable slice referring to it.
|
/// Reserve appended space and return a mutable slice referring to it.
|
||||||
pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] {
|
pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] {
|
||||||
debug!("MachBuffer: put data @ {}: len {}", self.cur_offset(), len);
|
trace!("MachBuffer: put data @ {}: len {}", self.cur_offset(), len);
|
||||||
let off = self.data.len();
|
let off = self.data.len();
|
||||||
let new_len = self.data.len() + len;
|
let new_len = self.data.len() + len;
|
||||||
self.data.resize(new_len, 0);
|
self.data.resize(new_len, 0);
|
||||||
@@ -187,7 +292,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Align up to the given alignment.
|
/// Align up to the given alignment.
|
||||||
pub fn align_to(&mut self, align_to: CodeOffset) {
|
pub fn align_to(&mut self, align_to: CodeOffset) {
|
||||||
debug!("MachBuffer: align to {}", align_to);
|
trace!("MachBuffer: align to {}", align_to);
|
||||||
assert!(align_to.is_power_of_two());
|
assert!(align_to.is_power_of_two());
|
||||||
while self.cur_offset() & (align_to - 1) != 0 {
|
while self.cur_offset() & (align_to - 1) != 0 {
|
||||||
self.put1(0);
|
self.put1(0);
|
||||||
@@ -200,13 +305,13 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
let l = self.label_offsets.len() as u32;
|
let l = self.label_offsets.len() as u32;
|
||||||
self.label_offsets.push(UNKNOWN_LABEL_OFFSET);
|
self.label_offsets.push(UNKNOWN_LABEL_OFFSET);
|
||||||
self.label_aliases.push(UNKNOWN_LABEL);
|
self.label_aliases.push(UNKNOWN_LABEL);
|
||||||
debug!("MachBuffer: new label -> {:?}", MachLabel(l));
|
trace!("MachBuffer: new label -> {:?}", MachLabel(l));
|
||||||
MachLabel(l)
|
MachLabel(l)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reserve the first N MachLabels for blocks.
|
/// Reserve the first N MachLabels for blocks.
|
||||||
pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
|
pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
|
||||||
debug!("MachBuffer: first {} labels are for blocks", blocks);
|
trace!("MachBuffer: first {} labels are for blocks", blocks);
|
||||||
debug_assert!(self.label_offsets.is_empty());
|
debug_assert!(self.label_offsets.is_empty());
|
||||||
self.label_offsets
|
self.label_offsets
|
||||||
.resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
|
.resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
|
||||||
@@ -215,7 +320,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
/// Bind a label to the current offset.
|
/// Bind a label to the current offset.
|
||||||
pub fn bind_label(&mut self, label: MachLabel) {
|
pub fn bind_label(&mut self, label: MachLabel) {
|
||||||
debug!(
|
trace!(
|
||||||
"MachBuffer: bind label {:?} at offset {}",
|
"MachBuffer: bind label {:?} at offset {}",
|
||||||
label,
|
label,
|
||||||
self.cur_offset()
|
self.cur_offset()
|
||||||
@@ -244,9 +349,11 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
/// happen immediately, the buffer must already contain bytes at `offset` up
|
/// happen immediately, the buffer must already contain bytes at `offset` up
|
||||||
/// to `offset + kind.patch_size()`.
|
/// to `offset + kind.patch_size()`.
|
||||||
pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: MachLabel, kind: I::LabelUse) {
|
pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: MachLabel, kind: I::LabelUse) {
|
||||||
debug!(
|
trace!(
|
||||||
"MachBuffer: use_label_at_offset: offset {} label {:?} kind {:?}",
|
"MachBuffer: use_label_at_offset: offset {} label {:?} kind {:?}",
|
||||||
offset, label, kind
|
offset,
|
||||||
|
label,
|
||||||
|
kind
|
||||||
);
|
);
|
||||||
debug_assert!(offset + kind.patch_size() <= self.cur_offset());
|
debug_assert!(offset + kind.patch_size() <= self.cur_offset());
|
||||||
|
|
||||||
@@ -310,14 +417,15 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
self.data.truncate(b.start as usize);
|
self.data.truncate(b.start as usize);
|
||||||
self.fixup_records.truncate(b.fixup);
|
self.fixup_records.truncate(b.fixup);
|
||||||
let cur_off = self.cur_offset();
|
let cur_off = self.cur_offset();
|
||||||
debug!(
|
trace!(
|
||||||
"truncate_last_branch: truncated {:?}; off now {}",
|
"truncate_last_branch: truncated {:?}; off now {}",
|
||||||
b, cur_off
|
b,
|
||||||
|
cur_off
|
||||||
);
|
);
|
||||||
for &mut (l, ref mut off) in self.labels_by_offset.iter_mut().rev() {
|
for &mut (l, ref mut off) in self.labels_by_offset.iter_mut().rev() {
|
||||||
if *off > cur_off {
|
if *off > cur_off {
|
||||||
*off = cur_off;
|
*off = cur_off;
|
||||||
debug!(" -> label {:?} reassigned to {}", l, cur_off);
|
trace!(" -> label {:?} reassigned to {}", l, cur_off);
|
||||||
self.label_offsets[l.0 as usize] = cur_off;
|
self.label_offsets[l.0 as usize] = cur_off;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
@@ -326,13 +434,15 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn optimize_branches(&mut self) {
|
fn optimize_branches(&mut self) {
|
||||||
debug!(
|
trace!(
|
||||||
"enter optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
|
"enter optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
|
||||||
self.latest_branches, self.labels_by_offset, self.fixup_records
|
self.latest_branches,
|
||||||
|
self.labels_by_offset,
|
||||||
|
self.fixup_records
|
||||||
);
|
);
|
||||||
while let Some(b) = self.latest_branches.last() {
|
while let Some(b) = self.latest_branches.last() {
|
||||||
let cur_off = self.cur_offset();
|
let cur_off = self.cur_offset();
|
||||||
debug!("optimize_branches: last branch {:?} at off {}", b, cur_off);
|
trace!("optimize_branches: last branch {:?} at off {}", b, cur_off);
|
||||||
// If there has been any code emission since the end of the last branch or
|
// If there has been any code emission since the end of the last branch or
|
||||||
// label definition, then there's nothing we can edit (because we
|
// label definition, then there's nothing we can edit (because we
|
||||||
// don't move code once placed, only back up and overwrite), so
|
// don't move code once placed, only back up and overwrite), so
|
||||||
@@ -359,11 +469,11 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
// Set any label equal to current branch's start as an alias of
|
// Set any label equal to current branch's start as an alias of
|
||||||
// the branch's target.
|
// the branch's target.
|
||||||
for &(l, off) in self.labels_by_offset.iter().rev() {
|
for &(l, off) in self.labels_by_offset.iter().rev() {
|
||||||
debug!(" -> uncond: latest label {:?} at off {}", l, off);
|
trace!(" -> uncond: latest label {:?} at off {}", l, off);
|
||||||
if off > b.start {
|
if off > b.start {
|
||||||
continue;
|
continue;
|
||||||
} else if off == b.start {
|
} else if off == b.start {
|
||||||
debug!(" -> setting alias to {:?}", b.target);
|
trace!(" -> setting alias to {:?}", b.target);
|
||||||
self.label_aliases[l.0 as usize] = b.target;
|
self.label_aliases[l.0 as usize] = b.target;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
@@ -375,12 +485,12 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
// Examine any immediately preceding branch.
|
// Examine any immediately preceding branch.
|
||||||
if self.latest_branches.len() > 1 {
|
if self.latest_branches.len() > 1 {
|
||||||
let prev_b = &self.latest_branches[self.latest_branches.len() - 2];
|
let prev_b = &self.latest_branches[self.latest_branches.len() - 2];
|
||||||
debug!(" -> more than one branch; prev_b = {:?}", prev_b);
|
trace!(" -> more than one branch; prev_b = {:?}", prev_b);
|
||||||
// This uncond is immediately after another uncond; we've
|
// This uncond is immediately after another uncond; we've
|
||||||
// already redirected labels to this uncond away; so we can
|
// already redirected labels to this uncond away; so we can
|
||||||
// truncate this uncond.
|
// truncate this uncond.
|
||||||
if prev_b.is_uncond() && prev_b.end == b.start {
|
if prev_b.is_uncond() && prev_b.end == b.start {
|
||||||
debug!(" -> uncond follows another uncond; truncating");
|
trace!(" -> uncond follows another uncond; truncating");
|
||||||
self.truncate_last_branch();
|
self.truncate_last_branch();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -395,7 +505,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
&& prev_b.end == b.start
|
&& prev_b.end == b.start
|
||||||
&& self.resolve_label_offset(prev_b.target) == cur_off
|
&& self.resolve_label_offset(prev_b.target) == cur_off
|
||||||
{
|
{
|
||||||
debug!(" -> uncond follows a conditional, and conditional's target resolves to current offset");
|
trace!(" -> uncond follows a conditional, and conditional's target resolves to current offset");
|
||||||
let target = b.target;
|
let target = b.target;
|
||||||
let data = prev_b.inverted.clone().unwrap();
|
let data = prev_b.inverted.clone().unwrap();
|
||||||
self.truncate_last_branch();
|
self.truncate_last_branch();
|
||||||
@@ -407,7 +517,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
self.data.extend_from_slice(&data[..]);
|
self.data.extend_from_slice(&data[..]);
|
||||||
prev_b.inverted = Some(not_inverted);
|
prev_b.inverted = Some(not_inverted);
|
||||||
self.fixup_records[prev_b.fixup].label = target;
|
self.fixup_records[prev_b.fixup].label = target;
|
||||||
debug!(" -> reassigning target of condbr to {:?}", target);
|
trace!(" -> reassigning target of condbr to {:?}", target);
|
||||||
prev_b.target = target;
|
prev_b.target = target;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -420,7 +530,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
// the current offset (end of branch) to the truncated
|
// the current offset (end of branch) to the truncated
|
||||||
// end-of-code.
|
// end-of-code.
|
||||||
if self.resolve_label_offset(b.target) == cur_off {
|
if self.resolve_label_offset(b.target) == cur_off {
|
||||||
debug!("branch with target == cur off; truncating");
|
trace!("branch with target == cur off; truncating");
|
||||||
self.truncate_last_branch();
|
self.truncate_last_branch();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -430,9 +540,11 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
|
|
||||||
self.purge_latest_branches();
|
self.purge_latest_branches();
|
||||||
|
|
||||||
debug!(
|
trace!(
|
||||||
"leave optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
|
"leave optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
|
||||||
self.latest_branches, self.labels_by_offset, self.fixup_records
|
self.latest_branches,
|
||||||
|
self.labels_by_offset,
|
||||||
|
self.fixup_records
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -440,7 +552,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
let cur_off = self.cur_offset();
|
let cur_off = self.cur_offset();
|
||||||
if let Some(l) = self.latest_branches.last() {
|
if let Some(l) = self.latest_branches.last() {
|
||||||
if l.end < cur_off {
|
if l.end < cur_off {
|
||||||
debug!("purge_latest_branches: removing branch {:?}", l);
|
trace!("purge_latest_branches: removing branch {:?}", l);
|
||||||
self.latest_branches.clear();
|
self.latest_branches.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -498,9 +610,11 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
kind,
|
kind,
|
||||||
} in fixup_records.into_iter()
|
} in fixup_records.into_iter()
|
||||||
{
|
{
|
||||||
debug!(
|
trace!(
|
||||||
"emit_island: fixup for label {:?} at offset {} kind {:?}",
|
"emit_island: fixup for label {:?} at offset {} kind {:?}",
|
||||||
label, offset, kind
|
label,
|
||||||
|
offset,
|
||||||
|
kind
|
||||||
);
|
);
|
||||||
// We eagerly perform fixups whose label targets are known, if not out
|
// We eagerly perform fixups whose label targets are known, if not out
|
||||||
// of range, to avoid unnecessary veneers.
|
// of range, to avoid unnecessary veneers.
|
||||||
@@ -516,7 +630,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
|
||||||
debug!(
|
trace!(
|
||||||
" -> label_offset = {}, known = {}, in_range = {} (pos {} neg {})",
|
" -> label_offset = {}, known = {}, in_range = {} (pos {} neg {})",
|
||||||
label_offset,
|
label_offset,
|
||||||
known,
|
known,
|
||||||
@@ -530,7 +644,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
if in_range {
|
if in_range {
|
||||||
debug_assert!(known); // implied by in_range.
|
debug_assert!(known); // implied by in_range.
|
||||||
let slice = &mut self.data[start..end];
|
let slice = &mut self.data[start..end];
|
||||||
debug!("patching in-range!");
|
trace!("patching in-range!");
|
||||||
kind.patch(slice, offset, label_offset);
|
kind.patch(slice, offset, label_offset);
|
||||||
} else if !known && !kind.supports_veneer() {
|
} else if !known && !kind.supports_veneer() {
|
||||||
// Nothing for now. Keep it for next round.
|
// Nothing for now. Keep it for next round.
|
||||||
@@ -543,21 +657,23 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
// Allocate space for a veneer in the island.
|
// Allocate space for a veneer in the island.
|
||||||
self.align_to(I::LabelUse::ALIGN);
|
self.align_to(I::LabelUse::ALIGN);
|
||||||
let veneer_offset = self.cur_offset();
|
let veneer_offset = self.cur_offset();
|
||||||
debug!("making a veneer at {}", veneer_offset);
|
trace!("making a veneer at {}", veneer_offset);
|
||||||
let slice = &mut self.data[start..end];
|
let slice = &mut self.data[start..end];
|
||||||
// Patch the original label use to refer to teh veneer.
|
// Patch the original label use to refer to teh veneer.
|
||||||
debug!(
|
trace!(
|
||||||
"patching original at offset {} to veneer offset {}",
|
"patching original at offset {} to veneer offset {}",
|
||||||
offset, veneer_offset
|
offset,
|
||||||
|
veneer_offset
|
||||||
);
|
);
|
||||||
kind.patch(slice, offset, veneer_offset);
|
kind.patch(slice, offset, veneer_offset);
|
||||||
// Generate the veneer.
|
// Generate the veneer.
|
||||||
let veneer_slice = self.get_appended_space(kind.veneer_size() as usize);
|
let veneer_slice = self.get_appended_space(kind.veneer_size() as usize);
|
||||||
let (veneer_fixup_off, veneer_label_use) =
|
let (veneer_fixup_off, veneer_label_use) =
|
||||||
kind.generate_veneer(veneer_slice, veneer_offset);
|
kind.generate_veneer(veneer_slice, veneer_offset);
|
||||||
debug!(
|
trace!(
|
||||||
"generated veneer; fixup offset {}, label_use {:?}",
|
"generated veneer; fixup offset {}, label_use {:?}",
|
||||||
veneer_fixup_off, veneer_label_use
|
veneer_fixup_off,
|
||||||
|
veneer_label_use
|
||||||
);
|
);
|
||||||
// If the label is known (but was just out of range), do the
|
// If the label is known (but was just out of range), do the
|
||||||
// veneer label-use fixup now too; otherwise, save it for later.
|
// veneer label-use fixup now too; otherwise, save it for later.
|
||||||
@@ -565,7 +681,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
let start = veneer_fixup_off as usize;
|
let start = veneer_fixup_off as usize;
|
||||||
let end = (veneer_fixup_off + veneer_label_use.patch_size()) as usize;
|
let end = (veneer_fixup_off + veneer_label_use.patch_size()) as usize;
|
||||||
let veneer_slice = &mut self.data[start..end];
|
let veneer_slice = &mut self.data[start..end];
|
||||||
debug!("doing veneer fixup right away too");
|
trace!("doing veneer fixup right away too");
|
||||||
veneer_label_use.patch(veneer_slice, veneer_fixup_off, label_offset);
|
veneer_label_use.patch(veneer_slice, veneer_fixup_off, label_offset);
|
||||||
} else {
|
} else {
|
||||||
new_fixups.push(MachLabelFixup {
|
new_fixups.push(MachLabelFixup {
|
||||||
|
|||||||
@@ -23,9 +23,9 @@ use alloc::vec::Vec;
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
/// An "instruction color" partitions instructions by side-effecting ops. All
|
/// An "instruction color" partitions CLIF instructions by side-effecting ops.
|
||||||
/// instructions with the same "color" are guaranteed not to be separated by any
|
/// All instructions with the same "color" are guaranteed not to be separated by
|
||||||
/// side-effecting op (for this purpose, loads are also considered
|
/// any side-effecting op (for this purpose, loads are also considered
|
||||||
/// side-effecting, to avoid subtle questions w.r.t. the memory model), and
|
/// side-effecting, to avoid subtle questions w.r.t. the memory model), and
|
||||||
/// furthermore, it is guaranteed that for any two instructions A and B such
|
/// furthermore, it is guaranteed that for any two instructions A and B such
|
||||||
/// that color(A) == color(B), either A dominates B and B postdominates A, or
|
/// that color(A) == color(B), either A dominates B and B postdominates A, or
|
||||||
@@ -33,7 +33,8 @@ use smallvec::SmallVec;
|
|||||||
/// have the same color, trivially providing the second condition.) Intuitively,
|
/// have the same color, trivially providing the second condition.) Intuitively,
|
||||||
/// this means that the ops of the same color must always execute "together", as
|
/// this means that the ops of the same color must always execute "together", as
|
||||||
/// part of one atomic contiguous section of the dynamic execution trace, and
|
/// part of one atomic contiguous section of the dynamic execution trace, and
|
||||||
/// they can be freely permuted without affecting program behavior.
|
/// they can be freely permuted (modulo true dataflow dependencies) without
|
||||||
|
/// affecting program behavior.
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
pub struct InstColor(u32);
|
pub struct InstColor(u32);
|
||||||
impl InstColor {
|
impl InstColor {
|
||||||
@@ -122,7 +123,11 @@ pub trait LowerCtx {
|
|||||||
/// If the backend uses the register, rather than one of the other
|
/// If the backend uses the register, rather than one of the other
|
||||||
/// forms (constant or merging of the producing op), it must call
|
/// forms (constant or merging of the producing op), it must call
|
||||||
/// `use_input_reg()` to ensure the producing inst is actually lowered
|
/// `use_input_reg()` to ensure the producing inst is actually lowered
|
||||||
/// as well.
|
/// as well. Failing to do so may result in the instruction that generates
|
||||||
|
/// this value never being generated, thus resulting in incorrect execution.
|
||||||
|
/// For correctness, backends should thus wrap `get_input()` and
|
||||||
|
/// `use_input_regs()` with helpers that return a register only after
|
||||||
|
/// ensuring it is marked as used.
|
||||||
fn get_input(&self, ir_inst: Inst, idx: usize) -> LowerInput;
|
fn get_input(&self, ir_inst: Inst, idx: usize) -> LowerInput;
|
||||||
/// Get the `idx`th output register of the given IR instruction. When
|
/// Get the `idx`th output register of the given IR instruction. When
|
||||||
/// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
|
/// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
|
||||||
@@ -133,7 +138,7 @@ pub trait LowerCtx {
|
|||||||
// ask for an input to be gen'd into a register.
|
// ask for an input to be gen'd into a register.
|
||||||
|
|
||||||
/// Get a new temp.
|
/// Get a new temp.
|
||||||
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
|
fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
|
||||||
/// Emit a machine instruction.
|
/// Emit a machine instruction.
|
||||||
fn emit(&mut self, mach_inst: Self::I);
|
fn emit(&mut self, mach_inst: Self::I);
|
||||||
/// Indicate that the given input uses the register returned by
|
/// Indicate that the given input uses the register returned by
|
||||||
@@ -477,7 +482,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
// There's some overlap, so play safe and copy via temps.
|
// There's some overlap, so play safe and copy via temps.
|
||||||
let mut tmp_regs: SmallVec<[Writable<Reg>; 16]> = SmallVec::new();
|
let mut tmp_regs: SmallVec<[Writable<Reg>; 16]> = SmallVec::new();
|
||||||
for &ty in &phi_classes {
|
for &ty in &phi_classes {
|
||||||
tmp_regs.push(self.tmp(I::rc_for_type(ty)?, ty));
|
tmp_regs.push(self.alloc_tmp(I::rc_for_type(ty)?, ty));
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("phi_temps = {:?}", tmp_regs);
|
debug!("phi_temps = {:?}", tmp_regs);
|
||||||
@@ -721,6 +726,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
Ok(vcode)
|
Ok(vcode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the actual inputs for a value. This is the implementation for
|
||||||
|
/// `get_input()` but starting from the SSA value, which is not exposed to
|
||||||
|
/// the backend.
|
||||||
fn get_input_for_val(&self, at_inst: Inst, val: Value) -> LowerInput {
|
fn get_input_for_val(&self, at_inst: Inst, val: Value) -> LowerInput {
|
||||||
debug!("get_input_for_val: val {} at inst {}", val, at_inst);
|
debug!("get_input_for_val: val {} at inst {}", val, at_inst);
|
||||||
let mut reg = self.value_regs[val];
|
let mut reg = self.value_regs[val];
|
||||||
@@ -889,7 +897,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
|||||||
Writable::from_reg(self.value_regs[val])
|
Writable::from_reg(self.value_regs[val])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
|
fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
|
||||||
let v = self.next_vreg;
|
let v = self.next_vreg;
|
||||||
self.next_vreg += 1;
|
self.next_vreg += 1;
|
||||||
let vreg = Reg::new_virtual(rc, v);
|
let vreg = Reg::new_virtual(rc, v);
|
||||||
|
|||||||
Reference in New Issue
Block a user