Implement initial emission of constants
This approach suffers from memory-size bloat during compile time due to the desire to de-duplicate the constants emitted and reduce runtime memory-size. As a first step, though, this provides an end-to-end mechanism for constants to be emitted in the MachBuffer islands.
This commit is contained in:
@@ -1979,29 +1979,10 @@ pub(crate) fn emit(
|
|||||||
sink.put1(*imm);
|
sink.put1(*imm);
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmLoadConstSeq { val, dst, ty } => {
|
Inst::XmmLoadConst { src, dst, ty } => {
|
||||||
// This sequence is *one* instruction in the vcode, and is expanded only here at
|
let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src));
|
||||||
// emission time, because we cannot allow the regalloc to insert spills/reloads in
|
|
||||||
// the middle; we depend on hardcoded PC-rel addressing below. TODO Eventually this
|
|
||||||
// "constant inline" code should be replaced by constant pool integration.
|
|
||||||
|
|
||||||
// Load the inline constant.
|
|
||||||
let constant_start_label = sink.get_label();
|
|
||||||
let load_offset = Amode::rip_relative(constant_start_label);
|
|
||||||
let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None);
|
let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None);
|
||||||
load.emit(sink, info, state);
|
load.emit(sink, info, state);
|
||||||
|
|
||||||
// Jump over the constant.
|
|
||||||
let constant_end_label = sink.get_label();
|
|
||||||
let jump = Inst::jmp_known(constant_end_label);
|
|
||||||
jump.emit(sink, info, state);
|
|
||||||
|
|
||||||
// Emit the constant.
|
|
||||||
sink.bind_label(constant_start_label);
|
|
||||||
for i in val.iter() {
|
|
||||||
sink.put1(*i);
|
|
||||||
}
|
|
||||||
sink.bind_label(constant_end_label);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmUninitializedValue { .. } => {
|
Inst::XmmUninitializedValue { .. } => {
|
||||||
|
|||||||
@@ -240,8 +240,8 @@ pub enum Inst {
|
|||||||
},
|
},
|
||||||
|
|
||||||
/// XMM (vector) unary op (to move a constant value into an xmm register): movups
|
/// XMM (vector) unary op (to move a constant value into an xmm register): movups
|
||||||
XmmLoadConstSeq {
|
XmmLoadConst {
|
||||||
val: Vec<u8>,
|
src: VCodeConstant,
|
||||||
dst: Writable<Reg>,
|
dst: Writable<Reg>,
|
||||||
ty: Type,
|
ty: Type,
|
||||||
},
|
},
|
||||||
@@ -553,7 +553,7 @@ impl Inst {
|
|||||||
| Inst::VirtualSPOffsetAdj { .. }
|
| Inst::VirtualSPOffsetAdj { .. }
|
||||||
| Inst::XmmCmove { .. }
|
| Inst::XmmCmove { .. }
|
||||||
| Inst::XmmCmpRmR { .. }
|
| Inst::XmmCmpRmR { .. }
|
||||||
| Inst::XmmLoadConstSeq { .. }
|
| Inst::XmmLoadConst { .. }
|
||||||
| Inst::XmmMinMaxSeq { .. }
|
| Inst::XmmMinMaxSeq { .. }
|
||||||
| Inst::XmmUninitializedValue { .. } => None,
|
| Inst::XmmUninitializedValue { .. } => None,
|
||||||
|
|
||||||
@@ -695,11 +695,10 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn xmm_load_const_seq(val: Vec<u8>, dst: Writable<Reg>, ty: Type) -> Inst {
|
pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
|
||||||
debug_assert!(val.len() == 16);
|
|
||||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||||
debug_assert!(ty.is_vector() && ty.bits() == 128);
|
debug_assert!(ty.is_vector() && ty.bits() == 128);
|
||||||
Inst::XmmLoadConstSeq { val, dst, ty }
|
Inst::XmmLoadConst { src, dst, ty }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convenient helper for unary float operations.
|
/// Convenient helper for unary float operations.
|
||||||
@@ -1506,8 +1505,8 @@ impl PrettyPrint for Inst {
|
|||||||
dst.show_rru(mb_rru),
|
dst.show_rru(mb_rru),
|
||||||
),
|
),
|
||||||
|
|
||||||
Inst::XmmLoadConstSeq { val, dst, .. } => {
|
Inst::XmmLoadConst { src, dst, .. } => {
|
||||||
format!("load_const ${:?}, {}", val, dst.show_rru(mb_rru),)
|
format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),)
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmToGpr {
|
Inst::XmmToGpr {
|
||||||
@@ -1937,7 +1936,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Inst::XmmUninitializedValue { dst } => collector.add_def(*dst),
|
Inst::XmmUninitializedValue { dst } => collector.add_def(*dst),
|
||||||
Inst::XmmLoadConstSeq { dst, .. } => collector.add_def(*dst),
|
Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst),
|
||||||
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
|
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
|
||||||
collector.add_use(*lhs);
|
collector.add_use(*lhs);
|
||||||
collector.add_mod(*rhs_dst);
|
collector.add_mod(*rhs_dst);
|
||||||
@@ -2274,7 +2273,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
Inst::XmmUninitializedValue { ref mut dst, .. } => {
|
Inst::XmmUninitializedValue { ref mut dst, .. } => {
|
||||||
map_def(mapper, dst);
|
map_def(mapper, dst);
|
||||||
}
|
}
|
||||||
Inst::XmmLoadConstSeq { ref mut dst, .. } => {
|
Inst::XmmLoadConst { ref mut dst, .. } => {
|
||||||
map_def(mapper, dst);
|
map_def(mapper, dst);
|
||||||
}
|
}
|
||||||
Inst::XmmMinMaxSeq {
|
Inst::XmmMinMaxSeq {
|
||||||
@@ -2685,7 +2684,7 @@ impl MachInst for Inst {
|
|||||||
} else {
|
} else {
|
||||||
ret.push(Inst::imm(
|
ret.push(Inst::imm(
|
||||||
OperandSize::from_bytes(ty.bytes()),
|
OperandSize::from_bytes(ty.bytes()),
|
||||||
value,
|
value.into(),
|
||||||
to_reg,
|
to_reg,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3345,17 +3345,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Vconst => {
|
Opcode::Vconst => {
|
||||||
let val = if let &InstructionData::UnaryConst {
|
let used_constant = if let &InstructionData::UnaryConst {
|
||||||
constant_handle, ..
|
constant_handle, ..
|
||||||
} = ctx.data(insn)
|
} = ctx.data(insn)
|
||||||
{
|
{
|
||||||
ctx.get_constant_data(constant_handle).clone().into_vec()
|
ctx.use_constant(VCodeConstantData::Pool(
|
||||||
|
constant_handle,
|
||||||
|
ctx.get_constant_data(constant_handle).clone(),
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
unreachable!("vconst should always have unary_const format")
|
unreachable!("vconst should always have unary_const format")
|
||||||
};
|
};
|
||||||
|
// TODO use Inst::gen_constant() instead.
|
||||||
let dst = get_output_reg(ctx, outputs[0]);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
ctx.emit(Inst::xmm_load_const_seq(val, dst, ty));
|
ctx.emit(Inst::xmm_load_const(used_constant, dst, ty));
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::RawBitcast => {
|
Opcode::RawBitcast => {
|
||||||
@@ -3396,8 +3400,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
|
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
|
||||||
.map(zero_unknown_lane_index)
|
.map(zero_unknown_lane_index)
|
||||||
.collect();
|
.collect();
|
||||||
|
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
|
||||||
let tmp = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
let tmp = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp, ty));
|
ctx.emit(Inst::xmm_load_const(constant, tmp, ty));
|
||||||
// After loading the constructed mask in a temporary register, we use this to
|
// After loading the constructed mask in a temporary register, we use this to
|
||||||
// shuffle the `dst` register (remember that, in this case, it is the same as
|
// shuffle the `dst` register (remember that, in this case, it is the same as
|
||||||
// `src` so we disregard this register).
|
// `src` so we disregard this register).
|
||||||
@@ -3416,8 +3421,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let tmp0 = ctx.alloc_tmp(RegClass::V128, lhs_ty);
|
let tmp0 = ctx.alloc_tmp(RegClass::V128, lhs_ty);
|
||||||
ctx.emit(Inst::gen_move(tmp0, lhs, lhs_ty));
|
ctx.emit(Inst::gen_move(tmp0, lhs, lhs_ty));
|
||||||
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
||||||
|
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
|
||||||
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty));
|
ctx.emit(Inst::xmm_load_const(constant, tmp1, ty));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Pshufb,
|
SseOpcode::Pshufb,
|
||||||
RegMem::from(tmp1),
|
RegMem::from(tmp1),
|
||||||
@@ -3431,8 +3437,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
.map(|b| b.wrapping_sub(16))
|
.map(|b| b.wrapping_sub(16))
|
||||||
.map(zero_unknown_lane_index)
|
.map(zero_unknown_lane_index)
|
||||||
.collect();
|
.collect();
|
||||||
|
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
|
||||||
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty));
|
ctx.emit(Inst::xmm_load_const(constant, tmp2, ty));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Pshufb,
|
SseOpcode::Pshufb,
|
||||||
RegMem::from(tmp2),
|
RegMem::from(tmp2),
|
||||||
@@ -3469,11 +3476,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
// Create a mask for zeroing out-of-bounds lanes of the swizzle mask.
|
// Create a mask for zeroing out-of-bounds lanes of the swizzle mask.
|
||||||
let zero_mask = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
let zero_mask = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||||
let zero_mask_value = vec![
|
static ZERO_MASK_VALUE: [u8; 16] = [
|
||||||
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
||||||
0x70, 0x70,
|
0x70, 0x70,
|
||||||
];
|
];
|
||||||
ctx.emit(Inst::xmm_load_const_seq(zero_mask_value, zero_mask, ty));
|
let constant = ctx.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE));
|
||||||
|
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
|
||||||
|
|
||||||
// Use the `zero_mask` on a writable `swizzle_mask`.
|
// Use the `zero_mask` on a writable `swizzle_mask`.
|
||||||
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
||||||
|
|||||||
@@ -142,8 +142,9 @@
|
|||||||
|
|
||||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, StackMap};
|
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, StackMap};
|
||||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||||
use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst};
|
use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeConstant, VCodeConstants, VCodeInst};
|
||||||
use crate::timing;
|
use crate::timing;
|
||||||
|
use cranelift_entity::{entity_impl, SecondaryMap};
|
||||||
|
|
||||||
use log::trace;
|
use log::trace;
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
@@ -218,6 +219,8 @@ pub struct MachBuffer<I: VCodeInst> {
|
|||||||
/// when the offset has grown past this (`labels_at_tail_off`) point.
|
/// when the offset has grown past this (`labels_at_tail_off`) point.
|
||||||
/// Always <= `cur_offset()`.
|
/// Always <= `cur_offset()`.
|
||||||
labels_at_tail_off: CodeOffset,
|
labels_at_tail_off: CodeOffset,
|
||||||
|
/// Map used constants to their [MachLabel].
|
||||||
|
constant_labels: SecondaryMap<VCodeConstant, MachLabel>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A `MachBuffer` once emission is completed: holds generated code and records,
|
/// A `MachBuffer` once emission is completed: holds generated code and records,
|
||||||
@@ -248,6 +251,7 @@ static UNKNOWN_LABEL: MachLabel = MachLabel(0xffff_ffff);
|
|||||||
/// appropriately when the label's location is eventually known.
|
/// appropriately when the label's location is eventually known.
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
pub struct MachLabel(u32);
|
pub struct MachLabel(u32);
|
||||||
|
entity_impl!(MachLabel);
|
||||||
|
|
||||||
impl MachLabel {
|
impl MachLabel {
|
||||||
/// Get a label for a block. (The first N MachLabels are always reseved for
|
/// Get a label for a block. (The first N MachLabels are always reseved for
|
||||||
@@ -267,6 +271,12 @@ impl MachLabel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for MachLabel {
|
||||||
|
fn default() -> Self {
|
||||||
|
UNKNOWN_LABEL
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A stack map extent, when creating a stack map.
|
/// A stack map extent, when creating a stack map.
|
||||||
pub enum StackMapExtent {
|
pub enum StackMapExtent {
|
||||||
/// The stack map starts at this instruction, and ends after the number of upcoming bytes
|
/// The stack map starts at this instruction, and ends after the number of upcoming bytes
|
||||||
@@ -299,6 +309,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
latest_branches: SmallVec::new(),
|
latest_branches: SmallVec::new(),
|
||||||
labels_at_tail: SmallVec::new(),
|
labels_at_tail: SmallVec::new(),
|
||||||
labels_at_tail_off: 0,
|
labels_at_tail_off: 0,
|
||||||
|
constant_labels: SecondaryMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -468,6 +479,24 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
// Post-invariant: as for `get_label()`.
|
// Post-invariant: as for `get_label()`.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reserve the next N MachLabels for constants.
|
||||||
|
pub fn reserve_labels_for_constants(&mut self, constants: &VCodeConstants) {
|
||||||
|
trace!(
|
||||||
|
"MachBuffer: next {} labels are for constants",
|
||||||
|
constants.len()
|
||||||
|
);
|
||||||
|
for c in constants.keys() {
|
||||||
|
self.constant_labels[c] = self.get_label();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Post-invariant: as for `get_label()`.
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retrieve the reserved label for a constant.
|
||||||
|
pub fn get_label_for_constant(&self, constant: VCodeConstant) -> MachLabel {
|
||||||
|
self.constant_labels[constant]
|
||||||
|
}
|
||||||
|
|
||||||
/// Bind a label to the current offset. A label can only be bound once.
|
/// Bind a label to the current offset. A label can only be bound once.
|
||||||
pub fn bind_label(&mut self, label: MachLabel) {
|
pub fn bind_label(&mut self, label: MachLabel) {
|
||||||
trace!(
|
trace!(
|
||||||
@@ -998,7 +1027,13 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
data: &[u8],
|
data: &[u8],
|
||||||
max_distance: CodeOffset,
|
max_distance: CodeOffset,
|
||||||
) {
|
) {
|
||||||
let deadline = self.cur_offset() + max_distance;
|
trace!(
|
||||||
|
"defer_constant: eventually emit {} bytes aligned to {} at label {:?}",
|
||||||
|
data.len(),
|
||||||
|
align,
|
||||||
|
label
|
||||||
|
);
|
||||||
|
let deadline = self.cur_offset().saturating_add(max_distance);
|
||||||
self.island_worst_case_size += data.len() as CodeOffset;
|
self.island_worst_case_size += data.len() as CodeOffset;
|
||||||
self.island_worst_case_size &= !(I::LabelUse::ALIGN - 1);
|
self.island_worst_case_size &= !(I::LabelUse::ALIGN - 1);
|
||||||
self.pending_constants.push(MachLabelConstant {
|
self.pending_constants.push(MachLabelConstant {
|
||||||
@@ -1136,14 +1171,6 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
pub fn finish(mut self) -> MachBufferFinalized {
|
pub fn finish(mut self) -> MachBufferFinalized {
|
||||||
let _tt = timing::vcode_emit_finish();
|
let _tt = timing::vcode_emit_finish();
|
||||||
|
|
||||||
// Ensure that all labels are defined. This is a full (release-mode)
|
|
||||||
// assert because we must avoid looping indefinitely below; an
|
|
||||||
// unresolved label will prevent the fixup_records vec from emptying.
|
|
||||||
assert!(self
|
|
||||||
.label_offsets
|
|
||||||
.iter()
|
|
||||||
.all(|&off| off != UNKNOWN_LABEL_OFFSET));
|
|
||||||
|
|
||||||
while !self.pending_constants.is_empty() || !self.fixup_records.is_empty() {
|
while !self.pending_constants.is_empty() || !self.fixup_records.is_empty() {
|
||||||
// `emit_island()` will emit any pending veneers and constants, and
|
// `emit_island()` will emit any pending veneers and constants, and
|
||||||
// as a side-effect, will also take care of any fixups with resolved
|
// as a side-effect, will also take care of any fixups with resolved
|
||||||
@@ -1151,6 +1178,11 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||||||
self.emit_island();
|
self.emit_island();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure that all labels have been fixed up after the last island is emitted. This is a
|
||||||
|
// full (release-mode) assert because an unresolved label means the emitted code is
|
||||||
|
// incorrect.
|
||||||
|
assert!(self.fixup_records.is_empty());
|
||||||
|
|
||||||
MachBufferFinalized {
|
MachBufferFinalized {
|
||||||
data: self.data,
|
data: self.data,
|
||||||
relocs: self.relocs,
|
relocs: self.relocs,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use crate::ir::{
|
|||||||
};
|
};
|
||||||
use crate::machinst::{
|
use crate::machinst::{
|
||||||
ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
|
ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
|
||||||
VCodeInst,
|
VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst,
|
||||||
};
|
};
|
||||||
use crate::CodegenResult;
|
use crate::CodegenResult;
|
||||||
|
|
||||||
@@ -162,6 +162,8 @@ pub trait LowerCtx {
|
|||||||
fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool;
|
fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool;
|
||||||
/// Retrieve constant data given a handle.
|
/// Retrieve constant data given a handle.
|
||||||
fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData;
|
fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData;
|
||||||
|
/// Indicate that a constant should be emitted.
|
||||||
|
fn use_constant(&mut self, constant: VCodeConstantData) -> VCodeConstant;
|
||||||
/// Retrieve the value immediate from an instruction. This will perform necessary lookups on the
|
/// Retrieve the value immediate from an instruction. This will perform necessary lookups on the
|
||||||
/// `DataFlowGraph` to retrieve even large immediates.
|
/// `DataFlowGraph` to retrieve even large immediates.
|
||||||
fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue>;
|
fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue>;
|
||||||
@@ -318,7 +320,8 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
emit_info: I::Info,
|
emit_info: I::Info,
|
||||||
block_order: BlockLoweringOrder,
|
block_order: BlockLoweringOrder,
|
||||||
) -> CodegenResult<Lower<'func, I>> {
|
) -> CodegenResult<Lower<'func, I>> {
|
||||||
let mut vcode = VCodeBuilder::new(abi, emit_info, block_order);
|
let constants = VCodeConstants::with_capacity(f.dfg.constants.len());
|
||||||
|
let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants);
|
||||||
|
|
||||||
let mut next_vreg: u32 = 0;
|
let mut next_vreg: u32 = 0;
|
||||||
|
|
||||||
@@ -1010,6 +1013,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
|||||||
self.f.dfg.constants.get(constant_handle)
|
self.f.dfg.constants.get(constant_handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn use_constant(&mut self, constant: VCodeConstantData) -> VCodeConstant {
|
||||||
|
self.vcode.constants().insert(constant)
|
||||||
|
}
|
||||||
|
|
||||||
fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue> {
|
fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue> {
|
||||||
let inst_data = self.data(ir_inst);
|
let inst_data = self.data(ir_inst);
|
||||||
match inst_data {
|
match inst_data {
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||||
//! backend pipeline.
|
//! backend pipeline.
|
||||||
|
|
||||||
use crate::ir::{self, types, SourceLoc};
|
use crate::ir::{self, types, Constant, ConstantData, SourceLoc};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::settings;
|
use crate::settings;
|
||||||
use crate::timing;
|
use crate::timing;
|
||||||
@@ -31,7 +31,9 @@ use regalloc::{
|
|||||||
|
|
||||||
use alloc::boxed::Box;
|
use alloc::boxed::Box;
|
||||||
use alloc::{borrow::Cow, vec::Vec};
|
use alloc::{borrow::Cow, vec::Vec};
|
||||||
|
use cranelift_entity::{entity_impl, Keys, PrimaryMap};
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
use std::string::String;
|
use std::string::String;
|
||||||
@@ -110,6 +112,9 @@ pub struct VCode<I: VCodeInst> {
|
|||||||
|
|
||||||
/// Instruction end offsets
|
/// Instruction end offsets
|
||||||
insts_layout: RefCell<(Vec<u32>, u32)>,
|
insts_layout: RefCell<(Vec<u32>, u32)>,
|
||||||
|
|
||||||
|
/// Constants.
|
||||||
|
constants: VCodeConstants,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A builder for a VCode function body. This builder is designed for the
|
/// A builder for a VCode function body. This builder is designed for the
|
||||||
@@ -149,9 +154,10 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||||||
abi: Box<dyn ABICallee<I = I>>,
|
abi: Box<dyn ABICallee<I = I>>,
|
||||||
emit_info: I::Info,
|
emit_info: I::Info,
|
||||||
block_order: BlockLoweringOrder,
|
block_order: BlockLoweringOrder,
|
||||||
|
constants: VCodeConstants,
|
||||||
) -> VCodeBuilder<I> {
|
) -> VCodeBuilder<I> {
|
||||||
let reftype_class = I::ref_type_regclass(abi.flags());
|
let reftype_class = I::ref_type_regclass(abi.flags());
|
||||||
let vcode = VCode::new(abi, emit_info, block_order);
|
let vcode = VCode::new(abi, emit_info, block_order, constants);
|
||||||
let stack_map_info = StackmapRequestInfo {
|
let stack_map_info = StackmapRequestInfo {
|
||||||
reftype_class,
|
reftype_class,
|
||||||
reftyped_vregs: vec![],
|
reftyped_vregs: vec![],
|
||||||
@@ -255,6 +261,11 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||||||
self.cur_srcloc = srcloc;
|
self.cur_srcloc = srcloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Access the constants.
|
||||||
|
pub fn constants(&mut self) -> &mut VCodeConstants {
|
||||||
|
&mut self.vcode.constants
|
||||||
|
}
|
||||||
|
|
||||||
/// Build the final VCode, returning the vcode itself as well as auxiliary
|
/// Build the final VCode, returning the vcode itself as well as auxiliary
|
||||||
/// information, such as the stack map request information.
|
/// information, such as the stack map request information.
|
||||||
pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
|
pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
|
||||||
@@ -284,6 +295,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||||||
abi: Box<dyn ABICallee<I = I>>,
|
abi: Box<dyn ABICallee<I = I>>,
|
||||||
emit_info: I::Info,
|
emit_info: I::Info,
|
||||||
block_order: BlockLoweringOrder,
|
block_order: BlockLoweringOrder,
|
||||||
|
constants: VCodeConstants,
|
||||||
) -> VCode<I> {
|
) -> VCode<I> {
|
||||||
VCode {
|
VCode {
|
||||||
liveins: abi.liveins(),
|
liveins: abi.liveins(),
|
||||||
@@ -303,6 +315,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||||||
safepoint_slots: vec![],
|
safepoint_slots: vec![],
|
||||||
prologue_epilogue_ranges: None,
|
prologue_epilogue_ranges: None,
|
||||||
insts_layout: RefCell::new((vec![], 0)),
|
insts_layout: RefCell::new((vec![], 0)),
|
||||||
|
constants,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -466,7 +479,10 @@ impl<I: VCodeInst> VCode<I> {
|
|||||||
let mut buffer = MachBuffer::new();
|
let mut buffer = MachBuffer::new();
|
||||||
let mut state = I::State::new(&*self.abi);
|
let mut state = I::State::new(&*self.abi);
|
||||||
|
|
||||||
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
|
// The first M MachLabels are reserved for block indices, the next N MachLabels for
|
||||||
|
// constants.
|
||||||
|
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex);
|
||||||
|
buffer.reserve_labels_for_constants(&self.constants);
|
||||||
|
|
||||||
let mut insts_layout = vec![0; self.insts.len()];
|
let mut insts_layout = vec![0; self.insts.len()];
|
||||||
|
|
||||||
@@ -530,6 +546,12 @@ impl<I: VCodeInst> VCode<I> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Emit the constants used by the function.
|
||||||
|
for (constant, data) in self.constants.iter() {
|
||||||
|
let label = buffer.get_label_for_constant(constant);
|
||||||
|
buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
|
||||||
|
}
|
||||||
|
|
||||||
*self.insts_layout.borrow_mut() = (insts_layout, buffer.cur_offset());
|
*self.insts_layout.borrow_mut() = (insts_layout, buffer.cur_offset());
|
||||||
|
|
||||||
buffer
|
buffer
|
||||||
@@ -735,3 +757,141 @@ impl<I: VCodeInst> PrettyPrint for VCode<I> {
|
|||||||
s
|
s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This structure tracks the large constants used in VCode that will be emitted separately by the
|
||||||
|
/// [MachBuffer].
|
||||||
|
///
|
||||||
|
/// First, during the lowering phase, constants are inserted using
|
||||||
|
/// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
|
||||||
|
/// used in this phase. Some deduplication is performed, when possible, as constant
|
||||||
|
/// values are inserted.
|
||||||
|
///
|
||||||
|
/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
|
||||||
|
/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
|
||||||
|
/// then writes the constant values to the buffer.
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct VCodeConstants {
|
||||||
|
constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
|
||||||
|
pool_uses: HashMap<Constant, VCodeConstant>,
|
||||||
|
well_known_uses: HashMap<*const [u8], VCodeConstant>,
|
||||||
|
}
|
||||||
|
impl VCodeConstants {
|
||||||
|
/// Initialize the structure with the expected number of constants.
|
||||||
|
pub fn with_capacity(expected_num_constants: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
constants: PrimaryMap::with_capacity(expected_num_constants),
|
||||||
|
pool_uses: HashMap::with_capacity(expected_num_constants),
|
||||||
|
well_known_uses: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a constant; using this method indicates that a constant value will be used and thus
|
||||||
|
/// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
|
||||||
|
/// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
|
||||||
|
/// [VCodeConstantData::Generated].
|
||||||
|
pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
|
||||||
|
match data {
|
||||||
|
VCodeConstantData::Generated(_) => self.constants.push(data),
|
||||||
|
VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
|
||||||
|
None => {
|
||||||
|
let vcode_constant = self.constants.push(data);
|
||||||
|
self.pool_uses.insert(constant, vcode_constant);
|
||||||
|
vcode_constant
|
||||||
|
}
|
||||||
|
Some(&vcode_constant) => vcode_constant,
|
||||||
|
},
|
||||||
|
VCodeConstantData::WellKnown(data_ref) => {
|
||||||
|
match self.well_known_uses.get(&(data_ref as *const [u8])) {
|
||||||
|
None => {
|
||||||
|
let vcode_constant = self.constants.push(data);
|
||||||
|
self.well_known_uses
|
||||||
|
.insert(data_ref as *const [u8], vcode_constant);
|
||||||
|
vcode_constant
|
||||||
|
}
|
||||||
|
Some(&vcode_constant) => vcode_constant,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retrieve a byte slice for the given [VCodeConstant], if available.
|
||||||
|
pub fn get(&self, constant: VCodeConstant) -> Option<&[u8]> {
|
||||||
|
self.constants.get(constant).map(|d| d.as_slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the number of constants inserted.
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.constants.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over the [VCodeConstant] keys inserted in this structure.
|
||||||
|
pub fn keys(&self) -> Keys<VCodeConstant> {
|
||||||
|
self.constants.keys()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
|
||||||
|
/// structure.
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
|
||||||
|
self.constants.iter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub struct VCodeConstant(u32);
|
||||||
|
entity_impl!(VCodeConstant);
|
||||||
|
|
||||||
|
/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
|
||||||
|
/// these separately instead of as raw byte buffers allows us to avoid some duplication.
|
||||||
|
pub enum VCodeConstantData {
|
||||||
|
/// A constant already present in the Cranelift IR
|
||||||
|
/// [ConstantPool](crate::ir::constant::ConstantPool).
|
||||||
|
Pool(Constant, ConstantData),
|
||||||
|
/// A reference to a well-known constant value that is statically encoded within the compiler.
|
||||||
|
WellKnown(&'static [u8]),
|
||||||
|
/// A constant value generated during lowering; the value may depend on the instruction context
|
||||||
|
/// which makes it difficult to de-duplicate--if possible, use other variants.
|
||||||
|
Generated(ConstantData),
|
||||||
|
}
|
||||||
|
impl VCodeConstantData {
|
||||||
|
/// Retrieve the constant data as a byte slice.
|
||||||
|
pub fn as_slice(&self) -> &[u8] {
|
||||||
|
match self {
|
||||||
|
VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
|
||||||
|
VCodeConstantData::WellKnown(d) => d,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate the alignment of the constant data.
|
||||||
|
pub fn alignment(&self) -> u32 {
|
||||||
|
if self.as_slice().len() <= 8 {
|
||||||
|
8
|
||||||
|
} else {
|
||||||
|
16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use std::mem::{size_of, size_of_val};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn size_of_constant_structs() {
|
||||||
|
assert_eq!(size_of::<Constant>(), 4);
|
||||||
|
assert_eq!(size_of::<VCodeConstant>(), 4);
|
||||||
|
assert_eq!(size_of::<ConstantData>(), 24);
|
||||||
|
assert_eq!(size_of::<VCodeConstantData>(), 32);
|
||||||
|
assert_eq!(
|
||||||
|
size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
|
||||||
|
24
|
||||||
|
);
|
||||||
|
assert_eq!(size_of::<HashMap<Constant, VCodeConstant>>(), 48);
|
||||||
|
assert_eq!(size_of::<HashMap<*const [u8], VCodeConstant>>(), 48);
|
||||||
|
assert_eq!(size_of::<VCodeConstants>(), 120);
|
||||||
|
assert_eq!(size_of_val(&VCodeConstants::with_capacity(0)), 120);
|
||||||
|
// TODO This structure could use some significant memory-size optimization. The use of
|
||||||
|
// HashMap to deduplicate both pool and well-known constants is clearly an issue.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,11 +12,11 @@ block0:
|
|||||||
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0
|
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
; check: load_const $$[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], %xmm1
|
; check: load_const VCodeConstant(3), %xmm1
|
||||||
; nextln: load_const $$[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], %xmm0
|
; nextln: load_const VCodeConstant(2), %xmm0
|
||||||
; nextln: load_const $$[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128], %xmm2
|
; nextln: load_const VCodeConstant(0), %xmm2
|
||||||
; nextln: pshufb %xmm2, %xmm1
|
; nextln: pshufb %xmm2, %xmm1
|
||||||
; nextln: load_const $$[128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 1], %xmm2
|
; nextln: load_const VCodeConstant(1), %xmm2
|
||||||
; nextln: pshufb %xmm2, %xmm0
|
; nextln: pshufb %xmm2, %xmm0
|
||||||
; nextln: orps %xmm1, %xmm0
|
; nextln: orps %xmm1, %xmm0
|
||||||
|
|
||||||
@@ -27,8 +27,8 @@ block0:
|
|||||||
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1
|
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
; check: load_const $$[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], %xmm0
|
; check: load_const VCodeConstant(1), %xmm0
|
||||||
; nextln: load_const $$[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3], %xmm1
|
; nextln: load_const VCodeConstant(0), %xmm1
|
||||||
; nextln: pshufb %xmm1, %xmm0
|
; nextln: pshufb %xmm1, %xmm0
|
||||||
|
|
||||||
|
|
||||||
@@ -42,9 +42,9 @@ block0:
|
|||||||
v2 = swizzle.i8x16 v0, v1
|
v2 = swizzle.i8x16 v0, v1
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
; check: load_const $$[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], %xmm1
|
; check: load_const VCodeConstant(1), %xmm1
|
||||||
; nextln: load_const $$[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], %xmm0
|
; nextln: load_const VCodeConstant(1), %xmm0
|
||||||
; nextln: load_const $$[112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112], %xmm2
|
; nextln: load_const VCodeConstant(0), %xmm2
|
||||||
; nextln: paddusb %xmm2, %xmm0
|
; nextln: paddusb %xmm2, %xmm0
|
||||||
; nextln: pshufb %xmm0, %xmm1
|
; nextln: pshufb %xmm0, %xmm1
|
||||||
; nextln: movdqa %xmm1, %xmm0
|
; nextln: movdqa %xmm1, %xmm0
|
||||||
|
|||||||
Reference in New Issue
Block a user