Implement initial emission of constants

This approach suffers from memory-size bloat during compile time due to the desire to de-duplicate the constants emitted and reduce runtime memory-size. As a first step, though, this provides an end-to-end mechanism for constants to be emitted in the MachBuffer islands.
This commit is contained in:
Andrew Brown
2020-10-27 11:54:15 -07:00
parent efa87d4c17
commit 83f182b390
7 changed files with 251 additions and 64 deletions

View File

@@ -1979,29 +1979,10 @@ pub(crate) fn emit(
sink.put1(*imm);
}
Inst::XmmLoadConstSeq { val, dst, ty } => {
// This sequence is *one* instruction in the vcode, and is expanded only here at
// emission time, because we cannot allow the regalloc to insert spills/reloads in
// the middle; we depend on hardcoded PC-rel addressing below. TODO Eventually this
// "constant inline" code should be replaced by constant pool integration.
// Load the inline constant.
let constant_start_label = sink.get_label();
let load_offset = Amode::rip_relative(constant_start_label);
Inst::XmmLoadConst { src, dst, ty } => {
let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src));
let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None);
load.emit(sink, info, state);
// Jump over the constant.
let constant_end_label = sink.get_label();
let jump = Inst::jmp_known(constant_end_label);
jump.emit(sink, info, state);
// Emit the constant.
sink.bind_label(constant_start_label);
for i in val.iter() {
sink.put1(*i);
}
sink.bind_label(constant_end_label);
}
Inst::XmmUninitializedValue { .. } => {

View File

@@ -240,8 +240,8 @@ pub enum Inst {
},
/// XMM (vector) unary op (to move a constant value into an xmm register): movups
XmmLoadConstSeq {
val: Vec<u8>,
XmmLoadConst {
src: VCodeConstant,
dst: Writable<Reg>,
ty: Type,
},
@@ -553,7 +553,7 @@ impl Inst {
| Inst::VirtualSPOffsetAdj { .. }
| Inst::XmmCmove { .. }
| Inst::XmmCmpRmR { .. }
| Inst::XmmLoadConstSeq { .. }
| Inst::XmmLoadConst { .. }
| Inst::XmmMinMaxSeq { .. }
| Inst::XmmUninitializedValue { .. } => None,
@@ -695,11 +695,10 @@ impl Inst {
}
}
pub(crate) fn xmm_load_const_seq(val: Vec<u8>, dst: Writable<Reg>, ty: Type) -> Inst {
debug_assert!(val.len() == 16);
pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
debug_assert!(ty.is_vector() && ty.bits() == 128);
Inst::XmmLoadConstSeq { val, dst, ty }
Inst::XmmLoadConst { src, dst, ty }
}
/// Convenient helper for unary float operations.
@@ -1506,8 +1505,8 @@ impl PrettyPrint for Inst {
dst.show_rru(mb_rru),
),
Inst::XmmLoadConstSeq { val, dst, .. } => {
format!("load_const ${:?}, {}", val, dst.show_rru(mb_rru),)
Inst::XmmLoadConst { src, dst, .. } => {
format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),)
}
Inst::XmmToGpr {
@@ -1937,7 +1936,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
}
}
Inst::XmmUninitializedValue { dst } => collector.add_def(*dst),
Inst::XmmLoadConstSeq { dst, .. } => collector.add_def(*dst),
Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst),
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
collector.add_use(*lhs);
collector.add_mod(*rhs_dst);
@@ -2274,7 +2273,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
Inst::XmmUninitializedValue { ref mut dst, .. } => {
map_def(mapper, dst);
}
Inst::XmmLoadConstSeq { ref mut dst, .. } => {
Inst::XmmLoadConst { ref mut dst, .. } => {
map_def(mapper, dst);
}
Inst::XmmMinMaxSeq {
@@ -2685,7 +2684,7 @@ impl MachInst for Inst {
} else {
ret.push(Inst::imm(
OperandSize::from_bytes(ty.bytes()),
value,
value.into(),
to_reg,
));
}

View File

@@ -3345,17 +3345,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Vconst => {
let val = if let &InstructionData::UnaryConst {
let used_constant = if let &InstructionData::UnaryConst {
constant_handle, ..
} = ctx.data(insn)
{
ctx.get_constant_data(constant_handle).clone().into_vec()
ctx.use_constant(VCodeConstantData::Pool(
constant_handle,
ctx.get_constant_data(constant_handle).clone(),
))
} else {
unreachable!("vconst should always have unary_const format")
};
// TODO use Inst::gen_constant() instead.
let dst = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap();
ctx.emit(Inst::xmm_load_const_seq(val, dst, ty));
ctx.emit(Inst::xmm_load_const(used_constant, dst, ty));
}
Opcode::RawBitcast => {
@@ -3396,8 +3400,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
.map(zero_unknown_lane_index)
.collect();
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
let tmp = ctx.alloc_tmp(RegClass::V128, types::I8X16);
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp, ty));
ctx.emit(Inst::xmm_load_const(constant, tmp, ty));
// After loading the constructed mask in a temporary register, we use this to
// shuffle the `dst` register (remember that, in this case, it is the same as
// `src` so we disregard this register).
@@ -3416,8 +3421,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let tmp0 = ctx.alloc_tmp(RegClass::V128, lhs_ty);
ctx.emit(Inst::gen_move(tmp0, lhs, lhs_ty));
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty));
ctx.emit(Inst::xmm_load_const(constant, tmp1, ty));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(tmp1),
@@ -3431,8 +3437,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
.map(|b| b.wrapping_sub(16))
.map(zero_unknown_lane_index)
.collect();
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty));
ctx.emit(Inst::xmm_load_const(constant, tmp2, ty));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(tmp2),
@@ -3469,11 +3476,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Create a mask for zeroing out-of-bounds lanes of the swizzle mask.
let zero_mask = ctx.alloc_tmp(RegClass::V128, types::I8X16);
let zero_mask_value = vec![
static ZERO_MASK_VALUE: [u8; 16] = [
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
0x70, 0x70,
];
ctx.emit(Inst::xmm_load_const_seq(zero_mask_value, zero_mask, ty));
let constant = ctx.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE));
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
// Use the `zero_mask` on a writable `swizzle_mask`.
let swizzle_mask = Writable::from_reg(swizzle_mask);