Implement initial emission of constants
This approach suffers from memory-size bloat during compile time due to the desire to de-duplicate the constants emitted and reduce runtime memory-size. As a first step, though, this provides an end-to-end mechanism for constants to be emitted in the MachBuffer islands.
This commit is contained in:
@@ -1979,29 +1979,10 @@ pub(crate) fn emit(
|
||||
sink.put1(*imm);
|
||||
}
|
||||
|
||||
Inst::XmmLoadConstSeq { val, dst, ty } => {
|
||||
// This sequence is *one* instruction in the vcode, and is expanded only here at
|
||||
// emission time, because we cannot allow the regalloc to insert spills/reloads in
|
||||
// the middle; we depend on hardcoded PC-rel addressing below. TODO Eventually this
|
||||
// "constant inline" code should be replaced by constant pool integration.
|
||||
|
||||
// Load the inline constant.
|
||||
let constant_start_label = sink.get_label();
|
||||
let load_offset = Amode::rip_relative(constant_start_label);
|
||||
Inst::XmmLoadConst { src, dst, ty } => {
|
||||
let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src));
|
||||
let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None);
|
||||
load.emit(sink, info, state);
|
||||
|
||||
// Jump over the constant.
|
||||
let constant_end_label = sink.get_label();
|
||||
let jump = Inst::jmp_known(constant_end_label);
|
||||
jump.emit(sink, info, state);
|
||||
|
||||
// Emit the constant.
|
||||
sink.bind_label(constant_start_label);
|
||||
for i in val.iter() {
|
||||
sink.put1(*i);
|
||||
}
|
||||
sink.bind_label(constant_end_label);
|
||||
}
|
||||
|
||||
Inst::XmmUninitializedValue { .. } => {
|
||||
|
||||
@@ -240,8 +240,8 @@ pub enum Inst {
|
||||
},
|
||||
|
||||
/// XMM (vector) unary op (to move a constant value into an xmm register): movups
|
||||
XmmLoadConstSeq {
|
||||
val: Vec<u8>,
|
||||
XmmLoadConst {
|
||||
src: VCodeConstant,
|
||||
dst: Writable<Reg>,
|
||||
ty: Type,
|
||||
},
|
||||
@@ -553,7 +553,7 @@ impl Inst {
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::XmmCmove { .. }
|
||||
| Inst::XmmCmpRmR { .. }
|
||||
| Inst::XmmLoadConstSeq { .. }
|
||||
| Inst::XmmLoadConst { .. }
|
||||
| Inst::XmmMinMaxSeq { .. }
|
||||
| Inst::XmmUninitializedValue { .. } => None,
|
||||
|
||||
@@ -695,11 +695,10 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_load_const_seq(val: Vec<u8>, dst: Writable<Reg>, ty: Type) -> Inst {
|
||||
debug_assert!(val.len() == 16);
|
||||
pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||
debug_assert!(ty.is_vector() && ty.bits() == 128);
|
||||
Inst::XmmLoadConstSeq { val, dst, ty }
|
||||
Inst::XmmLoadConst { src, dst, ty }
|
||||
}
|
||||
|
||||
/// Convenient helper for unary float operations.
|
||||
@@ -1506,8 +1505,8 @@ impl PrettyPrint for Inst {
|
||||
dst.show_rru(mb_rru),
|
||||
),
|
||||
|
||||
Inst::XmmLoadConstSeq { val, dst, .. } => {
|
||||
format!("load_const ${:?}, {}", val, dst.show_rru(mb_rru),)
|
||||
Inst::XmmLoadConst { src, dst, .. } => {
|
||||
format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),)
|
||||
}
|
||||
|
||||
Inst::XmmToGpr {
|
||||
@@ -1937,7 +1936,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
}
|
||||
}
|
||||
Inst::XmmUninitializedValue { dst } => collector.add_def(*dst),
|
||||
Inst::XmmLoadConstSeq { dst, .. } => collector.add_def(*dst),
|
||||
Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst),
|
||||
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
|
||||
collector.add_use(*lhs);
|
||||
collector.add_mod(*rhs_dst);
|
||||
@@ -2274,7 +2273,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
Inst::XmmUninitializedValue { ref mut dst, .. } => {
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::XmmLoadConstSeq { ref mut dst, .. } => {
|
||||
Inst::XmmLoadConst { ref mut dst, .. } => {
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::XmmMinMaxSeq {
|
||||
@@ -2685,7 +2684,7 @@ impl MachInst for Inst {
|
||||
} else {
|
||||
ret.push(Inst::imm(
|
||||
OperandSize::from_bytes(ty.bytes()),
|
||||
value,
|
||||
value.into(),
|
||||
to_reg,
|
||||
));
|
||||
}
|
||||
|
||||
@@ -3345,17 +3345,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Vconst => {
|
||||
let val = if let &InstructionData::UnaryConst {
|
||||
let used_constant = if let &InstructionData::UnaryConst {
|
||||
constant_handle, ..
|
||||
} = ctx.data(insn)
|
||||
{
|
||||
ctx.get_constant_data(constant_handle).clone().into_vec()
|
||||
ctx.use_constant(VCodeConstantData::Pool(
|
||||
constant_handle,
|
||||
ctx.get_constant_data(constant_handle).clone(),
|
||||
))
|
||||
} else {
|
||||
unreachable!("vconst should always have unary_const format")
|
||||
};
|
||||
// TODO use Inst::gen_constant() instead.
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
ctx.emit(Inst::xmm_load_const_seq(val, dst, ty));
|
||||
ctx.emit(Inst::xmm_load_const(used_constant, dst, ty));
|
||||
}
|
||||
|
||||
Opcode::RawBitcast => {
|
||||
@@ -3396,8 +3400,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
|
||||
.map(zero_unknown_lane_index)
|
||||
.collect();
|
||||
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp, ty));
|
||||
ctx.emit(Inst::xmm_load_const(constant, tmp, ty));
|
||||
// After loading the constructed mask in a temporary register, we use this to
|
||||
// shuffle the `dst` register (remember that, in this case, it is the same as
|
||||
// `src` so we disregard this register).
|
||||
@@ -3416,8 +3421,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let tmp0 = ctx.alloc_tmp(RegClass::V128, lhs_ty);
|
||||
ctx.emit(Inst::gen_move(tmp0, lhs, lhs_ty));
|
||||
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
||||
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty));
|
||||
ctx.emit(Inst::xmm_load_const(constant, tmp1, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pshufb,
|
||||
RegMem::from(tmp1),
|
||||
@@ -3431,8 +3437,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
.map(|b| b.wrapping_sub(16))
|
||||
.map(zero_unknown_lane_index)
|
||||
.collect();
|
||||
let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty));
|
||||
ctx.emit(Inst::xmm_load_const(constant, tmp2, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pshufb,
|
||||
RegMem::from(tmp2),
|
||||
@@ -3469,11 +3476,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
// Create a mask for zeroing out-of-bounds lanes of the swizzle mask.
|
||||
let zero_mask = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||
let zero_mask_value = vec![
|
||||
static ZERO_MASK_VALUE: [u8; 16] = [
|
||||
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
||||
0x70, 0x70,
|
||||
];
|
||||
ctx.emit(Inst::xmm_load_const_seq(zero_mask_value, zero_mask, ty));
|
||||
let constant = ctx.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE));
|
||||
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
|
||||
|
||||
// Use the `zero_mask` on a writable `swizzle_mask`.
|
||||
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
||||
|
||||
Reference in New Issue
Block a user