machinst x64: fold address modes on loads/stores;

This commit is contained in:
Benjamin Bouvier
2020-08-18 19:31:20 +02:00
parent b830ee79de
commit efff43e769

View File

@@ -124,10 +124,14 @@ struct InsnOutput {
output: usize,
}
fn matches_input<C: LowerCtx<I = Inst>>(c: &mut C, input: InsnInput, op: Opcode) -> Option<IRInst> {
let inputs = c.get_input(input.insn, input.input);
fn matches_input<C: LowerCtx<I = Inst>>(
ctx: &mut C,
input: InsnInput,
op: Opcode,
) -> Option<IRInst> {
let inputs = ctx.get_input(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
let data = ctx.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
@@ -214,6 +218,10 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
})
}
fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
ctx.get_input(spec.insn, spec.input).constant
}
/// Put the given input into an immediate, a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
@@ -340,6 +348,80 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
Ok(())
}
/// Returns whether the given input is a shift by a constant value less or equal than 3.
/// The goal is to embed it within an address mode.
fn matches_small_cst_shift<C: LowerCtx<I = Inst>>(
ctx: &mut C,
spec: InsnInput,
) -> Option<(InsnInput, u8)> {
if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) {
if let Some(shift_amt) = input_to_imm(
ctx,
InsnInput {
insn: shift,
input: 1,
},
) {
if shift_amt <= 3 {
return Some((
InsnInput {
insn: shift,
input: 0,
},
shift_amt as u8,
));
}
}
}
None
}
fn lower_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode {
// We now either have an add that we must materialize, or some other input; as well as the
// final offset.
if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
let add_inputs = &[
InsnInput {
insn: add,
input: 0,
},
InsnInput {
insn: add,
input: 1,
},
];
// TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
// aren't happening in the wasm case. We could do better, given some range analysis.
let (base, index, shift) = if let Some((shift_input, shift_amt)) =
matches_small_cst_shift(ctx, add_inputs[0])
{
(
input_to_reg(ctx, add_inputs[1]),
input_to_reg(ctx, shift_input),
shift_amt,
)
} else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) {
(
input_to_reg(ctx, add_inputs[0]),
input_to_reg(ctx, shift_input),
shift_amt,
)
} else {
(
input_to_reg(ctx, add_inputs[0]),
input_to_reg(ctx, add_inputs[1]),
0,
)
};
return Amode::imm_reg_reg_shift(offset, base, index, shift);
}
let input = input_to_reg(ctx, spec);
Amode::imm_reg(offset, input)
}
//=============================================================================
// Top-level instruction lowering entry point, for one instruction.
@@ -1660,7 +1742,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => false,
};
let addr = match op {
let amode = match op {
Opcode::Load
| Opcode::Uload8
| Opcode::Sload8
@@ -1669,8 +1751,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload32
| Opcode::Sload32 => {
assert_eq!(inputs.len(), 1, "only one input for load operands");
let base = input_to_reg(ctx, inputs[0]);
Amode::imm_reg(offset as u32, base)
lower_amode(ctx, inputs[0], offset as u32)
}
Opcode::LoadComplex
@@ -1704,7 +1785,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// so ext-mode is defined in this case.
ctx.emit(Inst::movsx_rm_r(
ext_mode.unwrap(),
RegMem::mem(addr),
RegMem::mem(amode),
dst,
srcloc,
));
@@ -1712,12 +1793,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(false, false) => {
if elem_ty.bytes() == 8 {
// Use a plain load.
ctx.emit(Inst::mov64_m_r(addr, dst, srcloc))
ctx.emit(Inst::mov64_m_r(amode, dst, srcloc))
} else {
// Use a zero-extended load.
ctx.emit(Inst::movzx_rm_r(
ext_mode.unwrap(),
RegMem::mem(addr),
RegMem::mem(amode),
dst,
srcloc,
))
@@ -1726,13 +1807,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(_, true) => {
ctx.emit(match elem_ty {
types::F32 => {
Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc)
Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst, srcloc)
}
types::F64 => {
Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc)
Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst, srcloc)
}
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc)
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst, srcloc)
} // TODO Specialize for different types: MOVUPD, MOVDQU
_ => unreachable!("unexpected type for load: {:?}", elem_ty),
});
@@ -1761,9 +1842,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let addr = match op {
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
assert_eq!(inputs.len(), 2, "only one input for store memory operands");
let base = input_to_reg(ctx, inputs[1]);
// TODO sign?
Amode::imm_reg(offset as u32, base)
lower_amode(ctx, inputs[1], offset as u32)
}
Opcode::StoreComplex