machinst x64: fold address modes on loads/stores;
This commit is contained in:
@@ -124,10 +124,14 @@ struct InsnOutput {
|
|||||||
output: usize,
|
output: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn matches_input<C: LowerCtx<I = Inst>>(c: &mut C, input: InsnInput, op: Opcode) -> Option<IRInst> {
|
fn matches_input<C: LowerCtx<I = Inst>>(
|
||||||
let inputs = c.get_input(input.insn, input.input);
|
ctx: &mut C,
|
||||||
|
input: InsnInput,
|
||||||
|
op: Opcode,
|
||||||
|
) -> Option<IRInst> {
|
||||||
|
let inputs = ctx.get_input(input.insn, input.input);
|
||||||
if let Some((src_inst, _)) = inputs.inst {
|
if let Some((src_inst, _)) = inputs.inst {
|
||||||
let data = c.data(src_inst);
|
let data = ctx.data(src_inst);
|
||||||
if data.opcode() == op {
|
if data.opcode() == op {
|
||||||
return Some(src_inst);
|
return Some(src_inst);
|
||||||
}
|
}
|
||||||
@@ -214,6 +218,10 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
|
||||||
|
ctx.get_input(spec.insn, spec.input).constant
|
||||||
|
}
|
||||||
|
|
||||||
/// Put the given input into an immediate, a register or a memory operand.
|
/// Put the given input into an immediate, a register or a memory operand.
|
||||||
/// Effectful: may mark the given input as used, when returning the register form.
|
/// Effectful: may mark the given input as used, when returning the register form.
|
||||||
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
||||||
@@ -340,6 +348,80 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns whether the given input is a shift by a constant value less or equal than 3.
|
||||||
|
/// The goal is to embed it within an address mode.
|
||||||
|
fn matches_small_cst_shift<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
spec: InsnInput,
|
||||||
|
) -> Option<(InsnInput, u8)> {
|
||||||
|
if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) {
|
||||||
|
if let Some(shift_amt) = input_to_imm(
|
||||||
|
ctx,
|
||||||
|
InsnInput {
|
||||||
|
insn: shift,
|
||||||
|
input: 1,
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
if shift_amt <= 3 {
|
||||||
|
return Some((
|
||||||
|
InsnInput {
|
||||||
|
insn: shift,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
shift_amt as u8,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lower_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode {
|
||||||
|
// We now either have an add that we must materialize, or some other input; as well as the
|
||||||
|
// final offset.
|
||||||
|
if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
|
||||||
|
let add_inputs = &[
|
||||||
|
InsnInput {
|
||||||
|
insn: add,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
InsnInput {
|
||||||
|
insn: add,
|
||||||
|
input: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
|
||||||
|
// aren't happening in the wasm case. We could do better, given some range analysis.
|
||||||
|
let (base, index, shift) = if let Some((shift_input, shift_amt)) =
|
||||||
|
matches_small_cst_shift(ctx, add_inputs[0])
|
||||||
|
{
|
||||||
|
(
|
||||||
|
input_to_reg(ctx, add_inputs[1]),
|
||||||
|
input_to_reg(ctx, shift_input),
|
||||||
|
shift_amt,
|
||||||
|
)
|
||||||
|
} else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) {
|
||||||
|
(
|
||||||
|
input_to_reg(ctx, add_inputs[0]),
|
||||||
|
input_to_reg(ctx, shift_input),
|
||||||
|
shift_amt,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
input_to_reg(ctx, add_inputs[0]),
|
||||||
|
input_to_reg(ctx, add_inputs[1]),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
return Amode::imm_reg_reg_shift(offset, base, index, shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
let input = input_to_reg(ctx, spec);
|
||||||
|
Amode::imm_reg(offset, input)
|
||||||
|
}
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Top-level instruction lowering entry point, for one instruction.
|
// Top-level instruction lowering entry point, for one instruction.
|
||||||
|
|
||||||
@@ -1660,7 +1742,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let addr = match op {
|
let amode = match op {
|
||||||
Opcode::Load
|
Opcode::Load
|
||||||
| Opcode::Uload8
|
| Opcode::Uload8
|
||||||
| Opcode::Sload8
|
| Opcode::Sload8
|
||||||
@@ -1669,8 +1751,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload32
|
| Opcode::Uload32
|
||||||
| Opcode::Sload32 => {
|
| Opcode::Sload32 => {
|
||||||
assert_eq!(inputs.len(), 1, "only one input for load operands");
|
assert_eq!(inputs.len(), 1, "only one input for load operands");
|
||||||
let base = input_to_reg(ctx, inputs[0]);
|
lower_amode(ctx, inputs[0], offset as u32)
|
||||||
Amode::imm_reg(offset as u32, base)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::LoadComplex
|
Opcode::LoadComplex
|
||||||
@@ -1704,7 +1785,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// so ext-mode is defined in this case.
|
// so ext-mode is defined in this case.
|
||||||
ctx.emit(Inst::movsx_rm_r(
|
ctx.emit(Inst::movsx_rm_r(
|
||||||
ext_mode.unwrap(),
|
ext_mode.unwrap(),
|
||||||
RegMem::mem(addr),
|
RegMem::mem(amode),
|
||||||
dst,
|
dst,
|
||||||
srcloc,
|
srcloc,
|
||||||
));
|
));
|
||||||
@@ -1712,12 +1793,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
(false, false) => {
|
(false, false) => {
|
||||||
if elem_ty.bytes() == 8 {
|
if elem_ty.bytes() == 8 {
|
||||||
// Use a plain load.
|
// Use a plain load.
|
||||||
ctx.emit(Inst::mov64_m_r(addr, dst, srcloc))
|
ctx.emit(Inst::mov64_m_r(amode, dst, srcloc))
|
||||||
} else {
|
} else {
|
||||||
// Use a zero-extended load.
|
// Use a zero-extended load.
|
||||||
ctx.emit(Inst::movzx_rm_r(
|
ctx.emit(Inst::movzx_rm_r(
|
||||||
ext_mode.unwrap(),
|
ext_mode.unwrap(),
|
||||||
RegMem::mem(addr),
|
RegMem::mem(amode),
|
||||||
dst,
|
dst,
|
||||||
srcloc,
|
srcloc,
|
||||||
))
|
))
|
||||||
@@ -1726,13 +1807,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
(_, true) => {
|
(_, true) => {
|
||||||
ctx.emit(match elem_ty {
|
ctx.emit(match elem_ty {
|
||||||
types::F32 => {
|
types::F32 => {
|
||||||
Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc)
|
Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst, srcloc)
|
||||||
}
|
}
|
||||||
types::F64 => {
|
types::F64 => {
|
||||||
Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc)
|
Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst, srcloc)
|
||||||
}
|
}
|
||||||
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
|
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
|
||||||
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc)
|
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst, srcloc)
|
||||||
} // TODO Specialize for different types: MOVUPD, MOVDQU
|
} // TODO Specialize for different types: MOVUPD, MOVDQU
|
||||||
_ => unreachable!("unexpected type for load: {:?}", elem_ty),
|
_ => unreachable!("unexpected type for load: {:?}", elem_ty),
|
||||||
});
|
});
|
||||||
@@ -1761,9 +1842,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let addr = match op {
|
let addr = match op {
|
||||||
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
|
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
|
||||||
assert_eq!(inputs.len(), 2, "only one input for store memory operands");
|
assert_eq!(inputs.len(), 2, "only one input for store memory operands");
|
||||||
let base = input_to_reg(ctx, inputs[1]);
|
lower_amode(ctx, inputs[1], offset as u32)
|
||||||
// TODO sign?
|
|
||||||
Amode::imm_reg(offset as u32, base)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::StoreComplex
|
Opcode::StoreComplex
|
||||||
|
|||||||
Reference in New Issue
Block a user