From efff43e7697ad6716e532e47dadd264c89c683f7 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 18 Aug 2020 19:31:20 +0200 Subject: [PATCH] machinst x64: fold address modes on loads/stores; --- cranelift/codegen/src/isa/x64/lower.rs | 109 +++++++++++++++++++++---- 1 file changed, 94 insertions(+), 15 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 7aec77e5dc..047eac99d8 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -124,10 +124,14 @@ struct InsnOutput { output: usize, } -fn matches_input>(c: &mut C, input: InsnInput, op: Opcode) -> Option { - let inputs = c.get_input(input.insn, input.input); +fn matches_input>( + ctx: &mut C, + input: InsnInput, + op: Opcode, +) -> Option { + let inputs = ctx.get_input(input.insn, input.input); if let Some((src_inst, _)) = inputs.inst { - let data = c.data(src_inst); + let data = ctx.data(src_inst); if data.opcode() == op { return Some(src_inst); } @@ -214,6 +218,10 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option { }) } +fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option { + ctx.get_input(spec.insn, spec.input).constant +} + /// Put the given input into an immediate, a register or a memory operand. /// Effectful: may mark the given input as used, when returning the register form. fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { @@ -340,6 +348,80 @@ fn emit_vm_call>( Ok(()) } +/// Returns whether the given input is a shift by a constant value less or equal than 3. +/// The goal is to embed it within an address mode. +fn matches_small_cst_shift>( + ctx: &mut C, + spec: InsnInput, +) -> Option<(InsnInput, u8)> { + if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) { + if let Some(shift_amt) = input_to_imm( + ctx, + InsnInput { + insn: shift, + input: 1, + }, + ) { + if shift_amt <= 3 { + return Some(( + InsnInput { + insn: shift, + input: 0, + }, + shift_amt as u8, + )); + } + } + } + None +} + +fn lower_amode>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode { + // We now either have an add that we must materialize, or some other input; as well as the + // final offset. + if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) { + let add_inputs = &[ + InsnInput { + insn: add, + input: 0, + }, + InsnInput { + insn: add, + input: 1, + }, + ]; + + // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations + // aren't happening in the wasm case. We could do better, given some range analysis. + let (base, index, shift) = if let Some((shift_input, shift_amt)) = + matches_small_cst_shift(ctx, add_inputs[0]) + { + ( + input_to_reg(ctx, add_inputs[1]), + input_to_reg(ctx, shift_input), + shift_amt, + ) + } else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) { + ( + input_to_reg(ctx, add_inputs[0]), + input_to_reg(ctx, shift_input), + shift_amt, + ) + } else { + ( + input_to_reg(ctx, add_inputs[0]), + input_to_reg(ctx, add_inputs[1]), + 0, + ) + }; + + return Amode::imm_reg_reg_shift(offset, base, index, shift); + } + + let input = input_to_reg(ctx, spec); + Amode::imm_reg(offset, input) +} + //============================================================================= // Top-level instruction lowering entry point, for one instruction. @@ -1660,7 +1742,7 @@ fn lower_insn_to_regs>( _ => false, }; - let addr = match op { + let amode = match op { Opcode::Load | Opcode::Uload8 | Opcode::Sload8 @@ -1669,8 +1751,7 @@ fn lower_insn_to_regs>( | Opcode::Uload32 | Opcode::Sload32 => { assert_eq!(inputs.len(), 1, "only one input for load operands"); - let base = input_to_reg(ctx, inputs[0]); - Amode::imm_reg(offset as u32, base) + lower_amode(ctx, inputs[0], offset as u32) } Opcode::LoadComplex @@ -1704,7 +1785,7 @@ fn lower_insn_to_regs>( // so ext-mode is defined in this case. ctx.emit(Inst::movsx_rm_r( ext_mode.unwrap(), - RegMem::mem(addr), + RegMem::mem(amode), dst, srcloc, )); @@ -1712,12 +1793,12 @@ fn lower_insn_to_regs>( (false, false) => { if elem_ty.bytes() == 8 { // Use a plain load. - ctx.emit(Inst::mov64_m_r(addr, dst, srcloc)) + ctx.emit(Inst::mov64_m_r(amode, dst, srcloc)) } else { // Use a zero-extended load. ctx.emit(Inst::movzx_rm_r( ext_mode.unwrap(), - RegMem::mem(addr), + RegMem::mem(amode), dst, srcloc, )) @@ -1726,13 +1807,13 @@ fn lower_insn_to_regs>( (_, true) => { ctx.emit(match elem_ty { types::F32 => { - Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc) + Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst, srcloc) } types::F64 => { - Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc) + Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst, srcloc) } _ if elem_ty.is_vector() && elem_ty.bits() == 128 => { - Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc) + Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst, srcloc) } // TODO Specialize for different types: MOVUPD, MOVDQU _ => unreachable!("unexpected type for load: {:?}", elem_ty), }); @@ -1761,9 +1842,7 @@ fn lower_insn_to_regs>( let addr = match op { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { assert_eq!(inputs.len(), 2, "only one input for store memory operands"); - let base = input_to_reg(ctx, inputs[1]); - // TODO sign? - Amode::imm_reg(offset as u32, base) + lower_amode(ctx, inputs[1], offset as u32) } Opcode::StoreComplex