Merge pull request #2389 from cfallin/x64-load-op

x64 backend: merge loads into ALU ops when appropriate.
This commit is contained in:
Chris Fallin
2020-11-17 11:42:19 -08:00
committed by GitHub
2 changed files with 166 additions and 42 deletions

View File

@@ -22,9 +22,6 @@ use smallvec::SmallVec;
use std::convert::TryFrom;
use target_lexicon::Triple;
/// Context passed to all lowering functions.
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
//=============================================================================
// Helpers for instruction lowering.
@@ -89,13 +86,9 @@ fn matches_input_any<C: LowerCtx<I = Inst>>(
})
}
/// Put the given input into a register, and mark it as used (side-effect).
fn put_input_in_reg(ctx: Ctx, spec: InsnInput) -> Reg {
let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
if let Some(c) = input.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
let ty = ctx.input_ty(spec.insn, spec.input);
/// Emits instruction(s) to generate the given 64-bit constant value into a newly-allocated
/// temporary register, returning that register.
fn generate_constant<C: LowerCtx<I = Inst>>(ctx: &mut C, ty: Type, c: u64) -> Reg {
let from_bits = ty_bits(ty);
let masked = if from_bits < 64 {
c & ((1u64 << from_bits) - 1)
@@ -112,11 +105,87 @@ fn put_input_in_reg(ctx: Ctx, spec: InsnInput) -> Reg {
ctx.emit(inst);
}
cst_copy.to_reg()
}
/// Put the given input into a register, and mark it as used (side-effect).
fn put_input_in_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> Reg {
let ty = ctx.input_ty(spec.insn, spec.input);
let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
if let Some(c) = input.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
generate_constant(ctx, ty, c)
} else {
ctx.put_input_in_reg(spec.insn, spec.input)
}
}
/// Determines whether a load operation (indicated by `src_insn`) can be merged
/// into the current lowering point. If so, returns the address-base source (as
/// an `InsnInput`) and an offset from that address from which to perform the
/// load.
fn is_mergeable_load<C: LowerCtx<I = Inst>>(
ctx: &mut C,
src_insn: IRInst,
) -> Option<(InsnInput, i32)> {
let insn_data = ctx.data(src_insn);
let inputs = ctx.num_inputs(src_insn);
if inputs != 1 {
return None;
}
let load_ty = ctx.output_ty(src_insn, 0);
if ty_bits(load_ty) < 32 {
// Narrower values are handled by ALU insts that are at least 32 bits
// wide, which is normally OK as we ignore upper buts; but, if we
// generate, e.g., a direct-from-memory 32-bit add for a byte value and
// the byte is the last byte in a page, the extra data that we load is
// incorrectly accessed. So we only allow loads to merge for
// 32-bit-and-above widths.
return None;
}
// Just testing the opcode is enough, because the width will always match if
// the type does (and the type should match if the CLIF is properly
// constructed).
if insn_data.opcode() == Opcode::Load {
let offset = insn_data
.load_store_offset()
.expect("load should have offset");
Some((
InsnInput {
insn: src_insn,
input: 0,
},
offset,
))
} else {
None
}
}
/// Put the given input into a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegMem {
let inputs = ctx.get_input_as_source_or_const(spec.insn, spec.input);
if let Some(c) = inputs.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
let ty = ctx.input_ty(spec.insn, spec.input);
return RegMem::reg(generate_constant(ctx, ty, c));
}
if let Some((src_insn, 0)) = inputs.inst {
if let Some((addr_input, offset)) = is_mergeable_load(ctx, src_insn) {
ctx.sink_inst(src_insn);
let amode = lower_to_amode(ctx, addr_input, offset);
return RegMem::mem(amode);
}
}
RegMem::reg(ctx.put_input_in_reg(spec.insn, spec.input))
}
/// An extension specification for `extend_input_to_reg`.
#[derive(Clone, Copy)]
enum ExtSpec {
@@ -128,7 +197,11 @@ enum ExtSpec {
/// Put the given input into a register, marking it as used, and do a zero- or signed- extension if
/// required. (This obviously causes side-effects.)
fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
fn extend_input_to_reg<C: LowerCtx<I = Inst>>(
ctx: &mut C,
spec: InsnInput,
ext_spec: ExtSpec,
) -> Reg {
let requested_size = match ext_spec {
ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32,
ExtSpec::ZeroExtendTo64 | ExtSpec::SignExtendTo64 => 64,
@@ -160,13 +233,6 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
dst.to_reg()
}
/// Put the given input into a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
// TODO handle memory; merge a load directly, if possible.
RegMem::reg(ctx.put_input_in_reg(spec.insn, spec.input))
}
/// Returns whether the given input is an immediate that can be properly sign-extended, without any
/// possible side-effect.
fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option<u32> {
@@ -182,20 +248,20 @@ fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option<u32>
})
}
fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
fn input_to_sext_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> Option<u32> {
let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
non_reg_input_to_sext_imm(input, input_ty)
}
fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
fn input_to_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> Option<u64> {
ctx.get_input_as_source_or_const(spec.insn, spec.input)
.constant
}
/// Put the given input into an immediate, a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
fn input_to_reg_mem_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegMemImm {
let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
match non_reg_input_to_sext_imm(input, input_ty) {
@@ -305,7 +371,7 @@ fn emit_extract_lane<C: LowerCtx<I = Inst>>(
///
/// Note: make sure that there are no instructions modifying the flags between a call to this
/// function and the use of the flags!
fn emit_cmp(ctx: Ctx, insn: IRInst) {
fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
let ty = ctx.input_ty(insn, 0);
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
@@ -355,7 +421,12 @@ enum FcmpCondResult {
///
/// Note: make sure that there are no instructions modifying the flags between a call to this
/// function and the use of the flags!
fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
fn emit_fcmp<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
mut cond_code: FloatCC,
spec: FcmpSpec,
) -> FcmpCondResult {
let (flip_operands, inverted_equal) = match cond_code {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
@@ -407,7 +478,12 @@ fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) ->
cond_result
}
fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
fn make_libcall_sig<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
call_conv: CallConv,
ptr_ty: Type,
) -> Signature {
let mut sig = Signature::new(call_conv);
for i in 0..ctx.num_inputs(insn) {
sig.params.push(AbiParam::new(ctx.input_ty(insn, i)));
@@ -827,14 +903,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Bor
| Opcode::Bxor => {
// For commutative operations, try to commute operands if one is an
// immediate.
if let Some(imm) = input_to_sext_imm(ctx, inputs[0]) {
(put_input_in_reg(ctx, inputs[1]), RegMemImm::imm(imm))
// immediate or direct memory reference. Do so by converting LHS to RMI; if
// reg, then always convert RHS to RMI; else, use LHS as RMI and convert
// RHS to reg.
let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
if let RegMemImm::Reg { reg: lhs_reg } = lhs {
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
(lhs_reg, rhs)
} else {
(
put_input_in_reg(ctx, inputs[0]),
input_to_reg_mem_imm(ctx, inputs[1]),
)
let rhs_reg = put_input_in_reg(ctx, inputs[1]);
(rhs_reg, lhs)
}
}
Opcode::Isub => (

View File

@@ -0,0 +1,46 @@
test compile
target x86_64
feature "experimental_x64"
function %add_from_mem_u32_1(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = load.i32 v0
v3 = iadd.i32 v2, v1
; check: addl 0(%rdi), %r12d
return v3
}
function %add_from_mem_u32_2(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = load.i32 v0
v3 = iadd.i32 v1, v2
; check: addl 0(%rdi), %r12d
return v3
}
function %add_from_mem_u64_1(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v2, v1
; check: addq 0(%rdi), %r12
return v3
}
function %add_from_mem_u64_2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v1, v2
; check: addq 0(%rdi), %r12
return v3
}
; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits
; and would load 32 bits from memory, which may go beyond the end of the heap.
function %add_from_mem_not_narrow(i64, i8) -> i8 {
block0(v0: i64, v1: i8):
v2 = load.i8 v0
v3 = iadd.i8 v2, v1
; check: movzbq 0(%rdi), %r12
; nextln: addl %esi, %r12d
return v3
}