Merge pull request #784 from julian-seward1/issue779
Only create copy_nop instructions for types for which an encoding exi…
This commit is contained in:
@@ -3,12 +3,13 @@ RISC-V Encodings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from base import instructions as base
|
||||
from base import types
|
||||
from base.immediates import intcc
|
||||
from .defs import RV32, RV64
|
||||
from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL
|
||||
from .recipes import LOAD, STORE
|
||||
from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy
|
||||
from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov
|
||||
from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov, stacknull
|
||||
from .settings import use_m
|
||||
from cdsl.ast import Var
|
||||
from base.legalize import narrow, expand
|
||||
@@ -160,3 +161,9 @@ RV32.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
||||
RV64.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
||||
RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
||||
RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
||||
|
||||
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||
# into a no-op.
|
||||
for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]:
|
||||
RV64.enc(base.copy_nop.bind(ty), stacknull, 0)
|
||||
RV32.enc(base.copy_nop.bind(ty), stacknull, 0)
|
||||
|
||||
@@ -223,3 +223,8 @@ GPfi = EncRecipe(
|
||||
'GPfi', Unary, base_size=4,
|
||||
ins=Stack(GPR), outs=GPR,
|
||||
emit='unimplemented!();')
|
||||
|
||||
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||
# into a no-op.
|
||||
stacknull = EncRecipe('stacknull', Unary, base_size=0,
|
||||
ins=Stack(GPR), outs=Stack(GPR), emit='')
|
||||
|
||||
@@ -342,11 +342,23 @@ X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
|
||||
X86_32.enc(base.copy_special, *r.copysp(0x89))
|
||||
|
||||
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||
# into a no-op.
|
||||
X86_64.enc(base.copy_nop.i64, r.stacknull, 0)
|
||||
X86_64.enc(base.copy_nop.i32, r.stacknull, 0)
|
||||
X86_64.enc(base.copy_nop.f64, r.stacknull, 0)
|
||||
X86_64.enc(base.copy_nop.f32, r.stacknull, 0)
|
||||
# into a no-op. Ideally we could to make this encoding available for
|
||||
# all types, and write `base.copy_nop.any`, but it appears that the
|
||||
# controlling type variable must not polymorphic. So we make do with
|
||||
# the following limited set, and guard the generating transformation in
|
||||
# regalloc/reload.rs accordingly.
|
||||
#
|
||||
# The same encoding is generated for both the 64- and 32-bit architectures.
|
||||
# Note that we can't use `enc_both` here, because that attempts to create a
|
||||
# variant with a REX prefix in the 64-bit-architecture case. But since
|
||||
# there's no actual instruction for the REX prefix to modify the meaning of,
|
||||
# it will modify the meaning of whatever instruction happens to follow this
|
||||
# one, which is obviously wrong. Note also that we can and indeed *must*
|
||||
# claim that there's a 64-bit encoding for the 32-bit arch case, even though
|
||||
# no such single instruction actually exists for the 32-bit arch case.
|
||||
for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]:
|
||||
X86_64.enc(base.copy_nop.bind(ty), r.stacknull, 0)
|
||||
X86_32.enc(base.copy_nop.bind(ty), r.stacknull, 0)
|
||||
|
||||
# Adjust SP down by a dynamic value (or up, with a negative operand).
|
||||
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
|
||||
|
||||
@@ -222,18 +222,29 @@ impl<'a> Context<'a> {
|
||||
{
|
||||
let dst_vals = self.cur.func.dfg.inst_results(inst);
|
||||
if dst_vals.len() == 1 {
|
||||
let dst_val = dst_vals[0];
|
||||
let can_transform = match (
|
||||
self.cur.func.locations[arg],
|
||||
self.cur.func.locations[dst_vals[0]],
|
||||
self.cur.func.locations[dst_val],
|
||||
) {
|
||||
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot,
|
||||
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => {
|
||||
src_slot == dst_slot && {
|
||||
let src_ty = self.cur.func.dfg.value_type(arg);
|
||||
let dst_ty = self.cur.func.dfg.value_type(dst_val);
|
||||
debug_assert!(src_ty == dst_ty);
|
||||
// This limits the transformation to copies of the
|
||||
// types: I64 I32 I16 I8 F64 and F32, since that's
|
||||
// the set of `copy_nop` encodings available.
|
||||
src_ty.is_int() || src_ty.is_float()
|
||||
}
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
if can_transform {
|
||||
// Convert the instruction into a `copy_nop`.
|
||||
self.cur.func.dfg.replace(inst).copy_nop(arg);
|
||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
debug_assert!(ok, "copy_nop encoding missing for this type");
|
||||
|
||||
// And move on to the next insn.
|
||||
self.reloads.clear();
|
||||
|
||||
23
cranelift/filetests/filetests/regalloc/reload-779.clif
Normal file
23
cranelift/filetests/filetests/regalloc/reload-779.clif
Normal file
@@ -0,0 +1,23 @@
|
||||
test compile
|
||||
target x86_64
|
||||
|
||||
; Filed as https://github.com/CraneStation/cranelift/issues/779
|
||||
;
|
||||
; The copy_nop optimisation to reload (see Issue 773) was creating
|
||||
; copy_nop instructions for types for which there were no encoding.
|
||||
|
||||
function u0:0(i64, i64, i64) system_v {
|
||||
sig0 = () system_v
|
||||
sig1 = (i16) system_v
|
||||
fn1 = u0:94 sig0
|
||||
fn2 = u0:95 sig1
|
||||
|
||||
ebb0(v0: i64, v1: i64, v2: i64):
|
||||
v3 = iconst.i16 0
|
||||
jump ebb1(v3)
|
||||
|
||||
ebb1(v4: i16):
|
||||
call fn1()
|
||||
call fn2(v4)
|
||||
jump ebb1(v4)
|
||||
}
|
||||
Reference in New Issue
Block a user