Merge pull request #784 from julian-seward1/issue779
Only create copy_nop instructions for types for which an encoding exi…
This commit is contained in:
@@ -3,12 +3,13 @@ RISC-V Encodings.
|
|||||||
"""
|
"""
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from base import instructions as base
|
from base import instructions as base
|
||||||
|
from base import types
|
||||||
from base.immediates import intcc
|
from base.immediates import intcc
|
||||||
from .defs import RV32, RV64
|
from .defs import RV32, RV64
|
||||||
from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL
|
from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL
|
||||||
from .recipes import LOAD, STORE
|
from .recipes import LOAD, STORE
|
||||||
from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy
|
from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy
|
||||||
from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov
|
from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov, stacknull
|
||||||
from .settings import use_m
|
from .settings import use_m
|
||||||
from cdsl.ast import Var
|
from cdsl.ast import Var
|
||||||
from base.legalize import narrow, expand
|
from base.legalize import narrow, expand
|
||||||
@@ -160,3 +161,9 @@ RV32.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
|||||||
RV64.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
RV64.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
||||||
RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
||||||
RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
||||||
|
|
||||||
|
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||||
|
# into a no-op.
|
||||||
|
for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]:
|
||||||
|
RV64.enc(base.copy_nop.bind(ty), stacknull, 0)
|
||||||
|
RV32.enc(base.copy_nop.bind(ty), stacknull, 0)
|
||||||
|
|||||||
@@ -223,3 +223,8 @@ GPfi = EncRecipe(
|
|||||||
'GPfi', Unary, base_size=4,
|
'GPfi', Unary, base_size=4,
|
||||||
ins=Stack(GPR), outs=GPR,
|
ins=Stack(GPR), outs=GPR,
|
||||||
emit='unimplemented!();')
|
emit='unimplemented!();')
|
||||||
|
|
||||||
|
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||||
|
# into a no-op.
|
||||||
|
stacknull = EncRecipe('stacknull', Unary, base_size=0,
|
||||||
|
ins=Stack(GPR), outs=Stack(GPR), emit='')
|
||||||
|
|||||||
@@ -342,11 +342,23 @@ X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
|
|||||||
X86_32.enc(base.copy_special, *r.copysp(0x89))
|
X86_32.enc(base.copy_special, *r.copysp(0x89))
|
||||||
|
|
||||||
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||||
# into a no-op.
|
# into a no-op. Ideally we could to make this encoding available for
|
||||||
X86_64.enc(base.copy_nop.i64, r.stacknull, 0)
|
# all types, and write `base.copy_nop.any`, but it appears that the
|
||||||
X86_64.enc(base.copy_nop.i32, r.stacknull, 0)
|
# controlling type variable must not polymorphic. So we make do with
|
||||||
X86_64.enc(base.copy_nop.f64, r.stacknull, 0)
|
# the following limited set, and guard the generating transformation in
|
||||||
X86_64.enc(base.copy_nop.f32, r.stacknull, 0)
|
# regalloc/reload.rs accordingly.
|
||||||
|
#
|
||||||
|
# The same encoding is generated for both the 64- and 32-bit architectures.
|
||||||
|
# Note that we can't use `enc_both` here, because that attempts to create a
|
||||||
|
# variant with a REX prefix in the 64-bit-architecture case. But since
|
||||||
|
# there's no actual instruction for the REX prefix to modify the meaning of,
|
||||||
|
# it will modify the meaning of whatever instruction happens to follow this
|
||||||
|
# one, which is obviously wrong. Note also that we can and indeed *must*
|
||||||
|
# claim that there's a 64-bit encoding for the 32-bit arch case, even though
|
||||||
|
# no such single instruction actually exists for the 32-bit arch case.
|
||||||
|
for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]:
|
||||||
|
X86_64.enc(base.copy_nop.bind(ty), r.stacknull, 0)
|
||||||
|
X86_32.enc(base.copy_nop.bind(ty), r.stacknull, 0)
|
||||||
|
|
||||||
# Adjust SP down by a dynamic value (or up, with a negative operand).
|
# Adjust SP down by a dynamic value (or up, with a negative operand).
|
||||||
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
|
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
|
||||||
|
|||||||
@@ -222,18 +222,29 @@ impl<'a> Context<'a> {
|
|||||||
{
|
{
|
||||||
let dst_vals = self.cur.func.dfg.inst_results(inst);
|
let dst_vals = self.cur.func.dfg.inst_results(inst);
|
||||||
if dst_vals.len() == 1 {
|
if dst_vals.len() == 1 {
|
||||||
|
let dst_val = dst_vals[0];
|
||||||
let can_transform = match (
|
let can_transform = match (
|
||||||
self.cur.func.locations[arg],
|
self.cur.func.locations[arg],
|
||||||
self.cur.func.locations[dst_vals[0]],
|
self.cur.func.locations[dst_val],
|
||||||
) {
|
) {
|
||||||
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot,
|
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => {
|
||||||
|
src_slot == dst_slot && {
|
||||||
|
let src_ty = self.cur.func.dfg.value_type(arg);
|
||||||
|
let dst_ty = self.cur.func.dfg.value_type(dst_val);
|
||||||
|
debug_assert!(src_ty == dst_ty);
|
||||||
|
// This limits the transformation to copies of the
|
||||||
|
// types: I64 I32 I16 I8 F64 and F32, since that's
|
||||||
|
// the set of `copy_nop` encodings available.
|
||||||
|
src_ty.is_int() || src_ty.is_float()
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
if can_transform {
|
if can_transform {
|
||||||
// Convert the instruction into a `copy_nop`.
|
// Convert the instruction into a `copy_nop`.
|
||||||
self.cur.func.dfg.replace(inst).copy_nop(arg);
|
self.cur.func.dfg.replace(inst).copy_nop(arg);
|
||||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||||
debug_assert!(ok);
|
debug_assert!(ok, "copy_nop encoding missing for this type");
|
||||||
|
|
||||||
// And move on to the next insn.
|
// And move on to the next insn.
|
||||||
self.reloads.clear();
|
self.reloads.clear();
|
||||||
|
|||||||
23
cranelift/filetests/filetests/regalloc/reload-779.clif
Normal file
23
cranelift/filetests/filetests/regalloc/reload-779.clif
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
; Filed as https://github.com/CraneStation/cranelift/issues/779
|
||||||
|
;
|
||||||
|
; The copy_nop optimisation to reload (see Issue 773) was creating
|
||||||
|
; copy_nop instructions for types for which there were no encoding.
|
||||||
|
|
||||||
|
function u0:0(i64, i64, i64) system_v {
|
||||||
|
sig0 = () system_v
|
||||||
|
sig1 = (i16) system_v
|
||||||
|
fn1 = u0:94 sig0
|
||||||
|
fn2 = u0:95 sig1
|
||||||
|
|
||||||
|
ebb0(v0: i64, v1: i64, v2: i64):
|
||||||
|
v3 = iconst.i16 0
|
||||||
|
jump ebb1(v3)
|
||||||
|
|
||||||
|
ebb1(v4: i16):
|
||||||
|
call fn1()
|
||||||
|
call fn2(v4)
|
||||||
|
jump ebb1(v4)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user