Cranelift: implement redundant fill removal on tree-shaped CFG regions. Mozilla bug 1570584. (#906)

This commit is contained in:
julian-seward1
2019-08-25 19:37:34 +02:00
committed by GitHub
parent cc57e84cbd
commit b8fb52446c
19 changed files with 1262 additions and 24 deletions

View File

@@ -9,6 +9,7 @@ use crate::cdsl::settings::SettingGroup;
use crate::shared::types::Bool::B1;
use crate::shared::types::Float::{F32, F64};
use crate::shared::types::Int::{I16, I32, I64, I8};
use crate::shared::types::Reference::{R32, R64};
use crate::shared::Definitions as SharedDefinitions;
use super::recipes::RecipeGroup;
@@ -121,7 +122,9 @@ pub fn define<'defs>(
let call_indirect = shared.by_name("call_indirect");
let copy = shared.by_name("copy");
let copy_nop = shared.by_name("copy_nop");
let copy_to_ssa = shared.by_name("copy_to_ssa");
let fill = shared.by_name("fill");
let fill_nop = shared.by_name("fill_nop");
let iadd = shared.by_name("iadd");
let iadd_imm = shared.by_name("iadd_imm");
let iconst = shared.by_name("iconst");
@@ -141,6 +144,8 @@ pub fn define<'defs>(
let return_ = shared.by_name("return");
// Recipes shorthands, prefixed with r_.
let r_copytossa = recipes.by_name("copytossa");
let r_fillnull = recipes.by_name("fillnull");
let r_icall = recipes.by_name("Icall");
let r_icopy = recipes.by_name("Icopy");
let r_ii = recipes.by_name("Ii");
@@ -368,6 +373,14 @@ pub fn define<'defs>(
e.add64(enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
e.add64(enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
// No-op fills, created by late-stage redundant-fill removal.
for &ty in &[I64, I32] {
e.add64(enc(fill_nop.bind(ty), r_fillnull, 0));
e.add32(enc(fill_nop.bind(ty), r_fillnull, 0));
}
e.add64(enc(fill_nop.bind(B1), r_fillnull, 0));
e.add32(enc(fill_nop.bind(B1), r_fillnull, 0));
// Register copies.
e.add32(enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
e.add64(enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
@@ -394,5 +407,34 @@ pub fn define<'defs>(
e.add64(enc(copy_nop.bind(ty), r_stacknull, 0));
}
// Copy-to-SSA
e.add32(enc(
copy_to_ssa.bind(I32),
r_copytossa,
opimm_bits(0b000, 0),
));
e.add64(enc(
copy_to_ssa.bind(I64),
r_copytossa,
opimm_bits(0b000, 0),
));
e.add64(enc(
copy_to_ssa.bind(I32),
r_copytossa,
opimm32_bits(0b000, 0),
));
e.add32(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
e.add64(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
e.add32(enc(
copy_to_ssa.bind_ref(R32),
r_copytossa,
opimm_bits(0b000, 0),
));
e.add64(enc(
copy_to_ssa.bind_ref(R64),
r_copytossa,
opimm_bits(0b000, 0),
));
e
}

View File

@@ -63,6 +63,7 @@ pub fn define<'formats>(
let f_branch_icmp = formats.by_name("BranchIcmp");
let f_call = formats.by_name("Call");
let f_call_indirect = formats.by_name("CallIndirect");
let f_copy_to_ssa = formats.by_name("CopyToSsa");
let f_int_compare = formats.by_name("IntCompare");
let f_int_compare_imm = formats.by_name("IntCompareImm");
let f_jump = formats.by_name("Jump");
@@ -185,6 +186,14 @@ pub fn define<'formats>(
.emit("put_i(bits, src, 0, dst, sink);"),
);
// Same for copy-to-SSA -- GPR regmove.
recipes.push(
EncodingRecipeBuilder::new("copytossa", f_copy_to_ssa, 4)
// No operands_in to mention, because a source register is specified directly.
.operands_out(vec![gpr])
.emit("put_i(bits, src, 0, out_reg0, sink);"),
);
// U-type instructions have a 20-bit immediate that targets bits 12-31.
let format = formats.get(f_unary_imm);
recipes.push(
@@ -271,5 +280,14 @@ pub fn define<'formats>(
.emit(""),
);
// No-op fills, created by late-stage redundant-fill removal.
recipes.push(
EncodingRecipeBuilder::new("fillnull", f_unary, 0)
.operands_in(vec![Stack::new(gpr)])
.operands_out(vec![gpr])
.clobbers_flags(false)
.emit(""),
);
recipes
}

View File

@@ -340,6 +340,7 @@ pub fn define(
let copy = shared.by_name("copy");
let copy_nop = shared.by_name("copy_nop");
let copy_special = shared.by_name("copy_special");
let copy_to_ssa = shared.by_name("copy_to_ssa");
let ctz = shared.by_name("ctz");
let debugtrap = shared.by_name("debugtrap");
let extractlane = shared.by_name("extractlane");
@@ -352,6 +353,7 @@ pub fn define(
let fdiv = shared.by_name("fdiv");
let ffcmp = shared.by_name("ffcmp");
let fill = shared.by_name("fill");
let fill_nop = shared.by_name("fill_nop");
let floor = shared.by_name("floor");
let fmul = shared.by_name("fmul");
let fpromote = shared.by_name("fpromote");
@@ -468,7 +470,9 @@ pub fn define(
let rec_fax = r.template("fax");
let rec_fcmp = r.template("fcmp");
let rec_fcscc = r.template("fcscc");
let rec_ffillnull = r.recipe("ffillnull");
let rec_ffillSib32 = r.template("ffillSib32");
let rec_fillnull = r.recipe("fillnull");
let rec_fillSib32 = r.template("fillSib32");
let rec_fld = r.template("fld");
let rec_fldDisp32 = r.template("fldDisp32");
@@ -490,6 +494,7 @@ pub fn define(
let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
let rec_furm = r.template("furm");
let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
let rec_furmi_rnd = r.template("furmi_rnd");
let rec_got_fnaddr8 = r.template("got_fnaddr8");
let rec_got_gvaddr8 = r.template("got_gvaddr8");
@@ -568,6 +573,7 @@ pub fn define(
let rec_trapff = r.recipe("trapff");
let rec_u_id = r.template("u_id");
let rec_umr = r.template("umr");
let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
let rec_ur = r.template("ur");
let rec_urm = r.template("urm");
let rec_urm_noflags = r.template("urm_noflags");
@@ -921,6 +927,18 @@ pub fn define(
e.enc_r32_r64(fill, rec_fillSib32.opcodes(vec![0x8b]));
e.enc_r32_r64(regfill, rec_regfill32.opcodes(vec![0x8b]));
// No-op fills, created by late-stage redundant-fill removal.
for &ty in &[I64, I32, I16, I8] {
e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
}
e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
for &ty in &[F64, F32] {
e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
}
// Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(vec![0x8b]));
@@ -943,6 +961,24 @@ pub fn define(
e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w());
e.enc32(copy_special, rec_copysp.opcodes(vec![0x89]));
// Copy to SSA
e.enc_i32_i64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
e.enc_r32_r64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
e.enc_both(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
e.enc_both(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
e.enc_both(
copy_to_ssa.bind(I16),
rec_umr_reg_to_ssa.opcodes(vec![0x89]),
);
e.enc_both(
copy_to_ssa.bind(F64),
rec_furm_reg_to_ssa.opcodes(vec![0xf2, 0x0f, 0x10]),
);
e.enc_both(
copy_to_ssa.bind(F32),
rec_furm_reg_to_ssa.opcodes(vec![0xf3, 0x0f, 0x10]),
);
// Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
// into a no-op.
// The same encoding is generated for both the 64- and 32-bit architectures.

View File

@@ -367,6 +367,7 @@ pub fn define<'shared>(
let f_call = formats.by_name("Call");
let f_call_indirect = formats.by_name("CallIndirect");
let f_copy_special = formats.by_name("CopySpecial");
let f_copy_to_ssa = formats.by_name("CopyToSsa");
let f_extract_lane = formats.by_name("ExtractLane"); // TODO this would preferably retrieve a BinaryImm8 format but because formats are compared structurally and ExtractLane has the same structure this is impossible--if we rename ExtractLane, it may even impact parsing
let f_float_compare = formats.by_name("FloatCompare");
let f_float_cond = formats.by_name("FloatCond");
@@ -426,6 +427,22 @@ pub fn define<'shared>(
.emit(""),
);
// No-op fills, created by late-stage redundant-fill removal.
recipes.add_recipe(
EncodingRecipeBuilder::new("fillnull", f_unary, 0)
.operands_in(vec![stack_gpr32])
.operands_out(vec![gpr])
.clobbers_flags(false)
.emit(""),
);
recipes.add_recipe(
EncodingRecipeBuilder::new("ffillnull", f_unary, 0)
.operands_in(vec![stack_gpr32])
.operands_out(vec![fpr])
.clobbers_flags(false)
.emit(""),
);
recipes
.add_recipe(EncodingRecipeBuilder::new("debugtrap", f_nullary, 1).emit("sink.put1(0xcc);"));
@@ -570,6 +587,20 @@ pub fn define<'shared>(
),
);
// Same as umr, but with the source register specified directly.
recipes.add_template_recipe(
EncodingRecipeBuilder::new("umr_reg_to_ssa", f_copy_to_ssa, 1)
// No operands_in to mention, because a source register is specified directly.
.operands_out(vec![gpr])
.clobbers_flags(false)
.emit(
r#"
{{PUT_OP}}(bits, rex2(out_reg0, src), sink);
modrm_rr(out_reg0, src, sink);
"#,
),
);
// XX /r, but for a unary operator with separate input/output register.
// RM form. Clobbers FLAGS.
recipes.add_template_recipe(
@@ -631,6 +662,20 @@ pub fn define<'shared>(
),
);
// Same as furm, but with the source register specified directly.
recipes.add_template_recipe(
EncodingRecipeBuilder::new("furm_reg_to_ssa", f_copy_to_ssa, 1)
// No operands_in to mention, because a source register is specified directly.
.operands_out(vec![fpr])
.clobbers_flags(false)
.emit(
r#"
{{PUT_OP}}(bits, rex2(src, out_reg0), sink);
modrm_rr(src, out_reg0, sink);
"#,
),
);
// XX /r, RM form, GPR -> FPR.
recipes.add_template_recipe(
EncodingRecipeBuilder::new("frurm", f_unary, 1)

View File

@@ -157,6 +157,7 @@ pub fn define(immediates: &OperandKinds, entities: &OperandKinds) -> FormatRegis
.imm(("src", regunit))
.imm(("dst", regunit)),
);
registry.insert(Builder::new("CopyToSsa").imm(("src", regunit)));
registry.insert(
Builder::new("RegSpill")
.value()

View File

@@ -1194,6 +1194,22 @@ pub fn define(
.can_load(true),
);
ig.push(
Inst::new(
"fill_nop",
r#"
This is identical to `fill`, except it has no encoding, since it is a no-op.
This instruction is created only during late-stage redundant-reload removal, after all
registers and stack slots have been assigned. It is used to replace `fill`s that have
been identified as redundant.
"#,
)
.operands_in(vec![x])
.operands_out(vec![a])
.can_load(true),
);
let src = &operand("src", regunit);
let dst = &operand("dst", regunit);
@@ -1233,6 +1249,23 @@ pub fn define(
.other_side_effects(true),
);
ig.push(
Inst::new(
"copy_to_ssa",
r#"
Copies the contents of ''src'' register to ''a'' SSA name.
This instruction copies the contents of one register, regardless of its SSA name, to
another register, creating a new SSA name. In that sense it is a one-sided version
of ''copy_special''. This instruction is internal and should not be created by
Cranelift users.
"#,
)
.operands_in(vec![src])
.operands_out(vec![a])
.other_side_effects(true),
);
ig.push(
Inst::new(
"copy_nop",