Cranelift: implement redundant fill removal on tree-shaped CFG regions. Mozilla bug 1570584. (#906)
This commit is contained in:
@@ -9,6 +9,7 @@ use crate::cdsl::settings::SettingGroup;
|
||||
use crate::shared::types::Bool::B1;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
use crate::shared::types::Reference::{R32, R64};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
use super::recipes::RecipeGroup;
|
||||
@@ -121,7 +122,9 @@ pub fn define<'defs>(
|
||||
let call_indirect = shared.by_name("call_indirect");
|
||||
let copy = shared.by_name("copy");
|
||||
let copy_nop = shared.by_name("copy_nop");
|
||||
let copy_to_ssa = shared.by_name("copy_to_ssa");
|
||||
let fill = shared.by_name("fill");
|
||||
let fill_nop = shared.by_name("fill_nop");
|
||||
let iadd = shared.by_name("iadd");
|
||||
let iadd_imm = shared.by_name("iadd_imm");
|
||||
let iconst = shared.by_name("iconst");
|
||||
@@ -141,6 +144,8 @@ pub fn define<'defs>(
|
||||
let return_ = shared.by_name("return");
|
||||
|
||||
// Recipes shorthands, prefixed with r_.
|
||||
let r_copytossa = recipes.by_name("copytossa");
|
||||
let r_fillnull = recipes.by_name("fillnull");
|
||||
let r_icall = recipes.by_name("Icall");
|
||||
let r_icopy = recipes.by_name("Icopy");
|
||||
let r_ii = recipes.by_name("Ii");
|
||||
@@ -368,6 +373,14 @@ pub fn define<'defs>(
|
||||
e.add64(enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
|
||||
e.add64(enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
|
||||
|
||||
// No-op fills, created by late-stage redundant-fill removal.
|
||||
for &ty in &[I64, I32] {
|
||||
e.add64(enc(fill_nop.bind(ty), r_fillnull, 0));
|
||||
e.add32(enc(fill_nop.bind(ty), r_fillnull, 0));
|
||||
}
|
||||
e.add64(enc(fill_nop.bind(B1), r_fillnull, 0));
|
||||
e.add32(enc(fill_nop.bind(B1), r_fillnull, 0));
|
||||
|
||||
// Register copies.
|
||||
e.add32(enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
|
||||
e.add64(enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
|
||||
@@ -394,5 +407,34 @@ pub fn define<'defs>(
|
||||
e.add64(enc(copy_nop.bind(ty), r_stacknull, 0));
|
||||
}
|
||||
|
||||
// Copy-to-SSA
|
||||
e.add32(enc(
|
||||
copy_to_ssa.bind(I32),
|
||||
r_copytossa,
|
||||
opimm_bits(0b000, 0),
|
||||
));
|
||||
e.add64(enc(
|
||||
copy_to_ssa.bind(I64),
|
||||
r_copytossa,
|
||||
opimm_bits(0b000, 0),
|
||||
));
|
||||
e.add64(enc(
|
||||
copy_to_ssa.bind(I32),
|
||||
r_copytossa,
|
||||
opimm32_bits(0b000, 0),
|
||||
));
|
||||
e.add32(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
|
||||
e.add64(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
|
||||
e.add32(enc(
|
||||
copy_to_ssa.bind_ref(R32),
|
||||
r_copytossa,
|
||||
opimm_bits(0b000, 0),
|
||||
));
|
||||
e.add64(enc(
|
||||
copy_to_ssa.bind_ref(R64),
|
||||
r_copytossa,
|
||||
opimm_bits(0b000, 0),
|
||||
));
|
||||
|
||||
e
|
||||
}
|
||||
|
||||
@@ -63,6 +63,7 @@ pub fn define<'formats>(
|
||||
let f_branch_icmp = formats.by_name("BranchIcmp");
|
||||
let f_call = formats.by_name("Call");
|
||||
let f_call_indirect = formats.by_name("CallIndirect");
|
||||
let f_copy_to_ssa = formats.by_name("CopyToSsa");
|
||||
let f_int_compare = formats.by_name("IntCompare");
|
||||
let f_int_compare_imm = formats.by_name("IntCompareImm");
|
||||
let f_jump = formats.by_name("Jump");
|
||||
@@ -185,6 +186,14 @@ pub fn define<'formats>(
|
||||
.emit("put_i(bits, src, 0, dst, sink);"),
|
||||
);
|
||||
|
||||
// Same for copy-to-SSA -- GPR regmove.
|
||||
recipes.push(
|
||||
EncodingRecipeBuilder::new("copytossa", f_copy_to_ssa, 4)
|
||||
// No operands_in to mention, because a source register is specified directly.
|
||||
.operands_out(vec![gpr])
|
||||
.emit("put_i(bits, src, 0, out_reg0, sink);"),
|
||||
);
|
||||
|
||||
// U-type instructions have a 20-bit immediate that targets bits 12-31.
|
||||
let format = formats.get(f_unary_imm);
|
||||
recipes.push(
|
||||
@@ -271,5 +280,14 @@ pub fn define<'formats>(
|
||||
.emit(""),
|
||||
);
|
||||
|
||||
// No-op fills, created by late-stage redundant-fill removal.
|
||||
recipes.push(
|
||||
EncodingRecipeBuilder::new("fillnull", f_unary, 0)
|
||||
.operands_in(vec![Stack::new(gpr)])
|
||||
.operands_out(vec![gpr])
|
||||
.clobbers_flags(false)
|
||||
.emit(""),
|
||||
);
|
||||
|
||||
recipes
|
||||
}
|
||||
|
||||
@@ -340,6 +340,7 @@ pub fn define(
|
||||
let copy = shared.by_name("copy");
|
||||
let copy_nop = shared.by_name("copy_nop");
|
||||
let copy_special = shared.by_name("copy_special");
|
||||
let copy_to_ssa = shared.by_name("copy_to_ssa");
|
||||
let ctz = shared.by_name("ctz");
|
||||
let debugtrap = shared.by_name("debugtrap");
|
||||
let extractlane = shared.by_name("extractlane");
|
||||
@@ -352,6 +353,7 @@ pub fn define(
|
||||
let fdiv = shared.by_name("fdiv");
|
||||
let ffcmp = shared.by_name("ffcmp");
|
||||
let fill = shared.by_name("fill");
|
||||
let fill_nop = shared.by_name("fill_nop");
|
||||
let floor = shared.by_name("floor");
|
||||
let fmul = shared.by_name("fmul");
|
||||
let fpromote = shared.by_name("fpromote");
|
||||
@@ -468,7 +470,9 @@ pub fn define(
|
||||
let rec_fax = r.template("fax");
|
||||
let rec_fcmp = r.template("fcmp");
|
||||
let rec_fcscc = r.template("fcscc");
|
||||
let rec_ffillnull = r.recipe("ffillnull");
|
||||
let rec_ffillSib32 = r.template("ffillSib32");
|
||||
let rec_fillnull = r.recipe("fillnull");
|
||||
let rec_fillSib32 = r.template("fillSib32");
|
||||
let rec_fld = r.template("fld");
|
||||
let rec_fldDisp32 = r.template("fldDisp32");
|
||||
@@ -490,6 +494,7 @@ pub fn define(
|
||||
let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
|
||||
let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
|
||||
let rec_furm = r.template("furm");
|
||||
let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
|
||||
let rec_furmi_rnd = r.template("furmi_rnd");
|
||||
let rec_got_fnaddr8 = r.template("got_fnaddr8");
|
||||
let rec_got_gvaddr8 = r.template("got_gvaddr8");
|
||||
@@ -568,6 +573,7 @@ pub fn define(
|
||||
let rec_trapff = r.recipe("trapff");
|
||||
let rec_u_id = r.template("u_id");
|
||||
let rec_umr = r.template("umr");
|
||||
let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
|
||||
let rec_ur = r.template("ur");
|
||||
let rec_urm = r.template("urm");
|
||||
let rec_urm_noflags = r.template("urm_noflags");
|
||||
@@ -921,6 +927,18 @@ pub fn define(
|
||||
e.enc_r32_r64(fill, rec_fillSib32.opcodes(vec![0x8b]));
|
||||
e.enc_r32_r64(regfill, rec_regfill32.opcodes(vec![0x8b]));
|
||||
|
||||
// No-op fills, created by late-stage redundant-fill removal.
|
||||
for &ty in &[I64, I32, I16, I8] {
|
||||
e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
|
||||
e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
|
||||
}
|
||||
e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
|
||||
e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
|
||||
for &ty in &[F64, F32] {
|
||||
e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
|
||||
e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
|
||||
}
|
||||
|
||||
// Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
|
||||
|
||||
e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(vec![0x8b]));
|
||||
@@ -943,6 +961,24 @@ pub fn define(
|
||||
e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w());
|
||||
e.enc32(copy_special, rec_copysp.opcodes(vec![0x89]));
|
||||
|
||||
// Copy to SSA
|
||||
e.enc_i32_i64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
|
||||
e.enc_r32_r64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
|
||||
e.enc_both(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
|
||||
e.enc_both(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
|
||||
e.enc_both(
|
||||
copy_to_ssa.bind(I16),
|
||||
rec_umr_reg_to_ssa.opcodes(vec![0x89]),
|
||||
);
|
||||
e.enc_both(
|
||||
copy_to_ssa.bind(F64),
|
||||
rec_furm_reg_to_ssa.opcodes(vec![0xf2, 0x0f, 0x10]),
|
||||
);
|
||||
e.enc_both(
|
||||
copy_to_ssa.bind(F32),
|
||||
rec_furm_reg_to_ssa.opcodes(vec![0xf3, 0x0f, 0x10]),
|
||||
);
|
||||
|
||||
// Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
|
||||
// into a no-op.
|
||||
// The same encoding is generated for both the 64- and 32-bit architectures.
|
||||
|
||||
@@ -367,6 +367,7 @@ pub fn define<'shared>(
|
||||
let f_call = formats.by_name("Call");
|
||||
let f_call_indirect = formats.by_name("CallIndirect");
|
||||
let f_copy_special = formats.by_name("CopySpecial");
|
||||
let f_copy_to_ssa = formats.by_name("CopyToSsa");
|
||||
let f_extract_lane = formats.by_name("ExtractLane"); // TODO this would preferably retrieve a BinaryImm8 format but because formats are compared structurally and ExtractLane has the same structure this is impossible--if we rename ExtractLane, it may even impact parsing
|
||||
let f_float_compare = formats.by_name("FloatCompare");
|
||||
let f_float_cond = formats.by_name("FloatCond");
|
||||
@@ -426,6 +427,22 @@ pub fn define<'shared>(
|
||||
.emit(""),
|
||||
);
|
||||
|
||||
// No-op fills, created by late-stage redundant-fill removal.
|
||||
recipes.add_recipe(
|
||||
EncodingRecipeBuilder::new("fillnull", f_unary, 0)
|
||||
.operands_in(vec![stack_gpr32])
|
||||
.operands_out(vec![gpr])
|
||||
.clobbers_flags(false)
|
||||
.emit(""),
|
||||
);
|
||||
recipes.add_recipe(
|
||||
EncodingRecipeBuilder::new("ffillnull", f_unary, 0)
|
||||
.operands_in(vec![stack_gpr32])
|
||||
.operands_out(vec![fpr])
|
||||
.clobbers_flags(false)
|
||||
.emit(""),
|
||||
);
|
||||
|
||||
recipes
|
||||
.add_recipe(EncodingRecipeBuilder::new("debugtrap", f_nullary, 1).emit("sink.put1(0xcc);"));
|
||||
|
||||
@@ -570,6 +587,20 @@ pub fn define<'shared>(
|
||||
),
|
||||
);
|
||||
|
||||
// Same as umr, but with the source register specified directly.
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("umr_reg_to_ssa", f_copy_to_ssa, 1)
|
||||
// No operands_in to mention, because a source register is specified directly.
|
||||
.operands_out(vec![gpr])
|
||||
.clobbers_flags(false)
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(out_reg0, src), sink);
|
||||
modrm_rr(out_reg0, src, sink);
|
||||
"#,
|
||||
),
|
||||
);
|
||||
|
||||
// XX /r, but for a unary operator with separate input/output register.
|
||||
// RM form. Clobbers FLAGS.
|
||||
recipes.add_template_recipe(
|
||||
@@ -631,6 +662,20 @@ pub fn define<'shared>(
|
||||
),
|
||||
);
|
||||
|
||||
// Same as furm, but with the source register specified directly.
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("furm_reg_to_ssa", f_copy_to_ssa, 1)
|
||||
// No operands_in to mention, because a source register is specified directly.
|
||||
.operands_out(vec![fpr])
|
||||
.clobbers_flags(false)
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(src, out_reg0), sink);
|
||||
modrm_rr(src, out_reg0, sink);
|
||||
"#,
|
||||
),
|
||||
);
|
||||
|
||||
// XX /r, RM form, GPR -> FPR.
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("frurm", f_unary, 1)
|
||||
|
||||
@@ -157,6 +157,7 @@ pub fn define(immediates: &OperandKinds, entities: &OperandKinds) -> FormatRegis
|
||||
.imm(("src", regunit))
|
||||
.imm(("dst", regunit)),
|
||||
);
|
||||
registry.insert(Builder::new("CopyToSsa").imm(("src", regunit)));
|
||||
registry.insert(
|
||||
Builder::new("RegSpill")
|
||||
.value()
|
||||
|
||||
@@ -1194,6 +1194,22 @@ pub fn define(
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"fill_nop",
|
||||
r#"
|
||||
This is identical to `fill`, except it has no encoding, since it is a no-op.
|
||||
|
||||
This instruction is created only during late-stage redundant-reload removal, after all
|
||||
registers and stack slots have been assigned. It is used to replace `fill`s that have
|
||||
been identified as redundant.
|
||||
"#,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let src = &operand("src", regunit);
|
||||
let dst = &operand("dst", regunit);
|
||||
|
||||
@@ -1233,6 +1249,23 @@ pub fn define(
|
||||
.other_side_effects(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"copy_to_ssa",
|
||||
r#"
|
||||
Copies the contents of ''src'' register to ''a'' SSA name.
|
||||
|
||||
This instruction copies the contents of one register, regardless of its SSA name, to
|
||||
another register, creating a new SSA name. In that sense it is a one-sided version
|
||||
of ''copy_special''. This instruction is internal and should not be created by
|
||||
Cranelift users.
|
||||
"#,
|
||||
)
|
||||
.operands_in(vec![src])
|
||||
.operands_out(vec![a])
|
||||
.other_side_effects(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"copy_nop",
|
||||
|
||||
Reference in New Issue
Block a user