egraph-based midend: draw the rest of the owl (productionized). (#4953)
* egraph-based midend: draw the rest of the owl. * Rename `egg` submodule of cranelift-codegen to `egraph`. * Apply some feedback from @jsharp during code walkthrough. * Remove recursion from find_best_node by doing a single pass. Rather than recursively computing the lowest-cost node for a given eclass and memoizing the answer at each eclass node, we can do a single forward pass; because every eclass node refers only to earlier nodes, this is sufficient. The behavior may slightly differ from the earlier behavior because we cannot short-circuit costs to zero once a node is elaborated; but in practice this should not matter. * Make elaboration non-recursive. Use an explicit stack instead (with `ElabStackEntry` entries, alongside a result stack). * Make elaboration traversal of the domtree non-recursive/stack-safe. * Work analysis logic in Cranelift-side egraph glue into a general analysis framework in cranelift-egraph. * Apply static recursion limit to rule application. * Fix aarch64 wrt dynamic-vector support -- broken rebase. * Topo-sort cranelift-egraph before cranelift-codegen in publish script, like the comment instructs me to! * Fix multi-result call testcase. * Include `cranelift-egraph` in `PUBLISHED_CRATES`. * Fix atomic_rmw: not really a load. * Remove now-unnecessary PartialOrd/Ord derivations. * Address some code-review comments. * Review feedback. * Review feedback. * No overlap in mid-end rules, because we are defining a multi-constructor. * rustfmt * Review feedback. * Review feedback. * Review feedback. * Review feedback. * Remove redundant `mut`. * Add comment noting what rules can do. * Review feedback. * Clarify comment wording. * Update `has_memory_fence_semantics`. * Apply @jameysharp's improved loop-level computation. Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fix suggestion commit. * Fix off-by-one in new loop-nest analysis. * Review feedback. * Review feedback. * Review feedback. * Use `Default`, not `std::default::Default`, as per @fitzgen Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com> * Apply @fitzgen's comment elaboration to a doc-comment. Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com> * Add stat for hitting the rewrite-depth limit. * Some code motion in split prelude to make the diff a little clearer wrt `main`. * Take @jameysharp's suggested `try_into()` usage for blockparam indices. Co-authored-by: Jamey Sharp <jamey@minilop.net> * Take @jameysharp's suggestion to avoid double-match on load op. Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fix suggestion (add import). * Review feedback. * Fix stack_load handling. * Remove redundant can_store case. * Take @jameysharp's suggested improvement to FuncEGraph::build() logic Co-authored-by: Jamey Sharp <jamey@minilop.net> * Tweaks to FuncEGraph::build() on top of suggestion. * Take @jameysharp's suggested clarified condition Co-authored-by: Jamey Sharp <jamey@minilop.net> * Clean up after suggestion (unused variable). * Fix loop analysis. * loop level asserts * Revert constant-space loop analysis -- edge cases were incorrect, so let's go with the simple thing for now. * Take @jameysharp's suggestion re: result_tys Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fix up after suggestion * Take @jameysharp's suggestion to use fold rather than reduce Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fixup after suggestion * Take @jameysharp's suggestion to remove elaborate_eclass_use's return value. * Clarifying comment in terminator insts. Co-authored-by: Jamey Sharp <jamey@minilop.net> Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -537,6 +537,7 @@ dependencies = [
|
||||
"cranelift-bforest",
|
||||
"cranelift-codegen-meta",
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-egraph",
|
||||
"cranelift-entity",
|
||||
"cranelift-isle",
|
||||
"criterion",
|
||||
|
||||
@@ -129,6 +129,7 @@ wasmtime-fuzzing = { path = "crates/fuzzing" }
|
||||
|
||||
cranelift-wasm = { path = "cranelift/wasm", version = "0.90.0" }
|
||||
cranelift-codegen = { path = "cranelift/codegen", version = "0.90.0" }
|
||||
cranelift-egraph = { path = "cranelift/egraph", version = "0.90.0" }
|
||||
cranelift-frontend = { path = "cranelift/frontend", version = "0.90.0" }
|
||||
cranelift-entity = { path = "cranelift/entity", version = "0.90.0" }
|
||||
cranelift-native = { path = "cranelift/native", version = "0.90.0" }
|
||||
|
||||
@@ -18,6 +18,7 @@ bumpalo = "3"
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.90.0" }
|
||||
cranelift-entity = { workspace = true }
|
||||
cranelift-bforest = { workspace = true }
|
||||
cranelift-egraph = { workspace = true }
|
||||
hashbrown = { workspace = true, optional = true }
|
||||
target-lexicon = { workspace = true }
|
||||
log = { workspace = true }
|
||||
|
||||
@@ -177,9 +177,19 @@ fn get_isle_compilations(
|
||||
) -> Result<IsleCompilations, std::io::Error> {
|
||||
let cur_dir = std::env::current_dir()?;
|
||||
|
||||
let clif_isle = out_dir.join("clif.isle");
|
||||
// Preludes.
|
||||
let clif_lower_isle = out_dir.join("clif_lower.isle");
|
||||
let clif_opt_isle = out_dir.join("clif_opt.isle");
|
||||
let prelude_isle =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude.isle"));
|
||||
let prelude_opt_isle =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude_opt.isle"));
|
||||
let prelude_lower_isle =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude_lower.isle"));
|
||||
|
||||
// Directory for mid-end optimizations.
|
||||
let src_opts = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("opts"));
|
||||
// Directories for lowering backends.
|
||||
let src_isa_x64 =
|
||||
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("x64"));
|
||||
let src_isa_aarch64 =
|
||||
@@ -204,47 +214,62 @@ fn get_isle_compilations(
|
||||
// `cranelift/codegen/src/isa/*/lower/isle/generated_code.rs`!
|
||||
Ok(IsleCompilations {
|
||||
items: vec![
|
||||
// The mid-end optimization rules.
|
||||
IsleCompilation {
|
||||
output: out_dir.join("isle_opt.rs"),
|
||||
inputs: vec![
|
||||
prelude_isle.clone(),
|
||||
prelude_opt_isle.clone(),
|
||||
src_opts.join("algebraic.isle"),
|
||||
src_opts.join("cprop.isle"),
|
||||
],
|
||||
untracked_inputs: vec![clif_opt_isle.clone()],
|
||||
},
|
||||
// The x86-64 instruction selector.
|
||||
IsleCompilation {
|
||||
output: out_dir.join("isle_x64.rs"),
|
||||
inputs: vec![
|
||||
prelude_isle.clone(),
|
||||
prelude_lower_isle.clone(),
|
||||
src_isa_x64.join("inst.isle"),
|
||||
src_isa_x64.join("lower.isle"),
|
||||
],
|
||||
untracked_inputs: vec![clif_isle.clone()],
|
||||
untracked_inputs: vec![clif_lower_isle.clone()],
|
||||
},
|
||||
// The aarch64 instruction selector.
|
||||
IsleCompilation {
|
||||
output: out_dir.join("isle_aarch64.rs"),
|
||||
inputs: vec![
|
||||
prelude_isle.clone(),
|
||||
prelude_lower_isle.clone(),
|
||||
src_isa_aarch64.join("inst.isle"),
|
||||
src_isa_aarch64.join("inst_neon.isle"),
|
||||
src_isa_aarch64.join("lower.isle"),
|
||||
src_isa_aarch64.join("lower_dynamic_neon.isle"),
|
||||
],
|
||||
untracked_inputs: vec![clif_isle.clone()],
|
||||
untracked_inputs: vec![clif_lower_isle.clone()],
|
||||
},
|
||||
// The s390x instruction selector.
|
||||
IsleCompilation {
|
||||
output: out_dir.join("isle_s390x.rs"),
|
||||
inputs: vec![
|
||||
prelude_isle.clone(),
|
||||
prelude_lower_isle.clone(),
|
||||
src_isa_s390x.join("inst.isle"),
|
||||
src_isa_s390x.join("lower.isle"),
|
||||
],
|
||||
untracked_inputs: vec![clif_isle.clone()],
|
||||
untracked_inputs: vec![clif_lower_isle.clone()],
|
||||
},
|
||||
// The risc-v instruction selector.
|
||||
IsleCompilation {
|
||||
output: out_dir.join("isle_riscv64.rs"),
|
||||
inputs: vec![
|
||||
prelude_isle.clone(),
|
||||
prelude_lower_isle.clone(),
|
||||
src_isa_risc_v.join("inst.isle"),
|
||||
src_isa_risc_v.join("lower.isle"),
|
||||
],
|
||||
untracked_inputs: vec![clif_isle.clone()],
|
||||
untracked_inputs: vec![clif_lower_isle.clone()],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
@@ -60,36 +60,52 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
|
||||
fmt.empty_line();
|
||||
}
|
||||
|
||||
/// Generate the InstructionData enum.
|
||||
/// Generate the InstructionData and InstructionImms enums.
|
||||
///
|
||||
/// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at
|
||||
/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
|
||||
/// `ValueList` to store the additional information out of line.
|
||||
///
|
||||
/// `InstructionImms` stores everything about an instruction except for the arguments: in other
|
||||
/// words, the `Opcode` and any immediates or other parameters. `InstructionData` stores this, plus
|
||||
/// the SSA `Value` arguments.
|
||||
fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
|
||||
fmt.line("#[derive(Clone, Debug, PartialEq, Hash)]");
|
||||
fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
|
||||
fmt.line("#[allow(missing_docs)]");
|
||||
fmt.line("pub enum InstructionData {");
|
||||
fmt.indent(|fmt| {
|
||||
for format in formats {
|
||||
fmtln!(fmt, "{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode: Opcode,");
|
||||
if format.has_value_list {
|
||||
fmt.line("args: ValueList,");
|
||||
} else if format.num_value_operands == 1 {
|
||||
fmt.line("arg: Value,");
|
||||
} else if format.num_value_operands > 0 {
|
||||
fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
|
||||
}
|
||||
for field in &format.imm_fields {
|
||||
fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
|
||||
}
|
||||
});
|
||||
fmtln!(fmt, "},");
|
||||
for (name, include_args) in &[("InstructionData", true), ("InstructionImms", false)] {
|
||||
fmt.line("#[derive(Clone, Debug, PartialEq, Hash)]");
|
||||
if !include_args {
|
||||
// `InstructionImms` gets some extra derives: it acts like
|
||||
// a sort of extended opcode and we want to allow for
|
||||
// hashconsing via Eq. `Copy` also turns out to be useful.
|
||||
fmt.line("#[derive(Copy, Eq)]");
|
||||
}
|
||||
});
|
||||
fmt.line("}");
|
||||
fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
|
||||
fmt.line("#[allow(missing_docs)]");
|
||||
// generate `enum InstructionData` or `enum InstructionImms`.
|
||||
// (This comment exists so one can grep for `enum InstructionData`!)
|
||||
fmtln!(fmt, "pub enum {} {{", name);
|
||||
fmt.indent(|fmt| {
|
||||
for format in formats {
|
||||
fmtln!(fmt, "{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode: Opcode,");
|
||||
if *include_args {
|
||||
if format.has_value_list {
|
||||
fmt.line("args: ValueList,");
|
||||
} else if format.num_value_operands == 1 {
|
||||
fmt.line("arg: Value,");
|
||||
} else if format.num_value_operands > 0 {
|
||||
fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
|
||||
}
|
||||
}
|
||||
for field in &format.imm_fields {
|
||||
fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
|
||||
}
|
||||
});
|
||||
fmtln!(fmt, "},");
|
||||
}
|
||||
});
|
||||
fmt.line("}");
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) {
|
||||
@@ -150,6 +166,122 @@ fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_
|
||||
fmtln!(fmt, "}");
|
||||
}
|
||||
|
||||
/// Generate the conversion from `InstructionData` to `InstructionImms`, stripping out the
|
||||
/// `Value`s.
|
||||
fn gen_instruction_data_to_instruction_imms(formats: &[&InstructionFormat], fmt: &mut Formatter) {
|
||||
fmt.line("impl std::convert::From<&InstructionData> for InstructionImms {");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.doc_comment("Convert an `InstructionData` into an `InstructionImms`.");
|
||||
fmt.line("fn from(data: &InstructionData) -> InstructionImms {");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("match data {");
|
||||
fmt.indent(|fmt| {
|
||||
for format in formats {
|
||||
fmtln!(fmt, "InstructionData::{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode,");
|
||||
for field in &format.imm_fields {
|
||||
fmtln!(fmt, "{},", field.member);
|
||||
}
|
||||
fmt.line("..");
|
||||
});
|
||||
fmtln!(fmt, "}} => InstructionImms::{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode: *opcode,");
|
||||
for field in &format.imm_fields {
|
||||
fmtln!(fmt, "{}: {}.clone(),", field.member, field.member);
|
||||
}
|
||||
});
|
||||
fmt.line("},");
|
||||
}
|
||||
});
|
||||
fmt.line("}");
|
||||
});
|
||||
fmt.line("}");
|
||||
});
|
||||
fmt.line("}");
|
||||
fmt.empty_line();
|
||||
}
|
||||
|
||||
/// Generate the conversion from `InstructionImms` to `InstructionData`, adding the
|
||||
/// `Value`s.
|
||||
fn gen_instruction_imms_to_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
|
||||
fmt.line("impl InstructionImms {");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.doc_comment("Convert an `InstructionImms` into an `InstructionData` by adding args.");
|
||||
fmt.line(
|
||||
"pub fn with_args(&self, values: &[Value], value_list: &mut ValueListPool) -> InstructionData {",
|
||||
);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("match self {");
|
||||
fmt.indent(|fmt| {
|
||||
for format in formats {
|
||||
fmtln!(fmt, "InstructionImms::{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode,");
|
||||
for field in &format.imm_fields {
|
||||
fmtln!(fmt, "{},", field.member);
|
||||
}
|
||||
});
|
||||
fmt.line("} => {");
|
||||
if format.has_value_list {
|
||||
fmtln!(fmt, "let args = ValueList::from_slice(values, value_list);");
|
||||
}
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(fmt, "InstructionData::{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode: *opcode,");
|
||||
for field in &format.imm_fields {
|
||||
fmtln!(fmt, "{}: {}.clone(),", field.member, field.member);
|
||||
}
|
||||
if format.has_value_list {
|
||||
fmtln!(fmt, "args,");
|
||||
} else if format.num_value_operands == 1 {
|
||||
fmtln!(fmt, "arg: values[0],");
|
||||
} else if format.num_value_operands > 0 {
|
||||
let mut args = vec![];
|
||||
for i in 0..format.num_value_operands {
|
||||
args.push(format!("values[{}]", i));
|
||||
}
|
||||
fmtln!(fmt, "args: [{}],", args.join(", "));
|
||||
}
|
||||
});
|
||||
fmt.line("}");
|
||||
});
|
||||
fmt.line("},");
|
||||
}
|
||||
});
|
||||
fmt.line("}");
|
||||
});
|
||||
fmt.line("}");
|
||||
});
|
||||
fmt.line("}");
|
||||
fmt.empty_line();
|
||||
}
|
||||
|
||||
/// Generate the `opcode` method on InstructionImms.
|
||||
fn gen_instruction_imms_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) {
|
||||
fmt.line("impl InstructionImms {");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.doc_comment("Get the opcode of this instruction.");
|
||||
fmt.line("pub fn opcode(&self) -> Opcode {");
|
||||
fmt.indent(|fmt| {
|
||||
let mut m = Match::new("*self");
|
||||
for format in formats {
|
||||
m.arm(
|
||||
format!("Self::{}", format.name),
|
||||
vec!["opcode", ".."],
|
||||
"opcode".to_string(),
|
||||
);
|
||||
}
|
||||
fmt.add_match(m);
|
||||
});
|
||||
fmt.line("}");
|
||||
});
|
||||
fmt.line("}");
|
||||
fmt.empty_line();
|
||||
}
|
||||
|
||||
/// Generate the boring parts of the InstructionData implementation.
|
||||
///
|
||||
/// These methods in `impl InstructionData` can be generated automatically from the instruction
|
||||
@@ -1070,7 +1202,12 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo
|
||||
fmtln!(fmt, "}")
|
||||
}
|
||||
|
||||
fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: &mut Formatter) {
|
||||
fn gen_common_isle(
|
||||
formats: &[&InstructionFormat],
|
||||
instructions: &AllInstructions,
|
||||
fmt: &mut Formatter,
|
||||
is_lower: bool,
|
||||
) {
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::fmt::Write;
|
||||
|
||||
@@ -1123,40 +1260,46 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
|
||||
gen_isle_enum(name, variants, fmt)
|
||||
}
|
||||
|
||||
// Generate all of the value arrays we need for `InstructionData` as well as
|
||||
// the constructors and extractors for them.
|
||||
fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
|
||||
fmt.empty_line();
|
||||
let value_array_arities: BTreeSet<_> = formats
|
||||
.iter()
|
||||
.filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1)
|
||||
.map(|f| f.num_value_operands)
|
||||
.collect();
|
||||
for n in value_array_arities {
|
||||
fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
|
||||
fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
|
||||
if is_lower {
|
||||
// Generate all of the value arrays we need for `InstructionData` as well as
|
||||
// the constructors and extractors for them.
|
||||
fmt.line(
|
||||
";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
|
||||
);
|
||||
fmt.empty_line();
|
||||
let value_array_arities: BTreeSet<_> = formats
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1
|
||||
})
|
||||
.map(|f| f.num_value_operands)
|
||||
.collect();
|
||||
for n in value_array_arities {
|
||||
fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
|
||||
fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
|
||||
fmt.empty_line();
|
||||
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(decl value_array_{} ({}) ValueArray{})",
|
||||
n,
|
||||
(0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
|
||||
n
|
||||
);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(extern constructor value_array_{} pack_value_array_{})",
|
||||
n,
|
||||
n
|
||||
);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(extern extractor infallible value_array_{} unpack_value_array_{})",
|
||||
n,
|
||||
n
|
||||
);
|
||||
fmt.empty_line();
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(decl value_array_{} ({}) ValueArray{})",
|
||||
n,
|
||||
(0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
|
||||
n
|
||||
);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(extern constructor value_array_{} pack_value_array_{})",
|
||||
n,
|
||||
n
|
||||
);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(extern extractor infallible value_array_{} unpack_value_array_{})",
|
||||
n,
|
||||
n
|
||||
);
|
||||
fmt.empty_line();
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the extern type declaration for `Opcode`.
|
||||
@@ -1175,21 +1318,33 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
|
||||
fmt.line(")");
|
||||
fmt.empty_line();
|
||||
|
||||
// Generate the extern type declaration for `InstructionData`.
|
||||
fmt.line(";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
|
||||
// Generate the extern type declaration for `InstructionData`
|
||||
// (lowering) or `InstructionImms` (opt).
|
||||
let inst_data_name = if is_lower {
|
||||
"InstructionData"
|
||||
} else {
|
||||
"InstructionImms"
|
||||
};
|
||||
fmtln!(
|
||||
fmt,
|
||||
";;;; `{}` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
|
||||
inst_data_name
|
||||
);
|
||||
fmt.empty_line();
|
||||
fmt.line("(type InstructionData extern");
|
||||
fmtln!(fmt, "(type {} extern", inst_data_name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("(enum");
|
||||
fmt.indent(|fmt| {
|
||||
for format in formats {
|
||||
let mut s = format!("({} (opcode Opcode)", format.name);
|
||||
if format.has_value_list {
|
||||
s.push_str(" (args ValueList)");
|
||||
} else if format.num_value_operands == 1 {
|
||||
s.push_str(" (arg Value)");
|
||||
} else if format.num_value_operands > 1 {
|
||||
write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
|
||||
if is_lower {
|
||||
if format.has_value_list {
|
||||
s.push_str(" (args ValueList)");
|
||||
} else if format.num_value_operands == 1 {
|
||||
s.push_str(" (arg Value)");
|
||||
} else if format.num_value_operands > 1 {
|
||||
write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
|
||||
}
|
||||
}
|
||||
for field in &format.imm_fields {
|
||||
write!(
|
||||
@@ -1210,85 +1365,157 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
|
||||
fmt.empty_line();
|
||||
|
||||
// Generate the helper extractors for each opcode's full instruction.
|
||||
//
|
||||
// TODO: if/when we port our peephole optimization passes to ISLE we will
|
||||
// want helper constructors as well.
|
||||
fmt.line(";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;");
|
||||
fmtln!(
|
||||
fmt,
|
||||
";;;; Extracting Opcode, Operands, and Immediates from `{}` ;;;;;;;;",
|
||||
inst_data_name
|
||||
);
|
||||
fmt.empty_line();
|
||||
let ret_ty = if is_lower { "Inst" } else { "Id" };
|
||||
for inst in instructions {
|
||||
if !is_lower && inst.format.has_value_list {
|
||||
continue;
|
||||
}
|
||||
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(decl {} ({}) Inst)",
|
||||
"(decl {} ({}{}) {})",
|
||||
inst.name,
|
||||
if is_lower { "" } else { "Type " },
|
||||
inst.operands_in
|
||||
.iter()
|
||||
.map(|o| {
|
||||
let ty = o.kind.rust_type;
|
||||
if ty == "&[Value]" {
|
||||
"ValueSlice"
|
||||
if is_lower {
|
||||
if ty == "&[Value]" {
|
||||
"ValueSlice"
|
||||
} else {
|
||||
ty.rsplit("::").next().unwrap()
|
||||
}
|
||||
} else {
|
||||
ty.rsplit("::").next().unwrap()
|
||||
if ty == "&[Value]" {
|
||||
panic!("value slice in mid-end extractor");
|
||||
} else if ty == "Value" || ty == "ir::Value" {
|
||||
"Id"
|
||||
} else {
|
||||
ty.rsplit("::").next().unwrap()
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.join(" "),
|
||||
ret_ty
|
||||
);
|
||||
fmtln!(fmt, "(extractor");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(
|
||||
fmt,
|
||||
"({} {})",
|
||||
"({} {}{})",
|
||||
inst.name,
|
||||
if is_lower { "" } else { "ty " },
|
||||
inst.operands_in
|
||||
.iter()
|
||||
.map(|o| { o.name })
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
);
|
||||
let mut s = format!(
|
||||
"(inst_data (InstructionData.{} (Opcode.{})",
|
||||
inst.format.name, inst.camel_name
|
||||
);
|
||||
|
||||
// Value and varargs operands.
|
||||
if inst.format.has_value_list {
|
||||
// The instruction format uses a value list, but the
|
||||
// instruction itself might have not only a `&[Value]`
|
||||
// varargs operand, but also one or more `Value` operands as
|
||||
// well. If this is the case, then we need to read them off
|
||||
// the front of the `ValueList`.
|
||||
let values: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| o.is_value())
|
||||
.map(|o| o.name)
|
||||
.collect();
|
||||
let varargs = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.find(|o| o.is_varargs())
|
||||
.unwrap()
|
||||
.name;
|
||||
if values.is_empty() {
|
||||
write!(&mut s, " (value_list_slice {})", varargs).unwrap();
|
||||
} else {
|
||||
if is_lower {
|
||||
let mut s = format!(
|
||||
"(inst_data (InstructionData.{} (Opcode.{})",
|
||||
inst.format.name, inst.camel_name
|
||||
);
|
||||
|
||||
// Value and varargs operands.
|
||||
if inst.format.has_value_list {
|
||||
// The instruction format uses a value list, but the
|
||||
// instruction itself might have not only a `&[Value]`
|
||||
// varargs operand, but also one or more `Value` operands as
|
||||
// well. If this is the case, then we need to read them off
|
||||
// the front of the `ValueList`.
|
||||
let values: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| o.is_value())
|
||||
.map(|o| o.name)
|
||||
.collect();
|
||||
let varargs = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.find(|o| o.is_varargs())
|
||||
.unwrap()
|
||||
.name;
|
||||
if values.is_empty() {
|
||||
write!(&mut s, " (value_list_slice {})", varargs).unwrap();
|
||||
} else {
|
||||
write!(
|
||||
&mut s,
|
||||
" (unwrap_head_value_list_{} {} {})",
|
||||
values.len(),
|
||||
values.join(" "),
|
||||
varargs
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if inst.format.num_value_operands == 1 {
|
||||
write!(
|
||||
&mut s,
|
||||
" (unwrap_head_value_list_{} {} {})",
|
||||
values.len(),
|
||||
values.join(" "),
|
||||
varargs
|
||||
" {}",
|
||||
inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
|
||||
)
|
||||
.unwrap();
|
||||
} else if inst.format.num_value_operands > 1 {
|
||||
let values = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| o.is_value())
|
||||
.map(|o| o.name)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(values.len(), inst.format.num_value_operands);
|
||||
let values = values.join(" ");
|
||||
write!(
|
||||
&mut s,
|
||||
" (value_array_{} {})",
|
||||
inst.format.num_value_operands, values,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if inst.format.num_value_operands == 1 {
|
||||
write!(
|
||||
&mut s,
|
||||
" {}",
|
||||
inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
|
||||
)
|
||||
.unwrap();
|
||||
} else if inst.format.num_value_operands > 1 {
|
||||
|
||||
// Immediates.
|
||||
let imm_operands: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| !o.is_value() && !o.is_varargs())
|
||||
.collect();
|
||||
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
|
||||
for op in imm_operands {
|
||||
write!(&mut s, " {}", op.name).unwrap();
|
||||
}
|
||||
|
||||
s.push_str("))");
|
||||
fmt.line(&s);
|
||||
} else {
|
||||
// Mid-end case.
|
||||
let mut s = format!(
|
||||
"(enodes ty (InstructionImms.{} (Opcode.{})",
|
||||
inst.format.name, inst.camel_name
|
||||
);
|
||||
|
||||
// Immediates.
|
||||
let imm_operands: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| !o.is_value() && !o.is_varargs())
|
||||
.collect();
|
||||
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
|
||||
for op in imm_operands {
|
||||
write!(&mut s, " {}", op.name).unwrap();
|
||||
}
|
||||
// End of `InstructionImms`.
|
||||
s.push_str(")");
|
||||
|
||||
// Second arg to `enode`: value args.
|
||||
assert!(!inst.operands_in.iter().any(|op| op.is_varargs()));
|
||||
let values = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
@@ -1299,31 +1526,83 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
|
||||
let values = values.join(" ");
|
||||
write!(
|
||||
&mut s,
|
||||
" (value_array_{} {})",
|
||||
" (id_array_{} {})",
|
||||
inst.format.num_value_operands, values,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Immediates.
|
||||
let imm_operands: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| !o.is_value() && !o.is_varargs())
|
||||
.collect();
|
||||
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
|
||||
for op in imm_operands {
|
||||
write!(&mut s, " {}", op.name).unwrap();
|
||||
s.push_str(")");
|
||||
fmt.line(&s);
|
||||
}
|
||||
|
||||
s.push_str("))");
|
||||
fmt.line(&s);
|
||||
});
|
||||
fmt.line(")");
|
||||
|
||||
// Generate a constructor if this is the mid-end prelude.
|
||||
if !is_lower {
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(rule ({} ty {})",
|
||||
inst.name,
|
||||
inst.operands_in
|
||||
.iter()
|
||||
.map(|o| o.name)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
);
|
||||
fmt.indent(|fmt| {
|
||||
let mut s = format!(
|
||||
"(pure_enode ty (InstructionImms.{} (Opcode.{})",
|
||||
inst.format.name, inst.camel_name
|
||||
);
|
||||
|
||||
for o in inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| !o.is_value() && !o.is_varargs())
|
||||
{
|
||||
write!(&mut s, " {}", o.name).unwrap();
|
||||
}
|
||||
s.push_str(")");
|
||||
|
||||
let values = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| o.is_value())
|
||||
.map(|o| o.name)
|
||||
.collect::<Vec<_>>();
|
||||
let values = values.join(" ");
|
||||
write!(
|
||||
&mut s,
|
||||
" (id_array_{} {})",
|
||||
inst.format.num_value_operands, values
|
||||
)
|
||||
.unwrap();
|
||||
s.push_str(")");
|
||||
fmt.line(&s);
|
||||
});
|
||||
fmt.line(")");
|
||||
}
|
||||
|
||||
fmt.empty_line();
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_opt_isle(
|
||||
formats: &[&InstructionFormat],
|
||||
instructions: &AllInstructions,
|
||||
fmt: &mut Formatter,
|
||||
) {
|
||||
gen_common_isle(formats, instructions, fmt, /* is_lower = */ false);
|
||||
}
|
||||
|
||||
fn gen_lower_isle(
|
||||
formats: &[&InstructionFormat],
|
||||
instructions: &AllInstructions,
|
||||
fmt: &mut Formatter,
|
||||
) {
|
||||
gen_common_isle(formats, instructions, fmt, /* is_lower = */ true);
|
||||
}
|
||||
|
||||
/// Generate an `enum` immediate in ISLE.
|
||||
fn gen_isle_enum(name: &str, mut variants: Vec<&str>, fmt: &mut Formatter) {
|
||||
variants.sort();
|
||||
@@ -1388,7 +1667,8 @@ pub(crate) fn generate(
|
||||
all_inst: &AllInstructions,
|
||||
opcode_filename: &str,
|
||||
inst_builder_filename: &str,
|
||||
isle_filename: &str,
|
||||
isle_opt_filename: &str,
|
||||
isle_lower_filename: &str,
|
||||
out_dir: &str,
|
||||
isle_dir: &str,
|
||||
) -> Result<(), error::Error> {
|
||||
@@ -1398,16 +1678,24 @@ pub(crate) fn generate(
|
||||
gen_instruction_data(&formats, &mut fmt);
|
||||
fmt.empty_line();
|
||||
gen_instruction_data_impl(&formats, &mut fmt);
|
||||
gen_instruction_data_to_instruction_imms(&formats, &mut fmt);
|
||||
gen_instruction_imms_impl(&formats, &mut fmt);
|
||||
gen_instruction_imms_to_instruction_data(&formats, &mut fmt);
|
||||
fmt.empty_line();
|
||||
gen_opcodes(all_inst, &mut fmt);
|
||||
fmt.empty_line();
|
||||
gen_type_constraints(all_inst, &mut fmt);
|
||||
fmt.update_file(opcode_filename, out_dir)?;
|
||||
|
||||
// ISLE DSL.
|
||||
// ISLE DSL: mid-end ("opt") generated bindings.
|
||||
let mut fmt = Formatter::new();
|
||||
gen_isle(&formats, all_inst, &mut fmt);
|
||||
fmt.update_file(isle_filename, isle_dir)?;
|
||||
gen_opt_isle(&formats, all_inst, &mut fmt);
|
||||
fmt.update_file(isle_opt_filename, isle_dir)?;
|
||||
|
||||
// ISLE DSL: lowering generated bindings.
|
||||
let mut fmt = Formatter::new();
|
||||
gen_lower_isle(&formats, all_inst, &mut fmt);
|
||||
fmt.update_file(isle_lower_filename, isle_dir)?;
|
||||
|
||||
// Instruction builder.
|
||||
let mut fmt = Formatter::new();
|
||||
|
||||
@@ -47,7 +47,8 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str, isle_dir: &str) -> Result<(),
|
||||
&shared_defs.all_instructions,
|
||||
"opcodes.rs",
|
||||
"inst_builder.rs",
|
||||
"clif.isle",
|
||||
"clif_opt.isle",
|
||||
"clif_lower.isle",
|
||||
&out_dir,
|
||||
isle_dir,
|
||||
)?;
|
||||
|
||||
@@ -53,6 +53,17 @@ pub(crate) fn define() -> SettingGroup {
|
||||
true,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"use_egraphs",
|
||||
"Enable egraph-based optimization.",
|
||||
r#"
|
||||
This enables an optimization phase that converts CLIF to an egraph (equivalence graph)
|
||||
representation, performs various rewrites, and then converts it back. This can result in
|
||||
better optimization, but is currently considered experimental.
|
||||
"#,
|
||||
false,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"enable_verifier",
|
||||
"Run the Cranelift IR verifier at strategic times during compilation.",
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
use crate::alias_analysis::AliasAnalysis;
|
||||
use crate::dce::do_dce;
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::egraph::FuncEGraph;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
@@ -104,15 +105,20 @@ impl Context {
|
||||
|
||||
/// Compile the function, and emit machine code into a `Vec<u8>`.
|
||||
///
|
||||
/// Run the function through all the passes necessary to generate code for the target ISA
|
||||
/// represented by `isa`, as well as the final step of emitting machine code into a
|
||||
/// `Vec<u8>`. The machine code is not relocated. Instead, any relocations can be obtained
|
||||
/// from `compiled_code()`.
|
||||
/// Run the function through all the passes necessary to generate
|
||||
/// code for the target ISA represented by `isa`, as well as the
|
||||
/// final step of emitting machine code into a `Vec<u8>`. The
|
||||
/// machine code is not relocated. Instead, any relocations can be
|
||||
/// obtained from `compiled_code()`.
|
||||
///
|
||||
/// Performs any optimizations that are enabled, unless
|
||||
/// `optimize()` was already invoked.
|
||||
///
|
||||
/// This function calls `compile`, taking care to resize `mem` as
|
||||
/// needed, so it provides a safe interface.
|
||||
/// needed.
|
||||
///
|
||||
/// Returns information about the function's code and read-only data.
|
||||
/// Returns information about the function's code and read-only
|
||||
/// data.
|
||||
pub fn compile_and_emit(
|
||||
&mut self,
|
||||
isa: &dyn TargetIsa,
|
||||
@@ -131,15 +137,26 @@ impl Context {
|
||||
|
||||
self.verify_if(isa)?;
|
||||
|
||||
self.optimize(isa)?;
|
||||
|
||||
isa.compile_function(&self.func, self.want_disasm)
|
||||
}
|
||||
|
||||
/// Optimize the function, performing all compilation steps up to
|
||||
/// but not including machine-code lowering and register
|
||||
/// allocation.
|
||||
///
|
||||
/// Public only for testing purposes.
|
||||
pub fn optimize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
let opt_level = isa.flags().opt_level();
|
||||
log::trace!(
|
||||
"Compiling (opt level {:?}):\n{}",
|
||||
"Optimizing (opt level {:?}):\n{}",
|
||||
opt_level,
|
||||
self.func.display()
|
||||
);
|
||||
|
||||
self.compute_cfg();
|
||||
if opt_level != OptLevel::None {
|
||||
if !isa.flags().use_egraphs() && opt_level != OptLevel::None {
|
||||
self.preopt(isa)?;
|
||||
}
|
||||
if isa.flags().enable_nan_canonicalization() {
|
||||
@@ -147,7 +164,8 @@ impl Context {
|
||||
}
|
||||
|
||||
self.legalize(isa)?;
|
||||
if opt_level != OptLevel::None {
|
||||
|
||||
if !isa.flags().use_egraphs() && opt_level != OptLevel::None {
|
||||
self.compute_domtree();
|
||||
self.compute_loop_analysis();
|
||||
self.licm(isa)?;
|
||||
@@ -156,18 +174,29 @@ impl Context {
|
||||
|
||||
self.compute_domtree();
|
||||
self.eliminate_unreachable_code(isa)?;
|
||||
if opt_level != OptLevel::None {
|
||||
|
||||
if isa.flags().use_egraphs() || opt_level != OptLevel::None {
|
||||
self.dce(isa)?;
|
||||
}
|
||||
|
||||
self.remove_constant_phis(isa)?;
|
||||
|
||||
if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
|
||||
if isa.flags().use_egraphs() {
|
||||
log::debug!(
|
||||
"About to optimize with egraph phase:\n{}",
|
||||
self.func.display()
|
||||
);
|
||||
self.compute_loop_analysis();
|
||||
let mut eg = FuncEGraph::new(&self.func, &self.domtree, &self.loop_analysis, &self.cfg);
|
||||
eg.elaborate(&mut self.func);
|
||||
log::debug!("After egraph optimization:\n{}", self.func.display());
|
||||
log::info!("egraph stats: {:?}", eg.stats);
|
||||
} else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
|
||||
self.replace_redundant_loads()?;
|
||||
self.simple_gvn(isa)?;
|
||||
}
|
||||
|
||||
isa.compile_function(&self.func, self.want_disasm)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compile the function.
|
||||
|
||||
414
cranelift/codegen/src/egraph.rs
Normal file
414
cranelift/codegen/src/egraph.rs
Normal file
@@ -0,0 +1,414 @@
|
||||
//! Egraph-based mid-end optimization framework.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::loop_analysis::{LoopAnalysis, LoopLevel};
|
||||
use crate::trace;
|
||||
use crate::{
|
||||
fx::{FxHashMap, FxHashSet},
|
||||
inst_predicates::has_side_effect,
|
||||
ir::{Block, Function, Inst, InstructionData, InstructionImms, Opcode, Type},
|
||||
};
|
||||
use alloc::vec::Vec;
|
||||
use core::ops::Range;
|
||||
use cranelift_egraph::{EGraph, Id, Language, NewOrExisting};
|
||||
use cranelift_entity::EntityList;
|
||||
use cranelift_entity::SecondaryMap;
|
||||
|
||||
mod domtree;
|
||||
mod elaborate;
|
||||
mod node;
|
||||
mod stores;
|
||||
|
||||
use elaborate::Elaborator;
|
||||
pub use node::{Node, NodeCtx};
|
||||
pub use stores::{AliasAnalysis, MemoryState};
|
||||
|
||||
pub struct FuncEGraph<'a> {
|
||||
/// Dominator tree, used for elaboration pass.
|
||||
domtree: &'a DominatorTree,
|
||||
/// Loop analysis results, used for built-in LICM during elaboration.
|
||||
loop_analysis: &'a LoopAnalysis,
|
||||
/// Last-store tracker for integrated alias analysis during egraph build.
|
||||
alias_analysis: AliasAnalysis,
|
||||
/// The egraph itself.
|
||||
pub(crate) egraph: EGraph<NodeCtx, Analysis>,
|
||||
/// "node context", containing arenas for node data.
|
||||
pub(crate) node_ctx: NodeCtx,
|
||||
/// Ranges in `side_effect_ids` for sequences of side-effecting
|
||||
/// eclasses per block.
|
||||
side_effects: SecondaryMap<Block, Range<u32>>,
|
||||
side_effect_ids: Vec<Id>,
|
||||
/// Map from store instructions to their nodes; used for store-to-load forwarding.
|
||||
pub(crate) store_nodes: FxHashMap<Inst, (Type, Id)>,
|
||||
/// Ranges in `blockparam_ids_tys` for sequences of blockparam
|
||||
/// eclass IDs and types per block.
|
||||
blockparams: SecondaryMap<Block, Range<u32>>,
|
||||
blockparam_ids_tys: Vec<(Id, Type)>,
|
||||
/// Which canonical node IDs do we want to rematerialize in each
|
||||
/// block where they're used?
|
||||
pub(crate) remat_ids: FxHashSet<Id>,
|
||||
/// Which canonical node IDs have an enode whose value subsumes
|
||||
/// all others it's unioned with?
|
||||
pub(crate) subsume_ids: FxHashSet<Id>,
|
||||
/// Statistics recorded during the process of building,
|
||||
/// optimizing, and lowering out of this egraph.
|
||||
pub(crate) stats: Stats,
|
||||
/// Current rewrite-recursion depth. Used to enforce a finite
|
||||
/// limit on rewrite rule application so that we don't get stuck
|
||||
/// in an infinite chain.
|
||||
pub(crate) rewrite_depth: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(crate) struct Stats {
|
||||
pub(crate) node_created: u64,
|
||||
pub(crate) node_param: u64,
|
||||
pub(crate) node_result: u64,
|
||||
pub(crate) node_pure: u64,
|
||||
pub(crate) node_inst: u64,
|
||||
pub(crate) node_load: u64,
|
||||
pub(crate) node_dedup_query: u64,
|
||||
pub(crate) node_dedup_hit: u64,
|
||||
pub(crate) node_dedup_miss: u64,
|
||||
pub(crate) node_ctor_created: u64,
|
||||
pub(crate) node_ctor_deduped: u64,
|
||||
pub(crate) node_union: u64,
|
||||
pub(crate) node_subsume: u64,
|
||||
pub(crate) store_map_insert: u64,
|
||||
pub(crate) side_effect_nodes: u64,
|
||||
pub(crate) rewrite_rule_invoked: u64,
|
||||
pub(crate) rewrite_depth_limit: u64,
|
||||
pub(crate) store_to_load_forward: u64,
|
||||
pub(crate) elaborate_visit_node: u64,
|
||||
pub(crate) elaborate_memoize_hit: u64,
|
||||
pub(crate) elaborate_memoize_miss: u64,
|
||||
pub(crate) elaborate_memoize_miss_remat: u64,
|
||||
pub(crate) elaborate_licm_hoist: u64,
|
||||
pub(crate) elaborate_func: u64,
|
||||
pub(crate) elaborate_func_pre_insts: u64,
|
||||
pub(crate) elaborate_func_post_insts: u64,
|
||||
}
|
||||
|
||||
impl<'a> FuncEGraph<'a> {
|
||||
/// Create a new EGraph for the given function. Requires the
|
||||
/// domtree to be precomputed as well; the domtree is used for
|
||||
/// scheduling when lowering out of the egraph.
|
||||
pub fn new(
|
||||
func: &Function,
|
||||
domtree: &'a DominatorTree,
|
||||
loop_analysis: &'a LoopAnalysis,
|
||||
cfg: &ControlFlowGraph,
|
||||
) -> FuncEGraph<'a> {
|
||||
let node_count_estimate = func.dfg.num_values() * 2;
|
||||
let alias_analysis = AliasAnalysis::new(func, cfg);
|
||||
let mut this = Self {
|
||||
domtree,
|
||||
loop_analysis,
|
||||
alias_analysis,
|
||||
egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)),
|
||||
node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg),
|
||||
side_effects: SecondaryMap::default(),
|
||||
side_effect_ids: vec![],
|
||||
store_nodes: FxHashMap::default(),
|
||||
blockparams: SecondaryMap::default(),
|
||||
blockparam_ids_tys: vec![],
|
||||
remat_ids: FxHashSet::default(),
|
||||
subsume_ids: FxHashSet::default(),
|
||||
stats: Default::default(),
|
||||
rewrite_depth: 0,
|
||||
};
|
||||
this.build(func);
|
||||
this
|
||||
}
|
||||
|
||||
fn build(&mut self, func: &Function) {
|
||||
// Mapping of SSA `Value` to eclass ID.
|
||||
let mut value_to_id = FxHashMap::default();
|
||||
|
||||
// For each block in RPO, create an enode for block entry, for
|
||||
// each block param, and for each instruction.
|
||||
for &block in self.domtree.cfg_postorder().iter().rev() {
|
||||
let loop_level = self.loop_analysis.loop_level(block);
|
||||
let blockparam_start =
|
||||
u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count");
|
||||
for (i, &value) in func.dfg.block_params(block).iter().enumerate() {
|
||||
let ty = func.dfg.value_type(value);
|
||||
let param = self
|
||||
.egraph
|
||||
.add(
|
||||
Node::Param {
|
||||
block,
|
||||
index: i
|
||||
.try_into()
|
||||
.expect("blockparam index should fit in Node::Param"),
|
||||
ty,
|
||||
loop_level,
|
||||
},
|
||||
&mut self.node_ctx,
|
||||
)
|
||||
.get();
|
||||
value_to_id.insert(value, param);
|
||||
self.blockparam_ids_tys.push((param, ty));
|
||||
self.stats.node_created += 1;
|
||||
self.stats.node_param += 1;
|
||||
}
|
||||
let blockparam_end =
|
||||
u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count");
|
||||
self.blockparams[block] = blockparam_start..blockparam_end;
|
||||
|
||||
let side_effect_start =
|
||||
u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count");
|
||||
for inst in func.layout.block_insts(block) {
|
||||
// Build args from SSA values.
|
||||
let args = EntityList::from_iter(
|
||||
func.dfg.inst_args(inst).iter().map(|&arg| {
|
||||
let arg = func.dfg.resolve_aliases(arg);
|
||||
*value_to_id
|
||||
.get(&arg)
|
||||
.expect("Must have seen def before this use")
|
||||
}),
|
||||
&mut self.node_ctx.args,
|
||||
);
|
||||
|
||||
let results = func.dfg.inst_results(inst);
|
||||
|
||||
let types = self
|
||||
.node_ctx
|
||||
.types
|
||||
.from_iter(results.iter().map(|&val| func.dfg.value_type(val)));
|
||||
let types = types.freeze(&mut self.node_ctx.types);
|
||||
|
||||
let load_mem_state = self.alias_analysis.get_state_for_load(inst);
|
||||
let is_readonly_load = match func.dfg[inst] {
|
||||
InstructionData::Load {
|
||||
opcode: Opcode::Load,
|
||||
flags,
|
||||
..
|
||||
} => flags.readonly() && flags.notrap(),
|
||||
_ => false,
|
||||
};
|
||||
|
||||
// Create the egraph node.
|
||||
let op = InstructionImms::from(&func.dfg[inst]);
|
||||
let opcode = op.opcode();
|
||||
let srcloc = func.srclocs[inst];
|
||||
|
||||
let node = if is_readonly_load {
|
||||
self.stats.node_created += 1;
|
||||
self.stats.node_pure += 1;
|
||||
Node::Pure { op, args, types }
|
||||
} else if let Some(load_mem_state) = load_mem_state {
|
||||
let addr = args.as_slice(&self.node_ctx.args)[0];
|
||||
let ty = types.as_slice(&self.node_ctx.types)[0];
|
||||
trace!("load at inst {} has mem state {:?}", inst, load_mem_state);
|
||||
self.stats.node_created += 1;
|
||||
self.stats.node_load += 1;
|
||||
Node::Load {
|
||||
op,
|
||||
ty,
|
||||
inst,
|
||||
addr,
|
||||
mem_state: load_mem_state,
|
||||
srcloc,
|
||||
}
|
||||
} else if has_side_effect(func, inst) || opcode.can_load() {
|
||||
self.stats.node_created += 1;
|
||||
self.stats.node_inst += 1;
|
||||
Node::Inst {
|
||||
op,
|
||||
inst,
|
||||
args,
|
||||
types,
|
||||
srcloc,
|
||||
loop_level,
|
||||
}
|
||||
} else {
|
||||
self.stats.node_created += 1;
|
||||
self.stats.node_pure += 1;
|
||||
Node::Pure { op, args, types }
|
||||
};
|
||||
let dedup_needed = self.node_ctx.needs_dedup(&node);
|
||||
let is_pure = matches!(node, Node::Pure { .. });
|
||||
|
||||
let mut id = self.egraph.add(node, &mut self.node_ctx);
|
||||
|
||||
if dedup_needed {
|
||||
self.stats.node_dedup_query += 1;
|
||||
match id {
|
||||
NewOrExisting::New(_) => {
|
||||
self.stats.node_dedup_miss += 1;
|
||||
}
|
||||
NewOrExisting::Existing(_) => {
|
||||
self.stats.node_dedup_hit += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if opcode == Opcode::Store {
|
||||
let store_data_ty = func.dfg.value_type(func.dfg.inst_args(inst)[0]);
|
||||
self.store_nodes.insert(inst, (store_data_ty, id.get()));
|
||||
self.stats.store_map_insert += 1;
|
||||
}
|
||||
|
||||
// Loads that did not already merge into an existing
|
||||
// load: try to forward from a store (store-to-load
|
||||
// forwarding).
|
||||
if let NewOrExisting::New(new_id) = id {
|
||||
if load_mem_state.is_some() {
|
||||
let opt_id = crate::opts::store_to_load(new_id, self);
|
||||
trace!("store_to_load: {} -> {}", new_id, opt_id);
|
||||
if opt_id != new_id {
|
||||
id = NewOrExisting::Existing(opt_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now either optimize (for new pure nodes), or add to
|
||||
// the side-effecting list (for all other new nodes).
|
||||
let id = match id {
|
||||
NewOrExisting::Existing(id) => id,
|
||||
NewOrExisting::New(id) if is_pure => {
|
||||
// Apply all optimization rules immediately; the
|
||||
// aegraph (acyclic egraph) works best when we do
|
||||
// this so all uses pick up the eclass with all
|
||||
// possible enodes.
|
||||
crate::opts::optimize_eclass(id, self)
|
||||
}
|
||||
NewOrExisting::New(id) => {
|
||||
self.side_effect_ids.push(id);
|
||||
self.stats.side_effect_nodes += 1;
|
||||
id
|
||||
}
|
||||
};
|
||||
|
||||
// Create results and save in Value->Id map.
|
||||
match results {
|
||||
&[] => {}
|
||||
&[one_result] => {
|
||||
trace!("build: value {} -> id {}", one_result, id);
|
||||
value_to_id.insert(one_result, id);
|
||||
}
|
||||
many_results => {
|
||||
debug_assert!(many_results.len() > 1);
|
||||
for (i, &result) in many_results.iter().enumerate() {
|
||||
let ty = func.dfg.value_type(result);
|
||||
let projection = self
|
||||
.egraph
|
||||
.add(
|
||||
Node::Result {
|
||||
value: id,
|
||||
result: i,
|
||||
ty,
|
||||
},
|
||||
&mut self.node_ctx,
|
||||
)
|
||||
.get();
|
||||
self.stats.node_created += 1;
|
||||
self.stats.node_result += 1;
|
||||
trace!("build: value {} -> id {}", result, projection);
|
||||
value_to_id.insert(result, projection);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let side_effect_end =
|
||||
u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count");
|
||||
let side_effect_range = side_effect_start..side_effect_end;
|
||||
self.side_effects[block] = side_effect_range;
|
||||
}
|
||||
}
|
||||
|
||||
/// Scoped elaboration: compute a final ordering of op computation
|
||||
/// for each block and replace the given Func body.
|
||||
///
|
||||
/// This works in concert with the domtree. We do a preorder
|
||||
/// traversal of the domtree, tracking a scoped map from Id to
|
||||
/// (new) Value. The map's scopes correspond to levels in the
|
||||
/// domtree.
|
||||
///
|
||||
/// At each block, we iterate forward over the side-effecting
|
||||
/// eclasses, and recursively generate their arg eclasses, then
|
||||
/// emit the ops themselves.
|
||||
///
|
||||
/// To use an eclass in a given block, we first look it up in the
|
||||
/// scoped map, and get the Value if already present. If not, we
|
||||
/// need to generate it. We emit the extracted enode for this
|
||||
/// eclass after recursively generating its args. Eclasses are
|
||||
/// thus computed "as late as possible", but then memoized into
|
||||
/// the Id-to-Value map and available to all dominated blocks and
|
||||
/// for the rest of this block. (This subsumes GVN.)
|
||||
pub fn elaborate(&mut self, func: &mut Function) {
|
||||
let mut elab = Elaborator::new(
|
||||
func,
|
||||
self.domtree,
|
||||
self.loop_analysis,
|
||||
&self.egraph,
|
||||
&self.node_ctx,
|
||||
&self.remat_ids,
|
||||
&mut self.stats,
|
||||
);
|
||||
elab.elaborate(
|
||||
|block| {
|
||||
let blockparam_range = self.blockparams[block].clone();
|
||||
&self.blockparam_ids_tys
|
||||
[blockparam_range.start as usize..blockparam_range.end as usize]
|
||||
},
|
||||
|block| {
|
||||
let side_effect_range = self.side_effects[block].clone();
|
||||
&self.side_effect_ids
|
||||
[side_effect_range.start as usize..side_effect_range.end as usize]
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// State for egraph analysis that computes all needed properties.
|
||||
pub(crate) struct Analysis;
|
||||
|
||||
/// Analysis results for each eclass id.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct AnalysisValue {
|
||||
pub(crate) loop_level: LoopLevel,
|
||||
}
|
||||
|
||||
impl Default for AnalysisValue {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
loop_level: LoopLevel::root(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl cranelift_egraph::Analysis for Analysis {
|
||||
type L = NodeCtx;
|
||||
type Value = AnalysisValue;
|
||||
|
||||
fn for_node(
|
||||
&self,
|
||||
ctx: &NodeCtx,
|
||||
n: &Node,
|
||||
values: &SecondaryMap<Id, AnalysisValue>,
|
||||
) -> AnalysisValue {
|
||||
let loop_level = match n {
|
||||
&Node::Pure { ref args, .. } => args
|
||||
.as_slice(&ctx.args)
|
||||
.iter()
|
||||
.map(|&arg| values[arg].loop_level)
|
||||
.max()
|
||||
.unwrap_or(LoopLevel::root()),
|
||||
&Node::Load { addr, .. } => values[addr].loop_level,
|
||||
&Node::Result { value, .. } => values[value].loop_level,
|
||||
&Node::Inst { loop_level, .. } | &Node::Param { loop_level, .. } => loop_level,
|
||||
};
|
||||
|
||||
AnalysisValue { loop_level }
|
||||
}
|
||||
|
||||
fn meet(&self, _ctx: &NodeCtx, v1: &AnalysisValue, v2: &AnalysisValue) -> AnalysisValue {
|
||||
AnalysisValue {
|
||||
loop_level: std::cmp::max(v1.loop_level, v2.loop_level),
|
||||
}
|
||||
}
|
||||
}
|
||||
69
cranelift/codegen/src/egraph/domtree.rs
Normal file
69
cranelift/codegen/src/egraph/domtree.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
//! Extended domtree with various traversal support.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::{Block, Function};
|
||||
use cranelift_entity::{packed_option::PackedOption, SecondaryMap};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct DomTreeWithChildren {
|
||||
nodes: SecondaryMap<Block, DomTreeNode>,
|
||||
root: Block,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct DomTreeNode {
|
||||
children: PackedOption<Block>,
|
||||
next: PackedOption<Block>,
|
||||
}
|
||||
|
||||
impl DomTreeWithChildren {
|
||||
pub(crate) fn new(func: &Function, domtree: &DominatorTree) -> DomTreeWithChildren {
|
||||
let mut nodes: SecondaryMap<Block, DomTreeNode> =
|
||||
SecondaryMap::with_capacity(func.dfg.num_blocks());
|
||||
|
||||
for block in func.layout.blocks() {
|
||||
let idom_inst = match domtree.idom(block) {
|
||||
Some(idom_inst) => idom_inst,
|
||||
None => continue,
|
||||
};
|
||||
let idom = func
|
||||
.layout
|
||||
.inst_block(idom_inst)
|
||||
.expect("Dominating instruction should be part of a block");
|
||||
|
||||
nodes[block].next = nodes[idom].children;
|
||||
nodes[idom].children = block.into();
|
||||
}
|
||||
|
||||
let root = func.layout.entry_block().unwrap();
|
||||
|
||||
Self { nodes, root }
|
||||
}
|
||||
|
||||
pub(crate) fn root(&self) -> Block {
|
||||
self.root
|
||||
}
|
||||
|
||||
pub(crate) fn children<'a>(&'a self, block: Block) -> DomTreeChildIter<'a> {
|
||||
let block = self.nodes[block].children;
|
||||
DomTreeChildIter {
|
||||
domtree: self,
|
||||
block,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct DomTreeChildIter<'a> {
|
||||
domtree: &'a DomTreeWithChildren,
|
||||
block: PackedOption<Block>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for DomTreeChildIter<'a> {
|
||||
type Item = Block;
|
||||
fn next(&mut self) -> Option<Block> {
|
||||
self.block.expand().map(|block| {
|
||||
self.block = self.domtree.nodes[block].next;
|
||||
block
|
||||
})
|
||||
}
|
||||
}
|
||||
612
cranelift/codegen/src/egraph/elaborate.rs
Normal file
612
cranelift/codegen/src/egraph/elaborate.rs
Normal file
@@ -0,0 +1,612 @@
|
||||
//! Elaboration phase: lowers EGraph back to sequences of operations
|
||||
//! in CFG nodes.
|
||||
|
||||
use super::domtree::DomTreeWithChildren;
|
||||
use super::node::{op_cost, Cost, Node, NodeCtx};
|
||||
use super::Analysis;
|
||||
use super::Stats;
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::fx::FxHashSet;
|
||||
use crate::ir::{Block, Function, Inst, Opcode, RelSourceLoc, Type, Value, ValueList};
|
||||
use crate::loop_analysis::LoopAnalysis;
|
||||
use crate::scoped_hash_map::ScopedHashMap;
|
||||
use crate::trace;
|
||||
use alloc::vec::Vec;
|
||||
use cranelift_egraph::{EGraph, Id, Language, NodeKey};
|
||||
use cranelift_entity::{packed_option::PackedOption, SecondaryMap};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::ops::Add;
|
||||
|
||||
type LoopDepth = u32;
|
||||
|
||||
pub(crate) struct Elaborator<'a> {
|
||||
func: &'a mut Function,
|
||||
domtree: &'a DominatorTree,
|
||||
loop_analysis: &'a LoopAnalysis,
|
||||
node_ctx: &'a NodeCtx,
|
||||
egraph: &'a EGraph<NodeCtx, Analysis>,
|
||||
id_to_value: ScopedHashMap<Id, IdValue>,
|
||||
id_to_best_cost_and_node: SecondaryMap<Id, (Cost, Id)>,
|
||||
/// Stack of blocks and loops in current elaboration path.
|
||||
loop_stack: SmallVec<[LoopStackEntry; 8]>,
|
||||
cur_block: Option<Block>,
|
||||
first_branch: SecondaryMap<Block, PackedOption<Inst>>,
|
||||
remat_ids: &'a FxHashSet<Id>,
|
||||
/// Explicitly-unrolled value elaboration stack.
|
||||
elab_stack: Vec<ElabStackEntry>,
|
||||
elab_result_stack: Vec<IdValue>,
|
||||
/// Explicitly-unrolled block elaboration stack.
|
||||
block_stack: Vec<BlockStackEntry>,
|
||||
stats: &'a mut Stats,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct LoopStackEntry {
|
||||
/// The hoist point: a block that immediately dominates this
|
||||
/// loop. May not be an immediate predecessor, but will be a valid
|
||||
/// point to place all loop-invariant ops: they must depend only
|
||||
/// on inputs that dominate the loop, so are available at (the end
|
||||
/// of) this block.
|
||||
hoist_block: Block,
|
||||
/// The depth in the scope map.
|
||||
scope_depth: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ElabStackEntry {
|
||||
/// Next action is to resolve this id into a node and elaborate
|
||||
/// args.
|
||||
Start { id: Id },
|
||||
/// Args have been pushed; waiting for results.
|
||||
PendingNode {
|
||||
canonical: Id,
|
||||
node_key: NodeKey,
|
||||
remat: bool,
|
||||
num_args: usize,
|
||||
},
|
||||
/// Waiting for a result to return one projected value of a
|
||||
/// multi-value result.
|
||||
PendingProjection { canonical: Id, index: usize },
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum BlockStackEntry {
|
||||
Elaborate { block: Block, idom: Option<Block> },
|
||||
Pop,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum IdValue {
|
||||
/// A single value.
|
||||
Value {
|
||||
depth: LoopDepth,
|
||||
block: Block,
|
||||
value: Value,
|
||||
},
|
||||
/// Multiple results; indices in `node_args`.
|
||||
Values {
|
||||
depth: LoopDepth,
|
||||
block: Block,
|
||||
values: ValueList,
|
||||
},
|
||||
}
|
||||
|
||||
impl IdValue {
|
||||
fn block(&self) -> Block {
|
||||
match self {
|
||||
IdValue::Value { block, .. } | IdValue::Values { block, .. } => *block,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Elaborator<'a> {
|
||||
pub(crate) fn new(
|
||||
func: &'a mut Function,
|
||||
domtree: &'a DominatorTree,
|
||||
loop_analysis: &'a LoopAnalysis,
|
||||
egraph: &'a EGraph<NodeCtx, Analysis>,
|
||||
node_ctx: &'a NodeCtx,
|
||||
remat_ids: &'a FxHashSet<Id>,
|
||||
stats: &'a mut Stats,
|
||||
) -> Self {
|
||||
let num_blocks = func.dfg.num_blocks();
|
||||
let mut id_to_best_cost_and_node =
|
||||
SecondaryMap::with_default((Cost::infinity(), Id::invalid()));
|
||||
id_to_best_cost_and_node.resize(egraph.classes.len());
|
||||
Self {
|
||||
func,
|
||||
domtree,
|
||||
loop_analysis,
|
||||
egraph,
|
||||
node_ctx,
|
||||
id_to_value: ScopedHashMap::with_capacity(egraph.classes.len()),
|
||||
id_to_best_cost_and_node,
|
||||
loop_stack: smallvec![],
|
||||
cur_block: None,
|
||||
first_branch: SecondaryMap::with_capacity(num_blocks),
|
||||
remat_ids,
|
||||
elab_stack: vec![],
|
||||
elab_result_stack: vec![],
|
||||
block_stack: vec![],
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
fn cur_loop_depth(&self) -> LoopDepth {
|
||||
self.loop_stack.len() as LoopDepth
|
||||
}
|
||||
|
||||
fn start_block(&mut self, idom: Option<Block>, block: Block, block_params: &[(Id, Type)]) {
|
||||
trace!(
|
||||
"start_block: block {:?} with idom {:?} at loop depth {} scope depth {}",
|
||||
block,
|
||||
idom,
|
||||
self.cur_loop_depth(),
|
||||
self.id_to_value.depth()
|
||||
);
|
||||
|
||||
// Note that if the *entry* block is a loop header, we will
|
||||
// not make note of the loop here because it will not have an
|
||||
// immediate dominator. We must disallow this case because we
|
||||
// will skip adding the `LoopStackEntry` here but our
|
||||
// `LoopAnalysis` will otherwise still make note of this loop
|
||||
// and loop depths will not match.
|
||||
if let Some(idom) = idom {
|
||||
if self.loop_analysis.is_loop_header(block).is_some() {
|
||||
self.loop_stack.push(LoopStackEntry {
|
||||
// Any code hoisted out of this loop will have code
|
||||
// placed in `idom`, and will have def mappings
|
||||
// inserted in to the scoped hashmap at that block's
|
||||
// level.
|
||||
hoist_block: idom,
|
||||
scope_depth: (self.id_to_value.depth() - 1) as u32,
|
||||
});
|
||||
trace!(
|
||||
" -> loop header, pushing; depth now {}",
|
||||
self.loop_stack.len()
|
||||
);
|
||||
}
|
||||
} else {
|
||||
debug_assert!(
|
||||
self.loop_analysis.is_loop_header(block).is_none(),
|
||||
"Entry block (domtree root) cannot be a loop header!"
|
||||
);
|
||||
}
|
||||
|
||||
self.cur_block = Some(block);
|
||||
for &(id, ty) in block_params {
|
||||
let value = self.func.dfg.append_block_param(block, ty);
|
||||
trace!(" -> block param id {:?} value {:?}", id, value);
|
||||
self.id_to_value.insert_if_absent(
|
||||
id,
|
||||
IdValue::Value {
|
||||
depth: self.cur_loop_depth(),
|
||||
block,
|
||||
value,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList {
|
||||
let (instdata, result_tys) = match node {
|
||||
Node::Pure { op, types, .. } | Node::Inst { op, types, .. } => (
|
||||
op.with_args(args, &mut self.func.dfg.value_lists),
|
||||
types.as_slice(&self.node_ctx.types),
|
||||
),
|
||||
Node::Load { op, ty, .. } => (
|
||||
op.with_args(args, &mut self.func.dfg.value_lists),
|
||||
std::slice::from_ref(ty),
|
||||
),
|
||||
_ => panic!("Cannot `add_node()` on block param or projection"),
|
||||
};
|
||||
let srcloc = match node {
|
||||
Node::Inst { srcloc, .. } | Node::Load { srcloc, .. } => *srcloc,
|
||||
_ => RelSourceLoc::default(),
|
||||
};
|
||||
let opcode = instdata.opcode();
|
||||
// Is this instruction either an actual terminator (an
|
||||
// instruction that must end the block), or at least in the
|
||||
// group of branches at the end (including conditional
|
||||
// branches that may be followed by an actual terminator)? We
|
||||
// call this the "terminator group", and we record the first
|
||||
// inst in this group (`first_branch` below) so that we do not
|
||||
// insert instructions needed only by args of later
|
||||
// instructions in the terminator group in the middle of the
|
||||
// terminator group.
|
||||
//
|
||||
// E.g., for the original sequence
|
||||
// v1 = op ...
|
||||
// brnz vCond, block1
|
||||
// jump block2(v1)
|
||||
//
|
||||
// elaboration would naively produce
|
||||
//
|
||||
// brnz vCond, block1
|
||||
// v1 = op ...
|
||||
// jump block2(v1)
|
||||
//
|
||||
// but we use the `first_branch` mechanism below to ensure
|
||||
// that once we've emitted at least one branch, all other
|
||||
// elaborated insts have to go before that. So we emit brnz
|
||||
// first, then as we elaborate the jump, we find we need the
|
||||
// `op`; we `insert_inst` it *before* the brnz (which is the
|
||||
// `first_branch`).
|
||||
let is_terminator_group_inst =
|
||||
opcode.is_branch() || opcode.is_return() || opcode == Opcode::Trap;
|
||||
let inst = self.func.dfg.make_inst(instdata);
|
||||
self.func.srclocs[inst] = srcloc;
|
||||
|
||||
for &ty in result_tys {
|
||||
self.func.dfg.append_result(inst, ty);
|
||||
}
|
||||
|
||||
if is_terminator_group_inst {
|
||||
self.func.layout.append_inst(inst, to_block);
|
||||
if self.first_branch[to_block].is_none() {
|
||||
self.first_branch[to_block] = Some(inst).into();
|
||||
}
|
||||
} else if let Some(branch) = self.first_branch[to_block].into() {
|
||||
self.func.layout.insert_inst(inst, branch);
|
||||
} else {
|
||||
self.func.layout.append_inst(inst, to_block);
|
||||
}
|
||||
self.func.dfg.inst_results_list(inst)
|
||||
}
|
||||
|
||||
fn compute_best_nodes(&mut self) {
|
||||
let best = &mut self.id_to_best_cost_and_node;
|
||||
for (eclass_id, eclass) in &self.egraph.classes {
|
||||
trace!("computing best for eclass {:?}", eclass_id);
|
||||
if let Some(child1) = eclass.child1() {
|
||||
trace!(" -> child {:?}", child1);
|
||||
best[eclass_id] = best[child1];
|
||||
}
|
||||
if let Some(child2) = eclass.child2() {
|
||||
trace!(" -> child {:?}", child2);
|
||||
if best[child2].0 < best[eclass_id].0 {
|
||||
best[eclass_id] = best[child2];
|
||||
}
|
||||
}
|
||||
if let Some(node_key) = eclass.get_node() {
|
||||
let node = node_key.node(&self.egraph.nodes);
|
||||
trace!(" -> eclass {:?}: node {:?}", eclass_id, node);
|
||||
let (cost, id) = match node {
|
||||
Node::Param { .. }
|
||||
| Node::Inst { .. }
|
||||
| Node::Load { .. }
|
||||
| Node::Result { .. } => (Cost::zero(), eclass_id),
|
||||
Node::Pure { op, .. } => {
|
||||
let args_cost = self
|
||||
.node_ctx
|
||||
.children(node)
|
||||
.iter()
|
||||
.map(|&arg_id| {
|
||||
trace!(" -> arg {:?}", arg_id);
|
||||
best[arg_id].0
|
||||
})
|
||||
// Can't use `.sum()` for `Cost` types; do
|
||||
// an explicit reduce instead.
|
||||
.fold(Cost::zero(), Cost::add);
|
||||
let level = self.egraph.analysis_value(eclass_id).loop_level;
|
||||
let cost = op_cost(op).at_level(level) + args_cost;
|
||||
(cost, eclass_id)
|
||||
}
|
||||
};
|
||||
|
||||
if cost < best[eclass_id].0 {
|
||||
best[eclass_id] = (cost, id);
|
||||
}
|
||||
}
|
||||
debug_assert_ne!(best[eclass_id].0, Cost::infinity());
|
||||
debug_assert_ne!(best[eclass_id].1, Id::invalid());
|
||||
trace!("best for eclass {:?}: {:?}", eclass_id, best[eclass_id]);
|
||||
}
|
||||
}
|
||||
|
||||
fn elaborate_eclass_use(&mut self, id: Id) {
|
||||
self.elab_stack.push(ElabStackEntry::Start { id });
|
||||
self.process_elab_stack();
|
||||
debug_assert_eq!(self.elab_result_stack.len(), 1);
|
||||
self.elab_result_stack.clear();
|
||||
}
|
||||
|
||||
fn process_elab_stack(&mut self) {
|
||||
while let Some(entry) = self.elab_stack.last() {
|
||||
match entry {
|
||||
&ElabStackEntry::Start { id } => {
|
||||
// We always replace the Start entry, so pop it now.
|
||||
self.elab_stack.pop();
|
||||
|
||||
self.stats.elaborate_visit_node += 1;
|
||||
let canonical = self.egraph.canonical_id(id);
|
||||
trace!("elaborate: id {}", id);
|
||||
|
||||
let remat = if let Some(val) = self.id_to_value.get(&canonical) {
|
||||
// Look at the defined block, and determine whether this
|
||||
// node kind allows rematerialization if the value comes
|
||||
// from another block. If so, ignore the hit and recompute
|
||||
// below.
|
||||
let remat = val.block() != self.cur_block.unwrap()
|
||||
&& self.remat_ids.contains(&canonical);
|
||||
if !remat {
|
||||
trace!("elaborate: id {} -> {:?}", id, val);
|
||||
self.stats.elaborate_memoize_hit += 1;
|
||||
self.elab_result_stack.push(val.clone());
|
||||
continue;
|
||||
}
|
||||
trace!("elaborate: id {} -> remat", id);
|
||||
self.stats.elaborate_memoize_miss_remat += 1;
|
||||
// The op is pure at this point, so it is always valid to
|
||||
// remove from this map.
|
||||
self.id_to_value.remove(&canonical);
|
||||
true
|
||||
} else {
|
||||
self.remat_ids.contains(&canonical)
|
||||
};
|
||||
self.stats.elaborate_memoize_miss += 1;
|
||||
|
||||
// Get the best option; we use `id` (latest id) here so we
|
||||
// have a full view of the eclass.
|
||||
let (_, best_node_eclass) = self.id_to_best_cost_and_node[id];
|
||||
debug_assert_ne!(best_node_eclass, Id::invalid());
|
||||
|
||||
trace!(
|
||||
"elaborate: id {} -> best {} -> eclass node {:?}",
|
||||
id,
|
||||
best_node_eclass,
|
||||
self.egraph.classes[best_node_eclass]
|
||||
);
|
||||
let node_key = self.egraph.classes[best_node_eclass].get_node().unwrap();
|
||||
let node = node_key.node(&self.egraph.nodes);
|
||||
trace!(" -> enode {:?}", node);
|
||||
|
||||
// Is the node a block param? We should never get here if so
|
||||
// (they are inserted when first visiting the block).
|
||||
if matches!(node, Node::Param { .. }) {
|
||||
unreachable!("Param nodes should already be inserted");
|
||||
}
|
||||
|
||||
// Is the node a result projection? If so, resolve
|
||||
// the value we are projecting a part of, then
|
||||
// eventually return here (saving state with a
|
||||
// PendingProjection).
|
||||
if let Node::Result { value, result, .. } = node {
|
||||
trace!(" -> result; pushing arg value {}", value);
|
||||
self.elab_stack.push(ElabStackEntry::PendingProjection {
|
||||
index: *result,
|
||||
canonical,
|
||||
});
|
||||
self.elab_stack.push(ElabStackEntry::Start { id: *value });
|
||||
continue;
|
||||
}
|
||||
|
||||
// We're going to need to emit this
|
||||
// operator. First, enqueue all args to be
|
||||
// elaborated. Push state to receive the results
|
||||
// and later elab this node.
|
||||
let num_args = self.node_ctx.children(&node).len();
|
||||
self.elab_stack.push(ElabStackEntry::PendingNode {
|
||||
canonical,
|
||||
node_key,
|
||||
remat,
|
||||
num_args,
|
||||
});
|
||||
// Push args in reverse order so we process the
|
||||
// first arg first.
|
||||
for &arg_id in self.node_ctx.children(&node).iter().rev() {
|
||||
self.elab_stack.push(ElabStackEntry::Start { id: arg_id });
|
||||
}
|
||||
}
|
||||
|
||||
&ElabStackEntry::PendingNode {
|
||||
canonical,
|
||||
node_key,
|
||||
remat,
|
||||
num_args,
|
||||
} => {
|
||||
self.elab_stack.pop();
|
||||
|
||||
let node = node_key.node(&self.egraph.nodes);
|
||||
|
||||
// We should have all args resolved at this point.
|
||||
let arg_idx = self.elab_result_stack.len() - num_args;
|
||||
let args = &self.elab_result_stack[arg_idx..];
|
||||
|
||||
// Gather the individual output-CLIF `Value`s.
|
||||
let arg_values: SmallVec<[Value; 8]> = args
|
||||
.iter()
|
||||
.map(|idvalue| match idvalue {
|
||||
IdValue::Value { value, .. } => *value,
|
||||
IdValue::Values { .. } => {
|
||||
panic!("enode depends directly on multi-value result")
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Compute max loop depth.
|
||||
let max_loop_depth = args
|
||||
.iter()
|
||||
.map(|idvalue| match idvalue {
|
||||
IdValue::Value { depth, .. } => *depth,
|
||||
IdValue::Values { .. } => unreachable!(),
|
||||
})
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
|
||||
// Remove args from result stack.
|
||||
self.elab_result_stack.truncate(arg_idx);
|
||||
|
||||
// Determine the location at which we emit it. This is the
|
||||
// current block *unless* we hoist above a loop when all args
|
||||
// are loop-invariant (and this op is pure).
|
||||
let (loop_depth, scope_depth, block) = if node.is_non_pure() {
|
||||
// Non-pure op: always at the current location.
|
||||
(
|
||||
self.cur_loop_depth(),
|
||||
self.id_to_value.depth(),
|
||||
self.cur_block.unwrap(),
|
||||
)
|
||||
} else if max_loop_depth == self.cur_loop_depth() || remat {
|
||||
// Pure op, but depends on some value at the current loop
|
||||
// depth, or remat forces it here: as above.
|
||||
(
|
||||
self.cur_loop_depth(),
|
||||
self.id_to_value.depth(),
|
||||
self.cur_block.unwrap(),
|
||||
)
|
||||
} else {
|
||||
// Pure op, and does not depend on any args at current
|
||||
// loop depth: hoist out of loop.
|
||||
self.stats.elaborate_licm_hoist += 1;
|
||||
let data = &self.loop_stack[max_loop_depth as usize];
|
||||
(max_loop_depth, data.scope_depth as usize, data.hoist_block)
|
||||
};
|
||||
// Loop scopes are a subset of all scopes.
|
||||
debug_assert!(scope_depth >= loop_depth as usize);
|
||||
|
||||
// This is an actual operation; emit the node in sequence now.
|
||||
let results = self.add_node(node, &arg_values[..], block);
|
||||
let results_slice = results.as_slice(&self.func.dfg.value_lists);
|
||||
|
||||
// Build the result and memoize in the id-to-value map.
|
||||
let result = if results_slice.len() == 1 {
|
||||
IdValue::Value {
|
||||
depth: loop_depth,
|
||||
block,
|
||||
value: results_slice[0],
|
||||
}
|
||||
} else {
|
||||
IdValue::Values {
|
||||
depth: loop_depth,
|
||||
block,
|
||||
values: results,
|
||||
}
|
||||
};
|
||||
|
||||
self.id_to_value.insert_if_absent_with_depth(
|
||||
canonical,
|
||||
result.clone(),
|
||||
scope_depth,
|
||||
);
|
||||
|
||||
// Push onto the elab-results stack.
|
||||
self.elab_result_stack.push(result)
|
||||
}
|
||||
&ElabStackEntry::PendingProjection { index, canonical } => {
|
||||
self.elab_stack.pop();
|
||||
|
||||
// Grab the input from the elab-result stack.
|
||||
let value = self.elab_result_stack.pop().expect("Should have result");
|
||||
|
||||
let (depth, block, values) = match value {
|
||||
IdValue::Values {
|
||||
depth,
|
||||
block,
|
||||
values,
|
||||
..
|
||||
} => (depth, block, values),
|
||||
IdValue::Value { .. } => {
|
||||
unreachable!("Projection nodes should not be used on single results");
|
||||
}
|
||||
};
|
||||
let values = values.as_slice(&self.func.dfg.value_lists);
|
||||
let value = IdValue::Value {
|
||||
depth,
|
||||
block,
|
||||
value: values[index],
|
||||
};
|
||||
self.id_to_value.insert_if_absent(canonical, value.clone());
|
||||
|
||||
self.elab_result_stack.push(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn elaborate_block<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
|
||||
&mut self,
|
||||
idom: Option<Block>,
|
||||
block: Block,
|
||||
block_params_fn: &PF,
|
||||
block_side_effects_fn: &SEF,
|
||||
) {
|
||||
let blockparam_ids_tys = (block_params_fn)(block);
|
||||
self.start_block(idom, block, blockparam_ids_tys);
|
||||
for &id in (block_side_effects_fn)(block) {
|
||||
self.elaborate_eclass_use(id);
|
||||
}
|
||||
}
|
||||
|
||||
fn elaborate_domtree<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
|
||||
&mut self,
|
||||
block_params_fn: &PF,
|
||||
block_side_effects_fn: &SEF,
|
||||
domtree: &DomTreeWithChildren,
|
||||
) {
|
||||
let root = domtree.root();
|
||||
self.block_stack.push(BlockStackEntry::Elaborate {
|
||||
block: root,
|
||||
idom: None,
|
||||
});
|
||||
while let Some(top) = self.block_stack.pop() {
|
||||
match top {
|
||||
BlockStackEntry::Elaborate { block, idom } => {
|
||||
self.block_stack.push(BlockStackEntry::Pop);
|
||||
self.id_to_value.increment_depth();
|
||||
|
||||
self.elaborate_block(idom, block, block_params_fn, block_side_effects_fn);
|
||||
|
||||
// Push children. We are doing a preorder
|
||||
// traversal so we do this after processing this
|
||||
// block above.
|
||||
let block_stack_end = self.block_stack.len();
|
||||
for child in domtree.children(block) {
|
||||
self.block_stack.push(BlockStackEntry::Elaborate {
|
||||
block: child,
|
||||
idom: Some(block),
|
||||
});
|
||||
}
|
||||
// Reverse what we just pushed so we elaborate in
|
||||
// original block order. (The domtree iter is a
|
||||
// single-ended iter over a singly-linked list so
|
||||
// we can't `.rev()` above.)
|
||||
self.block_stack[block_stack_end..].reverse();
|
||||
}
|
||||
BlockStackEntry::Pop => {
|
||||
self.id_to_value.decrement_depth();
|
||||
if let Some(innermost_loop) = self.loop_stack.last() {
|
||||
if innermost_loop.scope_depth as usize == self.id_to_value.depth() {
|
||||
self.loop_stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn clear_func_body(&mut self) {
|
||||
// Clear all instructions and args/results from the DFG. We
|
||||
// rebuild them entirely during elaboration. (TODO: reuse the
|
||||
// existing inst for the *first* copy of a given node.)
|
||||
self.func.dfg.clear_insts();
|
||||
// Clear the instructions in every block, but leave the list
|
||||
// of blocks and their layout unmodified.
|
||||
self.func.layout.clear_insts();
|
||||
self.func.srclocs.clear();
|
||||
}
|
||||
|
||||
pub(crate) fn elaborate<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
|
||||
&mut self,
|
||||
block_params_fn: PF,
|
||||
block_side_effects_fn: SEF,
|
||||
) {
|
||||
let domtree = DomTreeWithChildren::new(self.func, self.domtree);
|
||||
self.stats.elaborate_func += 1;
|
||||
self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
|
||||
self.clear_func_body();
|
||||
self.compute_best_nodes();
|
||||
self.elaborate_domtree(&block_params_fn, &block_side_effects_fn, &domtree);
|
||||
self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
|
||||
}
|
||||
}
|
||||
376
cranelift/codegen/src/egraph/node.rs
Normal file
376
cranelift/codegen/src/egraph/node.rs
Normal file
@@ -0,0 +1,376 @@
|
||||
//! Node definition for EGraph representation.
|
||||
|
||||
use super::MemoryState;
|
||||
use crate::ir::{Block, DataFlowGraph, Inst, InstructionImms, Opcode, RelSourceLoc, Type};
|
||||
use crate::loop_analysis::LoopLevel;
|
||||
use cranelift_egraph::{BumpArena, BumpSlice, CtxEq, CtxHash, Id, Language, UnionFind};
|
||||
use cranelift_entity::{EntityList, ListPool};
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Node {
|
||||
/// A blockparam. Effectively an input/root; does not refer to
|
||||
/// predecessors' branch arguments, because this would create
|
||||
/// cycles.
|
||||
Param {
|
||||
/// CLIF block this param comes from.
|
||||
block: Block,
|
||||
/// Index of blockparam within block.
|
||||
index: u32,
|
||||
/// Type of the value.
|
||||
ty: Type,
|
||||
/// The loop level of this Param.
|
||||
loop_level: LoopLevel,
|
||||
},
|
||||
/// A CLIF instruction that is pure (has no side-effects). Not
|
||||
/// tied to any location; we will compute a set of locations at
|
||||
/// which to compute this node during lowering back out of the
|
||||
/// egraph.
|
||||
Pure {
|
||||
/// The instruction data, without SSA values.
|
||||
op: InstructionImms,
|
||||
/// eclass arguments to the operator.
|
||||
args: EntityList<Id>,
|
||||
/// Types of results.
|
||||
types: BumpSlice<Type>,
|
||||
},
|
||||
/// A CLIF instruction that has side-effects or is otherwise not
|
||||
/// representable by `Pure`.
|
||||
Inst {
|
||||
/// The instruction data, without SSA values.
|
||||
op: InstructionImms,
|
||||
/// eclass arguments to the operator.
|
||||
args: EntityList<Id>,
|
||||
/// Types of results.
|
||||
types: BumpSlice<Type>,
|
||||
/// The index of the original instruction. We include this so
|
||||
/// that the `Inst`s are not deduplicated: every instance is a
|
||||
/// logically separate and unique side-effect. However,
|
||||
/// because we clear the DataFlowGraph before elaboration,
|
||||
/// this `Inst` is *not* valid to fetch any details from the
|
||||
/// original instruction.
|
||||
inst: Inst,
|
||||
/// The source location to preserve.
|
||||
srcloc: RelSourceLoc,
|
||||
/// The loop level of this Inst.
|
||||
loop_level: LoopLevel,
|
||||
},
|
||||
/// A projection of one result of an `Inst` or `Pure`.
|
||||
Result {
|
||||
/// `Inst` or `Pure` node.
|
||||
value: Id,
|
||||
/// Index of the result we want.
|
||||
result: usize,
|
||||
/// Type of the value.
|
||||
ty: Type,
|
||||
},
|
||||
|
||||
/// A load instruction. Nominally a side-effecting `Inst` (and
|
||||
/// included in the list of side-effecting roots so it will always
|
||||
/// be elaborated), but represented as a distinct kind of node so
|
||||
/// that we can leverage deduplication to do
|
||||
/// redundant-load-elimination for free (and make store-to-load
|
||||
/// forwarding much easier).
|
||||
Load {
|
||||
// -- identity depends on:
|
||||
/// The original load operation. Must have one argument, the
|
||||
/// address.
|
||||
op: InstructionImms,
|
||||
/// The type of the load result.
|
||||
ty: Type,
|
||||
/// Address argument. Actual address has an offset, which is
|
||||
/// included in `op` (and thus already considered as part of
|
||||
/// the key).
|
||||
addr: Id,
|
||||
/// The abstract memory state that this load accesses.
|
||||
mem_state: MemoryState,
|
||||
|
||||
// -- not included in dedup key:
|
||||
/// The `Inst` we will use for a trap location for this
|
||||
/// load. Excluded from Eq/Hash so that loads that are
|
||||
/// identical except for the specific instance will dedup on
|
||||
/// top of each other.
|
||||
inst: Inst,
|
||||
/// Source location, for traps. Not included in Eq/Hash.
|
||||
srcloc: RelSourceLoc,
|
||||
},
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub(crate) fn is_non_pure(&self) -> bool {
|
||||
match self {
|
||||
Node::Inst { .. } | Node::Load { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared pools for type and id lists in nodes.
|
||||
pub struct NodeCtx {
|
||||
/// Arena for result-type arrays.
|
||||
pub types: BumpArena<Type>,
|
||||
/// Arena for arg eclass-ID lists.
|
||||
pub args: ListPool<Id>,
|
||||
}
|
||||
|
||||
impl NodeCtx {
|
||||
pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self {
|
||||
let n_types = dfg.num_values();
|
||||
let n_args = dfg.value_lists.capacity();
|
||||
Self {
|
||||
types: BumpArena::arena_with_capacity(n_types),
|
||||
args: ListPool::with_capacity(n_args),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeCtx {
|
||||
fn ids_eq(&self, a: &EntityList<Id>, b: &EntityList<Id>, uf: &mut UnionFind) -> bool {
|
||||
let a = a.as_slice(&self.args);
|
||||
let b = b.as_slice(&self.args);
|
||||
a.len() == b.len() && a.iter().zip(b.iter()).all(|(&a, &b)| uf.equiv_id_mut(a, b))
|
||||
}
|
||||
|
||||
fn hash_ids<H: Hasher>(&self, a: &EntityList<Id>, hash: &mut H, uf: &mut UnionFind) {
|
||||
let a = a.as_slice(&self.args);
|
||||
for &id in a {
|
||||
uf.hash_id_mut(hash, id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CtxEq<Node, Node> for NodeCtx {
|
||||
fn ctx_eq(&self, a: &Node, b: &Node, uf: &mut UnionFind) -> bool {
|
||||
match (a, b) {
|
||||
(
|
||||
&Node::Param {
|
||||
block,
|
||||
index,
|
||||
ty,
|
||||
loop_level: _,
|
||||
},
|
||||
&Node::Param {
|
||||
block: other_block,
|
||||
index: other_index,
|
||||
ty: other_ty,
|
||||
loop_level: _,
|
||||
},
|
||||
) => block == other_block && index == other_index && ty == other_ty,
|
||||
(
|
||||
&Node::Result { value, result, ty },
|
||||
&Node::Result {
|
||||
value: other_value,
|
||||
result: other_result,
|
||||
ty: other_ty,
|
||||
},
|
||||
) => uf.equiv_id_mut(value, other_value) && result == other_result && ty == other_ty,
|
||||
(
|
||||
&Node::Pure {
|
||||
ref op,
|
||||
ref args,
|
||||
ref types,
|
||||
},
|
||||
&Node::Pure {
|
||||
op: ref other_op,
|
||||
args: ref other_args,
|
||||
types: ref other_types,
|
||||
},
|
||||
) => {
|
||||
*op == *other_op
|
||||
&& self.ids_eq(args, other_args, uf)
|
||||
&& types.as_slice(&self.types) == other_types.as_slice(&self.types)
|
||||
}
|
||||
(
|
||||
&Node::Inst { inst, ref args, .. },
|
||||
&Node::Inst {
|
||||
inst: other_inst,
|
||||
args: ref other_args,
|
||||
..
|
||||
},
|
||||
) => inst == other_inst && self.ids_eq(args, other_args, uf),
|
||||
(
|
||||
&Node::Load {
|
||||
ref op,
|
||||
ty,
|
||||
addr,
|
||||
mem_state,
|
||||
..
|
||||
},
|
||||
&Node::Load {
|
||||
op: ref other_op,
|
||||
ty: other_ty,
|
||||
addr: other_addr,
|
||||
mem_state: other_mem_state,
|
||||
// Explicitly exclude: `inst` and `srcloc`. We
|
||||
// want loads to merge if identical in
|
||||
// opcode/offset, address expression, and last
|
||||
// store (this does implicit
|
||||
// redundant-load-elimination.)
|
||||
//
|
||||
// Note however that we *do* include `ty` (the
|
||||
// type) and match on that: we otherwise would
|
||||
// have no way of disambiguating loads of
|
||||
// different widths to the same address.
|
||||
..
|
||||
},
|
||||
) => {
|
||||
op == other_op
|
||||
&& ty == other_ty
|
||||
&& uf.equiv_id_mut(addr, other_addr)
|
||||
&& mem_state == other_mem_state
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CtxHash<Node> for NodeCtx {
|
||||
fn ctx_hash(&self, value: &Node, uf: &mut UnionFind) -> u64 {
|
||||
let mut state = crate::fx::FxHasher::default();
|
||||
std::mem::discriminant(value).hash(&mut state);
|
||||
match value {
|
||||
&Node::Param {
|
||||
block,
|
||||
index,
|
||||
ty: _,
|
||||
loop_level: _,
|
||||
} => {
|
||||
block.hash(&mut state);
|
||||
index.hash(&mut state);
|
||||
}
|
||||
&Node::Result {
|
||||
value,
|
||||
result,
|
||||
ty: _,
|
||||
} => {
|
||||
uf.hash_id_mut(&mut state, value);
|
||||
result.hash(&mut state);
|
||||
}
|
||||
&Node::Pure {
|
||||
ref op,
|
||||
ref args,
|
||||
types: _,
|
||||
} => {
|
||||
op.hash(&mut state);
|
||||
self.hash_ids(args, &mut state, uf);
|
||||
// Don't hash `types`: it requires an indirection
|
||||
// (hence cache misses), and result type *should* be
|
||||
// fully determined by op and args.
|
||||
}
|
||||
&Node::Inst { inst, ref args, .. } => {
|
||||
inst.hash(&mut state);
|
||||
self.hash_ids(args, &mut state, uf);
|
||||
}
|
||||
&Node::Load {
|
||||
ref op,
|
||||
ty,
|
||||
addr,
|
||||
mem_state,
|
||||
..
|
||||
} => {
|
||||
op.hash(&mut state);
|
||||
ty.hash(&mut state);
|
||||
uf.hash_id_mut(&mut state, addr);
|
||||
mem_state.hash(&mut state);
|
||||
}
|
||||
}
|
||||
|
||||
state.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct Cost(u32);
|
||||
impl Cost {
|
||||
pub(crate) fn at_level(&self, loop_level: LoopLevel) -> Cost {
|
||||
let loop_level = std::cmp::min(2, loop_level.level());
|
||||
let multiplier = 1u32 << ((10 * loop_level) as u32);
|
||||
Cost(self.0.saturating_mul(multiplier)).finite()
|
||||
}
|
||||
|
||||
pub(crate) fn infinity() -> Cost {
|
||||
// 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost`
|
||||
// only for heuristics and always saturate so this suffices!)
|
||||
Cost(u32::MAX)
|
||||
}
|
||||
|
||||
pub(crate) fn zero() -> Cost {
|
||||
Cost(0)
|
||||
}
|
||||
|
||||
/// Clamp this cost at a "finite" value. Can be used in
|
||||
/// conjunction with saturating ops to avoid saturating into
|
||||
/// `infinity()`.
|
||||
fn finite(self) -> Cost {
|
||||
Cost(std::cmp::min(u32::MAX - 1, self.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::default::Default for Cost {
|
||||
fn default() -> Cost {
|
||||
Cost::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Add<Cost> for Cost {
|
||||
type Output = Cost;
|
||||
fn add(self, other: Cost) -> Cost {
|
||||
Cost(self.0.saturating_add(other.0)).finite()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn op_cost(op: &InstructionImms) -> Cost {
|
||||
match op.opcode() {
|
||||
// Constants.
|
||||
Opcode::Iconst | Opcode::F32const | Opcode::F64const | Opcode::Bconst => Cost(0),
|
||||
// Extends/reduces.
|
||||
Opcode::Bextend
|
||||
| Opcode::Breduce
|
||||
| Opcode::Uextend
|
||||
| Opcode::Sextend
|
||||
| Opcode::Ireduce
|
||||
| Opcode::Iconcat
|
||||
| Opcode::Isplit => Cost(1),
|
||||
// "Simple" arithmetic.
|
||||
Opcode::Iadd
|
||||
| Opcode::Isub
|
||||
| Opcode::Band
|
||||
| Opcode::BandNot
|
||||
| Opcode::Bor
|
||||
| Opcode::BorNot
|
||||
| Opcode::Bxor
|
||||
| Opcode::BxorNot
|
||||
| Opcode::Bnot => Cost(2),
|
||||
// Everything else.
|
||||
_ => Cost(3),
|
||||
}
|
||||
}
|
||||
|
||||
impl Language for NodeCtx {
|
||||
type Node = Node;
|
||||
|
||||
fn children<'a>(&'a self, node: &'a Node) -> &'a [Id] {
|
||||
match node {
|
||||
Node::Param { .. } => &[],
|
||||
Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_slice(&self.args),
|
||||
Node::Load { addr, .. } => std::slice::from_ref(addr),
|
||||
Node::Result { value, .. } => std::slice::from_ref(value),
|
||||
}
|
||||
}
|
||||
|
||||
fn children_mut<'a>(&'a mut self, node: &'a mut Node) -> &'a mut [Id] {
|
||||
match node {
|
||||
Node::Param { .. } => &mut [],
|
||||
Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_mut_slice(&mut self.args),
|
||||
Node::Load { addr, .. } => std::slice::from_mut(addr),
|
||||
Node::Result { value, .. } => std::slice::from_mut(value),
|
||||
}
|
||||
}
|
||||
|
||||
fn needs_dedup(&self, node: &Node) -> bool {
|
||||
match node {
|
||||
Node::Pure { .. } | Node::Load { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
266
cranelift/codegen/src/egraph/stores.rs
Normal file
266
cranelift/codegen/src/egraph/stores.rs
Normal file
@@ -0,0 +1,266 @@
|
||||
//! Last-store tracking via alias analysis.
|
||||
//!
|
||||
//! We partition memory state into several *disjoint pieces* of
|
||||
//! "abstract state". There are a finite number of such pieces:
|
||||
//! currently, we call them "heap", "table", "vmctx", and "other". Any
|
||||
//! given address in memory belongs to exactly one disjoint piece.
|
||||
//!
|
||||
//! One never tracks which piece a concrete address belongs to at
|
||||
//! runtime; this is a purely static concept. Instead, all
|
||||
//! memory-accessing instructions (loads and stores) are labeled with
|
||||
//! one of these four categories in the `MemFlags`. It is forbidden
|
||||
//! for a load or store to access memory under one category and a
|
||||
//! later load or store to access the same memory under a different
|
||||
//! category. This is ensured to be true by construction during
|
||||
//! frontend translation into CLIF and during legalization.
|
||||
//!
|
||||
//! Given that this non-aliasing property is ensured by the producer
|
||||
//! of CLIF, we can compute a *may-alias* property: one load or store
|
||||
//! may-alias another load or store if both access the same category
|
||||
//! of abstract state.
|
||||
//!
|
||||
//! The "last store" pass helps to compute this aliasing: we perform a
|
||||
//! fixpoint analysis to track the last instruction that *might have*
|
||||
//! written to a given part of abstract state. We also track the block
|
||||
//! containing this store.
|
||||
//!
|
||||
//! We can't say for sure that the "last store" *did* actually write
|
||||
//! that state, but we know for sure that no instruction *later* than
|
||||
//! it (up to the current instruction) did. However, we can get a
|
||||
//! must-alias property from this: if at a given load or store, we
|
||||
//! look backward to the "last store", *AND* we find that it has
|
||||
//! exactly the same address expression and value type, then we know
|
||||
//! that the current instruction's access *must* be to the same memory
|
||||
//! location.
|
||||
//!
|
||||
//! To get this must-alias property, we leverage the node
|
||||
//! hashconsing. We design the Eq/Hash (node identity relation
|
||||
//! definition) of the `Node` struct so that all loads with (i) the
|
||||
//! same "last store", and (ii) the same address expression, and (iii)
|
||||
//! the same opcode-and-offset, will deduplicate (the first will be
|
||||
//! computed, and the later ones will use the same value). Furthermore
|
||||
//! we have an optimization that rewrites a load into the stored value
|
||||
//! of the last store *if* the last store has the same address
|
||||
//! expression and constant offset.
|
||||
//!
|
||||
//! This gives us two optimizations, "redundant load elimination" and
|
||||
//! "store-to-load forwarding".
|
||||
//!
|
||||
//! In theory we could also do *dead-store elimination*, where if a
|
||||
//! store overwrites a value earlier written by another store, *and*
|
||||
//! if no other load/store to the abstract state category occurred,
|
||||
//! *and* no other trapping instruction occurred (at which point we
|
||||
//! need an up-to-date memory state because post-trap-termination
|
||||
//! memory state can be observed), *and* we can prove the original
|
||||
//! store could not have trapped, then we can eliminate the original
|
||||
//! store. Because this is so complex, and the conditions for doing it
|
||||
//! correctly when post-trap state must be correct likely reduce the
|
||||
//! potential benefit, we don't yet do this.
|
||||
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::inst_predicates::has_memory_fence_semantics;
|
||||
use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
|
||||
use crate::trace;
|
||||
use cranelift_entity::SecondaryMap;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
/// For a given program point, the vector of last-store instruction
|
||||
/// indices for each disjoint category of abstract state.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
struct LastStores {
|
||||
heap: MemoryState,
|
||||
table: MemoryState,
|
||||
vmctx: MemoryState,
|
||||
other: MemoryState,
|
||||
}
|
||||
|
||||
/// State of memory seen by a load.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
|
||||
pub enum MemoryState {
|
||||
/// State at function entry: nothing is known (but it is one
|
||||
/// consistent value, so two loads from "entry" state at the same
|
||||
/// address will still provide the same result).
|
||||
#[default]
|
||||
Entry,
|
||||
/// State just after a store by the given instruction. The
|
||||
/// instruction is a store from which we can forward.
|
||||
Store(Inst),
|
||||
/// State just before the given instruction. Used for abstract
|
||||
/// value merges at merge-points when we cannot name a single
|
||||
/// producing site.
|
||||
BeforeInst(Inst),
|
||||
/// State just after the given instruction. Used when the
|
||||
/// instruction may update the associated state, but is not a
|
||||
/// store whose value we can cleanly forward. (E.g., perhaps a
|
||||
/// barrier of some sort.)
|
||||
AfterInst(Inst),
|
||||
}
|
||||
|
||||
impl LastStores {
|
||||
fn update(&mut self, func: &Function, inst: Inst) {
|
||||
let opcode = func.dfg[inst].opcode();
|
||||
if has_memory_fence_semantics(opcode) {
|
||||
self.heap = MemoryState::AfterInst(inst);
|
||||
self.table = MemoryState::AfterInst(inst);
|
||||
self.vmctx = MemoryState::AfterInst(inst);
|
||||
self.other = MemoryState::AfterInst(inst);
|
||||
} else if opcode.can_store() {
|
||||
if let Some(memflags) = func.dfg[inst].memflags() {
|
||||
*self.for_flags(memflags) = MemoryState::Store(inst);
|
||||
} else {
|
||||
self.heap = MemoryState::AfterInst(inst);
|
||||
self.table = MemoryState::AfterInst(inst);
|
||||
self.vmctx = MemoryState::AfterInst(inst);
|
||||
self.other = MemoryState::AfterInst(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState {
|
||||
if memflags.heap() {
|
||||
&mut self.heap
|
||||
} else if memflags.table() {
|
||||
&mut self.table
|
||||
} else if memflags.vmctx() {
|
||||
&mut self.vmctx
|
||||
} else {
|
||||
&mut self.other
|
||||
}
|
||||
}
|
||||
|
||||
fn meet_from(&mut self, other: &LastStores, loc: Inst) {
|
||||
let meet = |a: MemoryState, b: MemoryState| -> MemoryState {
|
||||
match (a, b) {
|
||||
(a, b) if a == b => a,
|
||||
_ => MemoryState::BeforeInst(loc),
|
||||
}
|
||||
};
|
||||
|
||||
self.heap = meet(self.heap, other.heap);
|
||||
self.table = meet(self.table, other.table);
|
||||
self.vmctx = meet(self.vmctx, other.vmctx);
|
||||
self.other = meet(self.other, other.other);
|
||||
}
|
||||
}
|
||||
|
||||
/// An alias-analysis pass.
|
||||
pub struct AliasAnalysis {
|
||||
/// Last-store instruction (or none) for a given load. Use a hash map
|
||||
/// instead of a `SecondaryMap` because this is sparse.
|
||||
load_mem_state: FxHashMap<Inst, MemoryState>,
|
||||
}
|
||||
|
||||
impl AliasAnalysis {
|
||||
/// Perform an alias analysis pass.
|
||||
pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis {
|
||||
log::trace!("alias analysis: input is:\n{:?}", func);
|
||||
let block_input = Self::compute_block_input_states(func, cfg);
|
||||
let load_mem_state = Self::compute_load_last_stores(func, block_input);
|
||||
AliasAnalysis { load_mem_state }
|
||||
}
|
||||
|
||||
fn compute_block_input_states(
|
||||
func: &Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
) -> SecondaryMap<Block, Option<LastStores>> {
|
||||
let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
|
||||
let mut worklist: SmallVec<[Block; 8]> = smallvec![];
|
||||
let mut worklist_set = FxHashSet::default();
|
||||
let entry = func.layout.entry_block().unwrap();
|
||||
worklist.push(entry);
|
||||
worklist_set.insert(entry);
|
||||
block_input[entry] = Some(LastStores::default());
|
||||
|
||||
while let Some(block) = worklist.pop() {
|
||||
worklist_set.remove(&block);
|
||||
let state = block_input[block].clone().unwrap();
|
||||
|
||||
trace!("alias analysis: input to {} is {:?}", block, state);
|
||||
|
||||
let state = func
|
||||
.layout
|
||||
.block_insts(block)
|
||||
.fold(state, |mut state, inst| {
|
||||
state.update(func, inst);
|
||||
trace!("after {}: state is {:?}", inst, state);
|
||||
state
|
||||
});
|
||||
|
||||
for succ in cfg.succ_iter(block) {
|
||||
let succ_first_inst = func.layout.first_inst(succ).unwrap();
|
||||
let succ_state = &mut block_input[succ];
|
||||
let old = succ_state.clone();
|
||||
if let Some(succ_state) = succ_state.as_mut() {
|
||||
succ_state.meet_from(&state, succ_first_inst);
|
||||
} else {
|
||||
*succ_state = Some(state);
|
||||
};
|
||||
let updated = *succ_state != old;
|
||||
|
||||
if updated && worklist_set.insert(succ) {
|
||||
worklist.push(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
block_input
|
||||
}
|
||||
|
||||
fn compute_load_last_stores(
|
||||
func: &Function,
|
||||
block_input: SecondaryMap<Block, Option<LastStores>>,
|
||||
) -> FxHashMap<Inst, MemoryState> {
|
||||
let mut load_mem_state = FxHashMap::default();
|
||||
|
||||
for block in func.layout.blocks() {
|
||||
let mut state = block_input[block].clone().unwrap();
|
||||
|
||||
for inst in func.layout.block_insts(block) {
|
||||
trace!(
|
||||
"alias analysis: scanning at {} with state {:?} ({:?})",
|
||||
inst,
|
||||
state,
|
||||
func.dfg[inst],
|
||||
);
|
||||
|
||||
// N.B.: we match `Load` specifically, and not any
|
||||
// other kinds of loads (or any opcode such that
|
||||
// `opcode.can_load()` returns true), because some
|
||||
// "can load" instructions actually have very
|
||||
// different semantics (are not just a load of a
|
||||
// particularly-typed value). For example, atomic
|
||||
// (load/store, RMW, CAS) instructions "can load" but
|
||||
// definitely should not participate in store-to-load
|
||||
// forwarding or redundant-load elimination. Our goal
|
||||
// here is to provide a `MemoryState` just for plain
|
||||
// old loads whose semantics we can completely reason
|
||||
// about.
|
||||
if let InstructionData::Load {
|
||||
opcode: Opcode::Load,
|
||||
flags,
|
||||
..
|
||||
} = func.dfg[inst]
|
||||
{
|
||||
let mem_state = *state.for_flags(flags);
|
||||
trace!(
|
||||
"alias analysis: at {}: load with mem_state {:?}",
|
||||
inst,
|
||||
mem_state,
|
||||
);
|
||||
|
||||
load_mem_state.insert(inst, mem_state);
|
||||
}
|
||||
|
||||
state.update(func, inst);
|
||||
}
|
||||
}
|
||||
|
||||
load_mem_state
|
||||
}
|
||||
|
||||
/// Get the state seen by a load, if any.
|
||||
pub fn get_state_for_load(&self, inst: Inst) -> Option<MemoryState> {
|
||||
self.load_mem_state.get(&inst).copied()
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ pub fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) ->
|
||||
}
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider as side-effect-free.
|
||||
#[inline(always)]
|
||||
fn trivially_has_side_effects(opcode: Opcode) -> bool {
|
||||
opcode.is_call()
|
||||
|| opcode.is_branch()
|
||||
@@ -24,6 +25,7 @@ fn trivially_has_side_effects(opcode: Opcode) -> bool {
|
||||
/// Load instructions without the `notrap` flag are defined to trap when
|
||||
/// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded
|
||||
/// value is unused.
|
||||
#[inline(always)]
|
||||
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
|
||||
if !opcode.can_load() {
|
||||
return false;
|
||||
@@ -37,6 +39,7 @@ fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool
|
||||
|
||||
/// Does the given instruction have any side-effect that would preclude it from being removed when
|
||||
/// its value is unused?
|
||||
#[inline(always)]
|
||||
pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
|
||||
let data = &func.dfg[inst];
|
||||
let opcode = data.opcode();
|
||||
@@ -123,8 +126,10 @@ pub fn has_memory_fence_semantics(op: Opcode) -> bool {
|
||||
| Opcode::AtomicCas
|
||||
| Opcode::AtomicLoad
|
||||
| Opcode::AtomicStore
|
||||
| Opcode::Fence => true,
|
||||
| Opcode::Fence
|
||||
| Opcode::Debugtrap => true,
|
||||
Opcode::Call | Opcode::CallIndirect => true,
|
||||
op if op.can_trap() => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,6 +120,23 @@ impl DataFlowGraph {
|
||||
self.immediates.clear();
|
||||
}
|
||||
|
||||
/// Clear all instructions, but keep blocks and other metadata
|
||||
/// (signatures, constants, immediates). Everything to do with
|
||||
/// `Value`s is cleared, including block params and debug info.
|
||||
///
|
||||
/// Used during egraph-based optimization to clear out the pre-opt
|
||||
/// body so that we can regenerate it from the egraph.
|
||||
pub(crate) fn clear_insts(&mut self) {
|
||||
self.insts.clear();
|
||||
self.results.clear();
|
||||
self.value_lists.clear();
|
||||
self.values.clear();
|
||||
self.values_labels = None;
|
||||
for block in self.blocks.values_mut() {
|
||||
block.params = ValueList::new();
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the total number of instructions created in this function, whether they are currently
|
||||
/// inserted in the layout or not.
|
||||
///
|
||||
|
||||
@@ -189,7 +189,7 @@ pub struct FunctionStencil {
|
||||
///
|
||||
/// Track the original source location for each instruction. The source locations are not
|
||||
/// interpreted by Cranelift, only preserved.
|
||||
srclocs: SourceLocs,
|
||||
pub srclocs: SourceLocs,
|
||||
|
||||
/// An optional global value which represents an expression evaluating to
|
||||
/// the stack limit for this function. This `GlobalValue` will be
|
||||
|
||||
@@ -61,6 +61,18 @@ impl Layout {
|
||||
self.last_block = None;
|
||||
}
|
||||
|
||||
/// Clear instructions from every block, but keep the blocks.
|
||||
///
|
||||
/// Used by the egraph-based optimization to clear out the
|
||||
/// function body but keep the CFG skeleton.
|
||||
pub(crate) fn clear_insts(&mut self) {
|
||||
self.insts.clear();
|
||||
for block in self.blocks.values_mut() {
|
||||
block.first_inst = None.into();
|
||||
block.last_inst = None.into();
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the capacity of the `BlockData` map.
|
||||
pub fn block_capacity(&self) -> usize {
|
||||
self.blocks.capacity()
|
||||
|
||||
@@ -48,7 +48,7 @@ pub use crate::ir::function::{DisplayFunctionAnnotations, Function};
|
||||
pub use crate::ir::globalvalue::GlobalValueData;
|
||||
pub use crate::ir::heap::{HeapData, HeapStyle};
|
||||
pub use crate::ir::instructions::{
|
||||
InstructionData, Opcode, ValueList, ValueListPool, VariableArgs,
|
||||
InstructionData, InstructionImms, Opcode, ValueList, ValueListPool, VariableArgs,
|
||||
};
|
||||
pub use crate::ir::jumptable::JumpTableData;
|
||||
pub use crate::ir::known_symbol::KnownSymbol;
|
||||
|
||||
@@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize};
|
||||
///
|
||||
/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
|
||||
/// that can't be given a real source location.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct SourceLoc(u32);
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ use crate::{
|
||||
abi::ArgPair, ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData,
|
||||
},
|
||||
};
|
||||
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
|
||||
use regalloc2::PReg;
|
||||
use std::boxed::Box;
|
||||
use std::convert::TryFrom;
|
||||
@@ -96,7 +97,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
}
|
||||
|
||||
impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
isle_prelude_methods!();
|
||||
isle_lower_prelude_methods!();
|
||||
isle_prelude_caller_methods!(crate::isa::aarch64::abi::AArch64MachineDeps, AArch64Caller);
|
||||
|
||||
fn sign_return_address_disabled(&mut self) -> Option<()> {
|
||||
|
||||
@@ -41,10 +41,25 @@ pub(crate) fn lower_insn_to_regs(
|
||||
match op {
|
||||
Opcode::Iconst | Opcode::Bconst | Opcode::Null => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::F32const | Opcode::F64const => unreachable!(
|
||||
"Should never see constant ops at top level lowering entry
|
||||
point, as constants are rematerialized at use-sites"
|
||||
),
|
||||
Opcode::F32const => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let val = ctx.get_constant(insn).unwrap();
|
||||
for inst in
|
||||
Inst::load_fp_constant32(rd, val as u32, |ty| ctx.alloc_tmp(ty).only_reg().unwrap())
|
||||
{
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::F64const => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let val = ctx.get_constant(insn).unwrap();
|
||||
for inst in
|
||||
Inst::load_fp_constant64(rd, val, |ty| ctx.alloc_tmp(ty).only_reg().unwrap())
|
||||
{
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::GetFramePointer | Opcode::GetStackPointer | Opcode::GetReturnAddress => {
|
||||
implemented_in_isle(ctx)
|
||||
|
||||
@@ -62,7 +62,15 @@ impl AArch64Backend {
|
||||
let emit_info = EmitInfo::new(flags.clone());
|
||||
let sigs = SigSet::new::<abi::AArch64MachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<AArch64Backend>(func, self, abi, &self.machine_env, emit_info, sigs)
|
||||
compile::compile::<AArch64Backend>(
|
||||
func,
|
||||
flags,
|
||||
self,
|
||||
abi,
|
||||
&self.machine_env,
|
||||
emit_info,
|
||||
sigs,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,17 +5,14 @@
|
||||
pub mod generated_code;
|
||||
use generated_code::{Context, MInst};
|
||||
|
||||
use target_lexicon::Triple;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{writable_zero_reg, zero_reg};
|
||||
use std::vec::Vec;
|
||||
|
||||
use crate::isa::riscv64::abi::Riscv64ABICaller;
|
||||
use crate::isa::riscv64::settings::Flags as IsaFlags;
|
||||
use crate::machinst::Reg;
|
||||
use crate::machinst::{isle::*, MachInst, SmallInstVec};
|
||||
use crate::settings::Flags;
|
||||
|
||||
use crate::machinst::{VCodeConstant, VCodeConstantData};
|
||||
use crate::settings::Flags;
|
||||
use crate::{
|
||||
ir::{
|
||||
immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags,
|
||||
@@ -24,13 +21,12 @@ use crate::{
|
||||
isa::riscv64::inst::*,
|
||||
machinst::{ArgPair, InsnOutput, Lower},
|
||||
};
|
||||
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
|
||||
use regalloc2::PReg;
|
||||
|
||||
use crate::isa::riscv64::abi::Riscv64ABICaller;
|
||||
use std::boxed::Box;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use crate::machinst::Reg;
|
||||
use std::vec::Vec;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
type BoxCallInfo = Box<CallInfo>;
|
||||
type BoxCallIndInfo = Box<CallIndInfo>;
|
||||
@@ -64,7 +60,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
}
|
||||
|
||||
impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
isle_prelude_methods!();
|
||||
isle_lower_prelude_methods!();
|
||||
isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller);
|
||||
|
||||
fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs {
|
||||
|
||||
@@ -62,7 +62,7 @@ impl Riscv64Backend {
|
||||
let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone());
|
||||
let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<Riscv64Backend>(func, self, abi, &self.mach_env, emit_info, sigs)
|
||||
compile::compile::<Riscv64Backend>(func, flags, self, abi, &self.mach_env, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ use crate::{
|
||||
machinst::abi::ABIMachineSpec,
|
||||
machinst::{ArgPair, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData},
|
||||
};
|
||||
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
|
||||
use regalloc2::PReg;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::boxed::Box;
|
||||
@@ -88,7 +89,7 @@ pub(crate) fn lower_branch(
|
||||
}
|
||||
|
||||
impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
isle_prelude_methods!();
|
||||
isle_lower_prelude_methods!();
|
||||
|
||||
fn abi_sig(&mut self, sig_ref: SigRef) -> Sig {
|
||||
self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref)
|
||||
|
||||
@@ -60,7 +60,15 @@ impl S390xBackend {
|
||||
let emit_info = EmitInfo::new(self.isa_flags.clone());
|
||||
let sigs = SigSet::new::<abi::S390xMachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<S390xBackend>(func, self, abi, &self.machine_env, emit_info, sigs)
|
||||
compile::compile::<S390xBackend>(
|
||||
func,
|
||||
self.flags.clone(),
|
||||
self,
|
||||
abi,
|
||||
&self.machine_env,
|
||||
emit_info,
|
||||
sigs,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::{
|
||||
ir::AtomicRmwOp,
|
||||
machinst::{InputSourceInst, Reg, Writable},
|
||||
};
|
||||
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
|
||||
use generated_code::{Context, MInst, RegisterClass};
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
@@ -92,7 +93,7 @@ pub(crate) fn lower_branch(
|
||||
}
|
||||
|
||||
impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
isle_prelude_methods!();
|
||||
isle_lower_prelude_methods!();
|
||||
isle_prelude_caller_methods!(X64ABIMachineSpec, X64Caller);
|
||||
|
||||
#[inline]
|
||||
|
||||
@@ -55,7 +55,7 @@ impl X64Backend {
|
||||
let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
|
||||
let sigs = SigSet::new::<abi::X64ABIMachineSpec>(func, &self.flags)?;
|
||||
let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?;
|
||||
compile::compile::<Self>(&func, self, abi, &self.reg_env, emit_info, sigs)
|
||||
compile::compile::<Self>(&func, flags, self, abi, &self.reg_env, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
604
cranelift/codegen/src/isle_prelude.rs
Normal file
604
cranelift/codegen/src/isle_prelude.rs
Normal file
@@ -0,0 +1,604 @@
|
||||
//! Shared ISLE prelude implementation for optimization (mid-end) and
|
||||
//! lowering (backend) ISLE environments.
|
||||
|
||||
/// Helper macro to define methods in `prelude.isle` within `impl Context for
|
||||
/// ...` for each backend. These methods are shared amongst all backends.
|
||||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! isle_common_prelude_methods {
|
||||
() => {
|
||||
/// We don't have a way of making a `()` value in isle directly.
|
||||
#[inline]
|
||||
fn unit(&mut self) -> Unit {
|
||||
()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_as_u32(&mut self, x: u8) -> Option<u32> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_as_u64(&mut self, x: u8) -> Option<u64> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u16_as_u64(&mut self, x: u16) -> Option<u64> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_as_u64(&mut self, x: u32) -> Option<u64> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn i64_as_u64(&mut self, x: i64) -> Option<u64> {
|
||||
Some(x as u64)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_add(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x.wrapping_add(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_sub(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x.wrapping_sub(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_mul(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x.wrapping_mul(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_sdiv(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
let x = x as i64;
|
||||
let y = y as i64;
|
||||
x.checked_div(y).map(|d| d as u64)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_udiv(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
x.checked_div(y)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_and(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x & y)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_or(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x | y)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_xor(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x ^ y)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_not(&mut self, x: u64) -> Option<u64> {
|
||||
Some(!x)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_is_zero(&mut self, value: u64) -> bool {
|
||||
0 == value
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_sextend_u32(&mut self, x: u64) -> Option<u64> {
|
||||
Some(x as u32 as i32 as i64 as u64)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits(&mut self, ty: Type) -> Option<u8> {
|
||||
use std::convert::TryInto;
|
||||
Some(ty.bits().try_into().unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
|
||||
ty.bits() as u16
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits_u64(&mut self, ty: Type) -> u64 {
|
||||
ty.bits() as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bytes(&mut self, ty: Type) -> u16 {
|
||||
u16::try_from(ty.bytes()).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_mask(&mut self, ty: Type) -> u64 {
|
||||
match ty.bits() {
|
||||
1 => 1,
|
||||
8 => 0xff,
|
||||
16 => 0xffff,
|
||||
32 => 0xffff_ffff,
|
||||
64 => 0xffff_ffff_ffff_ffff,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn fits_in_16(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 16 && !ty.is_dynamic_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 32 && !ty.is_dynamic_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn lane_fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||
if !ty.is_vector() && !ty.is_dynamic_vector() {
|
||||
None
|
||||
} else if ty.lane_type().bits() <= 32 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 64 && !ty.is_dynamic_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_32(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 32 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 32 || ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_8_or_16(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 8 || ty.bits() == 16 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn int_bool_fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I8 | I16 | I32 | B8 | B16 | B32 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I64 | B64 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I64 | B64 | R64 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_128(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I128 | B128 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int(&mut self, ty: Type) -> Option<Type> {
|
||||
ty.is_int().then(|| ty)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_int() || ty.is_bool() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_scalar_float(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
F32 | F64 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_float_or_vec(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
F32 | F64 => Some(ty),
|
||||
ty if ty.is_vector() => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.lane_type().is_float() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && !ty.lane_type().is_float() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec128(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 128 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn_vec64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn_vec128(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec64_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec128_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 128 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
|
||||
imm.bits() as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_bool(&mut self, b: bool) -> u64 {
|
||||
if b {
|
||||
u64::MAX
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
|
||||
if ty.lane_count() > 1 {
|
||||
Some((ty.lane_bits(), ty.lane_count()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
|
||||
if ty.is_dynamic_vector() {
|
||||
Some((ty.lane_bits(), ty.min_lane_count()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dynamic_int_lane(&mut self, ty: Type) -> Option<u32> {
|
||||
if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type())
|
||||
{
|
||||
Some(ty.lane_bits())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dynamic_fp_lane(&mut self, ty: Type) -> Option<u32> {
|
||||
if ty.is_dynamic_vector()
|
||||
&& crate::machinst::ty_has_float_or_vec_representation(ty.lane_type())
|
||||
{
|
||||
Some(ty.lane_bits())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn64_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && ty.min_bits() == 64 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn128_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && ty.min_bits() == 128 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
|
||||
val.bits().into()
|
||||
}
|
||||
|
||||
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
|
||||
val.bits()
|
||||
}
|
||||
|
||||
fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
|
||||
val
|
||||
}
|
||||
|
||||
fn not_vec32x2(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.lane_bits() == 32 && ty.lane_count() == 2 {
|
||||
None
|
||||
} else {
|
||||
Some(ty)
|
||||
}
|
||||
}
|
||||
|
||||
fn not_i64x2(&mut self, ty: Type) -> Option<()> {
|
||||
if ty == I64X2 {
|
||||
None
|
||||
} else {
|
||||
Some(())
|
||||
}
|
||||
}
|
||||
|
||||
fn trap_code_division_by_zero(&mut self) -> TrapCode {
|
||||
TrapCode::IntegerDivisionByZero
|
||||
}
|
||||
|
||||
fn trap_code_integer_overflow(&mut self) -> TrapCode {
|
||||
TrapCode::IntegerOverflow
|
||||
}
|
||||
|
||||
fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode {
|
||||
TrapCode::BadConversionToInteger
|
||||
}
|
||||
|
||||
fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
|
||||
match val.bits() {
|
||||
0 => None,
|
||||
n => Some(n as u64),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_add(&mut self, a: u32, b: u32) -> u32 {
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn s32_add_fallible(&mut self, a: u32, b: u32) -> Option<u32> {
|
||||
let a = a as i32;
|
||||
let b = b as i32;
|
||||
a.checked_add(b).map(|sum| sum as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_nonnegative(&mut self, x: u32) -> Option<u32> {
|
||||
if (x as i32) >= 0 {
|
||||
Some(x)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_lteq(&mut self, a: u32, b: u32) -> Option<()> {
|
||||
if a <= b {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_lteq(&mut self, a: u8, b: u8) -> Option<()> {
|
||||
if a <= b {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_lt(&mut self, a: u8, b: u8) -> Option<()> {
|
||||
if a < b {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imm64(&mut self, x: u64) -> Option<Imm64> {
|
||||
Some(Imm64::new(x as i64))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simm32(&mut self, x: Imm64) -> Option<u32> {
|
||||
let x64: i64 = x.into();
|
||||
let x32: i32 = x64.try_into().ok()?;
|
||||
Some(x32 as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn uimm8(&mut self, x: Imm64) -> Option<u8> {
|
||||
let x64: i64 = x.into();
|
||||
let x8: u8 = x64.try_into().ok()?;
|
||||
Some(x8)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn offset32(&mut self, x: Offset32) -> Option<u32> {
|
||||
let x: i32 = x.into();
|
||||
Some(x as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_and(&mut self, a: u8, b: u8) -> u8 {
|
||||
a & b
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn lane_type(&mut self, ty: Type) -> Type {
|
||||
ty.lane_type()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn offset32_to_u32(&mut self, offset: Offset32) -> u32 {
|
||||
let offset: i32 = offset.into();
|
||||
offset as u32
|
||||
}
|
||||
|
||||
fn range(&mut self, start: usize, end: usize) -> Range {
|
||||
(start, end)
|
||||
}
|
||||
|
||||
fn range_view(&mut self, (start, end): Range) -> RangeView {
|
||||
if start >= end {
|
||||
RangeView::Empty
|
||||
} else {
|
||||
RangeView::NonEmpty {
|
||||
index: start,
|
||||
rest: (start + 1, end),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mem_flags_trusted(&mut self) -> MemFlags {
|
||||
MemFlags::trusted()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC {
|
||||
x.unsigned()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option<condcodes::IntCC> {
|
||||
match cc {
|
||||
IntCC::Equal
|
||||
| IntCC::UnsignedGreaterThanOrEqual
|
||||
| IntCC::UnsignedGreaterThan
|
||||
| IntCC::UnsignedLessThanOrEqual
|
||||
| IntCC::UnsignedLessThan
|
||||
| IntCC::NotEqual => None,
|
||||
IntCC::SignedGreaterThanOrEqual
|
||||
| IntCC::SignedGreaterThan
|
||||
| IntCC::SignedLessThanOrEqual
|
||||
| IntCC::SignedLessThan => Some(*cc),
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -97,12 +97,15 @@ mod constant_hash;
|
||||
mod context;
|
||||
mod dce;
|
||||
mod divconst_magic_numbers;
|
||||
mod egraph;
|
||||
mod fx;
|
||||
mod inst_predicates;
|
||||
mod isle_prelude;
|
||||
mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod nan_canonicalization;
|
||||
mod opts;
|
||||
mod remove_constant_phis;
|
||||
mod result;
|
||||
mod scoped_hash_map;
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::ir::{Block, Function, Layout};
|
||||
use crate::packed_option::PackedOption;
|
||||
use crate::timing;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
/// A opaque reference to a code loop.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
@@ -29,6 +30,48 @@ pub struct LoopAnalysis {
|
||||
struct LoopData {
|
||||
header: Block,
|
||||
parent: PackedOption<Loop>,
|
||||
level: LoopLevel,
|
||||
}
|
||||
|
||||
/// A level in a loop nest.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct LoopLevel(u8);
|
||||
impl LoopLevel {
|
||||
const INVALID: u8 = 0xff;
|
||||
|
||||
/// Get the root level (no loop).
|
||||
pub fn root() -> Self {
|
||||
Self(0)
|
||||
}
|
||||
/// Get the loop level.
|
||||
pub fn level(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
/// Invalid loop level.
|
||||
pub fn invalid() -> Self {
|
||||
Self(Self::INVALID)
|
||||
}
|
||||
/// One loop level deeper.
|
||||
pub fn inc(self) -> Self {
|
||||
if self.0 == (Self::INVALID - 1) {
|
||||
self
|
||||
} else {
|
||||
Self(self.0 + 1)
|
||||
}
|
||||
}
|
||||
/// A clamped loop level from a larger-width (usize) depth.
|
||||
pub fn clamped(level: usize) -> Self {
|
||||
Self(
|
||||
u8::try_from(std::cmp::min(level, (Self::INVALID as usize) - 1))
|
||||
.expect("Clamped value must always convert"),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::default::Default for LoopLevel {
|
||||
fn default() -> Self {
|
||||
LoopLevel::invalid()
|
||||
}
|
||||
}
|
||||
|
||||
impl LoopData {
|
||||
@@ -37,6 +80,7 @@ impl LoopData {
|
||||
Self {
|
||||
header,
|
||||
parent: parent.into(),
|
||||
level: LoopLevel::invalid(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -71,6 +115,17 @@ impl LoopAnalysis {
|
||||
self.loops[lp].parent.expand()
|
||||
}
|
||||
|
||||
/// Return the innermost loop for a given block.
|
||||
pub fn innermost_loop(&self, block: Block) -> Option<Loop> {
|
||||
self.block_loop_map[block].expand()
|
||||
}
|
||||
|
||||
/// Determine if a Block is a loop header. If so, return the loop.
|
||||
pub fn is_loop_header(&self, block: Block) -> Option<Loop> {
|
||||
self.innermost_loop(block)
|
||||
.filter(|&lp| self.loop_header(lp) == block)
|
||||
}
|
||||
|
||||
/// Determine if a Block belongs to a loop by running a finger along the loop tree.
|
||||
///
|
||||
/// Returns `true` if `block` is in loop `lp`.
|
||||
@@ -96,6 +151,12 @@ impl LoopAnalysis {
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns the loop-nest level of a given block.
|
||||
pub fn loop_level(&self, block: Block) -> LoopLevel {
|
||||
self.innermost_loop(block)
|
||||
.map_or(LoopLevel(0), |lp| self.loops[lp].level)
|
||||
}
|
||||
}
|
||||
|
||||
impl LoopAnalysis {
|
||||
@@ -107,6 +168,7 @@ impl LoopAnalysis {
|
||||
self.block_loop_map.resize(func.dfg.num_blocks());
|
||||
self.find_loop_headers(cfg, domtree, &func.layout);
|
||||
self.discover_loop_blocks(cfg, domtree, &func.layout);
|
||||
self.assign_loop_levels();
|
||||
self.valid = true;
|
||||
}
|
||||
|
||||
@@ -228,6 +290,28 @@ impl LoopAnalysis {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn assign_loop_levels(&mut self) {
|
||||
let mut stack: SmallVec<[Loop; 8]> = smallvec![];
|
||||
for lp in self.loops.keys() {
|
||||
if self.loops[lp].level == LoopLevel::invalid() {
|
||||
stack.push(lp);
|
||||
while let Some(&lp) = stack.last() {
|
||||
if let Some(parent) = self.loops[lp].parent.into() {
|
||||
if self.loops[parent].level != LoopLevel::invalid() {
|
||||
self.loops[lp].level = self.loops[parent].level.inc();
|
||||
stack.pop();
|
||||
} else {
|
||||
stack.push(parent);
|
||||
}
|
||||
} else {
|
||||
self.loops[lp].level = LoopLevel::root().inc();
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -286,6 +370,10 @@ mod tests {
|
||||
assert_eq!(loop_analysis.is_in_loop(block2, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block3, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block0, loops[1]), false);
|
||||
assert_eq!(loop_analysis.loop_level(block0).level(), 1);
|
||||
assert_eq!(loop_analysis.loop_level(block1).level(), 2);
|
||||
assert_eq!(loop_analysis.loop_level(block2).level(), 2);
|
||||
assert_eq!(loop_analysis.loop_level(block3).level(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -345,5 +433,11 @@ mod tests {
|
||||
assert_eq!(loop_analysis.is_in_loop(block3, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block4, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block5, loops[0]), true);
|
||||
assert_eq!(loop_analysis.loop_level(block0).level(), 1);
|
||||
assert_eq!(loop_analysis.loop_level(block1).level(), 2);
|
||||
assert_eq!(loop_analysis.loop_level(block2).level(), 2);
|
||||
assert_eq!(loop_analysis.loop_level(block3).level(), 2);
|
||||
assert_eq!(loop_analysis.loop_level(block4).level(), 2);
|
||||
assert_eq!(loop_analysis.loop_level(block5).level(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ use regalloc2::{self, MachineEnv};
|
||||
/// for binary emission.
|
||||
pub fn compile<B: LowerBackend + TargetIsa>(
|
||||
f: &Function,
|
||||
flags: crate::settings::Flags,
|
||||
b: &B,
|
||||
abi: Callee<<<B as LowerBackend>::MInst as MachInst>::ABIMachineSpec>,
|
||||
machine_env: &MachineEnv,
|
||||
@@ -23,7 +24,7 @@ pub fn compile<B: LowerBackend + TargetIsa>(
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
|
||||
// Build the lowering context.
|
||||
let lower = crate::machinst::Lower::new(f, abi, emit_info, block_order, sigs)?;
|
||||
let lower = crate::machinst::Lower::new(f, flags, abi, emit_info, block_order, sigs)?;
|
||||
|
||||
// Lower the IR.
|
||||
let vcode = {
|
||||
|
||||
@@ -41,13 +41,9 @@ pub enum RangeView {
|
||||
/// ...` for each backend. These methods are shared amongst all backends.
|
||||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! isle_prelude_methods {
|
||||
macro_rules! isle_lower_prelude_methods {
|
||||
() => {
|
||||
/// We don't have a way of making a `()` value in isle directly.
|
||||
#[inline]
|
||||
fn unit(&mut self) -> Unit {
|
||||
()
|
||||
}
|
||||
isle_common_prelude_methods!();
|
||||
|
||||
#[inline]
|
||||
fn same_value(&mut self, a: Value, b: Value) -> Option<Value> {
|
||||
@@ -175,309 +171,6 @@ macro_rules! isle_prelude_methods {
|
||||
regs.regs().len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_as_u32(&mut self, x: u8) -> Option<u32> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_as_u64(&mut self, x: u8) -> Option<u64> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u16_as_u64(&mut self, x: u16) -> Option<u64> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_as_u64(&mut self, x: u32) -> Option<u64> {
|
||||
Some(x.into())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn i64_as_u64(&mut self, x: i64) -> Option<u64> {
|
||||
Some(x as u64)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_add(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x.wrapping_add(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_sub(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x.wrapping_sub(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_and(&mut self, x: u64, y: u64) -> Option<u64> {
|
||||
Some(x & y)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_is_zero(&mut self, value: u64) -> bool {
|
||||
0 == value
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits(&mut self, ty: Type) -> Option<u8> {
|
||||
use std::convert::TryInto;
|
||||
Some(ty.bits().try_into().unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
|
||||
ty.bits().try_into().unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits_u64(&mut self, ty: Type) -> u64 {
|
||||
ty.bits() as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bytes(&mut self, ty: Type) -> u16 {
|
||||
u16::try_from(ty.bytes()).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_mask(&mut self, ty: Type) -> u64 {
|
||||
match ty.bits() {
|
||||
1 => 1,
|
||||
8 => 0xff,
|
||||
16 => 0xffff,
|
||||
32 => 0xffff_ffff,
|
||||
64 => 0xffff_ffff_ffff_ffff,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn fits_in_16(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 16 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 32 && !ty.is_dynamic_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn lane_fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||
if !ty.is_vector() && !ty.is_dynamic_vector() {
|
||||
None
|
||||
} else if ty.lane_type().bits() <= 32 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 64 && !ty.is_dynamic_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_32(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 32 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 32 || ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_8_or_16(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() == 8 || ty.bits() == 16 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn int_bool_fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I8 | I16 | I32 | B8 | B16 | B32 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I64 | B64 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I64 | B64 | R64 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool_128(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
I128 | B128 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int(&mut self, ty: Type) -> Option<Type> {
|
||||
ty.is_int().then(|| ty)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_int_bool(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_int() || ty.is_bool() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_scalar_float(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
F32 | F64 => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_float_or_vec(&mut self, ty: Type) -> Option<Type> {
|
||||
match ty {
|
||||
F32 | F64 => Some(ty),
|
||||
ty if ty.is_vector() => Some(ty),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.lane_type().is_float() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && !ty.lane_type().is_float() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec128(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 128 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn_vec64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn_vec128(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec64_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_vec128_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_vector() && ty.bits() == 128 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_list_slice(&mut self, list: ValueList) -> ValueSlice {
|
||||
(list, 0)
|
||||
@@ -521,20 +214,6 @@ macro_rules! isle_prelude_methods {
|
||||
r.to_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
|
||||
imm.bits() as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_bool(&mut self, b: bool) -> u64 {
|
||||
if b {
|
||||
u64::MAX
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn inst_results(&mut self, inst: Inst) -> ValueSlice {
|
||||
(self.lower_ctx.dfg().inst_results_list(inst), 0)
|
||||
@@ -555,80 +234,11 @@ macro_rules! isle_prelude_methods {
|
||||
self.lower_ctx.dfg().value_type(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
|
||||
if ty.lane_count() > 1 {
|
||||
Some((ty.lane_bits(), ty.lane_count()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
|
||||
if ty.is_dynamic_vector() {
|
||||
Some((ty.lane_bits(), ty.min_lane_count()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dynamic_int_lane(&mut self, ty: Type) -> Option<u32> {
|
||||
if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type())
|
||||
{
|
||||
Some(ty.lane_bits())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dynamic_fp_lane(&mut self, ty: Type) -> Option<u32> {
|
||||
if ty.is_dynamic_vector()
|
||||
&& crate::machinst::ty_has_float_or_vec_representation(ty.lane_type())
|
||||
{
|
||||
Some(ty.lane_bits())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn64_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && ty.min_bits() == 64 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_dyn128_int(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.is_dynamic_vector() && ty.min_bits() == 128 && ty.lane_type().is_int() {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn def_inst(&mut self, val: Value) -> Option<Inst> {
|
||||
self.lower_ctx.dfg().value_def(val).inst()
|
||||
}
|
||||
|
||||
fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
|
||||
val.bits().into()
|
||||
}
|
||||
|
||||
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
|
||||
val.bits()
|
||||
}
|
||||
|
||||
fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
|
||||
val
|
||||
}
|
||||
|
||||
fn zero_value(&mut self, value: Value) -> Option<Value> {
|
||||
let insn = self.def_inst(value);
|
||||
if insn.is_some() {
|
||||
@@ -682,34 +292,6 @@ macro_rules! isle_prelude_methods {
|
||||
}
|
||||
}
|
||||
|
||||
fn not_vec32x2(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.lane_bits() == 32 && ty.lane_count() == 2 {
|
||||
None
|
||||
} else {
|
||||
Some(ty)
|
||||
}
|
||||
}
|
||||
|
||||
fn not_i64x2(&mut self, ty: Type) -> Option<()> {
|
||||
if ty == I64X2 {
|
||||
None
|
||||
} else {
|
||||
Some(())
|
||||
}
|
||||
}
|
||||
|
||||
fn trap_code_division_by_zero(&mut self) -> TrapCode {
|
||||
TrapCode::IntegerDivisionByZero
|
||||
}
|
||||
|
||||
fn trap_code_integer_overflow(&mut self) -> TrapCode {
|
||||
TrapCode::IntegerOverflow
|
||||
}
|
||||
|
||||
fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode {
|
||||
TrapCode::BadConversionToInteger
|
||||
}
|
||||
|
||||
fn avoid_div_traps(&mut self, _: Type) -> Option<()> {
|
||||
if self.flags.avoid_div_traps() {
|
||||
Some(())
|
||||
@@ -820,79 +402,6 @@ macro_rules! isle_prelude_methods {
|
||||
Some(u128::from_le_bytes(bytes.try_into().ok()?))
|
||||
}
|
||||
|
||||
fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
|
||||
match val.bits() {
|
||||
0 => None,
|
||||
n => Some(n as u64),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_add(&mut self, a: u32, b: u32) -> u32 {
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn s32_add_fallible(&mut self, a: u32, b: u32) -> Option<u32> {
|
||||
let a = a as i32;
|
||||
let b = b as i32;
|
||||
a.checked_add(b).map(|sum| sum as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_nonnegative(&mut self, x: u32) -> Option<u32> {
|
||||
if (x as i32) >= 0 {
|
||||
Some(x)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_lteq(&mut self, a: u32, b: u32) -> Option<()> {
|
||||
if a <= b {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simm32(&mut self, x: Imm64) -> Option<u32> {
|
||||
let x64: i64 = x.into();
|
||||
let x32: i32 = x64.try_into().ok()?;
|
||||
Some(x32 as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn uimm8(&mut self, x: Imm64) -> Option<u8> {
|
||||
let x64: i64 = x.into();
|
||||
let x8: u8 = x64.try_into().ok()?;
|
||||
Some(x8)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn offset32(&mut self, x: Offset32) -> Option<u32> {
|
||||
let x: i32 = x.into();
|
||||
Some(x as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_and(&mut self, a: u8, b: u8) -> u8 {
|
||||
a & b
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn lane_type(&mut self, ty: Type) -> Type {
|
||||
ty.lane_type()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn offset32_to_u32(&mut self, offset: Offset32) -> u32 {
|
||||
let offset: i32 = offset.into();
|
||||
offset as u32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn emit_u64_le_const(&mut self, value: u64) -> VCodeConstant {
|
||||
let data = VCodeConstantData::U64(value.to_le_bytes());
|
||||
@@ -913,21 +422,6 @@ macro_rules! isle_prelude_methods {
|
||||
))
|
||||
}
|
||||
|
||||
fn range(&mut self, start: usize, end: usize) -> Range {
|
||||
(start, end)
|
||||
}
|
||||
|
||||
fn range_view(&mut self, (start, end): Range) -> RangeView {
|
||||
if start >= end {
|
||||
RangeView::Empty
|
||||
} else {
|
||||
RangeView::NonEmpty {
|
||||
index: start,
|
||||
rest: (start + 1, end),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn retval(&mut self, i: usize) -> WritableValueRegs {
|
||||
self.lower_ctx.retval(i)
|
||||
}
|
||||
@@ -1067,11 +561,6 @@ macro_rules! isle_prelude_methods {
|
||||
self.lower_ctx.sink_inst(inst);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mem_flags_trusted(&mut self) -> MemFlags {
|
||||
MemFlags::trusted()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn preg_to_reg(&mut self, preg: PReg) -> Reg {
|
||||
preg.into()
|
||||
@@ -1081,27 +570,6 @@ macro_rules! isle_prelude_methods {
|
||||
fn gen_move(&mut self, ty: Type, dst: WritableReg, src: Reg) -> MInst {
|
||||
MInst::gen_move(dst, src, ty)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC {
|
||||
x.unsigned()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option<condcodes::IntCC> {
|
||||
match cc {
|
||||
IntCC::Equal
|
||||
| IntCC::UnsignedGreaterThanOrEqual
|
||||
| IntCC::UnsignedGreaterThan
|
||||
| IntCC::UnsignedLessThanOrEqual
|
||||
| IntCC::UnsignedLessThan
|
||||
| IntCC::NotEqual => None,
|
||||
IntCC::SignedGreaterThanOrEqual
|
||||
| IntCC::SignedGreaterThan
|
||||
| IntCC::SignedLessThanOrEqual
|
||||
| IntCC::SignedLessThan => Some(*cc),
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -147,6 +147,9 @@ pub struct Lower<'func, I: VCodeInst> {
|
||||
/// The function to lower.
|
||||
f: &'func Function,
|
||||
|
||||
/// Machine-independent flags.
|
||||
flags: crate::settings::Flags,
|
||||
|
||||
/// Lowered machine instructions.
|
||||
vcode: VCodeBuilder<I>,
|
||||
|
||||
@@ -345,6 +348,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
/// Prepare a new lowering context for the given IR function.
|
||||
pub fn new(
|
||||
f: &'func Function,
|
||||
flags: crate::settings::Flags,
|
||||
abi: Callee<I::ABIMachineSpec>,
|
||||
emit_info: I::Info,
|
||||
block_order: BlockLoweringOrder,
|
||||
@@ -433,6 +437,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
|
||||
Ok(Lower {
|
||||
f,
|
||||
flags,
|
||||
vcode,
|
||||
value_regs,
|
||||
retval_regs,
|
||||
@@ -1265,26 +1270,30 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
assert!(!self.inst_sunk.contains(&inst));
|
||||
}
|
||||
|
||||
// If the value is a constant, then (re)materialize it at each use. This
|
||||
// lowers register pressure.
|
||||
if let Some(c) = self
|
||||
.f
|
||||
.dfg
|
||||
.value_def(val)
|
||||
.inst()
|
||||
.and_then(|inst| self.get_constant(inst))
|
||||
{
|
||||
let regs = self.alloc_tmp(ty);
|
||||
trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
// If the value is a constant, then (re)materialize it at each
|
||||
// use. This lowers register pressure. (Only do this if we are
|
||||
// not using egraph-based compilation; the egraph framework
|
||||
// more efficiently rematerializes constants where needed.)
|
||||
if !self.flags.use_egraphs() {
|
||||
if let Some(c) = self
|
||||
.f
|
||||
.dfg
|
||||
.value_def(val)
|
||||
.inst()
|
||||
.and_then(|inst| self.get_constant(inst))
|
||||
{
|
||||
let regs = self.alloc_tmp(ty);
|
||||
trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
|
||||
let insts = I::gen_constant(regs, c.into(), ty, |ty| {
|
||||
self.alloc_tmp(ty).only_reg().unwrap()
|
||||
});
|
||||
for inst in insts {
|
||||
self.emit(inst);
|
||||
let insts = I::gen_constant(regs, c.into(), ty, |ty| {
|
||||
self.alloc_tmp(ty).only_reg().unwrap()
|
||||
});
|
||||
for inst in insts {
|
||||
self.emit(inst);
|
||||
}
|
||||
return non_writable_value_regs(regs);
|
||||
}
|
||||
return non_writable_value_regs(regs);
|
||||
}
|
||||
|
||||
let mut regs = self.value_regs[val];
|
||||
|
||||
297
cranelift/codegen/src/opts.rs
Normal file
297
cranelift/codegen/src/opts.rs
Normal file
@@ -0,0 +1,297 @@
|
||||
//! Optimization driver using ISLE rewrite rules on an egraph.
|
||||
|
||||
use crate::egraph::Analysis;
|
||||
use crate::egraph::FuncEGraph;
|
||||
use crate::egraph::MemoryState;
|
||||
pub use crate::egraph::{Node, NodeCtx};
|
||||
use crate::ir::condcodes;
|
||||
pub use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8};
|
||||
pub use crate::ir::types::*;
|
||||
pub use crate::ir::{
|
||||
dynamic_to_fixed, AtomicRmwOp, Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap,
|
||||
Immediate, InstructionImms, JumpTable, MemFlags, Opcode, StackSlot, Table, TrapCode, Type,
|
||||
Value,
|
||||
};
|
||||
use crate::isle_common_prelude_methods;
|
||||
use crate::machinst::isle::*;
|
||||
use crate::trace;
|
||||
pub use cranelift_egraph::{Id, NewOrExisting, NodeIter};
|
||||
use cranelift_entity::{EntityList, EntityRef};
|
||||
use smallvec::SmallVec;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub type IdArray = EntityList<Id>;
|
||||
#[allow(dead_code)]
|
||||
pub type Unit = ();
|
||||
pub type Range = (usize, usize);
|
||||
|
||||
pub type ConstructorVec<T> = SmallVec<[T; 8]>;
|
||||
|
||||
mod generated_code;
|
||||
use generated_code::ContextIter;
|
||||
|
||||
struct IsleContext<'a, 'b> {
|
||||
egraph: &'a mut FuncEGraph<'b>,
|
||||
}
|
||||
|
||||
const REWRITE_LIMIT: usize = 5;
|
||||
|
||||
pub fn optimize_eclass<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
|
||||
trace!("running rules on eclass {}", id.index());
|
||||
egraph.stats.rewrite_rule_invoked += 1;
|
||||
|
||||
if egraph.rewrite_depth > REWRITE_LIMIT {
|
||||
egraph.stats.rewrite_depth_limit += 1;
|
||||
return id;
|
||||
}
|
||||
egraph.rewrite_depth += 1;
|
||||
|
||||
// Find all possible rewrites and union them in, returning the
|
||||
// union.
|
||||
let mut ctx = IsleContext { egraph };
|
||||
let optimized_ids = generated_code::constructor_simplify(&mut ctx, id);
|
||||
let mut union_id = id;
|
||||
if let Some(mut ids) = optimized_ids {
|
||||
while let Some(new_id) = ids.next(&mut ctx) {
|
||||
if ctx.egraph.subsume_ids.contains(&new_id) {
|
||||
trace!(" -> eclass {} subsumes {}", new_id, id);
|
||||
ctx.egraph.stats.node_subsume += 1;
|
||||
// Merge in the unionfind so canonicalization still
|
||||
// works, but take *only* the subsuming ID, and break
|
||||
// now.
|
||||
ctx.egraph.egraph.unionfind.union(union_id, new_id);
|
||||
union_id = new_id;
|
||||
break;
|
||||
}
|
||||
ctx.egraph.stats.node_union += 1;
|
||||
let old_union_id = union_id;
|
||||
union_id = ctx
|
||||
.egraph
|
||||
.egraph
|
||||
.union(&ctx.egraph.node_ctx, union_id, new_id);
|
||||
trace!(
|
||||
" -> union eclass {} with {} to get {}",
|
||||
new_id,
|
||||
old_union_id,
|
||||
union_id
|
||||
);
|
||||
}
|
||||
}
|
||||
trace!(" -> optimize {} got {}", id, union_id);
|
||||
ctx.egraph.rewrite_depth -= 1;
|
||||
union_id
|
||||
}
|
||||
|
||||
pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
|
||||
// Note that we only examine the latest enode in the eclass: opts
|
||||
// are invoked for every new enode added to an eclass, so
|
||||
// traversing the whole eclass would be redundant.
|
||||
let load_key = egraph.egraph.classes[id].get_node().unwrap();
|
||||
if let Node::Load {
|
||||
op:
|
||||
InstructionImms::Load {
|
||||
opcode: Opcode::Load,
|
||||
offset: load_offset,
|
||||
..
|
||||
},
|
||||
ty: load_ty,
|
||||
addr: load_addr,
|
||||
mem_state: MemoryState::Store(store_inst),
|
||||
..
|
||||
} = load_key.node(&egraph.egraph.nodes)
|
||||
{
|
||||
trace!(" -> got load op for id {}", id);
|
||||
if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) {
|
||||
trace!(" -> got store id: {} ty: {}", store_id, store_ty);
|
||||
let store_key = egraph.egraph.classes[*store_id].get_node().unwrap();
|
||||
if let Node::Inst {
|
||||
op:
|
||||
InstructionImms::Store {
|
||||
opcode: Opcode::Store,
|
||||
offset: store_offset,
|
||||
..
|
||||
},
|
||||
args: store_args,
|
||||
..
|
||||
} = store_key.node(&egraph.egraph.nodes)
|
||||
{
|
||||
let store_args = store_args.as_slice(&egraph.node_ctx.args);
|
||||
let store_data = store_args[0];
|
||||
let store_addr = store_args[1];
|
||||
if *load_offset == *store_offset
|
||||
&& *load_ty == *store_ty
|
||||
&& egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr)
|
||||
{
|
||||
trace!(" -> same offset, type, address; forwarding");
|
||||
egraph.stats.store_to_load_forward += 1;
|
||||
return store_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
struct NodesEtorIter<'a, 'b>
|
||||
where
|
||||
'b: 'a,
|
||||
{
|
||||
root: Id,
|
||||
iter: NodeIter<NodeCtx, Analysis>,
|
||||
_phantom1: PhantomData<&'a ()>,
|
||||
_phantom2: PhantomData<&'b ()>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> generated_code::ContextIter for NodesEtorIter<'a, 'b>
|
||||
where
|
||||
'b: 'a,
|
||||
{
|
||||
type Context = IsleContext<'a, 'b>;
|
||||
type Output = (Type, InstructionImms, IdArray);
|
||||
|
||||
fn next(&mut self, ctx: &mut IsleContext<'a, 'b>) -> Option<Self::Output> {
|
||||
while let Some(node) = self.iter.next(&ctx.egraph.egraph) {
|
||||
trace!("iter from root {}: node {:?}", self.root, node);
|
||||
match node {
|
||||
Node::Pure { op, args, types }
|
||||
| Node::Inst {
|
||||
op, args, types, ..
|
||||
} if types.len() == 1 => {
|
||||
let ty = types.as_slice(&ctx.egraph.node_ctx.types)[0];
|
||||
return Some((ty, op.clone(), args.clone()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> {
|
||||
isle_common_prelude_methods!();
|
||||
|
||||
fn eclass_type(&mut self, eclass: Id) -> Option<Type> {
|
||||
let mut iter = self.egraph.egraph.enodes(eclass);
|
||||
while let Some(node) = iter.next(&self.egraph.egraph) {
|
||||
match node {
|
||||
&Node::Pure { types, .. } | &Node::Inst { types, .. } if types.len() == 1 => {
|
||||
return Some(types.as_slice(&self.egraph.node_ctx.types)[0]);
|
||||
}
|
||||
&Node::Load { ty, .. } => return Some(ty),
|
||||
&Node::Result { ty, .. } => return Some(ty),
|
||||
&Node::Param { ty, .. } => return Some(ty),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn at_loop_level(&mut self, eclass: Id) -> (u8, Id) {
|
||||
(
|
||||
self.egraph.egraph.analysis_value(eclass).loop_level.level() as u8,
|
||||
eclass,
|
||||
)
|
||||
}
|
||||
|
||||
type enodes_etor_iter = NodesEtorIter<'a, 'b>;
|
||||
|
||||
fn enodes_etor(&mut self, eclass: Id) -> Option<NodesEtorIter<'a, 'b>> {
|
||||
Some(NodesEtorIter {
|
||||
root: eclass,
|
||||
iter: self.egraph.egraph.enodes(eclass),
|
||||
_phantom1: PhantomData,
|
||||
_phantom2: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id {
|
||||
let types = self.egraph.node_ctx.types.single(ty);
|
||||
let types = types.freeze(&mut self.egraph.node_ctx.types);
|
||||
let op = op.clone();
|
||||
match self
|
||||
.egraph
|
||||
.egraph
|
||||
.add(Node::Pure { op, args, types }, &mut self.egraph.node_ctx)
|
||||
{
|
||||
NewOrExisting::New(id) => {
|
||||
self.egraph.stats.node_created += 1;
|
||||
self.egraph.stats.node_pure += 1;
|
||||
self.egraph.stats.node_ctor_created += 1;
|
||||
optimize_eclass(id, self.egraph)
|
||||
}
|
||||
NewOrExisting::Existing(id) => {
|
||||
self.egraph.stats.node_ctor_deduped += 1;
|
||||
id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn id_array_0_etor(&mut self, arg0: IdArray) -> Option<()> {
|
||||
let values = arg0.as_slice(&self.egraph.node_ctx.args);
|
||||
if values.len() == 0 {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn id_array_0_ctor(&mut self) -> IdArray {
|
||||
EntityList::default()
|
||||
}
|
||||
|
||||
fn id_array_1_etor(&mut self, arg0: IdArray) -> Option<Id> {
|
||||
let values = arg0.as_slice(&self.egraph.node_ctx.args);
|
||||
if values.len() == 1 {
|
||||
Some(values[0])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn id_array_1_ctor(&mut self, arg0: Id) -> IdArray {
|
||||
EntityList::from_iter([arg0].into_iter(), &mut self.egraph.node_ctx.args)
|
||||
}
|
||||
|
||||
fn id_array_2_etor(&mut self, arg0: IdArray) -> Option<(Id, Id)> {
|
||||
let values = arg0.as_slice(&self.egraph.node_ctx.args);
|
||||
if values.len() == 2 {
|
||||
Some((values[0], values[1]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn id_array_2_ctor(&mut self, arg0: Id, arg1: Id) -> IdArray {
|
||||
EntityList::from_iter([arg0, arg1].into_iter(), &mut self.egraph.node_ctx.args)
|
||||
}
|
||||
|
||||
fn id_array_3_etor(&mut self, arg0: IdArray) -> Option<(Id, Id, Id)> {
|
||||
let values = arg0.as_slice(&self.egraph.node_ctx.args);
|
||||
if values.len() == 3 {
|
||||
Some((values[0], values[1], values[2]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn id_array_3_ctor(&mut self, arg0: Id, arg1: Id, arg2: Id) -> IdArray {
|
||||
EntityList::from_iter(
|
||||
[arg0, arg1, arg2].into_iter(),
|
||||
&mut self.egraph.node_ctx.args,
|
||||
)
|
||||
}
|
||||
|
||||
fn remat(&mut self, id: Id) -> Id {
|
||||
trace!("remat: {}", id);
|
||||
self.egraph.remat_ids.insert(id);
|
||||
id
|
||||
}
|
||||
|
||||
fn subsume(&mut self, id: Id) -> Id {
|
||||
trace!("subsume: {}", id);
|
||||
self.egraph.subsume_ids.insert(id);
|
||||
id
|
||||
}
|
||||
}
|
||||
207
cranelift/codegen/src/opts/algebraic.isle
Normal file
207
cranelift/codegen/src/opts/algebraic.isle
Normal file
@@ -0,0 +1,207 @@
|
||||
;; Algebraic optimizations.
|
||||
|
||||
;; Rules here are allowed to rewrite pure expressions arbitrarily,
|
||||
;; using the same inputs as the original, or fewer. In other words, we
|
||||
;; cannot pull a new eclass id out of thin air and refer to it, other
|
||||
;; than a piece of the input or a new node that we construct; but we
|
||||
;; can freely rewrite e.g. `x+y-y` to `x`.
|
||||
|
||||
;; uextend/sextend of a constant.
|
||||
(rule (simplify (uextend $I64 (iconst $I32 imm)))
|
||||
(iconst $I64 imm))
|
||||
(rule (simplify (sextend $I64 (iconst $I32 (u64_from_imm64 imm))))
|
||||
(iconst $I64 (imm64 (u64_sextend_u32 imm))))
|
||||
|
||||
;; x+0 == 0+x == x.
|
||||
(rule (simplify (iadd ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (iadd ty
|
||||
(iconst ty (u64_from_imm64 0))
|
||||
x))
|
||||
(subsume x))
|
||||
;; x-0 == x.
|
||||
(rule (simplify (isub ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
;; 0-x == (ineg x).
|
||||
(rule (simplify (isub ty
|
||||
(iconst ty (u64_from_imm64 0))
|
||||
x))
|
||||
(ineg ty x))
|
||||
|
||||
;; x*1 == 1*x == x.
|
||||
(rule (simplify (imul ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 1))))
|
||||
(subsume x))
|
||||
(rule (simplify (imul ty
|
||||
(iconst ty (u64_from_imm64 1))
|
||||
x))
|
||||
(subsume x))
|
||||
|
||||
;; x*0 == 0*x == x.
|
||||
(rule (simplify (imul ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(iconst ty (imm64 0)))
|
||||
(rule (simplify (imul ty
|
||||
(iconst ty (u64_from_imm64 0))
|
||||
x))
|
||||
(iconst ty (imm64 0)))
|
||||
|
||||
;; x/1 == x.
|
||||
(rule (simplify (sdiv ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 1))))
|
||||
(subsume x))
|
||||
(rule (simplify (udiv ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 1))))
|
||||
(subsume x))
|
||||
|
||||
;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x.
|
||||
(rule (simplify (ishl ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (ushr ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (sshr ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (rotr ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (rotl ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
|
||||
;; x | 0 == 0 | x == x | x == x.
|
||||
(rule (simplify (bor ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (bor ty
|
||||
(iconst ty (u64_from_imm64 0))
|
||||
x))
|
||||
(subsume x))
|
||||
(rule (simplify (bor ty x x))
|
||||
(subsume x))
|
||||
|
||||
;; x ^ 0 == 0 ^ x == x.
|
||||
(rule (simplify (bxor ty
|
||||
x
|
||||
(iconst ty (u64_from_imm64 0))))
|
||||
(subsume x))
|
||||
(rule (simplify (bxor ty
|
||||
(iconst ty (u64_from_imm64 0))
|
||||
x))
|
||||
(subsume x))
|
||||
|
||||
;; x ^ x == 0.
|
||||
(rule (simplify (bxor ty x x))
|
||||
(subsume (iconst ty (imm64 0))))
|
||||
|
||||
;; x ^ not(x) == not(x) ^ x == -1.
|
||||
(rule (simplify (bxor $I32 x (bnot $I32 x))) (subsume (iconst $I32 (imm64 0xffff_ffff))))
|
||||
(rule (simplify (bxor $I32 (bnot $I32 x) x)) (subsume (iconst $I32 (imm64 0xffff_ffff))))
|
||||
(rule (simplify (bxor $I64 x (bnot $I64 x))) (subsume (iconst $I64 (imm64 0xffff_ffff_ffff_ffff))))
|
||||
(rule (simplify (bxor $I64 (bnot $I64 x) x)) (subsume (iconst $I64 (imm64 0xffff_ffff_ffff_ffff))))
|
||||
|
||||
;; x & -1 == -1 & x == x & x == x.
|
||||
(rule (simplify (band ty x x)) x)
|
||||
(rule (simplify (band $I32 x (iconst $I32 (u64_from_imm64 0xffff_ffff)))) (subsume x))
|
||||
(rule (simplify (band $I32 (iconst $I32 (u64_from_imm64 0xffff_ffff)) x)) (subsume x))
|
||||
(rule (simplify (band $I64 x (iconst $I64 (u64_from_imm64 0xffff_ffff_ffff_ffff)))) (subsume x))
|
||||
(rule (simplify (band $I64 (iconst $I64 (u64_from_imm64 0xffff_ffff_ffff_ffff)) x)) (subsume x))
|
||||
|
||||
;; x & 0 == 0 & x == 0.
|
||||
(rule (simplify (band ty x (iconst ty (u64_from_imm64 0)))) (iconst ty (imm64 0)))
|
||||
(rule (simplify (band ty (iconst ty (u64_from_imm64 0)) x)) (iconst ty (imm64 0)))
|
||||
|
||||
;; not(not(x)) == x.
|
||||
(rule (simplify (bnot ty (bnot ty x))) (subsume x))
|
||||
|
||||
;; DeMorgan's rule (two versions):
|
||||
;; bnot(bor(x, y)) == band(bnot(x), bnot(y))
|
||||
(rule (simplify (bnot ty (bor ty x y)))
|
||||
(band ty (bnot ty x) (bnot ty y)))
|
||||
;; bnot(band(x, y)) == bor(bnot(x), bnot(y))
|
||||
(rule (simplify (bnot ty (band t x y)))
|
||||
(bor ty (bnot ty x) (bnot ty y)))
|
||||
|
||||
;; x*2 == 2*x == x+x.
|
||||
(rule (simplify (imul ty x (iconst _ (simm32 2))))
|
||||
(iadd ty x x))
|
||||
(rule (simplify (imul ty (iconst _ (simm32 2)) x))
|
||||
(iadd ty x x))
|
||||
|
||||
;; x<<32>>32: uextend/sextend 32->64.
|
||||
(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
|
||||
(uextend $I64 x))
|
||||
|
||||
(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
|
||||
(sextend $I64 x))
|
||||
|
||||
;; TODO: strength reduction: mul/div to shifts
|
||||
;; TODO: div/rem by constants -> magic multiplications
|
||||
|
||||
;; Reassociate when it benefits LICM.
|
||||
(rule (simplify (iadd ty (iadd ty x y) z))
|
||||
(if-let (at_loop_level lx _) x)
|
||||
(if-let (at_loop_level ly _) y)
|
||||
(if-let (at_loop_level lz _) z)
|
||||
(if (u8_lt lx ly))
|
||||
(if (u8_lt lz ly))
|
||||
(iadd ty (iadd ty x z) y))
|
||||
(rule (simplify (iadd ty (iadd ty x y) z))
|
||||
(if-let (at_loop_level lx _) x)
|
||||
(if-let (at_loop_level ly _) y)
|
||||
(if-let (at_loop_level lz _) z)
|
||||
(if (u8_lt ly lx))
|
||||
(if (u8_lt lz lx))
|
||||
(iadd ty (iadd ty y z) x))
|
||||
|
||||
;; Select's selector input doesn't need bint; remove the redundant op.
|
||||
(rule (simplify (select ty (bint _ b) x y))
|
||||
(subsume (select ty b x y)))
|
||||
|
||||
;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
|
||||
;; used. This is neutral (add-with-imm) or positive (iconst) for
|
||||
;; register pressure, and these ops are very cheap.
|
||||
(rule (simplify x @ (iadd _ (iconst _ _) _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (iadd _ _ (iconst _ _)))
|
||||
(remat x))
|
||||
(rule (simplify x @ (isub _ (iconst _ _) _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (isub _ _ (iconst _ _)))
|
||||
(remat x))
|
||||
(rule (simplify x @ (band _ (iconst _ _) _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (band _ _ (iconst _ _)))
|
||||
(remat x))
|
||||
(rule (simplify x @ (bor _ (iconst _ _) _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (bor _ _ (iconst _ _)))
|
||||
(remat x))
|
||||
(rule (simplify x @ (bxor _ (iconst _ _) _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (bxor _ _ (iconst _ _)))
|
||||
(remat x))
|
||||
(rule (simplify x @ (bnot _ _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (iconst _ _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (f32const _ _))
|
||||
(remat x))
|
||||
(rule (simplify x @ (f64const _ _))
|
||||
(remat x))
|
||||
134
cranelift/codegen/src/opts/cprop.isle
Normal file
134
cranelift/codegen/src/opts/cprop.isle
Normal file
@@ -0,0 +1,134 @@
|
||||
;; Constant propagation.
|
||||
|
||||
(rule (simplify
|
||||
(iadd (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(subsume (iconst ty (imm64 (u64_add k1 k2)))))
|
||||
|
||||
(rule (simplify
|
||||
(isub (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(subsume (iconst ty (imm64 (u64_sub k1 k2)))))
|
||||
|
||||
(rule (simplify
|
||||
(imul (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(subsume (iconst ty (imm64 (u64_mul k1 k2)))))
|
||||
|
||||
(rule (simplify
|
||||
(sdiv (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(if-let d (u64_sdiv k1 k2))
|
||||
(subsume (iconst ty (imm64 d))))
|
||||
|
||||
(rule (simplify
|
||||
(udiv (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(if-let d (u64_udiv k1 k2))
|
||||
(subsume (iconst ty (imm64 d))))
|
||||
|
||||
(rule (simplify
|
||||
(bor (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(subsume (iconst ty (imm64 (u64_or k1 k2)))))
|
||||
|
||||
(rule (simplify
|
||||
(band (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(subsume (iconst ty (imm64 (u64_and k1 k2)))))
|
||||
|
||||
(rule (simplify
|
||||
(bxor (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k1))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(subsume (iconst ty (imm64 (u64_xor k1 k2)))))
|
||||
|
||||
(rule (simplify
|
||||
(bnot (fits_in_64 ty)
|
||||
(iconst ty (u64_from_imm64 k))))
|
||||
(subsume (iconst ty (imm64 (u64_not k)))))
|
||||
|
||||
;; Canonicalize via commutativity: push immediates to the right.
|
||||
;;
|
||||
;; (op k x) --> (op x k)
|
||||
|
||||
(rule (simplify
|
||||
(iadd ty k @ (iconst ty _) x))
|
||||
(iadd ty x k))
|
||||
;; sub is not commutative, but we can flip the args and negate the
|
||||
;; whole thing.
|
||||
(rule (simplify
|
||||
(isub ty k @ (iconst ty _) x))
|
||||
(ineg ty (isub ty x k)))
|
||||
(rule (simplify
|
||||
(imul ty k @ (iconst ty _) x))
|
||||
(imul ty x k))
|
||||
|
||||
(rule (simplify
|
||||
(bor ty k @ (iconst ty _) x))
|
||||
(bor ty x k))
|
||||
(rule (simplify
|
||||
(band ty k @ (iconst ty _) x))
|
||||
(band ty x k))
|
||||
(rule (simplify
|
||||
(bxor ty k @ (iconst ty _) x))
|
||||
(bxor ty x k))
|
||||
|
||||
;; Canonicalize via associativity: reassociate to a right-heavy tree
|
||||
;; for constants.
|
||||
;;
|
||||
;; (op (op x k) k) --> (op x (op k k))
|
||||
|
||||
(rule (simplify
|
||||
(iadd ty (iadd ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
||||
(iadd ty x (iadd ty k1 k2)))
|
||||
;; sub is not directly associative, but we can flip a sub to an add to
|
||||
;; make it work:
|
||||
;; - (sub (sub x k1) k2) -> (sub x (add k1 k2))
|
||||
;; - (sub (sub k1 x) k2) -> (sub (sub k1 k2) x)
|
||||
;; - (sub (add x k1) k2) -> (sub x (sub k2 k1))
|
||||
;; - (add (sub x k1) k2) -> (add x (sub k2 k1))
|
||||
;; - (add (sub k1 x) k2) -> (sub (add k1 k2) x)
|
||||
(rule (simplify (isub ty
|
||||
(isub ty x (iconst ty (u64_from_imm64 k1)))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(isub ty x (iconst ty (imm64 (u64_add k1 k2)))))
|
||||
(rule (simplify (isub ty
|
||||
(isub ty (iconst ty (u64_from_imm64 k1)) x)
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(isub ty (iconst ty (imm64 (u64_sub k1 k2))) x))
|
||||
(rule (simplify (isub ty
|
||||
(iadd ty x (iconst ty (u64_from_imm64 k1)))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(isub ty x (iconst ty (imm64 (u64_sub k1 k2)))))
|
||||
(rule (simplify (iadd ty
|
||||
(isub ty x (iconst ty (u64_from_imm64 k1)))
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(iadd ty x (iconst ty (imm64 (u64_sub k2 k1)))))
|
||||
(rule (simplify (iadd ty
|
||||
(isub ty (iconst ty (u64_from_imm64 k1)) x)
|
||||
(iconst ty (u64_from_imm64 k2))))
|
||||
(isub ty (iconst ty (imm64 (u64_add k1 k2))) x))
|
||||
|
||||
(rule (simplify
|
||||
(imul ty (imul ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
||||
(imul ty x (imul ty k1 k2)))
|
||||
(rule (simplify
|
||||
(bor ty (bor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
||||
(bor ty x (bor ty k1 k2)))
|
||||
(rule (simplify
|
||||
(band ty (band ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
||||
(band ty x (band ty k1 k2)))
|
||||
(rule (simplify
|
||||
(bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
||||
(bxor ty x (bxor ty k1 k2)))
|
||||
|
||||
;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs
|
||||
|
||||
11
cranelift/codegen/src/opts/generated_code.rs
Normal file
11
cranelift/codegen/src/opts/generated_code.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
//! Wrapper environment for generated code from optimization rules in ISLE.
|
||||
|
||||
// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of
|
||||
// the generated ISLE source below because we include!() it. We must include!() it because its path
|
||||
// depends on an environment variable; and also because of this, we can't do the `#[path = "..."]
|
||||
// mod generated_code;` trick either.
|
||||
#![allow(dead_code, unreachable_code, unreachable_patterns)]
|
||||
#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]
|
||||
#![allow(irrefutable_let_patterns, non_camel_case_types)]
|
||||
|
||||
include!(concat!(env!("ISLE_DIR"), "/isle_opt.rs"));
|
||||
@@ -31,24 +31,7 @@
|
||||
(type isize (primitive isize))
|
||||
|
||||
;; `cranelift-entity`-based identifiers.
|
||||
(type Inst (primitive Inst))
|
||||
(type Type (primitive Type))
|
||||
(type Value (primitive Value))
|
||||
|
||||
;; ISLE representation of `&[Value]`.
|
||||
(type ValueSlice (primitive ValueSlice))
|
||||
|
||||
;; ISLE representation of `Vec<u8>`
|
||||
(type VecMask extern (enum))
|
||||
|
||||
(type ValueList (primitive ValueList))
|
||||
(type ValueRegs (primitive ValueRegs))
|
||||
(type WritableValueRegs (primitive WritableValueRegs))
|
||||
|
||||
;; Instruction lowering result: a vector of `ValueRegs`.
|
||||
(type InstOutput (primitive InstOutput))
|
||||
;; (Mutable) builder to incrementally construct an `InstOutput`.
|
||||
(type InstOutputBuilder extern (enum))
|
||||
|
||||
(decl u32_add (u32 u32) u32)
|
||||
(extern constructor u32_add u32_add)
|
||||
@@ -72,6 +55,16 @@
|
||||
(decl pure u32_lteq (u32 u32) Unit)
|
||||
(extern constructor u32_lteq u32_lteq)
|
||||
|
||||
;; Pure/fallible constructor that tests if one u8 is less than or
|
||||
;; equal to another.
|
||||
(decl pure u8_lteq (u8 u8) Unit)
|
||||
(extern constructor u8_lteq u8_lteq)
|
||||
|
||||
;; Pure/fallible constructor that tests if one u8 is strictly less
|
||||
;; than another.
|
||||
(decl pure u8_lt (u8 u8) Unit)
|
||||
(extern constructor u8_lt u8_lt)
|
||||
|
||||
;; Get a signed 32-bit immediate in an u32 from an Imm64, if possible.
|
||||
(decl simm32 (u32) Imm64)
|
||||
(extern extractor simm32 simm32)
|
||||
@@ -83,143 +76,6 @@
|
||||
(decl u8_and (u8 u8) u8)
|
||||
(extern constructor u8_and u8_and)
|
||||
|
||||
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type Reg (primitive Reg))
|
||||
(type WritableReg (primitive WritableReg))
|
||||
(type OptionWritableReg (primitive OptionWritableReg))
|
||||
(type VecReg extern (enum))
|
||||
(type VecWritableReg extern (enum))
|
||||
(type PReg (primitive PReg))
|
||||
|
||||
;; Construct a `ValueRegs` of one register.
|
||||
(decl value_reg (Reg) ValueRegs)
|
||||
(extern constructor value_reg value_reg)
|
||||
|
||||
;; Construct a `ValueRegs` of two registers.
|
||||
(decl value_regs (Reg Reg) ValueRegs)
|
||||
(extern constructor value_regs value_regs)
|
||||
|
||||
;; Construct an empty `ValueRegs` containing only invalid register sentinels.
|
||||
(decl value_regs_invalid () ValueRegs)
|
||||
(extern constructor value_regs_invalid value_regs_invalid)
|
||||
|
||||
;; Construct an empty `InstOutput`.
|
||||
(decl output_none () InstOutput)
|
||||
(extern constructor output_none output_none)
|
||||
|
||||
;; Construct a single-element `InstOutput`.
|
||||
(decl output (ValueRegs) InstOutput)
|
||||
(extern constructor output output)
|
||||
|
||||
;; Construct a two-element `InstOutput`.
|
||||
(decl output_pair (ValueRegs ValueRegs) InstOutput)
|
||||
(extern constructor output_pair output_pair)
|
||||
|
||||
;; Construct a single-element `InstOutput` from a single register.
|
||||
(decl output_reg (Reg) InstOutput)
|
||||
(rule (output_reg reg) (output (value_reg reg)))
|
||||
|
||||
;; Construct a single-element `InstOutput` from a value.
|
||||
(decl output_value (Value) InstOutput)
|
||||
(rule (output_value val) (output (put_in_regs val)))
|
||||
|
||||
;; Initially empty `InstOutput` builder.
|
||||
(decl output_builder_new () InstOutputBuilder)
|
||||
(extern constructor output_builder_new output_builder_new)
|
||||
|
||||
;; Append a `ValueRegs` to an `InstOutput` under construction.
|
||||
(decl output_builder_push (InstOutputBuilder ValueRegs) Unit)
|
||||
(extern constructor output_builder_push output_builder_push)
|
||||
|
||||
;; Finish building an `InstOutput` incrementally.
|
||||
(decl output_builder_finish (InstOutputBuilder) InstOutput)
|
||||
(extern constructor output_builder_finish output_builder_finish)
|
||||
|
||||
;; Get a temporary register for writing.
|
||||
(decl temp_writable_reg (Type) WritableReg)
|
||||
(extern constructor temp_writable_reg temp_writable_reg)
|
||||
|
||||
;; Get a temporary register for reading.
|
||||
(decl temp_reg (Type) Reg)
|
||||
(rule (temp_reg ty)
|
||||
(writable_reg_to_reg (temp_writable_reg ty)))
|
||||
|
||||
(decl is_valid_reg (bool) Reg)
|
||||
(extern extractor infallible is_valid_reg is_valid_reg)
|
||||
|
||||
;; Get or match the invalid register.
|
||||
(decl invalid_reg () Reg)
|
||||
(extern constructor invalid_reg invalid_reg)
|
||||
(extractor (invalid_reg) (is_valid_reg $false))
|
||||
|
||||
;; Match any register but the invalid register.
|
||||
(decl valid_reg (Reg) Reg)
|
||||
(extractor (valid_reg reg) (and (is_valid_reg $true) reg))
|
||||
|
||||
;; Mark this value as used, to ensure that it gets lowered.
|
||||
(decl mark_value_used (Value) Unit)
|
||||
(extern constructor mark_value_used mark_value_used)
|
||||
|
||||
;; Put the given value into a register.
|
||||
;;
|
||||
;; Asserts that the value fits into a single register, and doesn't require
|
||||
;; multiple registers for its representation (like `i128` on x64 for example).
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_reg (Value) Reg)
|
||||
(extern constructor put_in_reg put_in_reg)
|
||||
|
||||
;; Put the given value into one or more registers.
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_regs (Value) ValueRegs)
|
||||
(extern constructor put_in_regs put_in_regs)
|
||||
|
||||
;; If the given reg is a real register, cause the value in reg to be in a virtual
|
||||
;; reg, by copying it into a new virtual reg.
|
||||
(decl ensure_in_vreg (Reg Type) Reg)
|
||||
(extern constructor ensure_in_vreg ensure_in_vreg)
|
||||
|
||||
;; Get the `n`th register inside a `ValueRegs`.
|
||||
(decl value_regs_get (ValueRegs usize) Reg)
|
||||
(extern constructor value_regs_get value_regs_get)
|
||||
|
||||
;; Get the number of registers in a `ValueRegs`.
|
||||
(decl value_regs_len (ValueRegs) usize)
|
||||
(extern constructor value_regs_len value_regs_len)
|
||||
|
||||
;; Get a range for the number of regs in a `ValueRegs`.
|
||||
(decl value_regs_range (ValueRegs) Range)
|
||||
(rule (value_regs_range regs) (range 0 (value_regs_len regs)))
|
||||
|
||||
;; Put the value into one or more registers and return the first register.
|
||||
;;
|
||||
;; Unlike `put_in_reg`, this does not assert that the value fits in a single
|
||||
;; register. This is useful for things like a `i128` shift amount, where we mask
|
||||
;; the shift amount to the bit width of the value being shifted, and so the high
|
||||
;; half of the `i128` won't ever be used.
|
||||
;;
|
||||
;; As a side efect, this marks that value as used.
|
||||
(decl lo_reg (Value) Reg)
|
||||
(rule (lo_reg val)
|
||||
(let ((regs ValueRegs (put_in_regs val)))
|
||||
(value_regs_get regs 0)))
|
||||
|
||||
;; Convert a `PReg` into a `Reg`
|
||||
(decl preg_to_reg (PReg) Reg)
|
||||
(extern constructor preg_to_reg preg_to_reg)
|
||||
|
||||
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type MachLabel (primitive MachLabel))
|
||||
(type ValueLabel (primitive ValueLabel))
|
||||
(type UnwindInst (primitive UnwindInst))
|
||||
(type ExternalName (primitive ExternalName))
|
||||
(type BoxExternalName (primitive BoxExternalName))
|
||||
(type RelocDistance (primitive RelocDistance))
|
||||
(type VecArgPair extern (enum))
|
||||
|
||||
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl pure u8_as_u32 (u8) u32)
|
||||
@@ -245,9 +101,30 @@
|
||||
(decl pure u64_sub (u64 u64) u64)
|
||||
(extern constructor u64_sub u64_sub)
|
||||
|
||||
(decl pure u64_mul (u64 u64) u64)
|
||||
(extern constructor u64_mul u64_mul)
|
||||
|
||||
(decl pure u64_sdiv (u64 u64) u64)
|
||||
(extern constructor u64_sdiv u64_sdiv)
|
||||
|
||||
(decl pure u64_udiv (u64 u64) u64)
|
||||
(extern constructor u64_udiv u64_udiv)
|
||||
|
||||
(decl pure u64_and (u64 u64) u64)
|
||||
(extern constructor u64_and u64_and)
|
||||
|
||||
(decl pure u64_or (u64 u64) u64)
|
||||
(extern constructor u64_or u64_or)
|
||||
|
||||
(decl pure u64_xor (u64 u64) u64)
|
||||
(extern constructor u64_xor u64_xor)
|
||||
|
||||
(decl pure u64_not (u64) u64)
|
||||
(extern constructor u64_not u64_not)
|
||||
|
||||
(decl pure u64_sextend_u32 (u64) u64)
|
||||
(extern constructor u64_sextend_u32 u64_sextend_u32)
|
||||
|
||||
(decl u64_is_zero (bool) u64)
|
||||
(extern extractor infallible u64_is_zero u64_is_zero)
|
||||
|
||||
@@ -443,46 +320,6 @@
|
||||
(decl not_i64x2 () Type)
|
||||
(extern extractor not_i64x2 not_i64x2)
|
||||
|
||||
;; Extractor to get a `ValueSlice` out of a `ValueList`.
|
||||
(decl value_list_slice (ValueSlice) ValueList)
|
||||
(extern extractor infallible value_list_slice value_list_slice)
|
||||
|
||||
;; Extractor to test whether a `ValueSlice` is empty.
|
||||
(decl value_slice_empty () ValueSlice)
|
||||
(extern extractor value_slice_empty value_slice_empty)
|
||||
|
||||
;; Extractor to split a `ValueSlice` into its first element plus a tail.
|
||||
(decl value_slice_unwrap (Value ValueSlice) ValueSlice)
|
||||
(extern extractor value_slice_unwrap value_slice_unwrap)
|
||||
|
||||
;; Return the length of a `ValueSlice`.
|
||||
(decl value_slice_len (ValueSlice) usize)
|
||||
(extern constructor value_slice_len value_slice_len)
|
||||
|
||||
;; Return any element of a `ValueSlice`.
|
||||
(decl value_slice_get (ValueSlice usize) Value)
|
||||
(extern constructor value_slice_get value_slice_get)
|
||||
|
||||
;; Extractor to get the first element from a value list, along with its tail as
|
||||
;; a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
|
||||
(extractor (unwrap_head_value_list_1 head tail)
|
||||
(value_list_slice (value_slice_unwrap head tail)))
|
||||
|
||||
;; Extractor to get the first two elements from a value list, along with its
|
||||
;; tail as a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
|
||||
(extractor (unwrap_head_value_list_2 head1 head2 tail)
|
||||
(value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
|
||||
|
||||
;; Constructor to test whether two values are same.
|
||||
(decl pure same_value (Value Value) Value)
|
||||
(extern constructor same_value same_value)
|
||||
|
||||
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
|
||||
(decl writable_reg_to_reg (WritableReg) Reg)
|
||||
(extern constructor writable_reg_to_reg writable_reg_to_reg)
|
||||
|
||||
;; Extract a `u8` from an `Uimm8`.
|
||||
(decl u8_from_uimm8 (u8) Uimm8)
|
||||
(extern extractor infallible u8_from_uimm8 u8_from_uimm8)
|
||||
@@ -499,6 +336,10 @@
|
||||
(decl nonzero_u64_from_imm64 (u64) Imm64)
|
||||
(extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64)
|
||||
|
||||
;; Create a new Imm64.
|
||||
(decl pure imm64 (u64) Imm64)
|
||||
(extern constructor imm64 imm64)
|
||||
|
||||
;; Extract a `u64` from an `Ieee32`.
|
||||
(decl u64_from_ieee32 (u64) Ieee32)
|
||||
(extern extractor infallible u64_from_ieee32 u64_from_ieee32)
|
||||
@@ -507,34 +348,6 @@
|
||||
(decl u64_from_ieee64 (u64) Ieee64)
|
||||
(extern extractor infallible u64_from_ieee64 u64_from_ieee64)
|
||||
|
||||
;; Extract the result values for the given instruction.
|
||||
(decl inst_results (ValueSlice) Inst)
|
||||
(extern extractor infallible inst_results inst_results)
|
||||
|
||||
;; Extract the first result value of the given instruction.
|
||||
(decl first_result (Value) Inst)
|
||||
(extern extractor first_result first_result)
|
||||
|
||||
;; Extract the `InstructionData` for an `Inst`.
|
||||
(decl inst_data (InstructionData) Inst)
|
||||
(extern extractor infallible inst_data inst_data)
|
||||
|
||||
;; Extract the type of a `Value`.
|
||||
(decl value_type (Type) Value)
|
||||
(extern extractor infallible value_type value_type)
|
||||
|
||||
;; Extract the type of the instruction's first result.
|
||||
(decl result_type (Type) Inst)
|
||||
(extractor (result_type ty)
|
||||
(first_result (value_type ty)))
|
||||
|
||||
;; Extract the type of the instruction's first result and pass along the
|
||||
;; instruction as well.
|
||||
(decl has_type (Type Inst) Inst)
|
||||
(extractor (has_type ty inst)
|
||||
(and (result_type ty)
|
||||
inst))
|
||||
|
||||
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
|
||||
;; type. Will only match when there is more than one lane.
|
||||
(decl multi_lane (u32 u32) Type)
|
||||
@@ -565,27 +378,10 @@
|
||||
(decl ty_dyn128_int (Type) Type)
|
||||
(extern extractor ty_dyn128_int ty_dyn128_int)
|
||||
|
||||
;; Match the instruction that defines the given value, if any.
|
||||
(decl def_inst (Inst) Value)
|
||||
(extern extractor def_inst def_inst)
|
||||
|
||||
;; Extract a constant `u64` from a value defined by an `iconst`.
|
||||
(decl u64_from_iconst (u64) Value)
|
||||
(extractor (u64_from_iconst x)
|
||||
(def_inst (iconst (u64_from_imm64 x))))
|
||||
|
||||
;; Convert an `Offset32` to a primitive number.
|
||||
(decl offset32_to_u32 (Offset32) u32)
|
||||
(extern constructor offset32_to_u32 offset32_to_u32)
|
||||
|
||||
;; Match any zero value for iconst, fconst32, fconst64, vconst and splat.
|
||||
(decl pure zero_value (Value) Value)
|
||||
(extern constructor zero_value zero_value)
|
||||
|
||||
;; Match a sinkable instruction from a value operand.
|
||||
(decl pure is_sinkable_inst (Value) Inst)
|
||||
(extern constructor is_sinkable_inst is_sinkable_inst)
|
||||
|
||||
;; This is a direct import of `IntCC::unsigned`.
|
||||
;; Get the corresponding IntCC with the signed component removed.
|
||||
;; For conditions without a signed component, this is a no-op.
|
||||
@@ -596,283 +392,6 @@
|
||||
(decl pure signed_cond_code (IntCC) IntCC)
|
||||
(extern constructor signed_cond_code signed_cond_code)
|
||||
|
||||
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Emit an instruction.
|
||||
;;
|
||||
;; This is low-level and side-effectful; it should only be used as an
|
||||
;; implementation detail by helpers that preserve the SSA facade themselves.
|
||||
|
||||
(decl emit (MInst) Unit)
|
||||
(extern constructor emit emit)
|
||||
|
||||
;; Sink an instruction.
|
||||
;;
|
||||
;; This is a side-effectful operation that notifies the context that the
|
||||
;; instruction has been sunk into another instruction, and no longer needs to
|
||||
;; be lowered.
|
||||
(decl sink_inst (Inst) Unit)
|
||||
(extern constructor sink_inst sink_inst)
|
||||
|
||||
;; Constant pool emission.
|
||||
|
||||
(type VCodeConstant (primitive VCodeConstant))
|
||||
|
||||
;; Add a u64 little-endian constant to the in-memory constant pool and
|
||||
;; return a VCodeConstant index that refers to it. This is
|
||||
;; side-effecting but idempotent (constants are deduplicated).
|
||||
(decl emit_u64_le_const (u64) VCodeConstant)
|
||||
(extern constructor emit_u64_le_const emit_u64_le_const)
|
||||
|
||||
;; Add a u128 little-endian constant to the in-memory constant pool and
|
||||
;; return a VCodeConstant index that refers to it. This is
|
||||
;; side-effecting but idempotent (constants are deduplicated).
|
||||
(decl emit_u128_le_const (u128) VCodeConstant)
|
||||
(extern constructor emit_u128_le_const emit_u128_le_const)
|
||||
|
||||
;; Fetch the VCodeConstant associated with a Constant.
|
||||
(decl const_to_vconst (Constant) VCodeConstant)
|
||||
(extern constructor const_to_vconst const_to_vconst)
|
||||
|
||||
;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type SideEffectNoResult (enum
|
||||
(Inst (inst MInst))
|
||||
(Inst2 (inst1 MInst)
|
||||
(inst2 MInst))
|
||||
(Inst3 (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(inst3 MInst))))
|
||||
|
||||
;; Create an empty `InstOutput`, but do emit the given side-effectful
|
||||
;; instruction.
|
||||
(decl side_effect (SideEffectNoResult) InstOutput)
|
||||
(rule (side_effect (SideEffectNoResult.Inst inst))
|
||||
(let ((_ Unit (emit inst)))
|
||||
(output_none)))
|
||||
(rule (side_effect (SideEffectNoResult.Inst2 inst1 inst2))
|
||||
(let ((_ Unit (emit inst1))
|
||||
(_ Unit (emit inst2)))
|
||||
(output_none)))
|
||||
(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
(let ((_ Unit (emit inst1))
|
||||
(_ Unit (emit inst2))
|
||||
(_ Unit (emit inst3)))
|
||||
(output_none)))
|
||||
|
||||
(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult)
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2))
|
||||
(SideEffectNoResult.Inst2 inst1 inst2))
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3))
|
||||
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3))
|
||||
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
|
||||
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that are used for their
|
||||
;; effect on flags.
|
||||
;;
|
||||
;; Variant determines how result is given when combined with a
|
||||
;; ConsumesFlags. See `with_flags` below for more.
|
||||
(type ProducesFlags (enum
|
||||
;; For cases where the flags have been produced by another
|
||||
;; instruction, and we have out-of-band reasons to know
|
||||
;; that they won't be clobbered by the time we depend on
|
||||
;; them.
|
||||
(AlreadyExistingFlags)
|
||||
(ProducesFlagsSideEffect (inst MInst))
|
||||
(ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst))
|
||||
;; Not directly combinable with a ConsumesFlags;
|
||||
;; used in s390x and unwrapped directly by `trapif`.
|
||||
(ProducesFlagsReturnsReg (inst MInst) (result Reg))
|
||||
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
|
||||
|
||||
;; Chain another producer to a `ProducesFlags`.
|
||||
(decl produces_flags_append (ProducesFlags MInst) ProducesFlags)
|
||||
(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2)
|
||||
(ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2))
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that consume flags.
|
||||
;;
|
||||
;; Variant determines how result is given when combined with a
|
||||
;; ProducesFlags. See `with_flags` below for more.
|
||||
(type ConsumesFlags (enum
|
||||
(ConsumesFlagsSideEffect (inst MInst))
|
||||
(ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst))
|
||||
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
|
||||
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
|
||||
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(result ValueRegs))
|
||||
(ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(inst3 MInst)
|
||||
(inst4 MInst)
|
||||
(result ValueRegs))))
|
||||
|
||||
|
||||
|
||||
;; Get the produced register out of a ProducesFlags.
|
||||
(decl produces_flags_get_reg (ProducesFlags) Reg)
|
||||
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
|
||||
|
||||
;; Modify a ProducesFlags to use it only for its side-effect, ignoring
|
||||
;; its result.
|
||||
(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
|
||||
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
|
||||
(ProducesFlags.ProducesFlagsSideEffect inst))
|
||||
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
|
||||
(ProducesFlags.ProducesFlagsSideEffect inst))
|
||||
|
||||
;; Helper for combining two flags-consumer instructions that return a
|
||||
;; single Reg, giving a ConsumesFlags that returns both values in a
|
||||
;; ValueRegs.
|
||||
(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
|
||||
(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||
inst1
|
||||
inst2
|
||||
(value_regs reg1 reg2)))
|
||||
(rule (consumes_flags_concat
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect inst1)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect inst2))
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2))
|
||||
|
||||
;; Combine flags-producing and -consuming instructions together, ensuring that
|
||||
;; they are emitted back-to-back and no other instructions can be emitted
|
||||
;; between them and potentially clobber the flags.
|
||||
;;
|
||||
;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
|
||||
;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
|
||||
;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
|
||||
;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
|
||||
;;
|
||||
;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
|
||||
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
(value_regs producer_result consumer_result)))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
(value_reg consumer_result)))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst_1))
|
||||
(_z Unit (emit consumer_inst_2)))
|
||||
consumer_result))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_inst_3
|
||||
consumer_inst_4
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst_1))
|
||||
(_z Unit (emit consumer_inst_2))
|
||||
(_w Unit (emit consumer_inst_3))
|
||||
(_v Unit (emit consumer_inst_4)))
|
||||
consumer_result))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
|
||||
(let ((_ Unit (emit producer_inst1))
|
||||
(_ Unit (emit producer_inst2))
|
||||
(_ Unit (emit consumer_inst)))
|
||||
(value_reg consumer_result)))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_ Unit (emit producer_inst1))
|
||||
(_ Unit (emit producer_inst2))
|
||||
(_ Unit (emit consumer_inst_1))
|
||||
(_ Unit (emit consumer_inst_2)))
|
||||
consumer_result))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
|
||||
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_inst_3
|
||||
consumer_inst_4
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_ Unit (emit producer_inst1))
|
||||
(_ Unit (emit producer_inst2))
|
||||
(_ Unit (emit consumer_inst_1))
|
||||
(_ Unit (emit consumer_inst_2))
|
||||
(_ Unit (emit consumer_inst_3))
|
||||
(_ Unit (emit consumer_inst_4)))
|
||||
consumer_result))
|
||||
|
||||
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
|
||||
(rule (with_flags_reg p c)
|
||||
(let ((v ValueRegs (with_flags p c)))
|
||||
(value_regs_get v 0)))
|
||||
|
||||
;; Indicate that the current state of the flags register from the instruction
|
||||
;; that produces this Value is relied on.
|
||||
(decl flags_to_producesflags (Value) ProducesFlags)
|
||||
(rule (flags_to_producesflags val)
|
||||
(let ((_ Unit (mark_value_used val)))
|
||||
(ProducesFlags.AlreadyExistingFlags)))
|
||||
|
||||
;; Combine a flags-producing instruction and a flags-consuming instruction that
|
||||
;; produces no results.
|
||||
;;
|
||||
;; This function handles the following case only:
|
||||
;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect
|
||||
(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult)
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.AlreadyExistingFlags)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst c))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.AlreadyExistingFlags)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||
(SideEffectNoResult.Inst2 c1 c2))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsSideEffect p)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst2 p c))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsSideEffect p)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||
(SideEffectNoResult.Inst3 p c1 c2))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst3 p1 p2 c))
|
||||
|
||||
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl trap_code_division_by_zero () TrapCode)
|
||||
@@ -884,70 +403,6 @@
|
||||
(decl trap_code_bad_conversion_to_integer () TrapCode)
|
||||
(extern constructor trap_code_bad_conversion_to_integer trap_code_bad_conversion_to_integer)
|
||||
|
||||
;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl avoid_div_traps () Type)
|
||||
(extern extractor avoid_div_traps avoid_div_traps)
|
||||
|
||||
;; This definition should be kept up to date with the values defined in
|
||||
;; cranelift/codegen/meta/src/shared/settings.rs
|
||||
(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff)))
|
||||
|
||||
(decl tls_model (TlsModel) Type)
|
||||
(extern extractor infallible tls_model tls_model)
|
||||
|
||||
(decl pure tls_model_is_elf_gd () Unit)
|
||||
(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
|
||||
|
||||
(decl pure tls_model_is_macho () Unit)
|
||||
(extern constructor tls_model_is_macho tls_model_is_macho)
|
||||
|
||||
(decl pure tls_model_is_coff () Unit)
|
||||
(extern constructor tls_model_is_coff tls_model_is_coff)
|
||||
|
||||
(decl pure preserve_frame_pointers () Unit)
|
||||
(extern constructor preserve_frame_pointers preserve_frame_pointers)
|
||||
|
||||
;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Accessor for `FuncRef`.
|
||||
|
||||
(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef)
|
||||
(extern extractor infallible func_ref_data func_ref_data)
|
||||
|
||||
;; Accessor for `GobalValue`.
|
||||
|
||||
(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue)
|
||||
(extern extractor symbol_value_data symbol_value_data)
|
||||
|
||||
(decl box_external_name (ExternalName) BoxExternalName)
|
||||
(extern constructor box_external_name box_external_name)
|
||||
|
||||
;; Accessor for `RelocDistance`.
|
||||
|
||||
(decl reloc_distance_near () RelocDistance)
|
||||
(extern extractor reloc_distance_near reloc_distance_near)
|
||||
|
||||
;; Accessor for `Immediate` as u128.
|
||||
|
||||
(decl u128_from_immediate (u128) Immediate)
|
||||
(extern extractor u128_from_immediate u128_from_immediate)
|
||||
|
||||
;; Accessor for `Immediate` as a vector of u8 values.
|
||||
|
||||
(decl vec_mask_from_immediate (VecMask) Immediate)
|
||||
(extern extractor vec_mask_from_immediate vec_mask_from_immediate)
|
||||
|
||||
;; Accessor for `Constant` as u128.
|
||||
|
||||
(decl u128_from_constant (u128) Constant)
|
||||
(extern extractor u128_from_constant u128_from_constant)
|
||||
|
||||
;; Accessor for `Constant` as u64.
|
||||
|
||||
(decl u64_from_constant (u64) Constant)
|
||||
(extern extractor u64_from_constant u64_from_constant)
|
||||
|
||||
;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; A range of integers to loop through.
|
||||
@@ -976,156 +431,7 @@
|
||||
(decl range_unwrap (usize Range) Range)
|
||||
(extractor (range_unwrap index rest) (range_view (RangeView.NonEmpty index rest)))
|
||||
|
||||
;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; The (writable) register(s) that will contain the n'th return value.
|
||||
(decl retval (usize) WritableValueRegs)
|
||||
(extern constructor retval retval)
|
||||
|
||||
;; Extractor to check for the special case that a `WritableValueRegs`
|
||||
;; contains only a single register.
|
||||
(decl only_writable_reg (WritableReg) WritableValueRegs)
|
||||
(extern extractor only_writable_reg only_writable_reg)
|
||||
|
||||
;; Get the `n`th register inside a `WritableValueRegs`.
|
||||
(decl writable_regs_get (WritableValueRegs usize) WritableReg)
|
||||
(extern constructor writable_regs_get writable_regs_get)
|
||||
|
||||
;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Type to hold information about a function call signature.
|
||||
(type Sig extern (enum))
|
||||
|
||||
;; Information how to pass one argument or return value.
|
||||
(type ABIArg extern (enum))
|
||||
|
||||
;; Information how to pass a single slot of one argument or return value.
|
||||
(type ABIArgSlot extern
|
||||
(enum
|
||||
(Reg
|
||||
(reg RealReg)
|
||||
(ty Type)
|
||||
(extension ArgumentExtension))
|
||||
(Stack
|
||||
(offset i64)
|
||||
(ty Type)
|
||||
(extension ArgumentExtension))))
|
||||
|
||||
|
||||
;; Physical register that may hold an argument or return value.
|
||||
(type RealReg (primitive RealReg))
|
||||
|
||||
;; Instruction on whether and how to extend an argument value.
|
||||
(type ArgumentExtension extern
|
||||
(enum
|
||||
(None)
|
||||
(Uext)
|
||||
(Sext)))
|
||||
|
||||
|
||||
;; Get the number of arguments expected.
|
||||
(decl abi_num_args (Sig) usize)
|
||||
(extern constructor abi_num_args abi_num_args)
|
||||
|
||||
;; Get information specifying how to pass one argument.
|
||||
(decl abi_get_arg (Sig usize) ABIArg)
|
||||
(extern constructor abi_get_arg abi_get_arg)
|
||||
|
||||
;; Get the number of return values expected.
|
||||
(decl abi_num_rets (Sig) usize)
|
||||
(extern constructor abi_num_rets abi_num_rets)
|
||||
|
||||
;; Get information specifying how to pass one return value.
|
||||
(decl abi_get_ret (Sig usize) ABIArg)
|
||||
(extern constructor abi_get_ret abi_get_ret)
|
||||
|
||||
;; Get information specifying how to pass the implicit pointer
|
||||
;; to the return-value area on the stack, if required.
|
||||
(decl abi_ret_arg (ABIArg) Sig)
|
||||
(extern extractor abi_ret_arg abi_ret_arg)
|
||||
|
||||
;; Succeeds if no implicit return-value area pointer is required.
|
||||
(decl abi_no_ret_arg () Sig)
|
||||
(extern extractor abi_no_ret_arg abi_no_ret_arg)
|
||||
|
||||
;; Size of the argument area.
|
||||
(decl abi_sized_stack_arg_space (Sig) i64)
|
||||
(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space)
|
||||
|
||||
;; Size of the return-value area.
|
||||
(decl abi_sized_stack_ret_space (Sig) i64)
|
||||
(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space)
|
||||
|
||||
;; StackSlot addr
|
||||
(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst)
|
||||
(extern constructor abi_stackslot_addr abi_stackslot_addr)
|
||||
|
||||
;; DynamicStackSlot addr
|
||||
(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst)
|
||||
(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr)
|
||||
|
||||
;; Extractor to detect the special case where an argument or
|
||||
;; return value only requires a single slot to be passed.
|
||||
(decl abi_arg_only_slot (ABIArgSlot) ABIArg)
|
||||
(extern extractor abi_arg_only_slot abi_arg_only_slot)
|
||||
|
||||
;; Extractor to detect the special case where a struct argument
|
||||
;; is explicitly passed by reference using a hidden pointer.
|
||||
(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg)
|
||||
(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer)
|
||||
|
||||
;; Extractor to detect the special case where a non-struct argument
|
||||
;; is implicitly passed by reference using a hidden pointer.
|
||||
(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg)
|
||||
(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer)
|
||||
|
||||
;; Convert a real register number into a virtual register.
|
||||
(decl real_reg_to_reg (RealReg) Reg)
|
||||
(extern constructor real_reg_to_reg real_reg_to_reg)
|
||||
|
||||
;; Convert a real register number into a writable virtual register.
|
||||
(decl real_reg_to_writable_reg (RealReg) WritableReg)
|
||||
(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg)
|
||||
|
||||
;; Generate a move between two registers.
|
||||
(decl gen_move (Type WritableReg Reg) MInst)
|
||||
(extern constructor gen_move gen_move)
|
||||
|
||||
;; Copy a return value to a set of registers.
|
||||
(decl copy_to_regs (WritableValueRegs Value) Unit)
|
||||
(rule (copy_to_regs dsts val @ (value_type ty))
|
||||
(let ((srcs ValueRegs (put_in_regs val)))
|
||||
(copy_to_regs_range ty (value_regs_range srcs) dsts srcs)))
|
||||
|
||||
;; Helper for `copy_to_regs` that uses a range to index into the reg/value
|
||||
;; vectors. Fails for the empty range.
|
||||
(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit)
|
||||
|
||||
(rule (copy_to_regs_range ty (range_empty) dsts srcs)
|
||||
(unit))
|
||||
|
||||
(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs)
|
||||
(let ((dst WritableReg (writable_regs_get dsts head))
|
||||
(src Reg (value_regs_get srcs head))
|
||||
(_ Unit (emit (gen_move ty dst src))))
|
||||
(copy_to_regs_range ty tail dsts srcs)))
|
||||
|
||||
(decl lower_return (Range ValueSlice) InstOutput)
|
||||
(rule (lower_return (range_empty) _) (output_none))
|
||||
(rule (lower_return (range_unwrap head tail) args)
|
||||
(let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head))))
|
||||
(lower_return tail args)))
|
||||
|
||||
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(convert Inst Value def_inst)
|
||||
(convert Reg ValueRegs value_reg)
|
||||
(convert Value Reg put_in_reg)
|
||||
(convert Value ValueRegs put_in_regs)
|
||||
(convert WritableReg Reg writable_reg_to_reg)
|
||||
(convert ValueRegs InstOutput output)
|
||||
(convert Reg InstOutput output_reg)
|
||||
(convert Value InstOutput output_value)
|
||||
(convert Offset32 u32 offset32_to_u32)
|
||||
(convert ExternalName BoxExternalName box_external_name)
|
||||
(convert PReg Reg preg_to_reg)
|
||||
|
||||
|
||||
740
cranelift/codegen/src/prelude_lower.isle
Normal file
740
cranelift/codegen/src/prelude_lower.isle
Normal file
@@ -0,0 +1,740 @@
|
||||
;; Prelude definitions specific to lowering environments (backends) in
|
||||
;; ISLE.
|
||||
|
||||
;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `cranelift-entity`-based identifiers.
|
||||
(type Inst (primitive Inst))
|
||||
(type Value (primitive Value))
|
||||
|
||||
;; ISLE representation of `&[Value]`.
|
||||
(type ValueSlice (primitive ValueSlice))
|
||||
|
||||
;; ISLE representation of `Vec<u8>`
|
||||
(type VecMask extern (enum))
|
||||
|
||||
(type ValueList (primitive ValueList))
|
||||
(type ValueRegs (primitive ValueRegs))
|
||||
(type WritableValueRegs (primitive WritableValueRegs))
|
||||
|
||||
;; Instruction lowering result: a vector of `ValueRegs`.
|
||||
(type InstOutput (primitive InstOutput))
|
||||
;; (Mutable) builder to incrementally construct an `InstOutput`.
|
||||
(type InstOutputBuilder extern (enum))
|
||||
|
||||
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type Reg (primitive Reg))
|
||||
(type WritableReg (primitive WritableReg))
|
||||
(type OptionWritableReg (primitive OptionWritableReg))
|
||||
(type VecReg extern (enum))
|
||||
(type VecWritableReg extern (enum))
|
||||
(type PReg (primitive PReg))
|
||||
|
||||
;; Construct a `ValueRegs` of one register.
|
||||
(decl value_reg (Reg) ValueRegs)
|
||||
(extern constructor value_reg value_reg)
|
||||
|
||||
;; Construct a `ValueRegs` of two registers.
|
||||
(decl value_regs (Reg Reg) ValueRegs)
|
||||
(extern constructor value_regs value_regs)
|
||||
|
||||
;; Construct an empty `ValueRegs` containing only invalid register sentinels.
|
||||
(decl value_regs_invalid () ValueRegs)
|
||||
(extern constructor value_regs_invalid value_regs_invalid)
|
||||
|
||||
;; Construct an empty `InstOutput`.
|
||||
(decl output_none () InstOutput)
|
||||
(extern constructor output_none output_none)
|
||||
|
||||
;; Construct a single-element `InstOutput`.
|
||||
(decl output (ValueRegs) InstOutput)
|
||||
(extern constructor output output)
|
||||
|
||||
;; Construct a two-element `InstOutput`.
|
||||
(decl output_pair (ValueRegs ValueRegs) InstOutput)
|
||||
(extern constructor output_pair output_pair)
|
||||
|
||||
;; Construct a single-element `InstOutput` from a single register.
|
||||
(decl output_reg (Reg) InstOutput)
|
||||
(rule (output_reg reg) (output (value_reg reg)))
|
||||
|
||||
;; Construct a single-element `InstOutput` from a value.
|
||||
(decl output_value (Value) InstOutput)
|
||||
(rule (output_value val) (output (put_in_regs val)))
|
||||
|
||||
;; Initially empty `InstOutput` builder.
|
||||
(decl output_builder_new () InstOutputBuilder)
|
||||
(extern constructor output_builder_new output_builder_new)
|
||||
|
||||
;; Append a `ValueRegs` to an `InstOutput` under construction.
|
||||
(decl output_builder_push (InstOutputBuilder ValueRegs) Unit)
|
||||
(extern constructor output_builder_push output_builder_push)
|
||||
|
||||
;; Finish building an `InstOutput` incrementally.
|
||||
(decl output_builder_finish (InstOutputBuilder) InstOutput)
|
||||
(extern constructor output_builder_finish output_builder_finish)
|
||||
|
||||
;; Get a temporary register for writing.
|
||||
(decl temp_writable_reg (Type) WritableReg)
|
||||
(extern constructor temp_writable_reg temp_writable_reg)
|
||||
|
||||
;; Get a temporary register for reading.
|
||||
(decl temp_reg (Type) Reg)
|
||||
(rule (temp_reg ty)
|
||||
(writable_reg_to_reg (temp_writable_reg ty)))
|
||||
|
||||
(decl is_valid_reg (bool) Reg)
|
||||
(extern extractor infallible is_valid_reg is_valid_reg)
|
||||
|
||||
;; Get or match the invalid register.
|
||||
(decl invalid_reg () Reg)
|
||||
(extern constructor invalid_reg invalid_reg)
|
||||
(extractor (invalid_reg) (is_valid_reg $false))
|
||||
|
||||
;; Match any register but the invalid register.
|
||||
(decl valid_reg (Reg) Reg)
|
||||
(extractor (valid_reg reg) (and (is_valid_reg $true) reg))
|
||||
|
||||
;; Mark this value as used, to ensure that it gets lowered.
|
||||
(decl mark_value_used (Value) Unit)
|
||||
(extern constructor mark_value_used mark_value_used)
|
||||
|
||||
;; Put the given value into a register.
|
||||
;;
|
||||
;; Asserts that the value fits into a single register, and doesn't require
|
||||
;; multiple registers for its representation (like `i128` on x64 for example).
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_reg (Value) Reg)
|
||||
(extern constructor put_in_reg put_in_reg)
|
||||
|
||||
;; Put the given value into one or more registers.
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_regs (Value) ValueRegs)
|
||||
(extern constructor put_in_regs put_in_regs)
|
||||
|
||||
;; If the given reg is a real register, cause the value in reg to be in a virtual
|
||||
;; reg, by copying it into a new virtual reg.
|
||||
(decl ensure_in_vreg (Reg Type) Reg)
|
||||
(extern constructor ensure_in_vreg ensure_in_vreg)
|
||||
|
||||
;; Get the `n`th register inside a `ValueRegs`.
|
||||
(decl value_regs_get (ValueRegs usize) Reg)
|
||||
(extern constructor value_regs_get value_regs_get)
|
||||
|
||||
;; Get the number of registers in a `ValueRegs`.
|
||||
(decl value_regs_len (ValueRegs) usize)
|
||||
(extern constructor value_regs_len value_regs_len)
|
||||
|
||||
;; Get a range for the number of regs in a `ValueRegs`.
|
||||
(decl value_regs_range (ValueRegs) Range)
|
||||
(rule (value_regs_range regs) (range 0 (value_regs_len regs)))
|
||||
|
||||
;; Put the value into one or more registers and return the first register.
|
||||
;;
|
||||
;; Unlike `put_in_reg`, this does not assert that the value fits in a single
|
||||
;; register. This is useful for things like a `i128` shift amount, where we mask
|
||||
;; the shift amount to the bit width of the value being shifted, and so the high
|
||||
;; half of the `i128` won't ever be used.
|
||||
;;
|
||||
;; As a side efect, this marks that value as used.
|
||||
(decl lo_reg (Value) Reg)
|
||||
(rule (lo_reg val)
|
||||
(let ((regs ValueRegs (put_in_regs val)))
|
||||
(value_regs_get regs 0)))
|
||||
|
||||
;; Convert a `PReg` into a `Reg`.
|
||||
(decl preg_to_reg (PReg) Reg)
|
||||
(extern constructor preg_to_reg preg_to_reg)
|
||||
|
||||
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type MachLabel (primitive MachLabel))
|
||||
(type ValueLabel (primitive ValueLabel))
|
||||
(type UnwindInst (primitive UnwindInst))
|
||||
(type ExternalName (primitive ExternalName))
|
||||
(type BoxExternalName (primitive BoxExternalName))
|
||||
(type RelocDistance (primitive RelocDistance))
|
||||
(type VecArgPair extern (enum))
|
||||
|
||||
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Extractor to get a `ValueSlice` out of a `ValueList`.
|
||||
(decl value_list_slice (ValueSlice) ValueList)
|
||||
(extern extractor infallible value_list_slice value_list_slice)
|
||||
|
||||
;; Extractor to test whether a `ValueSlice` is empty.
|
||||
(decl value_slice_empty () ValueSlice)
|
||||
(extern extractor value_slice_empty value_slice_empty)
|
||||
|
||||
;; Extractor to split a `ValueSlice` into its first element plus a tail.
|
||||
(decl value_slice_unwrap (Value ValueSlice) ValueSlice)
|
||||
(extern extractor value_slice_unwrap value_slice_unwrap)
|
||||
|
||||
;; Return the length of a `ValueSlice`.
|
||||
(decl value_slice_len (ValueSlice) usize)
|
||||
(extern constructor value_slice_len value_slice_len)
|
||||
|
||||
;; Return any element of a `ValueSlice`.
|
||||
(decl value_slice_get (ValueSlice usize) Value)
|
||||
(extern constructor value_slice_get value_slice_get)
|
||||
|
||||
;; Extractor to get the first element from a value list, along with its tail as
|
||||
;; a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
|
||||
(extractor (unwrap_head_value_list_1 head tail)
|
||||
(value_list_slice (value_slice_unwrap head tail)))
|
||||
|
||||
;; Extractor to get the first two elements from a value list, along with its
|
||||
;; tail as a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
|
||||
(extractor (unwrap_head_value_list_2 head1 head2 tail)
|
||||
(value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
|
||||
|
||||
;; Constructor to test whether two values are same.
|
||||
(decl pure same_value (Value Value) Value)
|
||||
(extern constructor same_value same_value)
|
||||
|
||||
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
|
||||
(decl writable_reg_to_reg (WritableReg) Reg)
|
||||
(extern constructor writable_reg_to_reg writable_reg_to_reg)
|
||||
|
||||
;; Extract the result values for the given instruction.
|
||||
(decl inst_results (ValueSlice) Inst)
|
||||
(extern extractor infallible inst_results inst_results)
|
||||
|
||||
;; Extract the first result value of the given instruction.
|
||||
(decl first_result (Value) Inst)
|
||||
(extern extractor first_result first_result)
|
||||
|
||||
;; Extract the `InstructionData` for an `Inst`.
|
||||
(decl inst_data (InstructionData) Inst)
|
||||
(extern extractor infallible inst_data inst_data)
|
||||
|
||||
;; Extract the type of a `Value`.
|
||||
(decl value_type (Type) Value)
|
||||
(extern extractor infallible value_type value_type)
|
||||
|
||||
;; Extract the type of the instruction's first result.
|
||||
(decl result_type (Type) Inst)
|
||||
(extractor (result_type ty)
|
||||
(first_result (value_type ty)))
|
||||
|
||||
;; Extract the type of the instruction's first result and pass along the
|
||||
;; instruction as well.
|
||||
(decl has_type (Type Inst) Inst)
|
||||
(extractor (has_type ty inst)
|
||||
(and (result_type ty)
|
||||
inst))
|
||||
|
||||
;; Match the instruction that defines the given value, if any.
|
||||
(decl def_inst (Inst) Value)
|
||||
(extern extractor def_inst def_inst)
|
||||
|
||||
;; Extract a constant `u64` from a value defined by an `iconst`.
|
||||
(decl u64_from_iconst (u64) Value)
|
||||
(extractor (u64_from_iconst x)
|
||||
(def_inst (iconst (u64_from_imm64 x))))
|
||||
|
||||
;; Match any zero value for iconst, fconst32, fconst64, vconst and splat.
|
||||
(decl pure zero_value (Value) Value)
|
||||
(extern constructor zero_value zero_value)
|
||||
|
||||
;; Match a sinkable instruction from a value operand.
|
||||
(decl pure is_sinkable_inst (Value) Inst)
|
||||
(extern constructor is_sinkable_inst is_sinkable_inst)
|
||||
|
||||
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Emit an instruction.
|
||||
;;
|
||||
;; This is low-level and side-effectful; it should only be used as an
|
||||
;; implementation detail by helpers that preserve the SSA facade themselves.
|
||||
|
||||
(decl emit (MInst) Unit)
|
||||
(extern constructor emit emit)
|
||||
|
||||
;; Sink an instruction.
|
||||
;;
|
||||
;; This is a side-effectful operation that notifies the context that the
|
||||
;; instruction has been sunk into another instruction, and no longer needs to
|
||||
;; be lowered.
|
||||
(decl sink_inst (Inst) Unit)
|
||||
(extern constructor sink_inst sink_inst)
|
||||
|
||||
;; Constant pool emission.
|
||||
|
||||
(type VCodeConstant (primitive VCodeConstant))
|
||||
|
||||
;; Add a u64 little-endian constant to the in-memory constant pool and
|
||||
;; return a VCodeConstant index that refers to it. This is
|
||||
;; side-effecting but idempotent (constants are deduplicated).
|
||||
(decl emit_u64_le_const (u64) VCodeConstant)
|
||||
(extern constructor emit_u64_le_const emit_u64_le_const)
|
||||
|
||||
;; Add a u128 little-endian constant to the in-memory constant pool and
|
||||
;; return a VCodeConstant index that refers to it. This is
|
||||
;; side-effecting but idempotent (constants are deduplicated).
|
||||
(decl emit_u128_le_const (u128) VCodeConstant)
|
||||
(extern constructor emit_u128_le_const emit_u128_le_const)
|
||||
|
||||
;; Fetch the VCodeConstant associated with a Constant.
|
||||
(decl const_to_vconst (Constant) VCodeConstant)
|
||||
(extern constructor const_to_vconst const_to_vconst)
|
||||
|
||||
;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type SideEffectNoResult (enum
|
||||
(Inst (inst MInst))
|
||||
(Inst2 (inst1 MInst)
|
||||
(inst2 MInst))
|
||||
(Inst3 (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(inst3 MInst))))
|
||||
|
||||
;; Create an empty `InstOutput`, but do emit the given side-effectful
|
||||
;; instruction.
|
||||
(decl side_effect (SideEffectNoResult) InstOutput)
|
||||
(rule (side_effect (SideEffectNoResult.Inst inst))
|
||||
(let ((_ Unit (emit inst)))
|
||||
(output_none)))
|
||||
(rule (side_effect (SideEffectNoResult.Inst2 inst1 inst2))
|
||||
(let ((_ Unit (emit inst1))
|
||||
(_ Unit (emit inst2)))
|
||||
(output_none)))
|
||||
(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
(let ((_ Unit (emit inst1))
|
||||
(_ Unit (emit inst2))
|
||||
(_ Unit (emit inst3)))
|
||||
(output_none)))
|
||||
|
||||
(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult)
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2))
|
||||
(SideEffectNoResult.Inst2 inst1 inst2))
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3))
|
||||
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3))
|
||||
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
|
||||
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that are used for their
|
||||
;; effect on flags.
|
||||
;;
|
||||
;; Variant determines how result is given when combined with a
|
||||
;; ConsumesFlags. See `with_flags` below for more.
|
||||
(type ProducesFlags (enum
|
||||
;; For cases where the flags have been produced by another
|
||||
;; instruction, and we have out-of-band reasons to know
|
||||
;; that they won't be clobbered by the time we depend on
|
||||
;; them.
|
||||
(AlreadyExistingFlags)
|
||||
(ProducesFlagsSideEffect (inst MInst))
|
||||
(ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst))
|
||||
;; Not directly combinable with a ConsumesFlags;
|
||||
;; used in s390x and unwrapped directly by `trapif`.
|
||||
(ProducesFlagsReturnsReg (inst MInst) (result Reg))
|
||||
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
|
||||
|
||||
;; Chain another producer to a `ProducesFlags`.
|
||||
(decl produces_flags_append (ProducesFlags MInst) ProducesFlags)
|
||||
(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2)
|
||||
(ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2))
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that consume flags.
|
||||
;;
|
||||
;; Variant determines how result is given when combined with a
|
||||
;; ProducesFlags. See `with_flags` below for more.
|
||||
(type ConsumesFlags (enum
|
||||
(ConsumesFlagsSideEffect (inst MInst))
|
||||
(ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst))
|
||||
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
|
||||
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
|
||||
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(result ValueRegs))
|
||||
(ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(inst3 MInst)
|
||||
(inst4 MInst)
|
||||
(result ValueRegs))))
|
||||
|
||||
|
||||
|
||||
;; Get the produced register out of a ProducesFlags.
|
||||
(decl produces_flags_get_reg (ProducesFlags) Reg)
|
||||
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
|
||||
|
||||
;; Modify a ProducesFlags to use it only for its side-effect, ignoring
|
||||
;; its result.
|
||||
(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
|
||||
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
|
||||
(ProducesFlags.ProducesFlagsSideEffect inst))
|
||||
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
|
||||
(ProducesFlags.ProducesFlagsSideEffect inst))
|
||||
|
||||
;; Helper for combining two flags-consumer instructions that return a
|
||||
;; single Reg, giving a ConsumesFlags that returns both values in a
|
||||
;; ValueRegs.
|
||||
(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
|
||||
(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||
inst1
|
||||
inst2
|
||||
(value_regs reg1 reg2)))
|
||||
(rule (consumes_flags_concat
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect inst1)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect inst2))
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2))
|
||||
|
||||
;; Combine flags-producing and -consuming instructions together, ensuring that
|
||||
;; they are emitted back-to-back and no other instructions can be emitted
|
||||
;; between them and potentially clobber the flags.
|
||||
;;
|
||||
;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
|
||||
;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
|
||||
;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
|
||||
;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
|
||||
;;
|
||||
;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
|
||||
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
(value_regs producer_result consumer_result)))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
(value_reg consumer_result)))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst_1))
|
||||
(_z Unit (emit consumer_inst_2)))
|
||||
consumer_result))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_inst_3
|
||||
consumer_inst_4
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst_1))
|
||||
(_z Unit (emit consumer_inst_2))
|
||||
(_w Unit (emit consumer_inst_3))
|
||||
(_v Unit (emit consumer_inst_4)))
|
||||
consumer_result))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
|
||||
(let ((_ Unit (emit producer_inst1))
|
||||
(_ Unit (emit producer_inst2))
|
||||
(_ Unit (emit consumer_inst)))
|
||||
(value_reg consumer_result)))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_ Unit (emit producer_inst1))
|
||||
(_ Unit (emit producer_inst2))
|
||||
(_ Unit (emit consumer_inst_1))
|
||||
(_ Unit (emit consumer_inst_2)))
|
||||
consumer_result))
|
||||
|
||||
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
|
||||
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
|
||||
consumer_inst_2
|
||||
consumer_inst_3
|
||||
consumer_inst_4
|
||||
consumer_result))
|
||||
;; We must emit these instructions in order as the creator of
|
||||
;; the ConsumesFlags may be relying on dataflow dependencies
|
||||
;; amongst them.
|
||||
(let ((_ Unit (emit producer_inst1))
|
||||
(_ Unit (emit producer_inst2))
|
||||
(_ Unit (emit consumer_inst_1))
|
||||
(_ Unit (emit consumer_inst_2))
|
||||
(_ Unit (emit consumer_inst_3))
|
||||
(_ Unit (emit consumer_inst_4)))
|
||||
consumer_result))
|
||||
|
||||
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
|
||||
(rule (with_flags_reg p c)
|
||||
(let ((v ValueRegs (with_flags p c)))
|
||||
(value_regs_get v 0)))
|
||||
|
||||
;; Indicate that the current state of the flags register from the instruction
|
||||
;; that produces this Value is relied on.
|
||||
(decl flags_to_producesflags (Value) ProducesFlags)
|
||||
(rule (flags_to_producesflags val)
|
||||
(let ((_ Unit (mark_value_used val)))
|
||||
(ProducesFlags.AlreadyExistingFlags)))
|
||||
|
||||
;; Combine a flags-producing instruction and a flags-consuming instruction that
|
||||
;; produces no results.
|
||||
;;
|
||||
;; This function handles the following case only:
|
||||
;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect
|
||||
(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult)
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.AlreadyExistingFlags)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst c))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.AlreadyExistingFlags)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||
(SideEffectNoResult.Inst2 c1 c2))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsSideEffect p)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst2 p c))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsSideEffect p)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||
(SideEffectNoResult.Inst3 p c1 c2))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst3 p1 p2 c))
|
||||
|
||||
;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl avoid_div_traps () Type)
|
||||
(extern extractor avoid_div_traps avoid_div_traps)
|
||||
|
||||
;; This definition should be kept up to date with the values defined in
|
||||
;; cranelift/codegen/meta/src/shared/settings.rs
|
||||
(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff)))
|
||||
|
||||
(decl tls_model (TlsModel) Type)
|
||||
(extern extractor infallible tls_model tls_model)
|
||||
|
||||
(decl pure tls_model_is_elf_gd () Unit)
|
||||
(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
|
||||
|
||||
(decl pure tls_model_is_macho () Unit)
|
||||
(extern constructor tls_model_is_macho tls_model_is_macho)
|
||||
|
||||
(decl pure tls_model_is_coff () Unit)
|
||||
(extern constructor tls_model_is_coff tls_model_is_coff)
|
||||
|
||||
(decl pure preserve_frame_pointers () Unit)
|
||||
(extern constructor preserve_frame_pointers preserve_frame_pointers)
|
||||
|
||||
;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl box_external_name (ExternalName) BoxExternalName)
|
||||
(extern constructor box_external_name box_external_name)
|
||||
|
||||
;; Accessor for `FuncRef`.
|
||||
|
||||
(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef)
|
||||
(extern extractor infallible func_ref_data func_ref_data)
|
||||
|
||||
;; Accessor for `GlobalValue`.
|
||||
|
||||
(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue)
|
||||
(extern extractor symbol_value_data symbol_value_data)
|
||||
|
||||
;; Accessor for `RelocDistance`.
|
||||
|
||||
(decl reloc_distance_near () RelocDistance)
|
||||
(extern extractor reloc_distance_near reloc_distance_near)
|
||||
|
||||
;; Accessor for `Immediate` as a vector of u8 values.
|
||||
|
||||
(decl vec_mask_from_immediate (VecMask) Immediate)
|
||||
(extern extractor vec_mask_from_immediate vec_mask_from_immediate)
|
||||
|
||||
;; Accessor for `Immediate` as u128.
|
||||
|
||||
(decl u128_from_immediate (u128) Immediate)
|
||||
(extern extractor u128_from_immediate u128_from_immediate)
|
||||
|
||||
;; Accessor for `Constant` as u128.
|
||||
|
||||
(decl u128_from_constant (u128) Constant)
|
||||
(extern extractor u128_from_constant u128_from_constant)
|
||||
|
||||
;; Accessor for `Constant` as u64.
|
||||
|
||||
(decl u64_from_constant (u64) Constant)
|
||||
(extern extractor u64_from_constant u64_from_constant)
|
||||
|
||||
;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; The (writable) register(s) that will contain the n'th return value.
|
||||
(decl retval (usize) WritableValueRegs)
|
||||
(extern constructor retval retval)
|
||||
|
||||
;; Extractor to check for the special case that a `WritableValueRegs`
|
||||
;; contains only a single register.
|
||||
(decl only_writable_reg (WritableReg) WritableValueRegs)
|
||||
(extern extractor only_writable_reg only_writable_reg)
|
||||
|
||||
;; Get the `n`th register inside a `WritableValueRegs`.
|
||||
(decl writable_regs_get (WritableValueRegs usize) WritableReg)
|
||||
(extern constructor writable_regs_get writable_regs_get)
|
||||
|
||||
;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Type to hold information about a function call signature.
|
||||
(type Sig extern (enum))
|
||||
|
||||
;; Information how to pass one argument or return value.
|
||||
(type ABIArg extern (enum))
|
||||
|
||||
;; Information how to pass a single slot of one argument or return value.
|
||||
(type ABIArgSlot extern
|
||||
(enum
|
||||
(Reg
|
||||
(reg RealReg)
|
||||
(ty Type)
|
||||
(extension ArgumentExtension))
|
||||
(Stack
|
||||
(offset i64)
|
||||
(ty Type)
|
||||
(extension ArgumentExtension))))
|
||||
|
||||
;; Physical register that may hold an argument or return value.
|
||||
(type RealReg (primitive RealReg))
|
||||
|
||||
;; Instruction on whether and how to extend an argument value.
|
||||
(type ArgumentExtension extern
|
||||
(enum
|
||||
(None)
|
||||
(Uext)
|
||||
(Sext)))
|
||||
|
||||
;; Get the number of arguments expected.
|
||||
(decl abi_num_args (Sig) usize)
|
||||
(extern constructor abi_num_args abi_num_args)
|
||||
|
||||
;; Get information specifying how to pass one argument.
|
||||
(decl abi_get_arg (Sig usize) ABIArg)
|
||||
(extern constructor abi_get_arg abi_get_arg)
|
||||
|
||||
;; Get the number of return values expected.
|
||||
(decl abi_num_rets (Sig) usize)
|
||||
(extern constructor abi_num_rets abi_num_rets)
|
||||
|
||||
;; Get information specifying how to pass one return value.
|
||||
(decl abi_get_ret (Sig usize) ABIArg)
|
||||
(extern constructor abi_get_ret abi_get_ret)
|
||||
|
||||
;; Get information specifying how to pass the implicit pointer
|
||||
;; to the return-value area on the stack, if required.
|
||||
(decl abi_ret_arg (ABIArg) Sig)
|
||||
(extern extractor abi_ret_arg abi_ret_arg)
|
||||
|
||||
;; Succeeds if no implicit return-value area pointer is required.
|
||||
(decl abi_no_ret_arg () Sig)
|
||||
(extern extractor abi_no_ret_arg abi_no_ret_arg)
|
||||
|
||||
;; Size of the argument area.
|
||||
(decl abi_sized_stack_arg_space (Sig) i64)
|
||||
(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space)
|
||||
|
||||
;; Size of the return-value area.
|
||||
(decl abi_sized_stack_ret_space (Sig) i64)
|
||||
(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space)
|
||||
|
||||
;; StackSlot addr
|
||||
(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst)
|
||||
(extern constructor abi_stackslot_addr abi_stackslot_addr)
|
||||
|
||||
;; DynamicStackSlot addr
|
||||
(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst)
|
||||
(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr)
|
||||
|
||||
;; Extractor to detect the special case where an argument or
|
||||
;; return value only requires a single slot to be passed.
|
||||
(decl abi_arg_only_slot (ABIArgSlot) ABIArg)
|
||||
(extern extractor abi_arg_only_slot abi_arg_only_slot)
|
||||
|
||||
;; Extractor to detect the special case where a struct argument
|
||||
;; is explicitly passed by reference using a hidden pointer.
|
||||
(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg)
|
||||
(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer)
|
||||
|
||||
;; Extractor to detect the special case where a non-struct argument
|
||||
;; is implicitly passed by reference using a hidden pointer.
|
||||
(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg)
|
||||
(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer)
|
||||
|
||||
;; Convert a real register number into a virtual register.
|
||||
(decl real_reg_to_reg (RealReg) Reg)
|
||||
(extern constructor real_reg_to_reg real_reg_to_reg)
|
||||
|
||||
;; Convert a real register number into a writable virtual register.
|
||||
(decl real_reg_to_writable_reg (RealReg) WritableReg)
|
||||
(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg)
|
||||
|
||||
;; Generate a move between two registers.
|
||||
(decl gen_move (Type WritableReg Reg) MInst)
|
||||
(extern constructor gen_move gen_move)
|
||||
|
||||
;; Copy a return value to a set of registers.
|
||||
(decl copy_to_regs (WritableValueRegs Value) Unit)
|
||||
(rule (copy_to_regs dsts val @ (value_type ty))
|
||||
(let ((srcs ValueRegs (put_in_regs val)))
|
||||
(copy_to_regs_range ty (value_regs_range srcs) dsts srcs)))
|
||||
|
||||
;; Helper for `copy_to_regs` that uses a range to index into the reg/value
|
||||
;; vectors. Fails for the empty range.
|
||||
(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit)
|
||||
|
||||
(rule (copy_to_regs_range ty (range_empty) dsts srcs)
|
||||
(unit))
|
||||
|
||||
(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs)
|
||||
(let ((dst WritableReg (writable_regs_get dsts head))
|
||||
(src Reg (value_regs_get srcs head))
|
||||
(_ Unit (emit (gen_move ty dst src))))
|
||||
(copy_to_regs_range ty tail dsts srcs)))
|
||||
|
||||
|
||||
(decl lower_return (Range ValueSlice) InstOutput)
|
||||
(rule (lower_return (range_empty) _) (output_none))
|
||||
(rule (lower_return (range_unwrap head tail) args)
|
||||
(let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head))))
|
||||
(lower_return tail args)))
|
||||
|
||||
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(convert Inst Value def_inst)
|
||||
(convert Reg ValueRegs value_reg)
|
||||
(convert Value Reg put_in_reg)
|
||||
(convert Value ValueRegs put_in_regs)
|
||||
(convert WritableReg Reg writable_reg_to_reg)
|
||||
(convert ValueRegs InstOutput output)
|
||||
(convert Reg InstOutput output_reg)
|
||||
(convert Value InstOutput output_value)
|
||||
(convert ExternalName BoxExternalName box_external_name)
|
||||
(convert PReg Reg preg_to_reg)
|
||||
61
cranelift/codegen/src/prelude_opt.isle
Normal file
61
cranelift/codegen/src/prelude_opt.isle
Normal file
@@ -0,0 +1,61 @@
|
||||
;; Prelude definitions specific to the mid-end.
|
||||
|
||||
;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; An eclass ID.
|
||||
(type Id (primitive Id))
|
||||
|
||||
;; What is the type of an eclass (if a single type)?
|
||||
(decl eclass_type (Type) Id)
|
||||
(extern extractor eclass_type eclass_type)
|
||||
|
||||
;; Helper to wrap an Id-matching pattern and extract type.
|
||||
(decl has_type (Type Id) Id)
|
||||
(extractor (has_type ty id)
|
||||
(and (eclass_type ty)
|
||||
id))
|
||||
|
||||
;; Extract any node(s) for the given eclass ID.
|
||||
(decl multi enodes (Type InstructionImms IdArray) Id)
|
||||
(extern extractor enodes enodes_etor)
|
||||
|
||||
;; Construct a pure node, returning a new (or deduplicated
|
||||
;; already-existing) eclass ID.
|
||||
(decl pure_enode (Type InstructionImms IdArray) Id)
|
||||
(extern constructor pure_enode pure_enode_ctor)
|
||||
|
||||
;; Type of an Id slice (for args).
|
||||
(type IdArray (primitive IdArray))
|
||||
|
||||
(decl id_array_0 () IdArray)
|
||||
(extern constructor id_array_0 id_array_0_ctor)
|
||||
(extern extractor id_array_0 id_array_0_etor)
|
||||
(decl id_array_1 (Id) IdArray)
|
||||
(extern constructor id_array_1 id_array_1_ctor)
|
||||
(extern extractor id_array_1 id_array_1_etor)
|
||||
(decl id_array_2 (Id Id) IdArray)
|
||||
(extern constructor id_array_2 id_array_2_ctor)
|
||||
(extern extractor id_array_2 id_array_2_etor)
|
||||
(decl id_array_3 (Id Id Id) IdArray)
|
||||
(extern constructor id_array_3 id_array_3_ctor)
|
||||
(extern extractor id_array_3 id_array_3_etor)
|
||||
|
||||
;; Extractor to get the min loop-level of an eclass.
|
||||
(decl at_loop_level (u8 Id) Id)
|
||||
(extern extractor infallible at_loop_level at_loop_level)
|
||||
|
||||
;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; The main matcher rule invoked by the toplevel driver.
|
||||
(decl multi simplify (Id) Id)
|
||||
|
||||
;; Mark a node as requiring remat when used in a different block.
|
||||
(decl remat (Id) Id)
|
||||
(extern constructor remat remat)
|
||||
|
||||
;; Mark a node as subsuming whatever else it's rewritten from -- this
|
||||
;; is definitely preferable, not just a possible option. Useful for,
|
||||
;; e.g., constant propagation where we arrive at a definite "final
|
||||
;; answer".
|
||||
(decl subsume (Id) Id)
|
||||
(extern constructor subsume subsume)
|
||||
@@ -6,25 +6,22 @@
|
||||
|
||||
use crate::fx::FxHashMap;
|
||||
use core::hash::Hash;
|
||||
use core::mem;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use crate::fx::FxHasher;
|
||||
#[cfg(not(feature = "std"))]
|
||||
type Hasher = core::hash::BuildHasherDefault<FxHasher>;
|
||||
|
||||
struct Val<K, V> {
|
||||
struct Val<V> {
|
||||
value: V,
|
||||
next_key: Option<K>,
|
||||
depth: usize,
|
||||
level: u32,
|
||||
generation: u32,
|
||||
}
|
||||
|
||||
/// A view into an occupied entry in a `ScopedHashMap`. It is part of the `Entry` enum.
|
||||
pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
|
||||
#[cfg(feature = "std")]
|
||||
entry: super::hash_map::OccupiedEntry<'a, K, Val<K, V>>,
|
||||
#[cfg(not(feature = "std"))]
|
||||
entry: super::hash_map::OccupiedEntry<'a, K, Val<K, V>, Hasher>,
|
||||
entry: super::hash_map::OccupiedEntry<'a, K, Val<V>>,
|
||||
}
|
||||
|
||||
impl<'a, K, V> OccupiedEntry<'a, K, V> {
|
||||
@@ -36,22 +33,34 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
|
||||
|
||||
/// A view into a vacant entry in a `ScopedHashMap`. It is part of the `Entry` enum.
|
||||
pub struct VacantEntry<'a, K: 'a, V: 'a> {
|
||||
#[cfg(feature = "std")]
|
||||
entry: super::hash_map::VacantEntry<'a, K, Val<K, V>>,
|
||||
#[cfg(not(feature = "std"))]
|
||||
entry: super::hash_map::VacantEntry<'a, K, Val<K, V>, Hasher>,
|
||||
next_key: Option<K>,
|
||||
depth: usize,
|
||||
entry: InsertLoc<'a, K, V>,
|
||||
depth: u32,
|
||||
generation: u32,
|
||||
}
|
||||
|
||||
impl<'a, K: Hash, V> VacantEntry<'a, K, V> {
|
||||
/// Where to insert from a `VacantEntry`. May be vacant or occupied in
|
||||
/// the underlying map because of lazy (generation-based) deletion.
|
||||
enum InsertLoc<'a, K: 'a, V: 'a> {
|
||||
Vacant(super::hash_map::VacantEntry<'a, K, Val<V>>),
|
||||
Occupied(super::hash_map::OccupiedEntry<'a, K, Val<V>>),
|
||||
}
|
||||
|
||||
impl<'a, K, V> VacantEntry<'a, K, V> {
|
||||
/// Sets the value of the entry with the `VacantEntry`'s key.
|
||||
pub fn insert(self, value: V) {
|
||||
self.entry.insert(Val {
|
||||
let val = Val {
|
||||
value,
|
||||
next_key: self.next_key,
|
||||
depth: self.depth,
|
||||
});
|
||||
level: self.depth,
|
||||
generation: self.generation,
|
||||
};
|
||||
match self.entry {
|
||||
InsertLoc::Vacant(v) => {
|
||||
v.insert(val);
|
||||
}
|
||||
InsertLoc::Occupied(mut o) => {
|
||||
o.insert(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,9 +78,9 @@ pub enum Entry<'a, K: 'a, V: 'a> {
|
||||
/// Shadowing, where one scope has entries with the same keys as a containing scope,
|
||||
/// is not supported in this implementation.
|
||||
pub struct ScopedHashMap<K, V> {
|
||||
map: FxHashMap<K, Val<K, V>>,
|
||||
last_insert: Option<K>,
|
||||
current_depth: usize,
|
||||
map: FxHashMap<K, Val<V>>,
|
||||
generation_by_depth: SmallVec<[u32; 8]>,
|
||||
generation: u32,
|
||||
}
|
||||
|
||||
impl<K, V> ScopedHashMap<K, V>
|
||||
@@ -82,52 +91,115 @@ where
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
map: FxHashMap(),
|
||||
last_insert: None,
|
||||
current_depth: 0,
|
||||
generation: 0,
|
||||
generation_by_depth: smallvec![0],
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates an empty `ScopedHashMap` with some pre-allocated capacity.
|
||||
pub fn with_capacity(cap: usize) -> Self {
|
||||
let mut map = FxHashMap::default();
|
||||
map.reserve(cap);
|
||||
Self {
|
||||
map,
|
||||
generation: 0,
|
||||
generation_by_depth: smallvec![0],
|
||||
}
|
||||
}
|
||||
|
||||
/// Similar to `FxHashMap::entry`, gets the given key's corresponding entry in the map for
|
||||
/// in-place manipulation.
|
||||
pub fn entry(&mut self, key: K) -> Entry<K, V> {
|
||||
pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> {
|
||||
self.entry_with_depth(key, self.depth())
|
||||
}
|
||||
|
||||
/// Get the entry, setting the scope depth at which to insert.
|
||||
pub fn entry_with_depth<'a>(&'a mut self, key: K, depth: usize) -> Entry<'a, K, V> {
|
||||
debug_assert!(depth <= self.generation_by_depth.len());
|
||||
let generation = self.generation_by_depth[depth];
|
||||
let depth = depth as u32;
|
||||
use super::hash_map::Entry::*;
|
||||
match self.map.entry(key) {
|
||||
Occupied(entry) => Entry::Occupied(OccupiedEntry { entry }),
|
||||
Vacant(entry) => {
|
||||
let clone_key = entry.key().clone();
|
||||
Entry::Vacant(VacantEntry {
|
||||
entry,
|
||||
next_key: mem::replace(&mut self.last_insert, Some(clone_key)),
|
||||
depth: self.current_depth,
|
||||
})
|
||||
Occupied(entry) => {
|
||||
let entry_generation = entry.get().generation;
|
||||
let entry_depth = entry.get().level as usize;
|
||||
if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) {
|
||||
Entry::Occupied(OccupiedEntry { entry })
|
||||
} else {
|
||||
Entry::Vacant(VacantEntry {
|
||||
entry: InsertLoc::Occupied(entry),
|
||||
depth,
|
||||
generation,
|
||||
})
|
||||
}
|
||||
}
|
||||
Vacant(entry) => Entry::Vacant(VacantEntry {
|
||||
entry: InsertLoc::Vacant(entry),
|
||||
depth,
|
||||
generation,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a value from a key, if present.
|
||||
pub fn get<'a>(&'a self, key: &K) -> Option<&'a V> {
|
||||
self.map
|
||||
.get(key)
|
||||
.filter(|entry| {
|
||||
let level = entry.level as usize;
|
||||
self.generation_by_depth.get(level).cloned() == Some(entry.generation)
|
||||
})
|
||||
.map(|entry| &entry.value)
|
||||
}
|
||||
|
||||
/// Insert a key-value pair if absent. No-op if already exists.
|
||||
pub fn insert_if_absent(&mut self, key: K, value: V) {
|
||||
self.insert_if_absent_with_depth(key, value, self.depth());
|
||||
}
|
||||
|
||||
/// Insert a key-value pair if absent, using the given depth for
|
||||
/// the insertion. No-op if already exists.
|
||||
pub fn insert_if_absent_with_depth(&mut self, key: K, value: V, depth: usize) {
|
||||
match self.entry_with_depth(key, depth) {
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(value);
|
||||
}
|
||||
Entry::Occupied(_) => {
|
||||
// Nothing.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Enter a new scope.
|
||||
pub fn increment_depth(&mut self) {
|
||||
// Increment the depth.
|
||||
self.current_depth = self.current_depth.checked_add(1).unwrap();
|
||||
self.generation_by_depth.push(self.generation);
|
||||
}
|
||||
|
||||
/// Exit the current scope.
|
||||
pub fn decrement_depth(&mut self) {
|
||||
// Remove all elements inserted at the current depth.
|
||||
while let Some(key) = self.last_insert.clone() {
|
||||
use crate::hash_map::Entry::*;
|
||||
match self.map.entry(key) {
|
||||
Occupied(entry) => {
|
||||
if entry.get().depth != self.current_depth {
|
||||
break;
|
||||
}
|
||||
self.last_insert = entry.remove_entry().1.next_key;
|
||||
}
|
||||
Vacant(_) => panic!(),
|
||||
}
|
||||
}
|
||||
self.generation += 1;
|
||||
self.generation_by_depth.pop();
|
||||
}
|
||||
|
||||
// Decrement the depth.
|
||||
self.current_depth = self.current_depth.checked_sub(1).unwrap();
|
||||
/// Return the current scope depth.
|
||||
pub fn depth(&self) -> usize {
|
||||
self.generation_by_depth
|
||||
.len()
|
||||
.checked_sub(1)
|
||||
.expect("generation_by_depth cannot be empty")
|
||||
}
|
||||
|
||||
/// Remote an entry.
|
||||
pub fn remove(&mut self, key: &K) -> Option<V> {
|
||||
self.map.remove(key).and_then(|val| {
|
||||
let entry_generation = val.generation;
|
||||
let entry_depth = val.level as usize;
|
||||
if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) {
|
||||
Some(val.value)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,4 +302,22 @@ mod tests {
|
||||
Entry::Vacant(entry) => entry.insert(3),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_arbitrary_depth() {
|
||||
let mut map: ScopedHashMap<i32, i32> = ScopedHashMap::new();
|
||||
map.insert_if_absent(1, 2);
|
||||
assert_eq!(map.get(&1), Some(&2));
|
||||
map.increment_depth();
|
||||
assert_eq!(map.get(&1), Some(&2));
|
||||
map.insert_if_absent(3, 4);
|
||||
assert_eq!(map.get(&3), Some(&4));
|
||||
map.decrement_depth();
|
||||
assert_eq!(map.get(&3), None);
|
||||
map.increment_depth();
|
||||
map.insert_if_absent_with_depth(3, 4, 0);
|
||||
assert_eq!(map.get(&3), Some(&4));
|
||||
map.decrement_depth();
|
||||
assert_eq!(map.get(&3), Some(&4));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -529,6 +529,7 @@ probestack_strategy = "outline"
|
||||
regalloc_checker = false
|
||||
regalloc_verbose_logs = false
|
||||
enable_alias_analysis = true
|
||||
use_egraphs = false
|
||||
enable_verifier = true
|
||||
is_pic = false
|
||||
use_colocated_libcalls = false
|
||||
|
||||
@@ -87,15 +87,17 @@
|
||||
//!
|
||||
//! ## Data Structure and Example
|
||||
//!
|
||||
//! Each eclass id refers to a table entry that can be one of:
|
||||
//! Each eclass id refers to a table entry ("eclass node", which is
|
||||
//! different than an "enode") that can be one of:
|
||||
//!
|
||||
//! - A single enode;
|
||||
//! - An enode and an earlier eclass id it is appended to;
|
||||
//! - An enode and an earlier eclass id it is appended to (a "child"
|
||||
//! eclass node);
|
||||
//! - A "union node" with two earlier eclass ids.
|
||||
//!
|
||||
//! Building the aegraph consists solely of adding new entries to the
|
||||
//! end of this table. An enode in any given entry can only refer to
|
||||
//! earlier eclass ids.
|
||||
//! end of this table of eclass nodes. An enode referenced from any
|
||||
//! given eclass node can only refer to earlier eclass ids.
|
||||
//!
|
||||
//! For example, consider the following eclass table:
|
||||
//!
|
||||
@@ -218,7 +220,7 @@
|
||||
//! POPL 2021. <https://dl.acm.org/doi/10.1145/3434304>
|
||||
|
||||
use cranelift_entity::PrimaryMap;
|
||||
use cranelift_entity::{entity_impl, packed_option::ReservedValue};
|
||||
use cranelift_entity::{entity_impl, packed_option::ReservedValue, SecondaryMap};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
@@ -256,6 +258,20 @@ pub trait Language: CtxEq<Self::Node, Self::Node> + CtxHash<Self::Node> {
|
||||
fn needs_dedup(&self, node: &Self::Node) -> bool;
|
||||
}
|
||||
|
||||
/// A trait that allows the aegraph to compute a property of each
|
||||
/// node as it is created.
|
||||
pub trait Analysis {
|
||||
type L: Language;
|
||||
type Value: Clone + Default;
|
||||
fn for_node(
|
||||
&self,
|
||||
ctx: &Self::L,
|
||||
n: &<Self::L as Language>::Node,
|
||||
values: &SecondaryMap<Id, Self::Value>,
|
||||
) -> Self::Value;
|
||||
fn meet(&self, ctx: &Self::L, v1: &Self::Value, v2: &Self::Value) -> Self::Value;
|
||||
}
|
||||
|
||||
/// Conditionally-compiled trace-log macro. (Borrowed from
|
||||
/// `cranelift-codegen`; it's not worth factoring out a common
|
||||
/// subcrate for this.)
|
||||
@@ -269,18 +285,20 @@ macro_rules! trace {
|
||||
}
|
||||
|
||||
/// An egraph.
|
||||
pub struct EGraph<L: Language> {
|
||||
pub struct EGraph<L: Language, A: Analysis<L = L>> {
|
||||
/// Node-allocation arena.
|
||||
pub nodes: Vec<L::Node>,
|
||||
/// Hash-consing map from Nodes to eclass IDs.
|
||||
node_map: CtxHashMap<NodeKey, Id>,
|
||||
/// Eclass definitions. Each eclass consists of an enode, and
|
||||
/// parent pointer to the rest of the eclass.
|
||||
/// child pointer to the rest of the eclass.
|
||||
pub classes: PrimaryMap<Id, EClass>,
|
||||
/// Union-find for canonical ID generation. This lets us name an
|
||||
/// eclass with a canonical ID that is the same for all
|
||||
/// generations of the class.
|
||||
pub unionfind: UnionFind,
|
||||
/// Analysis and per-node state.
|
||||
pub analysis: Option<(A, SecondaryMap<Id, A::Value>)>,
|
||||
}
|
||||
|
||||
/// A reference to a node.
|
||||
@@ -298,7 +316,7 @@ impl NodeKey {
|
||||
|
||||
/// Get the node for this NodeKey, given the `nodes` from the
|
||||
/// appropriate `EGraph`.
|
||||
pub fn node<'a, L: Language>(&self, nodes: &'a [L::Node]) -> &'a L::Node {
|
||||
pub fn node<'a, N>(&self, nodes: &'a [N]) -> &'a N {
|
||||
&nodes[self.index as usize]
|
||||
}
|
||||
|
||||
@@ -311,35 +329,35 @@ impl NodeKey {
|
||||
}
|
||||
}
|
||||
|
||||
struct NodeKeyCtx<'a, L: Language> {
|
||||
struct NodeKeyCtx<'a, 'b, L: Language> {
|
||||
nodes: &'a [L::Node],
|
||||
node_ctx: &'a L,
|
||||
node_ctx: &'b L,
|
||||
}
|
||||
|
||||
impl<'ctx, L: Language> CtxEq<NodeKey, NodeKey> for NodeKeyCtx<'ctx, L> {
|
||||
impl<'a, 'b, L: Language> CtxEq<NodeKey, NodeKey> for NodeKeyCtx<'a, 'b, L> {
|
||||
fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool {
|
||||
let a = a.node::<L>(self.nodes);
|
||||
let b = b.node::<L>(self.nodes);
|
||||
let a = a.node(self.nodes);
|
||||
let b = b.node(self.nodes);
|
||||
self.node_ctx.ctx_eq(a, b, uf)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, L: Language> CtxHash<NodeKey> for NodeKeyCtx<'ctx, L> {
|
||||
impl<'a, 'b, L: Language> CtxHash<NodeKey> for NodeKeyCtx<'a, 'b, L> {
|
||||
fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 {
|
||||
self.node_ctx.ctx_hash(value.node::<L>(self.nodes), uf)
|
||||
self.node_ctx.ctx_hash(value.node(self.nodes), uf)
|
||||
}
|
||||
}
|
||||
|
||||
/// An EClass entry. Contains either a single new enode and a parent
|
||||
/// eclass (i.e., adds one new enode), or unions two parent eclasses
|
||||
/// An EClass entry. Contains either a single new enode and a child
|
||||
/// eclass (i.e., adds one new enode), or unions two child eclasses
|
||||
/// together.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EClass {
|
||||
// formats:
|
||||
//
|
||||
// 00 | unused (31 bits) | NodeKey (31 bits)
|
||||
// 01 | eclass_parent (31 bits) | NodeKey (31 bits)
|
||||
// 10 | eclass_parent_1 (31 bits) | eclass_parent_id_2 (31 bits)
|
||||
// 00 | unused (31 bits) | NodeKey (31 bits)
|
||||
// 01 | eclass_child (31 bits) | NodeKey (31 bits)
|
||||
// 10 | eclass_child_1 (31 bits) | eclass_child_id_2 (31 bits)
|
||||
bits: u64,
|
||||
}
|
||||
|
||||
@@ -352,47 +370,47 @@ impl EClass {
|
||||
}
|
||||
}
|
||||
|
||||
fn node_and_parent(node: NodeKey, eclass_parent: Id) -> EClass {
|
||||
fn node_and_child(node: NodeKey, eclass_child: Id) -> EClass {
|
||||
let node_idx = node.bits() as u64;
|
||||
debug_assert!(node_idx < (1 << 31));
|
||||
debug_assert!(eclass_parent != Id::invalid());
|
||||
let parent = eclass_parent.0 as u64;
|
||||
debug_assert!(parent < (1 << 31));
|
||||
debug_assert!(eclass_child != Id::invalid());
|
||||
let child = eclass_child.0 as u64;
|
||||
debug_assert!(child < (1 << 31));
|
||||
EClass {
|
||||
bits: (0b01 << 62) | (parent << 31) | node_idx,
|
||||
bits: (0b01 << 62) | (child << 31) | node_idx,
|
||||
}
|
||||
}
|
||||
|
||||
fn union(parent1: Id, parent2: Id) -> EClass {
|
||||
debug_assert!(parent1 != Id::invalid());
|
||||
let parent1 = parent1.0 as u64;
|
||||
debug_assert!(parent1 < (1 << 31));
|
||||
fn union(child1: Id, child2: Id) -> EClass {
|
||||
debug_assert!(child1 != Id::invalid());
|
||||
let child1 = child1.0 as u64;
|
||||
debug_assert!(child1 < (1 << 31));
|
||||
|
||||
debug_assert!(parent2 != Id::invalid());
|
||||
let parent2 = parent2.0 as u64;
|
||||
debug_assert!(parent2 < (1 << 31));
|
||||
debug_assert!(child2 != Id::invalid());
|
||||
let child2 = child2.0 as u64;
|
||||
debug_assert!(child2 < (1 << 31));
|
||||
|
||||
EClass {
|
||||
bits: (0b10 << 62) | (parent1 << 31) | parent2,
|
||||
bits: (0b10 << 62) | (child1 << 31) | child2,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the node, if any, from a node-only or node-and-parent
|
||||
/// Get the node, if any, from a node-only or node-and-child
|
||||
/// eclass.
|
||||
pub fn get_node(&self) -> Option<NodeKey> {
|
||||
self.as_node()
|
||||
.or_else(|| self.as_node_and_parent().map(|(node, _)| node))
|
||||
.or_else(|| self.as_node_and_child().map(|(node, _)| node))
|
||||
}
|
||||
|
||||
/// Get the first parent, if any.
|
||||
pub fn parent1(&self) -> Option<Id> {
|
||||
self.as_node_and_parent()
|
||||
/// Get the first child, if any.
|
||||
pub fn child1(&self) -> Option<Id> {
|
||||
self.as_node_and_child()
|
||||
.map(|(_, p1)| p1)
|
||||
.or(self.as_union().map(|(p1, _)| p1))
|
||||
}
|
||||
|
||||
/// Get the second parent, if any.
|
||||
pub fn parent2(&self) -> Option<Id> {
|
||||
/// Get the second child, if any.
|
||||
pub fn child2(&self) -> Option<Id> {
|
||||
self.as_union().map(|(_, p2)| p2)
|
||||
}
|
||||
|
||||
@@ -406,25 +424,25 @@ impl EClass {
|
||||
}
|
||||
}
|
||||
|
||||
/// If this EClass is one new enode and a parent, return the node
|
||||
/// and parent ID.
|
||||
pub fn as_node_and_parent(&self) -> Option<(NodeKey, Id)> {
|
||||
/// If this EClass is one new enode and a child, return the node
|
||||
/// and child ID.
|
||||
pub fn as_node_and_child(&self) -> Option<(NodeKey, Id)> {
|
||||
if (self.bits >> 62) == 0b01 {
|
||||
let node_idx = (self.bits & ((1 << 31) - 1)) as u32;
|
||||
let parent = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
|
||||
Some((NodeKey::from_bits(node_idx), Id::from_bits(parent)))
|
||||
let child = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
|
||||
Some((NodeKey::from_bits(node_idx), Id::from_bits(child)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// If this EClass is the union variety, return the two parent
|
||||
/// If this EClass is the union variety, return the two child
|
||||
/// EClasses. Both are guaranteed not to be `Id::invalid()`.
|
||||
pub fn as_union(&self) -> Option<(Id, Id)> {
|
||||
if (self.bits >> 62) == 0b10 {
|
||||
let parent1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
|
||||
let parent2 = (self.bits & ((1 << 31) - 1)) as u32;
|
||||
Some((Id::from_bits(parent1), Id::from_bits(parent2)))
|
||||
let child1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
|
||||
let child2 = (self.bits & ((1 << 31) - 1)) as u32;
|
||||
Some((Id::from_bits(child1), Id::from_bits(child2)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -449,27 +467,31 @@ impl<T> NewOrExisting<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Language> EGraph<L>
|
||||
impl<L: Language, A: Analysis<L = L>> EGraph<L, A>
|
||||
where
|
||||
L::Node: 'static,
|
||||
{
|
||||
/// Create a new aegraph.
|
||||
pub fn new() -> Self {
|
||||
pub fn new(analysis: Option<A>) -> Self {
|
||||
let analysis = analysis.map(|a| (a, SecondaryMap::new()));
|
||||
Self {
|
||||
nodes: vec![],
|
||||
node_map: CtxHashMap::new(),
|
||||
classes: PrimaryMap::new(),
|
||||
unionfind: UnionFind::new(),
|
||||
analysis,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new aegraph with the given capacity.
|
||||
pub fn with_capacity(nodes: usize) -> Self {
|
||||
pub fn with_capacity(nodes: usize, analysis: Option<A>) -> Self {
|
||||
let analysis = analysis.map(|a| (a, SecondaryMap::with_capacity(nodes)));
|
||||
Self {
|
||||
nodes: Vec::with_capacity(nodes),
|
||||
node_map: CtxHashMap::with_capacity(nodes),
|
||||
classes: PrimaryMap::with_capacity(nodes),
|
||||
unionfind: UnionFind::with_capacity(nodes),
|
||||
analysis,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -506,6 +528,10 @@ where
|
||||
// Add to interning map with a NodeKey referring to the eclass.
|
||||
v.insert(eclass_id);
|
||||
|
||||
// Update analysis.
|
||||
let node_ctx = ctx.node_ctx;
|
||||
self.update_analysis(node_ctx, eclass_id);
|
||||
|
||||
NewOrExisting::New(eclass_id)
|
||||
}
|
||||
}
|
||||
@@ -520,7 +546,7 @@ where
|
||||
/// property (args must have lower eclass Ids than the eclass
|
||||
/// containing the node with those args). Returns the Id of the
|
||||
/// merged eclass.
|
||||
pub fn union(&mut self, a: Id, b: Id) -> Id {
|
||||
pub fn union(&mut self, ctx: &L, a: Id, b: Id) -> Id {
|
||||
assert_ne!(a, Id::invalid());
|
||||
assert_ne!(b, Id::invalid());
|
||||
let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b));
|
||||
@@ -532,16 +558,17 @@ where
|
||||
|
||||
self.unionfind.union(a, b);
|
||||
|
||||
// If the younger eclass has no parent, we can link it
|
||||
// If the younger eclass has no child, we can link it
|
||||
// directly and return that eclass. Otherwise, we create a new
|
||||
// union eclass.
|
||||
if let Some(node) = self.classes[a].as_node() {
|
||||
trace!(
|
||||
" -> id {} is one-node eclass; making into node-and-parent with id {}",
|
||||
" -> id {} is one-node eclass; making into node-and-child with id {}",
|
||||
a,
|
||||
b
|
||||
);
|
||||
self.classes[a] = EClass::node_and_parent(node, b);
|
||||
self.classes[a] = EClass::node_and_child(node, b);
|
||||
self.update_analysis(ctx, a);
|
||||
return a;
|
||||
}
|
||||
|
||||
@@ -549,6 +576,7 @@ where
|
||||
self.unionfind.add(u);
|
||||
self.unionfind.union(u, b);
|
||||
trace!(" -> union id {} and id {} into id {}", a, b, u);
|
||||
self.update_analysis(ctx, u);
|
||||
u
|
||||
}
|
||||
|
||||
@@ -569,12 +597,41 @@ where
|
||||
}
|
||||
|
||||
/// Get the enodes for a given eclass.
|
||||
pub fn enodes(&self, eclass: Id) -> NodeIter<L> {
|
||||
pub fn enodes(&self, eclass: Id) -> NodeIter<L, A> {
|
||||
NodeIter {
|
||||
stack: smallvec![eclass],
|
||||
_phantom: PhantomData,
|
||||
_phantom1: PhantomData,
|
||||
_phantom2: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update analysis for a given eclass node.
|
||||
fn update_analysis(&mut self, ctx: &L, eclass: Id) {
|
||||
if let Some((analysis, state)) = self.analysis.as_mut() {
|
||||
let eclass_data = self.classes[eclass];
|
||||
let value = if let Some(node_key) = eclass_data.as_node() {
|
||||
let node = node_key.node(&self.nodes);
|
||||
analysis.for_node(ctx, node, state)
|
||||
} else if let Some((node_key, child)) = eclass_data.as_node_and_child() {
|
||||
let node = node_key.node(&self.nodes);
|
||||
let value = analysis.for_node(ctx, node, state);
|
||||
let child_value = &state[child];
|
||||
analysis.meet(ctx, &value, child_value)
|
||||
} else if let Some((c1, c2)) = eclass_data.as_union() {
|
||||
let c1 = &state[c1];
|
||||
let c2 = &state[c2];
|
||||
analysis.meet(ctx, c1, c2)
|
||||
} else {
|
||||
panic!("Invalid eclass node: {:?}", eclass_data);
|
||||
};
|
||||
state[eclass] = value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the analysis value for a given eclass. Panics if no analysis is present.
|
||||
pub fn analysis_value(&self, eclass: Id) -> &A::Value {
|
||||
&self.analysis.as_ref().unwrap().1[eclass]
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over all nodes in an eclass.
|
||||
@@ -582,27 +639,28 @@ where
|
||||
/// Because eclasses are immutable once created, this does *not* need
|
||||
/// to hold an open borrow on the egraph; it is free to add new nodes,
|
||||
/// while our existing Ids will remain valid.
|
||||
pub struct NodeIter<L: Language> {
|
||||
pub struct NodeIter<L: Language, A: Analysis<L = L>> {
|
||||
stack: SmallVec<[Id; 8]>,
|
||||
_phantom: PhantomData<L>,
|
||||
_phantom1: PhantomData<L>,
|
||||
_phantom2: PhantomData<A>,
|
||||
}
|
||||
|
||||
impl<L: Language> NodeIter<L> {
|
||||
pub fn next<'a>(&mut self, egraph: &'a EGraph<L>) -> Option<&'a L::Node> {
|
||||
impl<L: Language, A: Analysis<L = L>> NodeIter<L, A> {
|
||||
pub fn next<'a>(&mut self, egraph: &'a EGraph<L, A>) -> Option<&'a L::Node> {
|
||||
while let Some(next) = self.stack.pop() {
|
||||
let eclass = egraph.classes[next];
|
||||
if let Some(node) = eclass.as_node() {
|
||||
return Some(&egraph.nodes[node.index as usize]);
|
||||
} else if let Some((node, parent)) = eclass.as_node_and_parent() {
|
||||
if parent != Id::invalid() {
|
||||
self.stack.push(parent);
|
||||
} else if let Some((node, child)) = eclass.as_node_and_child() {
|
||||
if child != Id::invalid() {
|
||||
self.stack.push(child);
|
||||
}
|
||||
return Some(&egraph.nodes[node.index as usize]);
|
||||
} else if let Some((parent1, parent2)) = eclass.as_union() {
|
||||
debug_assert!(parent1 != Id::invalid());
|
||||
debug_assert!(parent2 != Id::invalid());
|
||||
self.stack.push(parent2);
|
||||
self.stack.push(parent1);
|
||||
} else if let Some((child1, child2)) = eclass.as_union() {
|
||||
debug_assert!(child1 != Id::invalid());
|
||||
debug_assert!(child2 != Id::invalid());
|
||||
self.stack.push(child2);
|
||||
self.stack.push(child1);
|
||||
continue;
|
||||
} else {
|
||||
unreachable!("Invalid eclass format");
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
use crate::{trace, Id};
|
||||
use cranelift_entity::SecondaryMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
/// A union-find data structure. The data structure can allocate
|
||||
/// `Id`s, indicating eclasses, and can merge eclasses together.
|
||||
@@ -67,4 +68,18 @@ impl UnionFind {
|
||||
trace!("union: {}, {}", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine if two `Id`s are equivalent, after
|
||||
/// canonicalizing. Update union-find data structure during our
|
||||
/// canonicalization to make future lookups faster.
|
||||
pub fn equiv_id_mut(&mut self, a: Id, b: Id) -> bool {
|
||||
self.find_and_update(a) == self.find_and_update(b)
|
||||
}
|
||||
|
||||
/// Hash an `Id` after canonicalizing it. Update union-find data
|
||||
/// structure to make future lookups/hashing faster.
|
||||
pub fn hash_id_mut<H: Hasher>(&mut self, hash: &mut H, id: Id) {
|
||||
let id = self.find_and_update(id);
|
||||
id.hash(hash);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,6 +143,24 @@ impl<T: EntityRef + ReservedValue> ListPool<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new list pool with the given capacity for data pre-allocated.
|
||||
pub fn with_capacity(len: usize) -> Self {
|
||||
Self {
|
||||
data: Vec::with_capacity(len),
|
||||
free: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the capacity of this pool. This will be somewhat higher
|
||||
/// than the total length of lists that can be stored without
|
||||
/// reallocating, because of internal metadata overheads. It is
|
||||
/// mostly useful to allow another pool to be allocated that is
|
||||
/// likely to hold data transferred from this one without the need
|
||||
/// to grow.
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.data.capacity()
|
||||
}
|
||||
|
||||
/// Clear the pool, forgetting about all lists that use it.
|
||||
///
|
||||
/// This invalidates any existing entity lists that used this pool to allocate memory.
|
||||
|
||||
13
cranelift/filetests/filetests/egraph/algebraic.clif
Normal file
13
cranelift/filetests/filetests/egraph/algebraic.clif
Normal file
@@ -0,0 +1,13 @@
|
||||
test optimize
|
||||
set opt_level=none
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
function %f(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 2
|
||||
v2 = imul v0, v1
|
||||
; check: v1 = iadd v0, v0
|
||||
; nextln: return v1
|
||||
return v2
|
||||
}
|
||||
22
cranelift/filetests/filetests/egraph/alias_analysis.clif
Normal file
22
cranelift/filetests/filetests/egraph/alias_analysis.clif
Normal file
@@ -0,0 +1,22 @@
|
||||
test optimize
|
||||
set opt_level=none
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
function %f(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 0
|
||||
v2 = bor.i64 v0, v1
|
||||
v3 = load.i64 heap v0
|
||||
v4 = load.i64 heap v2
|
||||
v5 = band.i64 v3, v4
|
||||
store.i64 v0, v5
|
||||
v6 = load.i64 v3
|
||||
v7 = load.i64 v6
|
||||
return v7
|
||||
}
|
||||
|
||||
; check: v1 = load.i64 heap v0
|
||||
; nextln: store v0, v1
|
||||
; nextln: v2 = load.i64 v0
|
||||
; nextln: return v2
|
||||
29
cranelift/filetests/filetests/egraph/basic-gvn.clif
Normal file
29
cranelift/filetests/filetests/egraph/basic-gvn.clif
Normal file
@@ -0,0 +1,29 @@
|
||||
test optimize
|
||||
set opt_level=none
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
function %f(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = iadd v0, v1
|
||||
brnz v2, block1(v0)
|
||||
jump block2(v1)
|
||||
|
||||
block1(v3: i32):
|
||||
v4 = iadd v0, v1
|
||||
v5 = iadd v4, v3
|
||||
return v5
|
||||
|
||||
block2(v6: i32):
|
||||
return v6
|
||||
}
|
||||
|
||||
;; Check that the `iadd` for `v4` is subsumed by `v2`:
|
||||
|
||||
; check: block0(v0: i32, v1: i32):
|
||||
; nextln: v2 = iadd v0, v1
|
||||
; check: block1:
|
||||
; nextln: v3 = iadd.i32 v2, v0
|
||||
; nextln: return v3
|
||||
; check: block2:
|
||||
; nextln: return v1
|
||||
40
cranelift/filetests/filetests/egraph/licm.clif
Normal file
40
cranelift/filetests/filetests/egraph/licm.clif
Normal file
@@ -0,0 +1,40 @@
|
||||
test optimize
|
||||
set opt_level=none
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
function %f(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
jump block1(v0)
|
||||
|
||||
block1(v2: i32):
|
||||
v3 = iconst.i32 1
|
||||
v4 = iadd.i32 v1, v3
|
||||
v5 = iconst.i32 40
|
||||
v6 = icmp eq v2, v5
|
||||
v7 = iconst.i32 1
|
||||
v8 = iadd.i32 v2, v7
|
||||
brnz v6, block2(v4)
|
||||
jump block1(v8)
|
||||
|
||||
block2(v9: i32):
|
||||
return v9
|
||||
}
|
||||
|
||||
; check: block0(v0: i32, v1: i32):
|
||||
; nextln: jump block1(v0)
|
||||
|
||||
; check: block1(v2: i32):
|
||||
;; constants are not lifted; they are rematerialized in each block where used
|
||||
; nextln: v3 = iconst.i32 40
|
||||
; nextln: v4 = icmp eq v2, v3
|
||||
; nextln: v5 = iconst.i32 1
|
||||
; nextln: v6 = iadd v2, v5
|
||||
; nextln: brnz v4, block2
|
||||
; nextln: jump block1(v6)
|
||||
|
||||
; check: block2:
|
||||
; nextln: v7 = iconst.i32 1
|
||||
; nextln: v8 = iadd.i32 v1, v7
|
||||
; nextln: return v8
|
||||
|
||||
21
cranelift/filetests/filetests/egraph/misc.clif
Normal file
21
cranelift/filetests/filetests/egraph/misc.clif
Normal file
@@ -0,0 +1,21 @@
|
||||
test optimize
|
||||
set opt_level=none
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
function %stack_load(i64) -> i64 {
|
||||
ss0 = explicit_slot 8
|
||||
|
||||
block0(v0: i64):
|
||||
stack_store.i64 v0, ss0
|
||||
v1 = stack_load.i64 ss0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: function %stack_load(i64) -> i64 fast {
|
||||
; nextln: ss0 = explicit_slot 8
|
||||
; check: block0(v0: i64):
|
||||
; nextln: v1 = stack_addr.i64 ss0
|
||||
; nextln: store notrap aligned v0, v1
|
||||
; nextln: return v0
|
||||
; nextln: }
|
||||
24
cranelift/filetests/filetests/egraph/multivalue.clif
Normal file
24
cranelift/filetests/filetests/egraph/multivalue.clif
Normal file
@@ -0,0 +1,24 @@
|
||||
test compile precise-output
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
;; We want to make sure that this compiles successfully, so we are properly
|
||||
;; handling multi-value operator nodes.
|
||||
|
||||
function u0:359(i64) -> i8, i8 system_v {
|
||||
sig0 = (i64) -> i8, i8 system_v
|
||||
fn0 = colocated u0:521 sig0
|
||||
|
||||
block0(v0: i64):
|
||||
v3, v4 = call fn0(v0)
|
||||
return v3, v4
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; call User(userextname0)
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
23
cranelift/filetests/filetests/egraph/not_a_load.clif
Normal file
23
cranelift/filetests/filetests/egraph/not_a_load.clif
Normal file
@@ -0,0 +1,23 @@
|
||||
test compile precise-output
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
;; `atomic_rmw` is not a load, but it reports `true` to `.can_load()`. We want
|
||||
;; to make sure the alias analysis machinery doesn't break when we have these odd
|
||||
;; memory ops in the IR.
|
||||
|
||||
function u0:1302(i64) -> i64 system_v {
|
||||
block0(v0: i64):
|
||||
v9 = atomic_rmw.i64 add v0, v0
|
||||
return v0
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
35
cranelift/filetests/filetests/egraph/remat.clif
Normal file
35
cranelift/filetests/filetests/egraph/remat.clif
Normal file
@@ -0,0 +1,35 @@
|
||||
test optimize
|
||||
set opt_level=none
|
||||
set use_egraphs=true
|
||||
target x86_64
|
||||
|
||||
function %f(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 42
|
||||
v2 = iadd.i32 v0, v1
|
||||
brnz v2, block1
|
||||
jump block2
|
||||
|
||||
block1:
|
||||
v3 = iconst.i32 84
|
||||
v4 = iadd.i32 v2, v3
|
||||
return v4
|
||||
|
||||
block2:
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: block0(v0: i32):
|
||||
; nextln: v1 = iconst.i32 42
|
||||
; nextln: v2 = iadd v0, v1
|
||||
; nextln: brnz v2, block1
|
||||
; nextln: jump block2
|
||||
; check: block1:
|
||||
; nextln: v5 = iconst.i32 126
|
||||
; nextln: v6 = iadd.i32 v0, v5
|
||||
; nextln: return v6
|
||||
; check: block2:
|
||||
; nextln: v3 = iconst.i32 42
|
||||
; nextln: v4 = iadd.i32 v0, v3
|
||||
; nextln: return v4
|
||||
|
||||
@@ -45,6 +45,7 @@ mod test_domtree;
|
||||
mod test_interpret;
|
||||
mod test_legalizer;
|
||||
mod test_licm;
|
||||
mod test_optimize;
|
||||
mod test_preopt;
|
||||
mod test_print_cfg;
|
||||
mod test_run;
|
||||
@@ -120,6 +121,7 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result<Box<dyn subtest::SubTest>
|
||||
"interpret" => test_interpret::subtest(parsed),
|
||||
"legalizer" => test_legalizer::subtest(parsed),
|
||||
"licm" => test_licm::subtest(parsed),
|
||||
"optimize" => test_optimize::subtest(parsed),
|
||||
"preopt" => test_preopt::subtest(parsed),
|
||||
"print-cfg" => test_print_cfg::subtest(parsed),
|
||||
"run" => test_run::subtest(parsed),
|
||||
|
||||
47
cranelift/filetests/src/test_optimize.rs
Normal file
47
cranelift/filetests/src/test_optimize.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
//! Test command for testing the optimization phases.
|
||||
//!
|
||||
//! The `optimize` test command runs each function through the
|
||||
//! optimization passes, but not lowering or regalloc. The output for
|
||||
//! filecheck purposes is the resulting CLIF.
|
||||
//!
|
||||
//! Some legalization may be ISA-specific, so this requires an ISA
|
||||
//! (for now).
|
||||
|
||||
use crate::subtest::{run_filecheck, Context, SubTest};
|
||||
use anyhow::Result;
|
||||
use cranelift_codegen::ir;
|
||||
use cranelift_reader::TestCommand;
|
||||
use std::borrow::Cow;
|
||||
|
||||
struct TestOptimize;
|
||||
|
||||
pub fn subtest(parsed: &TestCommand) -> Result<Box<dyn SubTest>> {
|
||||
assert_eq!(parsed.command, "optimize");
|
||||
Ok(Box::new(TestOptimize))
|
||||
}
|
||||
|
||||
impl SubTest for TestOptimize {
|
||||
fn name(&self) -> &'static str {
|
||||
"optimize"
|
||||
}
|
||||
|
||||
fn is_mutating(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn needs_isa(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn run(&self, func: Cow<ir::Function>, context: &Context) -> Result<()> {
|
||||
let isa = context.isa.expect("optimize needs an ISA");
|
||||
let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());
|
||||
|
||||
comp_ctx
|
||||
.optimize(isa)
|
||||
.map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?;
|
||||
|
||||
let clif = format!("{:?}", comp_ctx.func);
|
||||
run_filecheck(&clif, context)
|
||||
}
|
||||
}
|
||||
@@ -108,6 +108,18 @@ fn check_overlaps(env: &TermEnv) -> Errors {
|
||||
let mut by_term = HashMap::new();
|
||||
for rule in env.rules.iter() {
|
||||
if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs {
|
||||
let is_multi_ctor = match &env.terms[tid.index()].kind {
|
||||
&TermKind::Decl { multi, .. } => multi,
|
||||
_ => false,
|
||||
};
|
||||
if is_multi_ctor {
|
||||
// Rules for multi-constructors are not checked for
|
||||
// overlap: the ctor returns *every* match, not just
|
||||
// the first or highest-priority one, so overlap does
|
||||
// not actually affect the results.
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut binds = Vec::new();
|
||||
let rule = RulePatterns {
|
||||
rule,
|
||||
|
||||
@@ -393,6 +393,7 @@ impl Engine {
|
||||
| "machine_code_cfg_info"
|
||||
| "tls_model" // wasmtime doesn't use tls right now
|
||||
| "opt_level" // opt level doesn't change semantics
|
||||
| "use_egraphs" // optimizing with egraphs doesn't change semantics
|
||||
| "enable_alias_analysis" // alias analysis-based opts don't change semantics
|
||||
| "probestack_func_adjusts_sp" // probestack above asserted disabled
|
||||
| "probestack_size_log2" // probestack above asserted disabled
|
||||
|
||||
@@ -24,8 +24,8 @@ const CRATES_TO_PUBLISH: &[&str] = &[
|
||||
"cranelift-bforest",
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-codegen-meta",
|
||||
"cranelift-codegen",
|
||||
"cranelift-egraph",
|
||||
"cranelift-codegen",
|
||||
"cranelift-reader",
|
||||
"cranelift-serde",
|
||||
"cranelift-module",
|
||||
@@ -88,6 +88,7 @@ const PUBLIC_CRATES: &[&str] = &[
|
||||
"cranelift-bforest",
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-codegen-meta",
|
||||
"cranelift-egraph",
|
||||
"cranelift-codegen",
|
||||
"cranelift-reader",
|
||||
"cranelift-serde",
|
||||
|
||||
Reference in New Issue
Block a user