egraph-based midend: draw the rest of the owl (productionized). (#4953)

* egraph-based midend: draw the rest of the owl.

* Rename `egg` submodule of cranelift-codegen to `egraph`.

* Apply some feedback from @jsharp during code walkthrough.

* Remove recursion from find_best_node by doing a single pass.

Rather than recursively computing the lowest-cost node for a given
eclass and memoizing the answer at each eclass node, we can do a single
forward pass; because every eclass node refers only to earlier nodes,
this is sufficient. The behavior may slightly differ from the earlier
behavior because we cannot short-circuit costs to zero once a node is
elaborated; but in practice this should not matter.

* Make elaboration non-recursive.

Use an explicit stack instead (with `ElabStackEntry` entries,
alongside a result stack).

* Make elaboration traversal of the domtree non-recursive/stack-safe.

* Work analysis logic in Cranelift-side egraph glue into a general analysis framework in cranelift-egraph.

* Apply static recursion limit to rule application.

* Fix aarch64 wrt dynamic-vector support -- broken rebase.

* Topo-sort cranelift-egraph before cranelift-codegen in publish script, like the comment instructs me to!

* Fix multi-result call testcase.

* Include `cranelift-egraph` in `PUBLISHED_CRATES`.

* Fix atomic_rmw: not really a load.

* Remove now-unnecessary PartialOrd/Ord derivations.

* Address some code-review comments.

* Review feedback.

* Review feedback.

* No overlap in mid-end rules, because we are defining a multi-constructor.

* rustfmt

* Review feedback.

* Review feedback.

* Review feedback.

* Review feedback.

* Remove redundant `mut`.

* Add comment noting what rules can do.

* Review feedback.

* Clarify comment wording.

* Update `has_memory_fence_semantics`.

* Apply @jameysharp's improved loop-level computation.

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Fix suggestion commit.

* Fix off-by-one in new loop-nest analysis.

* Review feedback.

* Review feedback.

* Review feedback.

* Use `Default`, not `std::default::Default`, as per @fitzgen

Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>

* Apply @fitzgen's comment elaboration to a doc-comment.

Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>

* Add stat for hitting the rewrite-depth limit.

* Some code motion in split prelude to make the diff a little clearer wrt `main`.

* Take @jameysharp's suggested `try_into()` usage for blockparam indices.

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Take @jameysharp's suggestion to avoid double-match on load op.

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Fix suggestion (add import).

* Review feedback.

* Fix stack_load handling.

* Remove redundant can_store case.

* Take @jameysharp's suggested improvement to FuncEGraph::build() logic

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Tweaks to FuncEGraph::build() on top of suggestion.

* Take @jameysharp's suggested clarified condition

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Clean up after suggestion (unused variable).

* Fix loop analysis.

* loop level asserts

* Revert constant-space loop analysis -- edge cases were incorrect, so let's go with the simple thing for now.

* Take @jameysharp's suggestion re: result_tys

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Fix up after suggestion

* Take @jameysharp's suggestion to use fold rather than reduce

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Fixup after suggestion

* Take @jameysharp's suggestion to remove elaborate_eclass_use's return value.

* Clarifying comment in terminator insts.

Co-authored-by: Jamey Sharp <jamey@minilop.net>
Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>
This commit is contained in:
Chris Fallin
2022-10-11 18:15:53 -07:00
committed by GitHub
parent e2f1ced0b6
commit 2be12a5167
59 changed files with 5125 additions and 1580 deletions

1
Cargo.lock generated
View File

@@ -537,6 +537,7 @@ dependencies = [
"cranelift-bforest",
"cranelift-codegen-meta",
"cranelift-codegen-shared",
"cranelift-egraph",
"cranelift-entity",
"cranelift-isle",
"criterion",

View File

@@ -129,6 +129,7 @@ wasmtime-fuzzing = { path = "crates/fuzzing" }
cranelift-wasm = { path = "cranelift/wasm", version = "0.90.0" }
cranelift-codegen = { path = "cranelift/codegen", version = "0.90.0" }
cranelift-egraph = { path = "cranelift/egraph", version = "0.90.0" }
cranelift-frontend = { path = "cranelift/frontend", version = "0.90.0" }
cranelift-entity = { path = "cranelift/entity", version = "0.90.0" }
cranelift-native = { path = "cranelift/native", version = "0.90.0" }

View File

@@ -18,6 +18,7 @@ bumpalo = "3"
cranelift-codegen-shared = { path = "./shared", version = "0.90.0" }
cranelift-entity = { workspace = true }
cranelift-bforest = { workspace = true }
cranelift-egraph = { workspace = true }
hashbrown = { workspace = true, optional = true }
target-lexicon = { workspace = true }
log = { workspace = true }

View File

@@ -177,9 +177,19 @@ fn get_isle_compilations(
) -> Result<IsleCompilations, std::io::Error> {
let cur_dir = std::env::current_dir()?;
let clif_isle = out_dir.join("clif.isle");
// Preludes.
let clif_lower_isle = out_dir.join("clif_lower.isle");
let clif_opt_isle = out_dir.join("clif_opt.isle");
let prelude_isle =
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude.isle"));
let prelude_opt_isle =
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude_opt.isle"));
let prelude_lower_isle =
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude_lower.isle"));
// Directory for mid-end optimizations.
let src_opts = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("opts"));
// Directories for lowering backends.
let src_isa_x64 =
make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("x64"));
let src_isa_aarch64 =
@@ -204,47 +214,62 @@ fn get_isle_compilations(
// `cranelift/codegen/src/isa/*/lower/isle/generated_code.rs`!
Ok(IsleCompilations {
items: vec![
// The mid-end optimization rules.
IsleCompilation {
output: out_dir.join("isle_opt.rs"),
inputs: vec![
prelude_isle.clone(),
prelude_opt_isle.clone(),
src_opts.join("algebraic.isle"),
src_opts.join("cprop.isle"),
],
untracked_inputs: vec![clif_opt_isle.clone()],
},
// The x86-64 instruction selector.
IsleCompilation {
output: out_dir.join("isle_x64.rs"),
inputs: vec![
prelude_isle.clone(),
prelude_lower_isle.clone(),
src_isa_x64.join("inst.isle"),
src_isa_x64.join("lower.isle"),
],
untracked_inputs: vec![clif_isle.clone()],
untracked_inputs: vec![clif_lower_isle.clone()],
},
// The aarch64 instruction selector.
IsleCompilation {
output: out_dir.join("isle_aarch64.rs"),
inputs: vec![
prelude_isle.clone(),
prelude_lower_isle.clone(),
src_isa_aarch64.join("inst.isle"),
src_isa_aarch64.join("inst_neon.isle"),
src_isa_aarch64.join("lower.isle"),
src_isa_aarch64.join("lower_dynamic_neon.isle"),
],
untracked_inputs: vec![clif_isle.clone()],
untracked_inputs: vec![clif_lower_isle.clone()],
},
// The s390x instruction selector.
IsleCompilation {
output: out_dir.join("isle_s390x.rs"),
inputs: vec![
prelude_isle.clone(),
prelude_lower_isle.clone(),
src_isa_s390x.join("inst.isle"),
src_isa_s390x.join("lower.isle"),
],
untracked_inputs: vec![clif_isle.clone()],
untracked_inputs: vec![clif_lower_isle.clone()],
},
// The risc-v instruction selector.
IsleCompilation {
output: out_dir.join("isle_riscv64.rs"),
inputs: vec![
prelude_isle.clone(),
prelude_lower_isle.clone(),
src_isa_risc_v.join("inst.isle"),
src_isa_risc_v.join("lower.isle"),
],
untracked_inputs: vec![clif_isle.clone()],
untracked_inputs: vec![clif_lower_isle.clone()],
},
],
})

View File

@@ -60,36 +60,52 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
fmt.empty_line();
}
/// Generate the InstructionData enum.
/// Generate the InstructionData and InstructionImms enums.
///
/// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at
/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
/// `ValueList` to store the additional information out of line.
///
/// `InstructionImms` stores everything about an instruction except for the arguments: in other
/// words, the `Opcode` and any immediates or other parameters. `InstructionData` stores this, plus
/// the SSA `Value` arguments.
fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
fmt.line("#[derive(Clone, Debug, PartialEq, Hash)]");
fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
fmt.line("#[allow(missing_docs)]");
fmt.line("pub enum InstructionData {");
fmt.indent(|fmt| {
for format in formats {
fmtln!(fmt, "{} {{", format.name);
fmt.indent(|fmt| {
fmt.line("opcode: Opcode,");
if format.has_value_list {
fmt.line("args: ValueList,");
} else if format.num_value_operands == 1 {
fmt.line("arg: Value,");
} else if format.num_value_operands > 0 {
fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
}
for field in &format.imm_fields {
fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
}
});
fmtln!(fmt, "},");
for (name, include_args) in &[("InstructionData", true), ("InstructionImms", false)] {
fmt.line("#[derive(Clone, Debug, PartialEq, Hash)]");
if !include_args {
// `InstructionImms` gets some extra derives: it acts like
// a sort of extended opcode and we want to allow for
// hashconsing via Eq. `Copy` also turns out to be useful.
fmt.line("#[derive(Copy, Eq)]");
}
});
fmt.line("}");
fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
fmt.line("#[allow(missing_docs)]");
// generate `enum InstructionData` or `enum InstructionImms`.
// (This comment exists so one can grep for `enum InstructionData`!)
fmtln!(fmt, "pub enum {} {{", name);
fmt.indent(|fmt| {
for format in formats {
fmtln!(fmt, "{} {{", format.name);
fmt.indent(|fmt| {
fmt.line("opcode: Opcode,");
if *include_args {
if format.has_value_list {
fmt.line("args: ValueList,");
} else if format.num_value_operands == 1 {
fmt.line("arg: Value,");
} else if format.num_value_operands > 0 {
fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
}
}
for field in &format.imm_fields {
fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
}
});
fmtln!(fmt, "},");
}
});
fmt.line("}");
}
}
fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) {
@@ -150,6 +166,122 @@ fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_
fmtln!(fmt, "}");
}
/// Generate the conversion from `InstructionData` to `InstructionImms`, stripping out the
/// `Value`s.
fn gen_instruction_data_to_instruction_imms(formats: &[&InstructionFormat], fmt: &mut Formatter) {
fmt.line("impl std::convert::From<&InstructionData> for InstructionImms {");
fmt.indent(|fmt| {
fmt.doc_comment("Convert an `InstructionData` into an `InstructionImms`.");
fmt.line("fn from(data: &InstructionData) -> InstructionImms {");
fmt.indent(|fmt| {
fmt.line("match data {");
fmt.indent(|fmt| {
for format in formats {
fmtln!(fmt, "InstructionData::{} {{", format.name);
fmt.indent(|fmt| {
fmt.line("opcode,");
for field in &format.imm_fields {
fmtln!(fmt, "{},", field.member);
}
fmt.line("..");
});
fmtln!(fmt, "}} => InstructionImms::{} {{", format.name);
fmt.indent(|fmt| {
fmt.line("opcode: *opcode,");
for field in &format.imm_fields {
fmtln!(fmt, "{}: {}.clone(),", field.member, field.member);
}
});
fmt.line("},");
}
});
fmt.line("}");
});
fmt.line("}");
});
fmt.line("}");
fmt.empty_line();
}
/// Generate the conversion from `InstructionImms` to `InstructionData`, adding the
/// `Value`s.
fn gen_instruction_imms_to_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
fmt.line("impl InstructionImms {");
fmt.indent(|fmt| {
fmt.doc_comment("Convert an `InstructionImms` into an `InstructionData` by adding args.");
fmt.line(
"pub fn with_args(&self, values: &[Value], value_list: &mut ValueListPool) -> InstructionData {",
);
fmt.indent(|fmt| {
fmt.line("match self {");
fmt.indent(|fmt| {
for format in formats {
fmtln!(fmt, "InstructionImms::{} {{", format.name);
fmt.indent(|fmt| {
fmt.line("opcode,");
for field in &format.imm_fields {
fmtln!(fmt, "{},", field.member);
}
});
fmt.line("} => {");
if format.has_value_list {
fmtln!(fmt, "let args = ValueList::from_slice(values, value_list);");
}
fmt.indent(|fmt| {
fmtln!(fmt, "InstructionData::{} {{", format.name);
fmt.indent(|fmt| {
fmt.line("opcode: *opcode,");
for field in &format.imm_fields {
fmtln!(fmt, "{}: {}.clone(),", field.member, field.member);
}
if format.has_value_list {
fmtln!(fmt, "args,");
} else if format.num_value_operands == 1 {
fmtln!(fmt, "arg: values[0],");
} else if format.num_value_operands > 0 {
let mut args = vec![];
for i in 0..format.num_value_operands {
args.push(format!("values[{}]", i));
}
fmtln!(fmt, "args: [{}],", args.join(", "));
}
});
fmt.line("}");
});
fmt.line("},");
}
});
fmt.line("}");
});
fmt.line("}");
});
fmt.line("}");
fmt.empty_line();
}
/// Generate the `opcode` method on InstructionImms.
fn gen_instruction_imms_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) {
fmt.line("impl InstructionImms {");
fmt.indent(|fmt| {
fmt.doc_comment("Get the opcode of this instruction.");
fmt.line("pub fn opcode(&self) -> Opcode {");
fmt.indent(|fmt| {
let mut m = Match::new("*self");
for format in formats {
m.arm(
format!("Self::{}", format.name),
vec!["opcode", ".."],
"opcode".to_string(),
);
}
fmt.add_match(m);
});
fmt.line("}");
});
fmt.line("}");
fmt.empty_line();
}
/// Generate the boring parts of the InstructionData implementation.
///
/// These methods in `impl InstructionData` can be generated automatically from the instruction
@@ -1070,7 +1202,12 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo
fmtln!(fmt, "}")
}
fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: &mut Formatter) {
fn gen_common_isle(
formats: &[&InstructionFormat],
instructions: &AllInstructions,
fmt: &mut Formatter,
is_lower: bool,
) {
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
@@ -1123,40 +1260,46 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
gen_isle_enum(name, variants, fmt)
}
// Generate all of the value arrays we need for `InstructionData` as well as
// the constructors and extractors for them.
fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
fmt.empty_line();
let value_array_arities: BTreeSet<_> = formats
.iter()
.filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1)
.map(|f| f.num_value_operands)
.collect();
for n in value_array_arities {
fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
if is_lower {
// Generate all of the value arrays we need for `InstructionData` as well as
// the constructors and extractors for them.
fmt.line(
";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
);
fmt.empty_line();
let value_array_arities: BTreeSet<_> = formats
.iter()
.filter(|f| {
f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1
})
.map(|f| f.num_value_operands)
.collect();
for n in value_array_arities {
fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
fmt.empty_line();
fmtln!(
fmt,
"(decl value_array_{} ({}) ValueArray{})",
n,
(0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
n
);
fmtln!(
fmt,
"(extern constructor value_array_{} pack_value_array_{})",
n,
n
);
fmtln!(
fmt,
"(extern extractor infallible value_array_{} unpack_value_array_{})",
n,
n
);
fmt.empty_line();
fmtln!(
fmt,
"(decl value_array_{} ({}) ValueArray{})",
n,
(0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
n
);
fmtln!(
fmt,
"(extern constructor value_array_{} pack_value_array_{})",
n,
n
);
fmtln!(
fmt,
"(extern extractor infallible value_array_{} unpack_value_array_{})",
n,
n
);
fmt.empty_line();
}
}
// Generate the extern type declaration for `Opcode`.
@@ -1175,21 +1318,33 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
fmt.line(")");
fmt.empty_line();
// Generate the extern type declaration for `InstructionData`.
fmt.line(";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
// Generate the extern type declaration for `InstructionData`
// (lowering) or `InstructionImms` (opt).
let inst_data_name = if is_lower {
"InstructionData"
} else {
"InstructionImms"
};
fmtln!(
fmt,
";;;; `{}` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
inst_data_name
);
fmt.empty_line();
fmt.line("(type InstructionData extern");
fmtln!(fmt, "(type {} extern", inst_data_name);
fmt.indent(|fmt| {
fmt.line("(enum");
fmt.indent(|fmt| {
for format in formats {
let mut s = format!("({} (opcode Opcode)", format.name);
if format.has_value_list {
s.push_str(" (args ValueList)");
} else if format.num_value_operands == 1 {
s.push_str(" (arg Value)");
} else if format.num_value_operands > 1 {
write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
if is_lower {
if format.has_value_list {
s.push_str(" (args ValueList)");
} else if format.num_value_operands == 1 {
s.push_str(" (arg Value)");
} else if format.num_value_operands > 1 {
write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
}
}
for field in &format.imm_fields {
write!(
@@ -1210,85 +1365,157 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
fmt.empty_line();
// Generate the helper extractors for each opcode's full instruction.
//
// TODO: if/when we port our peephole optimization passes to ISLE we will
// want helper constructors as well.
fmt.line(";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;");
fmtln!(
fmt,
";;;; Extracting Opcode, Operands, and Immediates from `{}` ;;;;;;;;",
inst_data_name
);
fmt.empty_line();
let ret_ty = if is_lower { "Inst" } else { "Id" };
for inst in instructions {
if !is_lower && inst.format.has_value_list {
continue;
}
fmtln!(
fmt,
"(decl {} ({}) Inst)",
"(decl {} ({}{}) {})",
inst.name,
if is_lower { "" } else { "Type " },
inst.operands_in
.iter()
.map(|o| {
let ty = o.kind.rust_type;
if ty == "&[Value]" {
"ValueSlice"
if is_lower {
if ty == "&[Value]" {
"ValueSlice"
} else {
ty.rsplit("::").next().unwrap()
}
} else {
ty.rsplit("::").next().unwrap()
if ty == "&[Value]" {
panic!("value slice in mid-end extractor");
} else if ty == "Value" || ty == "ir::Value" {
"Id"
} else {
ty.rsplit("::").next().unwrap()
}
}
})
.collect::<Vec<_>>()
.join(" ")
.join(" "),
ret_ty
);
fmtln!(fmt, "(extractor");
fmt.indent(|fmt| {
fmtln!(
fmt,
"({} {})",
"({} {}{})",
inst.name,
if is_lower { "" } else { "ty " },
inst.operands_in
.iter()
.map(|o| { o.name })
.collect::<Vec<_>>()
.join(" ")
);
let mut s = format!(
"(inst_data (InstructionData.{} (Opcode.{})",
inst.format.name, inst.camel_name
);
// Value and varargs operands.
if inst.format.has_value_list {
// The instruction format uses a value list, but the
// instruction itself might have not only a `&[Value]`
// varargs operand, but also one or more `Value` operands as
// well. If this is the case, then we need to read them off
// the front of the `ValueList`.
let values: Vec<_> = inst
.operands_in
.iter()
.filter(|o| o.is_value())
.map(|o| o.name)
.collect();
let varargs = inst
.operands_in
.iter()
.find(|o| o.is_varargs())
.unwrap()
.name;
if values.is_empty() {
write!(&mut s, " (value_list_slice {})", varargs).unwrap();
} else {
if is_lower {
let mut s = format!(
"(inst_data (InstructionData.{} (Opcode.{})",
inst.format.name, inst.camel_name
);
// Value and varargs operands.
if inst.format.has_value_list {
// The instruction format uses a value list, but the
// instruction itself might have not only a `&[Value]`
// varargs operand, but also one or more `Value` operands as
// well. If this is the case, then we need to read them off
// the front of the `ValueList`.
let values: Vec<_> = inst
.operands_in
.iter()
.filter(|o| o.is_value())
.map(|o| o.name)
.collect();
let varargs = inst
.operands_in
.iter()
.find(|o| o.is_varargs())
.unwrap()
.name;
if values.is_empty() {
write!(&mut s, " (value_list_slice {})", varargs).unwrap();
} else {
write!(
&mut s,
" (unwrap_head_value_list_{} {} {})",
values.len(),
values.join(" "),
varargs
)
.unwrap();
}
} else if inst.format.num_value_operands == 1 {
write!(
&mut s,
" (unwrap_head_value_list_{} {} {})",
values.len(),
values.join(" "),
varargs
" {}",
inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
)
.unwrap();
} else if inst.format.num_value_operands > 1 {
let values = inst
.operands_in
.iter()
.filter(|o| o.is_value())
.map(|o| o.name)
.collect::<Vec<_>>();
assert_eq!(values.len(), inst.format.num_value_operands);
let values = values.join(" ");
write!(
&mut s,
" (value_array_{} {})",
inst.format.num_value_operands, values,
)
.unwrap();
}
} else if inst.format.num_value_operands == 1 {
write!(
&mut s,
" {}",
inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
)
.unwrap();
} else if inst.format.num_value_operands > 1 {
// Immediates.
let imm_operands: Vec<_> = inst
.operands_in
.iter()
.filter(|o| !o.is_value() && !o.is_varargs())
.collect();
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
for op in imm_operands {
write!(&mut s, " {}", op.name).unwrap();
}
s.push_str("))");
fmt.line(&s);
} else {
// Mid-end case.
let mut s = format!(
"(enodes ty (InstructionImms.{} (Opcode.{})",
inst.format.name, inst.camel_name
);
// Immediates.
let imm_operands: Vec<_> = inst
.operands_in
.iter()
.filter(|o| !o.is_value() && !o.is_varargs())
.collect();
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
for op in imm_operands {
write!(&mut s, " {}", op.name).unwrap();
}
// End of `InstructionImms`.
s.push_str(")");
// Second arg to `enode`: value args.
assert!(!inst.operands_in.iter().any(|op| op.is_varargs()));
let values = inst
.operands_in
.iter()
@@ -1299,31 +1526,83 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt:
let values = values.join(" ");
write!(
&mut s,
" (value_array_{} {})",
" (id_array_{} {})",
inst.format.num_value_operands, values,
)
.unwrap();
}
// Immediates.
let imm_operands: Vec<_> = inst
.operands_in
.iter()
.filter(|o| !o.is_value() && !o.is_varargs())
.collect();
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
for op in imm_operands {
write!(&mut s, " {}", op.name).unwrap();
s.push_str(")");
fmt.line(&s);
}
s.push_str("))");
fmt.line(&s);
});
fmt.line(")");
// Generate a constructor if this is the mid-end prelude.
if !is_lower {
fmtln!(
fmt,
"(rule ({} ty {})",
inst.name,
inst.operands_in
.iter()
.map(|o| o.name)
.collect::<Vec<_>>()
.join(" ")
);
fmt.indent(|fmt| {
let mut s = format!(
"(pure_enode ty (InstructionImms.{} (Opcode.{})",
inst.format.name, inst.camel_name
);
for o in inst
.operands_in
.iter()
.filter(|o| !o.is_value() && !o.is_varargs())
{
write!(&mut s, " {}", o.name).unwrap();
}
s.push_str(")");
let values = inst
.operands_in
.iter()
.filter(|o| o.is_value())
.map(|o| o.name)
.collect::<Vec<_>>();
let values = values.join(" ");
write!(
&mut s,
" (id_array_{} {})",
inst.format.num_value_operands, values
)
.unwrap();
s.push_str(")");
fmt.line(&s);
});
fmt.line(")");
}
fmt.empty_line();
}
}
fn gen_opt_isle(
formats: &[&InstructionFormat],
instructions: &AllInstructions,
fmt: &mut Formatter,
) {
gen_common_isle(formats, instructions, fmt, /* is_lower = */ false);
}
fn gen_lower_isle(
formats: &[&InstructionFormat],
instructions: &AllInstructions,
fmt: &mut Formatter,
) {
gen_common_isle(formats, instructions, fmt, /* is_lower = */ true);
}
/// Generate an `enum` immediate in ISLE.
fn gen_isle_enum(name: &str, mut variants: Vec<&str>, fmt: &mut Formatter) {
variants.sort();
@@ -1388,7 +1667,8 @@ pub(crate) fn generate(
all_inst: &AllInstructions,
opcode_filename: &str,
inst_builder_filename: &str,
isle_filename: &str,
isle_opt_filename: &str,
isle_lower_filename: &str,
out_dir: &str,
isle_dir: &str,
) -> Result<(), error::Error> {
@@ -1398,16 +1678,24 @@ pub(crate) fn generate(
gen_instruction_data(&formats, &mut fmt);
fmt.empty_line();
gen_instruction_data_impl(&formats, &mut fmt);
gen_instruction_data_to_instruction_imms(&formats, &mut fmt);
gen_instruction_imms_impl(&formats, &mut fmt);
gen_instruction_imms_to_instruction_data(&formats, &mut fmt);
fmt.empty_line();
gen_opcodes(all_inst, &mut fmt);
fmt.empty_line();
gen_type_constraints(all_inst, &mut fmt);
fmt.update_file(opcode_filename, out_dir)?;
// ISLE DSL.
// ISLE DSL: mid-end ("opt") generated bindings.
let mut fmt = Formatter::new();
gen_isle(&formats, all_inst, &mut fmt);
fmt.update_file(isle_filename, isle_dir)?;
gen_opt_isle(&formats, all_inst, &mut fmt);
fmt.update_file(isle_opt_filename, isle_dir)?;
// ISLE DSL: lowering generated bindings.
let mut fmt = Formatter::new();
gen_lower_isle(&formats, all_inst, &mut fmt);
fmt.update_file(isle_lower_filename, isle_dir)?;
// Instruction builder.
let mut fmt = Formatter::new();

View File

@@ -47,7 +47,8 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str, isle_dir: &str) -> Result<(),
&shared_defs.all_instructions,
"opcodes.rs",
"inst_builder.rs",
"clif.isle",
"clif_opt.isle",
"clif_lower.isle",
&out_dir,
isle_dir,
)?;

View File

@@ -53,6 +53,17 @@ pub(crate) fn define() -> SettingGroup {
true,
);
settings.add_bool(
"use_egraphs",
"Enable egraph-based optimization.",
r#"
This enables an optimization phase that converts CLIF to an egraph (equivalence graph)
representation, performs various rewrites, and then converts it back. This can result in
better optimization, but is currently considered experimental.
"#,
false,
);
settings.add_bool(
"enable_verifier",
"Run the Cranelift IR verifier at strategic times during compilation.",

View File

@@ -12,6 +12,7 @@
use crate::alias_analysis::AliasAnalysis;
use crate::dce::do_dce;
use crate::dominator_tree::DominatorTree;
use crate::egraph::FuncEGraph;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::Function;
use crate::isa::TargetIsa;
@@ -104,15 +105,20 @@ impl Context {
/// Compile the function, and emit machine code into a `Vec<u8>`.
///
/// Run the function through all the passes necessary to generate code for the target ISA
/// represented by `isa`, as well as the final step of emitting machine code into a
/// `Vec<u8>`. The machine code is not relocated. Instead, any relocations can be obtained
/// from `compiled_code()`.
/// Run the function through all the passes necessary to generate
/// code for the target ISA represented by `isa`, as well as the
/// final step of emitting machine code into a `Vec<u8>`. The
/// machine code is not relocated. Instead, any relocations can be
/// obtained from `compiled_code()`.
///
/// Performs any optimizations that are enabled, unless
/// `optimize()` was already invoked.
///
/// This function calls `compile`, taking care to resize `mem` as
/// needed, so it provides a safe interface.
/// needed.
///
/// Returns information about the function's code and read-only data.
/// Returns information about the function's code and read-only
/// data.
pub fn compile_and_emit(
&mut self,
isa: &dyn TargetIsa,
@@ -131,15 +137,26 @@ impl Context {
self.verify_if(isa)?;
self.optimize(isa)?;
isa.compile_function(&self.func, self.want_disasm)
}
/// Optimize the function, performing all compilation steps up to
/// but not including machine-code lowering and register
/// allocation.
///
/// Public only for testing purposes.
pub fn optimize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
let opt_level = isa.flags().opt_level();
log::trace!(
"Compiling (opt level {:?}):\n{}",
"Optimizing (opt level {:?}):\n{}",
opt_level,
self.func.display()
);
self.compute_cfg();
if opt_level != OptLevel::None {
if !isa.flags().use_egraphs() && opt_level != OptLevel::None {
self.preopt(isa)?;
}
if isa.flags().enable_nan_canonicalization() {
@@ -147,7 +164,8 @@ impl Context {
}
self.legalize(isa)?;
if opt_level != OptLevel::None {
if !isa.flags().use_egraphs() && opt_level != OptLevel::None {
self.compute_domtree();
self.compute_loop_analysis();
self.licm(isa)?;
@@ -156,18 +174,29 @@ impl Context {
self.compute_domtree();
self.eliminate_unreachable_code(isa)?;
if opt_level != OptLevel::None {
if isa.flags().use_egraphs() || opt_level != OptLevel::None {
self.dce(isa)?;
}
self.remove_constant_phis(isa)?;
if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
if isa.flags().use_egraphs() {
log::debug!(
"About to optimize with egraph phase:\n{}",
self.func.display()
);
self.compute_loop_analysis();
let mut eg = FuncEGraph::new(&self.func, &self.domtree, &self.loop_analysis, &self.cfg);
eg.elaborate(&mut self.func);
log::debug!("After egraph optimization:\n{}", self.func.display());
log::info!("egraph stats: {:?}", eg.stats);
} else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
self.replace_redundant_loads()?;
self.simple_gvn(isa)?;
}
isa.compile_function(&self.func, self.want_disasm)
Ok(())
}
/// Compile the function.

View File

@@ -0,0 +1,414 @@
//! Egraph-based mid-end optimization framework.
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::loop_analysis::{LoopAnalysis, LoopLevel};
use crate::trace;
use crate::{
fx::{FxHashMap, FxHashSet},
inst_predicates::has_side_effect,
ir::{Block, Function, Inst, InstructionData, InstructionImms, Opcode, Type},
};
use alloc::vec::Vec;
use core::ops::Range;
use cranelift_egraph::{EGraph, Id, Language, NewOrExisting};
use cranelift_entity::EntityList;
use cranelift_entity::SecondaryMap;
mod domtree;
mod elaborate;
mod node;
mod stores;
use elaborate::Elaborator;
pub use node::{Node, NodeCtx};
pub use stores::{AliasAnalysis, MemoryState};
pub struct FuncEGraph<'a> {
/// Dominator tree, used for elaboration pass.
domtree: &'a DominatorTree,
/// Loop analysis results, used for built-in LICM during elaboration.
loop_analysis: &'a LoopAnalysis,
/// Last-store tracker for integrated alias analysis during egraph build.
alias_analysis: AliasAnalysis,
/// The egraph itself.
pub(crate) egraph: EGraph<NodeCtx, Analysis>,
/// "node context", containing arenas for node data.
pub(crate) node_ctx: NodeCtx,
/// Ranges in `side_effect_ids` for sequences of side-effecting
/// eclasses per block.
side_effects: SecondaryMap<Block, Range<u32>>,
side_effect_ids: Vec<Id>,
/// Map from store instructions to their nodes; used for store-to-load forwarding.
pub(crate) store_nodes: FxHashMap<Inst, (Type, Id)>,
/// Ranges in `blockparam_ids_tys` for sequences of blockparam
/// eclass IDs and types per block.
blockparams: SecondaryMap<Block, Range<u32>>,
blockparam_ids_tys: Vec<(Id, Type)>,
/// Which canonical node IDs do we want to rematerialize in each
/// block where they're used?
pub(crate) remat_ids: FxHashSet<Id>,
/// Which canonical node IDs have an enode whose value subsumes
/// all others it's unioned with?
pub(crate) subsume_ids: FxHashSet<Id>,
/// Statistics recorded during the process of building,
/// optimizing, and lowering out of this egraph.
pub(crate) stats: Stats,
/// Current rewrite-recursion depth. Used to enforce a finite
/// limit on rewrite rule application so that we don't get stuck
/// in an infinite chain.
pub(crate) rewrite_depth: usize,
}
#[derive(Clone, Debug, Default)]
pub(crate) struct Stats {
pub(crate) node_created: u64,
pub(crate) node_param: u64,
pub(crate) node_result: u64,
pub(crate) node_pure: u64,
pub(crate) node_inst: u64,
pub(crate) node_load: u64,
pub(crate) node_dedup_query: u64,
pub(crate) node_dedup_hit: u64,
pub(crate) node_dedup_miss: u64,
pub(crate) node_ctor_created: u64,
pub(crate) node_ctor_deduped: u64,
pub(crate) node_union: u64,
pub(crate) node_subsume: u64,
pub(crate) store_map_insert: u64,
pub(crate) side_effect_nodes: u64,
pub(crate) rewrite_rule_invoked: u64,
pub(crate) rewrite_depth_limit: u64,
pub(crate) store_to_load_forward: u64,
pub(crate) elaborate_visit_node: u64,
pub(crate) elaborate_memoize_hit: u64,
pub(crate) elaborate_memoize_miss: u64,
pub(crate) elaborate_memoize_miss_remat: u64,
pub(crate) elaborate_licm_hoist: u64,
pub(crate) elaborate_func: u64,
pub(crate) elaborate_func_pre_insts: u64,
pub(crate) elaborate_func_post_insts: u64,
}
impl<'a> FuncEGraph<'a> {
/// Create a new EGraph for the given function. Requires the
/// domtree to be precomputed as well; the domtree is used for
/// scheduling when lowering out of the egraph.
pub fn new(
func: &Function,
domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
cfg: &ControlFlowGraph,
) -> FuncEGraph<'a> {
let node_count_estimate = func.dfg.num_values() * 2;
let alias_analysis = AliasAnalysis::new(func, cfg);
let mut this = Self {
domtree,
loop_analysis,
alias_analysis,
egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)),
node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg),
side_effects: SecondaryMap::default(),
side_effect_ids: vec![],
store_nodes: FxHashMap::default(),
blockparams: SecondaryMap::default(),
blockparam_ids_tys: vec![],
remat_ids: FxHashSet::default(),
subsume_ids: FxHashSet::default(),
stats: Default::default(),
rewrite_depth: 0,
};
this.build(func);
this
}
fn build(&mut self, func: &Function) {
// Mapping of SSA `Value` to eclass ID.
let mut value_to_id = FxHashMap::default();
// For each block in RPO, create an enode for block entry, for
// each block param, and for each instruction.
for &block in self.domtree.cfg_postorder().iter().rev() {
let loop_level = self.loop_analysis.loop_level(block);
let blockparam_start =
u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count");
for (i, &value) in func.dfg.block_params(block).iter().enumerate() {
let ty = func.dfg.value_type(value);
let param = self
.egraph
.add(
Node::Param {
block,
index: i
.try_into()
.expect("blockparam index should fit in Node::Param"),
ty,
loop_level,
},
&mut self.node_ctx,
)
.get();
value_to_id.insert(value, param);
self.blockparam_ids_tys.push((param, ty));
self.stats.node_created += 1;
self.stats.node_param += 1;
}
let blockparam_end =
u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count");
self.blockparams[block] = blockparam_start..blockparam_end;
let side_effect_start =
u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count");
for inst in func.layout.block_insts(block) {
// Build args from SSA values.
let args = EntityList::from_iter(
func.dfg.inst_args(inst).iter().map(|&arg| {
let arg = func.dfg.resolve_aliases(arg);
*value_to_id
.get(&arg)
.expect("Must have seen def before this use")
}),
&mut self.node_ctx.args,
);
let results = func.dfg.inst_results(inst);
let types = self
.node_ctx
.types
.from_iter(results.iter().map(|&val| func.dfg.value_type(val)));
let types = types.freeze(&mut self.node_ctx.types);
let load_mem_state = self.alias_analysis.get_state_for_load(inst);
let is_readonly_load = match func.dfg[inst] {
InstructionData::Load {
opcode: Opcode::Load,
flags,
..
} => flags.readonly() && flags.notrap(),
_ => false,
};
// Create the egraph node.
let op = InstructionImms::from(&func.dfg[inst]);
let opcode = op.opcode();
let srcloc = func.srclocs[inst];
let node = if is_readonly_load {
self.stats.node_created += 1;
self.stats.node_pure += 1;
Node::Pure { op, args, types }
} else if let Some(load_mem_state) = load_mem_state {
let addr = args.as_slice(&self.node_ctx.args)[0];
let ty = types.as_slice(&self.node_ctx.types)[0];
trace!("load at inst {} has mem state {:?}", inst, load_mem_state);
self.stats.node_created += 1;
self.stats.node_load += 1;
Node::Load {
op,
ty,
inst,
addr,
mem_state: load_mem_state,
srcloc,
}
} else if has_side_effect(func, inst) || opcode.can_load() {
self.stats.node_created += 1;
self.stats.node_inst += 1;
Node::Inst {
op,
inst,
args,
types,
srcloc,
loop_level,
}
} else {
self.stats.node_created += 1;
self.stats.node_pure += 1;
Node::Pure { op, args, types }
};
let dedup_needed = self.node_ctx.needs_dedup(&node);
let is_pure = matches!(node, Node::Pure { .. });
let mut id = self.egraph.add(node, &mut self.node_ctx);
if dedup_needed {
self.stats.node_dedup_query += 1;
match id {
NewOrExisting::New(_) => {
self.stats.node_dedup_miss += 1;
}
NewOrExisting::Existing(_) => {
self.stats.node_dedup_hit += 1;
}
}
}
if opcode == Opcode::Store {
let store_data_ty = func.dfg.value_type(func.dfg.inst_args(inst)[0]);
self.store_nodes.insert(inst, (store_data_ty, id.get()));
self.stats.store_map_insert += 1;
}
// Loads that did not already merge into an existing
// load: try to forward from a store (store-to-load
// forwarding).
if let NewOrExisting::New(new_id) = id {
if load_mem_state.is_some() {
let opt_id = crate::opts::store_to_load(new_id, self);
trace!("store_to_load: {} -> {}", new_id, opt_id);
if opt_id != new_id {
id = NewOrExisting::Existing(opt_id);
}
}
}
// Now either optimize (for new pure nodes), or add to
// the side-effecting list (for all other new nodes).
let id = match id {
NewOrExisting::Existing(id) => id,
NewOrExisting::New(id) if is_pure => {
// Apply all optimization rules immediately; the
// aegraph (acyclic egraph) works best when we do
// this so all uses pick up the eclass with all
// possible enodes.
crate::opts::optimize_eclass(id, self)
}
NewOrExisting::New(id) => {
self.side_effect_ids.push(id);
self.stats.side_effect_nodes += 1;
id
}
};
// Create results and save in Value->Id map.
match results {
&[] => {}
&[one_result] => {
trace!("build: value {} -> id {}", one_result, id);
value_to_id.insert(one_result, id);
}
many_results => {
debug_assert!(many_results.len() > 1);
for (i, &result) in many_results.iter().enumerate() {
let ty = func.dfg.value_type(result);
let projection = self
.egraph
.add(
Node::Result {
value: id,
result: i,
ty,
},
&mut self.node_ctx,
)
.get();
self.stats.node_created += 1;
self.stats.node_result += 1;
trace!("build: value {} -> id {}", result, projection);
value_to_id.insert(result, projection);
}
}
}
}
let side_effect_end =
u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count");
let side_effect_range = side_effect_start..side_effect_end;
self.side_effects[block] = side_effect_range;
}
}
/// Scoped elaboration: compute a final ordering of op computation
/// for each block and replace the given Func body.
///
/// This works in concert with the domtree. We do a preorder
/// traversal of the domtree, tracking a scoped map from Id to
/// (new) Value. The map's scopes correspond to levels in the
/// domtree.
///
/// At each block, we iterate forward over the side-effecting
/// eclasses, and recursively generate their arg eclasses, then
/// emit the ops themselves.
///
/// To use an eclass in a given block, we first look it up in the
/// scoped map, and get the Value if already present. If not, we
/// need to generate it. We emit the extracted enode for this
/// eclass after recursively generating its args. Eclasses are
/// thus computed "as late as possible", but then memoized into
/// the Id-to-Value map and available to all dominated blocks and
/// for the rest of this block. (This subsumes GVN.)
pub fn elaborate(&mut self, func: &mut Function) {
let mut elab = Elaborator::new(
func,
self.domtree,
self.loop_analysis,
&self.egraph,
&self.node_ctx,
&self.remat_ids,
&mut self.stats,
);
elab.elaborate(
|block| {
let blockparam_range = self.blockparams[block].clone();
&self.blockparam_ids_tys
[blockparam_range.start as usize..blockparam_range.end as usize]
},
|block| {
let side_effect_range = self.side_effects[block].clone();
&self.side_effect_ids
[side_effect_range.start as usize..side_effect_range.end as usize]
},
);
}
}
/// State for egraph analysis that computes all needed properties.
pub(crate) struct Analysis;
/// Analysis results for each eclass id.
#[derive(Clone, Debug)]
pub(crate) struct AnalysisValue {
pub(crate) loop_level: LoopLevel,
}
impl Default for AnalysisValue {
fn default() -> Self {
Self {
loop_level: LoopLevel::root(),
}
}
}
impl cranelift_egraph::Analysis for Analysis {
type L = NodeCtx;
type Value = AnalysisValue;
fn for_node(
&self,
ctx: &NodeCtx,
n: &Node,
values: &SecondaryMap<Id, AnalysisValue>,
) -> AnalysisValue {
let loop_level = match n {
&Node::Pure { ref args, .. } => args
.as_slice(&ctx.args)
.iter()
.map(|&arg| values[arg].loop_level)
.max()
.unwrap_or(LoopLevel::root()),
&Node::Load { addr, .. } => values[addr].loop_level,
&Node::Result { value, .. } => values[value].loop_level,
&Node::Inst { loop_level, .. } | &Node::Param { loop_level, .. } => loop_level,
};
AnalysisValue { loop_level }
}
fn meet(&self, _ctx: &NodeCtx, v1: &AnalysisValue, v2: &AnalysisValue) -> AnalysisValue {
AnalysisValue {
loop_level: std::cmp::max(v1.loop_level, v2.loop_level),
}
}
}

View File

@@ -0,0 +1,69 @@
//! Extended domtree with various traversal support.
use crate::dominator_tree::DominatorTree;
use crate::ir::{Block, Function};
use cranelift_entity::{packed_option::PackedOption, SecondaryMap};
#[derive(Clone, Debug)]
pub(crate) struct DomTreeWithChildren {
nodes: SecondaryMap<Block, DomTreeNode>,
root: Block,
}
#[derive(Clone, Copy, Debug, Default)]
struct DomTreeNode {
children: PackedOption<Block>,
next: PackedOption<Block>,
}
impl DomTreeWithChildren {
pub(crate) fn new(func: &Function, domtree: &DominatorTree) -> DomTreeWithChildren {
let mut nodes: SecondaryMap<Block, DomTreeNode> =
SecondaryMap::with_capacity(func.dfg.num_blocks());
for block in func.layout.blocks() {
let idom_inst = match domtree.idom(block) {
Some(idom_inst) => idom_inst,
None => continue,
};
let idom = func
.layout
.inst_block(idom_inst)
.expect("Dominating instruction should be part of a block");
nodes[block].next = nodes[idom].children;
nodes[idom].children = block.into();
}
let root = func.layout.entry_block().unwrap();
Self { nodes, root }
}
pub(crate) fn root(&self) -> Block {
self.root
}
pub(crate) fn children<'a>(&'a self, block: Block) -> DomTreeChildIter<'a> {
let block = self.nodes[block].children;
DomTreeChildIter {
domtree: self,
block,
}
}
}
pub(crate) struct DomTreeChildIter<'a> {
domtree: &'a DomTreeWithChildren,
block: PackedOption<Block>,
}
impl<'a> Iterator for DomTreeChildIter<'a> {
type Item = Block;
fn next(&mut self) -> Option<Block> {
self.block.expand().map(|block| {
self.block = self.domtree.nodes[block].next;
block
})
}
}

View File

@@ -0,0 +1,612 @@
//! Elaboration phase: lowers EGraph back to sequences of operations
//! in CFG nodes.
use super::domtree::DomTreeWithChildren;
use super::node::{op_cost, Cost, Node, NodeCtx};
use super::Analysis;
use super::Stats;
use crate::dominator_tree::DominatorTree;
use crate::fx::FxHashSet;
use crate::ir::{Block, Function, Inst, Opcode, RelSourceLoc, Type, Value, ValueList};
use crate::loop_analysis::LoopAnalysis;
use crate::scoped_hash_map::ScopedHashMap;
use crate::trace;
use alloc::vec::Vec;
use cranelift_egraph::{EGraph, Id, Language, NodeKey};
use cranelift_entity::{packed_option::PackedOption, SecondaryMap};
use smallvec::{smallvec, SmallVec};
use std::ops::Add;
type LoopDepth = u32;
pub(crate) struct Elaborator<'a> {
func: &'a mut Function,
domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
node_ctx: &'a NodeCtx,
egraph: &'a EGraph<NodeCtx, Analysis>,
id_to_value: ScopedHashMap<Id, IdValue>,
id_to_best_cost_and_node: SecondaryMap<Id, (Cost, Id)>,
/// Stack of blocks and loops in current elaboration path.
loop_stack: SmallVec<[LoopStackEntry; 8]>,
cur_block: Option<Block>,
first_branch: SecondaryMap<Block, PackedOption<Inst>>,
remat_ids: &'a FxHashSet<Id>,
/// Explicitly-unrolled value elaboration stack.
elab_stack: Vec<ElabStackEntry>,
elab_result_stack: Vec<IdValue>,
/// Explicitly-unrolled block elaboration stack.
block_stack: Vec<BlockStackEntry>,
stats: &'a mut Stats,
}
#[derive(Clone, Debug)]
struct LoopStackEntry {
/// The hoist point: a block that immediately dominates this
/// loop. May not be an immediate predecessor, but will be a valid
/// point to place all loop-invariant ops: they must depend only
/// on inputs that dominate the loop, so are available at (the end
/// of) this block.
hoist_block: Block,
/// The depth in the scope map.
scope_depth: u32,
}
#[derive(Clone, Debug)]
enum ElabStackEntry {
/// Next action is to resolve this id into a node and elaborate
/// args.
Start { id: Id },
/// Args have been pushed; waiting for results.
PendingNode {
canonical: Id,
node_key: NodeKey,
remat: bool,
num_args: usize,
},
/// Waiting for a result to return one projected value of a
/// multi-value result.
PendingProjection { canonical: Id, index: usize },
}
#[derive(Clone, Debug)]
enum BlockStackEntry {
Elaborate { block: Block, idom: Option<Block> },
Pop,
}
#[derive(Clone, Debug)]
enum IdValue {
/// A single value.
Value {
depth: LoopDepth,
block: Block,
value: Value,
},
/// Multiple results; indices in `node_args`.
Values {
depth: LoopDepth,
block: Block,
values: ValueList,
},
}
impl IdValue {
fn block(&self) -> Block {
match self {
IdValue::Value { block, .. } | IdValue::Values { block, .. } => *block,
}
}
}
impl<'a> Elaborator<'a> {
pub(crate) fn new(
func: &'a mut Function,
domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
egraph: &'a EGraph<NodeCtx, Analysis>,
node_ctx: &'a NodeCtx,
remat_ids: &'a FxHashSet<Id>,
stats: &'a mut Stats,
) -> Self {
let num_blocks = func.dfg.num_blocks();
let mut id_to_best_cost_and_node =
SecondaryMap::with_default((Cost::infinity(), Id::invalid()));
id_to_best_cost_and_node.resize(egraph.classes.len());
Self {
func,
domtree,
loop_analysis,
egraph,
node_ctx,
id_to_value: ScopedHashMap::with_capacity(egraph.classes.len()),
id_to_best_cost_and_node,
loop_stack: smallvec![],
cur_block: None,
first_branch: SecondaryMap::with_capacity(num_blocks),
remat_ids,
elab_stack: vec![],
elab_result_stack: vec![],
block_stack: vec![],
stats,
}
}
fn cur_loop_depth(&self) -> LoopDepth {
self.loop_stack.len() as LoopDepth
}
fn start_block(&mut self, idom: Option<Block>, block: Block, block_params: &[(Id, Type)]) {
trace!(
"start_block: block {:?} with idom {:?} at loop depth {} scope depth {}",
block,
idom,
self.cur_loop_depth(),
self.id_to_value.depth()
);
// Note that if the *entry* block is a loop header, we will
// not make note of the loop here because it will not have an
// immediate dominator. We must disallow this case because we
// will skip adding the `LoopStackEntry` here but our
// `LoopAnalysis` will otherwise still make note of this loop
// and loop depths will not match.
if let Some(idom) = idom {
if self.loop_analysis.is_loop_header(block).is_some() {
self.loop_stack.push(LoopStackEntry {
// Any code hoisted out of this loop will have code
// placed in `idom`, and will have def mappings
// inserted in to the scoped hashmap at that block's
// level.
hoist_block: idom,
scope_depth: (self.id_to_value.depth() - 1) as u32,
});
trace!(
" -> loop header, pushing; depth now {}",
self.loop_stack.len()
);
}
} else {
debug_assert!(
self.loop_analysis.is_loop_header(block).is_none(),
"Entry block (domtree root) cannot be a loop header!"
);
}
self.cur_block = Some(block);
for &(id, ty) in block_params {
let value = self.func.dfg.append_block_param(block, ty);
trace!(" -> block param id {:?} value {:?}", id, value);
self.id_to_value.insert_if_absent(
id,
IdValue::Value {
depth: self.cur_loop_depth(),
block,
value,
},
);
}
}
fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList {
let (instdata, result_tys) = match node {
Node::Pure { op, types, .. } | Node::Inst { op, types, .. } => (
op.with_args(args, &mut self.func.dfg.value_lists),
types.as_slice(&self.node_ctx.types),
),
Node::Load { op, ty, .. } => (
op.with_args(args, &mut self.func.dfg.value_lists),
std::slice::from_ref(ty),
),
_ => panic!("Cannot `add_node()` on block param or projection"),
};
let srcloc = match node {
Node::Inst { srcloc, .. } | Node::Load { srcloc, .. } => *srcloc,
_ => RelSourceLoc::default(),
};
let opcode = instdata.opcode();
// Is this instruction either an actual terminator (an
// instruction that must end the block), or at least in the
// group of branches at the end (including conditional
// branches that may be followed by an actual terminator)? We
// call this the "terminator group", and we record the first
// inst in this group (`first_branch` below) so that we do not
// insert instructions needed only by args of later
// instructions in the terminator group in the middle of the
// terminator group.
//
// E.g., for the original sequence
// v1 = op ...
// brnz vCond, block1
// jump block2(v1)
//
// elaboration would naively produce
//
// brnz vCond, block1
// v1 = op ...
// jump block2(v1)
//
// but we use the `first_branch` mechanism below to ensure
// that once we've emitted at least one branch, all other
// elaborated insts have to go before that. So we emit brnz
// first, then as we elaborate the jump, we find we need the
// `op`; we `insert_inst` it *before* the brnz (which is the
// `first_branch`).
let is_terminator_group_inst =
opcode.is_branch() || opcode.is_return() || opcode == Opcode::Trap;
let inst = self.func.dfg.make_inst(instdata);
self.func.srclocs[inst] = srcloc;
for &ty in result_tys {
self.func.dfg.append_result(inst, ty);
}
if is_terminator_group_inst {
self.func.layout.append_inst(inst, to_block);
if self.first_branch[to_block].is_none() {
self.first_branch[to_block] = Some(inst).into();
}
} else if let Some(branch) = self.first_branch[to_block].into() {
self.func.layout.insert_inst(inst, branch);
} else {
self.func.layout.append_inst(inst, to_block);
}
self.func.dfg.inst_results_list(inst)
}
fn compute_best_nodes(&mut self) {
let best = &mut self.id_to_best_cost_and_node;
for (eclass_id, eclass) in &self.egraph.classes {
trace!("computing best for eclass {:?}", eclass_id);
if let Some(child1) = eclass.child1() {
trace!(" -> child {:?}", child1);
best[eclass_id] = best[child1];
}
if let Some(child2) = eclass.child2() {
trace!(" -> child {:?}", child2);
if best[child2].0 < best[eclass_id].0 {
best[eclass_id] = best[child2];
}
}
if let Some(node_key) = eclass.get_node() {
let node = node_key.node(&self.egraph.nodes);
trace!(" -> eclass {:?}: node {:?}", eclass_id, node);
let (cost, id) = match node {
Node::Param { .. }
| Node::Inst { .. }
| Node::Load { .. }
| Node::Result { .. } => (Cost::zero(), eclass_id),
Node::Pure { op, .. } => {
let args_cost = self
.node_ctx
.children(node)
.iter()
.map(|&arg_id| {
trace!(" -> arg {:?}", arg_id);
best[arg_id].0
})
// Can't use `.sum()` for `Cost` types; do
// an explicit reduce instead.
.fold(Cost::zero(), Cost::add);
let level = self.egraph.analysis_value(eclass_id).loop_level;
let cost = op_cost(op).at_level(level) + args_cost;
(cost, eclass_id)
}
};
if cost < best[eclass_id].0 {
best[eclass_id] = (cost, id);
}
}
debug_assert_ne!(best[eclass_id].0, Cost::infinity());
debug_assert_ne!(best[eclass_id].1, Id::invalid());
trace!("best for eclass {:?}: {:?}", eclass_id, best[eclass_id]);
}
}
fn elaborate_eclass_use(&mut self, id: Id) {
self.elab_stack.push(ElabStackEntry::Start { id });
self.process_elab_stack();
debug_assert_eq!(self.elab_result_stack.len(), 1);
self.elab_result_stack.clear();
}
fn process_elab_stack(&mut self) {
while let Some(entry) = self.elab_stack.last() {
match entry {
&ElabStackEntry::Start { id } => {
// We always replace the Start entry, so pop it now.
self.elab_stack.pop();
self.stats.elaborate_visit_node += 1;
let canonical = self.egraph.canonical_id(id);
trace!("elaborate: id {}", id);
let remat = if let Some(val) = self.id_to_value.get(&canonical) {
// Look at the defined block, and determine whether this
// node kind allows rematerialization if the value comes
// from another block. If so, ignore the hit and recompute
// below.
let remat = val.block() != self.cur_block.unwrap()
&& self.remat_ids.contains(&canonical);
if !remat {
trace!("elaborate: id {} -> {:?}", id, val);
self.stats.elaborate_memoize_hit += 1;
self.elab_result_stack.push(val.clone());
continue;
}
trace!("elaborate: id {} -> remat", id);
self.stats.elaborate_memoize_miss_remat += 1;
// The op is pure at this point, so it is always valid to
// remove from this map.
self.id_to_value.remove(&canonical);
true
} else {
self.remat_ids.contains(&canonical)
};
self.stats.elaborate_memoize_miss += 1;
// Get the best option; we use `id` (latest id) here so we
// have a full view of the eclass.
let (_, best_node_eclass) = self.id_to_best_cost_and_node[id];
debug_assert_ne!(best_node_eclass, Id::invalid());
trace!(
"elaborate: id {} -> best {} -> eclass node {:?}",
id,
best_node_eclass,
self.egraph.classes[best_node_eclass]
);
let node_key = self.egraph.classes[best_node_eclass].get_node().unwrap();
let node = node_key.node(&self.egraph.nodes);
trace!(" -> enode {:?}", node);
// Is the node a block param? We should never get here if so
// (they are inserted when first visiting the block).
if matches!(node, Node::Param { .. }) {
unreachable!("Param nodes should already be inserted");
}
// Is the node a result projection? If so, resolve
// the value we are projecting a part of, then
// eventually return here (saving state with a
// PendingProjection).
if let Node::Result { value, result, .. } = node {
trace!(" -> result; pushing arg value {}", value);
self.elab_stack.push(ElabStackEntry::PendingProjection {
index: *result,
canonical,
});
self.elab_stack.push(ElabStackEntry::Start { id: *value });
continue;
}
// We're going to need to emit this
// operator. First, enqueue all args to be
// elaborated. Push state to receive the results
// and later elab this node.
let num_args = self.node_ctx.children(&node).len();
self.elab_stack.push(ElabStackEntry::PendingNode {
canonical,
node_key,
remat,
num_args,
});
// Push args in reverse order so we process the
// first arg first.
for &arg_id in self.node_ctx.children(&node).iter().rev() {
self.elab_stack.push(ElabStackEntry::Start { id: arg_id });
}
}
&ElabStackEntry::PendingNode {
canonical,
node_key,
remat,
num_args,
} => {
self.elab_stack.pop();
let node = node_key.node(&self.egraph.nodes);
// We should have all args resolved at this point.
let arg_idx = self.elab_result_stack.len() - num_args;
let args = &self.elab_result_stack[arg_idx..];
// Gather the individual output-CLIF `Value`s.
let arg_values: SmallVec<[Value; 8]> = args
.iter()
.map(|idvalue| match idvalue {
IdValue::Value { value, .. } => *value,
IdValue::Values { .. } => {
panic!("enode depends directly on multi-value result")
}
})
.collect();
// Compute max loop depth.
let max_loop_depth = args
.iter()
.map(|idvalue| match idvalue {
IdValue::Value { depth, .. } => *depth,
IdValue::Values { .. } => unreachable!(),
})
.max()
.unwrap_or(0);
// Remove args from result stack.
self.elab_result_stack.truncate(arg_idx);
// Determine the location at which we emit it. This is the
// current block *unless* we hoist above a loop when all args
// are loop-invariant (and this op is pure).
let (loop_depth, scope_depth, block) = if node.is_non_pure() {
// Non-pure op: always at the current location.
(
self.cur_loop_depth(),
self.id_to_value.depth(),
self.cur_block.unwrap(),
)
} else if max_loop_depth == self.cur_loop_depth() || remat {
// Pure op, but depends on some value at the current loop
// depth, or remat forces it here: as above.
(
self.cur_loop_depth(),
self.id_to_value.depth(),
self.cur_block.unwrap(),
)
} else {
// Pure op, and does not depend on any args at current
// loop depth: hoist out of loop.
self.stats.elaborate_licm_hoist += 1;
let data = &self.loop_stack[max_loop_depth as usize];
(max_loop_depth, data.scope_depth as usize, data.hoist_block)
};
// Loop scopes are a subset of all scopes.
debug_assert!(scope_depth >= loop_depth as usize);
// This is an actual operation; emit the node in sequence now.
let results = self.add_node(node, &arg_values[..], block);
let results_slice = results.as_slice(&self.func.dfg.value_lists);
// Build the result and memoize in the id-to-value map.
let result = if results_slice.len() == 1 {
IdValue::Value {
depth: loop_depth,
block,
value: results_slice[0],
}
} else {
IdValue::Values {
depth: loop_depth,
block,
values: results,
}
};
self.id_to_value.insert_if_absent_with_depth(
canonical,
result.clone(),
scope_depth,
);
// Push onto the elab-results stack.
self.elab_result_stack.push(result)
}
&ElabStackEntry::PendingProjection { index, canonical } => {
self.elab_stack.pop();
// Grab the input from the elab-result stack.
let value = self.elab_result_stack.pop().expect("Should have result");
let (depth, block, values) = match value {
IdValue::Values {
depth,
block,
values,
..
} => (depth, block, values),
IdValue::Value { .. } => {
unreachable!("Projection nodes should not be used on single results");
}
};
let values = values.as_slice(&self.func.dfg.value_lists);
let value = IdValue::Value {
depth,
block,
value: values[index],
};
self.id_to_value.insert_if_absent(canonical, value.clone());
self.elab_result_stack.push(value);
}
}
}
}
fn elaborate_block<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
&mut self,
idom: Option<Block>,
block: Block,
block_params_fn: &PF,
block_side_effects_fn: &SEF,
) {
let blockparam_ids_tys = (block_params_fn)(block);
self.start_block(idom, block, blockparam_ids_tys);
for &id in (block_side_effects_fn)(block) {
self.elaborate_eclass_use(id);
}
}
fn elaborate_domtree<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
&mut self,
block_params_fn: &PF,
block_side_effects_fn: &SEF,
domtree: &DomTreeWithChildren,
) {
let root = domtree.root();
self.block_stack.push(BlockStackEntry::Elaborate {
block: root,
idom: None,
});
while let Some(top) = self.block_stack.pop() {
match top {
BlockStackEntry::Elaborate { block, idom } => {
self.block_stack.push(BlockStackEntry::Pop);
self.id_to_value.increment_depth();
self.elaborate_block(idom, block, block_params_fn, block_side_effects_fn);
// Push children. We are doing a preorder
// traversal so we do this after processing this
// block above.
let block_stack_end = self.block_stack.len();
for child in domtree.children(block) {
self.block_stack.push(BlockStackEntry::Elaborate {
block: child,
idom: Some(block),
});
}
// Reverse what we just pushed so we elaborate in
// original block order. (The domtree iter is a
// single-ended iter over a singly-linked list so
// we can't `.rev()` above.)
self.block_stack[block_stack_end..].reverse();
}
BlockStackEntry::Pop => {
self.id_to_value.decrement_depth();
if let Some(innermost_loop) = self.loop_stack.last() {
if innermost_loop.scope_depth as usize == self.id_to_value.depth() {
self.loop_stack.pop();
}
}
}
}
}
}
fn clear_func_body(&mut self) {
// Clear all instructions and args/results from the DFG. We
// rebuild them entirely during elaboration. (TODO: reuse the
// existing inst for the *first* copy of a given node.)
self.func.dfg.clear_insts();
// Clear the instructions in every block, but leave the list
// of blocks and their layout unmodified.
self.func.layout.clear_insts();
self.func.srclocs.clear();
}
pub(crate) fn elaborate<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
&mut self,
block_params_fn: PF,
block_side_effects_fn: SEF,
) {
let domtree = DomTreeWithChildren::new(self.func, self.domtree);
self.stats.elaborate_func += 1;
self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
self.clear_func_body();
self.compute_best_nodes();
self.elaborate_domtree(&block_params_fn, &block_side_effects_fn, &domtree);
self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
}
}

View File

@@ -0,0 +1,376 @@
//! Node definition for EGraph representation.
use super::MemoryState;
use crate::ir::{Block, DataFlowGraph, Inst, InstructionImms, Opcode, RelSourceLoc, Type};
use crate::loop_analysis::LoopLevel;
use cranelift_egraph::{BumpArena, BumpSlice, CtxEq, CtxHash, Id, Language, UnionFind};
use cranelift_entity::{EntityList, ListPool};
use std::hash::{Hash, Hasher};
#[derive(Debug)]
pub enum Node {
/// A blockparam. Effectively an input/root; does not refer to
/// predecessors' branch arguments, because this would create
/// cycles.
Param {
/// CLIF block this param comes from.
block: Block,
/// Index of blockparam within block.
index: u32,
/// Type of the value.
ty: Type,
/// The loop level of this Param.
loop_level: LoopLevel,
},
/// A CLIF instruction that is pure (has no side-effects). Not
/// tied to any location; we will compute a set of locations at
/// which to compute this node during lowering back out of the
/// egraph.
Pure {
/// The instruction data, without SSA values.
op: InstructionImms,
/// eclass arguments to the operator.
args: EntityList<Id>,
/// Types of results.
types: BumpSlice<Type>,
},
/// A CLIF instruction that has side-effects or is otherwise not
/// representable by `Pure`.
Inst {
/// The instruction data, without SSA values.
op: InstructionImms,
/// eclass arguments to the operator.
args: EntityList<Id>,
/// Types of results.
types: BumpSlice<Type>,
/// The index of the original instruction. We include this so
/// that the `Inst`s are not deduplicated: every instance is a
/// logically separate and unique side-effect. However,
/// because we clear the DataFlowGraph before elaboration,
/// this `Inst` is *not* valid to fetch any details from the
/// original instruction.
inst: Inst,
/// The source location to preserve.
srcloc: RelSourceLoc,
/// The loop level of this Inst.
loop_level: LoopLevel,
},
/// A projection of one result of an `Inst` or `Pure`.
Result {
/// `Inst` or `Pure` node.
value: Id,
/// Index of the result we want.
result: usize,
/// Type of the value.
ty: Type,
},
/// A load instruction. Nominally a side-effecting `Inst` (and
/// included in the list of side-effecting roots so it will always
/// be elaborated), but represented as a distinct kind of node so
/// that we can leverage deduplication to do
/// redundant-load-elimination for free (and make store-to-load
/// forwarding much easier).
Load {
// -- identity depends on:
/// The original load operation. Must have one argument, the
/// address.
op: InstructionImms,
/// The type of the load result.
ty: Type,
/// Address argument. Actual address has an offset, which is
/// included in `op` (and thus already considered as part of
/// the key).
addr: Id,
/// The abstract memory state that this load accesses.
mem_state: MemoryState,
// -- not included in dedup key:
/// The `Inst` we will use for a trap location for this
/// load. Excluded from Eq/Hash so that loads that are
/// identical except for the specific instance will dedup on
/// top of each other.
inst: Inst,
/// Source location, for traps. Not included in Eq/Hash.
srcloc: RelSourceLoc,
},
}
impl Node {
pub(crate) fn is_non_pure(&self) -> bool {
match self {
Node::Inst { .. } | Node::Load { .. } => true,
_ => false,
}
}
}
/// Shared pools for type and id lists in nodes.
pub struct NodeCtx {
/// Arena for result-type arrays.
pub types: BumpArena<Type>,
/// Arena for arg eclass-ID lists.
pub args: ListPool<Id>,
}
impl NodeCtx {
pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self {
let n_types = dfg.num_values();
let n_args = dfg.value_lists.capacity();
Self {
types: BumpArena::arena_with_capacity(n_types),
args: ListPool::with_capacity(n_args),
}
}
}
impl NodeCtx {
fn ids_eq(&self, a: &EntityList<Id>, b: &EntityList<Id>, uf: &mut UnionFind) -> bool {
let a = a.as_slice(&self.args);
let b = b.as_slice(&self.args);
a.len() == b.len() && a.iter().zip(b.iter()).all(|(&a, &b)| uf.equiv_id_mut(a, b))
}
fn hash_ids<H: Hasher>(&self, a: &EntityList<Id>, hash: &mut H, uf: &mut UnionFind) {
let a = a.as_slice(&self.args);
for &id in a {
uf.hash_id_mut(hash, id);
}
}
}
impl CtxEq<Node, Node> for NodeCtx {
fn ctx_eq(&self, a: &Node, b: &Node, uf: &mut UnionFind) -> bool {
match (a, b) {
(
&Node::Param {
block,
index,
ty,
loop_level: _,
},
&Node::Param {
block: other_block,
index: other_index,
ty: other_ty,
loop_level: _,
},
) => block == other_block && index == other_index && ty == other_ty,
(
&Node::Result { value, result, ty },
&Node::Result {
value: other_value,
result: other_result,
ty: other_ty,
},
) => uf.equiv_id_mut(value, other_value) && result == other_result && ty == other_ty,
(
&Node::Pure {
ref op,
ref args,
ref types,
},
&Node::Pure {
op: ref other_op,
args: ref other_args,
types: ref other_types,
},
) => {
*op == *other_op
&& self.ids_eq(args, other_args, uf)
&& types.as_slice(&self.types) == other_types.as_slice(&self.types)
}
(
&Node::Inst { inst, ref args, .. },
&Node::Inst {
inst: other_inst,
args: ref other_args,
..
},
) => inst == other_inst && self.ids_eq(args, other_args, uf),
(
&Node::Load {
ref op,
ty,
addr,
mem_state,
..
},
&Node::Load {
op: ref other_op,
ty: other_ty,
addr: other_addr,
mem_state: other_mem_state,
// Explicitly exclude: `inst` and `srcloc`. We
// want loads to merge if identical in
// opcode/offset, address expression, and last
// store (this does implicit
// redundant-load-elimination.)
//
// Note however that we *do* include `ty` (the
// type) and match on that: we otherwise would
// have no way of disambiguating loads of
// different widths to the same address.
..
},
) => {
op == other_op
&& ty == other_ty
&& uf.equiv_id_mut(addr, other_addr)
&& mem_state == other_mem_state
}
_ => false,
}
}
}
impl CtxHash<Node> for NodeCtx {
fn ctx_hash(&self, value: &Node, uf: &mut UnionFind) -> u64 {
let mut state = crate::fx::FxHasher::default();
std::mem::discriminant(value).hash(&mut state);
match value {
&Node::Param {
block,
index,
ty: _,
loop_level: _,
} => {
block.hash(&mut state);
index.hash(&mut state);
}
&Node::Result {
value,
result,
ty: _,
} => {
uf.hash_id_mut(&mut state, value);
result.hash(&mut state);
}
&Node::Pure {
ref op,
ref args,
types: _,
} => {
op.hash(&mut state);
self.hash_ids(args, &mut state, uf);
// Don't hash `types`: it requires an indirection
// (hence cache misses), and result type *should* be
// fully determined by op and args.
}
&Node::Inst { inst, ref args, .. } => {
inst.hash(&mut state);
self.hash_ids(args, &mut state, uf);
}
&Node::Load {
ref op,
ty,
addr,
mem_state,
..
} => {
op.hash(&mut state);
ty.hash(&mut state);
uf.hash_id_mut(&mut state, addr);
mem_state.hash(&mut state);
}
}
state.finish()
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct Cost(u32);
impl Cost {
pub(crate) fn at_level(&self, loop_level: LoopLevel) -> Cost {
let loop_level = std::cmp::min(2, loop_level.level());
let multiplier = 1u32 << ((10 * loop_level) as u32);
Cost(self.0.saturating_mul(multiplier)).finite()
}
pub(crate) fn infinity() -> Cost {
// 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost`
// only for heuristics and always saturate so this suffices!)
Cost(u32::MAX)
}
pub(crate) fn zero() -> Cost {
Cost(0)
}
/// Clamp this cost at a "finite" value. Can be used in
/// conjunction with saturating ops to avoid saturating into
/// `infinity()`.
fn finite(self) -> Cost {
Cost(std::cmp::min(u32::MAX - 1, self.0))
}
}
impl std::default::Default for Cost {
fn default() -> Cost {
Cost::zero()
}
}
impl std::ops::Add<Cost> for Cost {
type Output = Cost;
fn add(self, other: Cost) -> Cost {
Cost(self.0.saturating_add(other.0)).finite()
}
}
pub(crate) fn op_cost(op: &InstructionImms) -> Cost {
match op.opcode() {
// Constants.
Opcode::Iconst | Opcode::F32const | Opcode::F64const | Opcode::Bconst => Cost(0),
// Extends/reduces.
Opcode::Bextend
| Opcode::Breduce
| Opcode::Uextend
| Opcode::Sextend
| Opcode::Ireduce
| Opcode::Iconcat
| Opcode::Isplit => Cost(1),
// "Simple" arithmetic.
Opcode::Iadd
| Opcode::Isub
| Opcode::Band
| Opcode::BandNot
| Opcode::Bor
| Opcode::BorNot
| Opcode::Bxor
| Opcode::BxorNot
| Opcode::Bnot => Cost(2),
// Everything else.
_ => Cost(3),
}
}
impl Language for NodeCtx {
type Node = Node;
fn children<'a>(&'a self, node: &'a Node) -> &'a [Id] {
match node {
Node::Param { .. } => &[],
Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_slice(&self.args),
Node::Load { addr, .. } => std::slice::from_ref(addr),
Node::Result { value, .. } => std::slice::from_ref(value),
}
}
fn children_mut<'a>(&'a mut self, node: &'a mut Node) -> &'a mut [Id] {
match node {
Node::Param { .. } => &mut [],
Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_mut_slice(&mut self.args),
Node::Load { addr, .. } => std::slice::from_mut(addr),
Node::Result { value, .. } => std::slice::from_mut(value),
}
}
fn needs_dedup(&self, node: &Node) -> bool {
match node {
Node::Pure { .. } | Node::Load { .. } => true,
_ => false,
}
}
}

View File

@@ -0,0 +1,266 @@
//! Last-store tracking via alias analysis.
//!
//! We partition memory state into several *disjoint pieces* of
//! "abstract state". There are a finite number of such pieces:
//! currently, we call them "heap", "table", "vmctx", and "other". Any
//! given address in memory belongs to exactly one disjoint piece.
//!
//! One never tracks which piece a concrete address belongs to at
//! runtime; this is a purely static concept. Instead, all
//! memory-accessing instructions (loads and stores) are labeled with
//! one of these four categories in the `MemFlags`. It is forbidden
//! for a load or store to access memory under one category and a
//! later load or store to access the same memory under a different
//! category. This is ensured to be true by construction during
//! frontend translation into CLIF and during legalization.
//!
//! Given that this non-aliasing property is ensured by the producer
//! of CLIF, we can compute a *may-alias* property: one load or store
//! may-alias another load or store if both access the same category
//! of abstract state.
//!
//! The "last store" pass helps to compute this aliasing: we perform a
//! fixpoint analysis to track the last instruction that *might have*
//! written to a given part of abstract state. We also track the block
//! containing this store.
//!
//! We can't say for sure that the "last store" *did* actually write
//! that state, but we know for sure that no instruction *later* than
//! it (up to the current instruction) did. However, we can get a
//! must-alias property from this: if at a given load or store, we
//! look backward to the "last store", *AND* we find that it has
//! exactly the same address expression and value type, then we know
//! that the current instruction's access *must* be to the same memory
//! location.
//!
//! To get this must-alias property, we leverage the node
//! hashconsing. We design the Eq/Hash (node identity relation
//! definition) of the `Node` struct so that all loads with (i) the
//! same "last store", and (ii) the same address expression, and (iii)
//! the same opcode-and-offset, will deduplicate (the first will be
//! computed, and the later ones will use the same value). Furthermore
//! we have an optimization that rewrites a load into the stored value
//! of the last store *if* the last store has the same address
//! expression and constant offset.
//!
//! This gives us two optimizations, "redundant load elimination" and
//! "store-to-load forwarding".
//!
//! In theory we could also do *dead-store elimination*, where if a
//! store overwrites a value earlier written by another store, *and*
//! if no other load/store to the abstract state category occurred,
//! *and* no other trapping instruction occurred (at which point we
//! need an up-to-date memory state because post-trap-termination
//! memory state can be observed), *and* we can prove the original
//! store could not have trapped, then we can eliminate the original
//! store. Because this is so complex, and the conditions for doing it
//! correctly when post-trap state must be correct likely reduce the
//! potential benefit, we don't yet do this.
use crate::flowgraph::ControlFlowGraph;
use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::has_memory_fence_semantics;
use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
use crate::trace;
use cranelift_entity::SecondaryMap;
use smallvec::{smallvec, SmallVec};
/// For a given program point, the vector of last-store instruction
/// indices for each disjoint category of abstract state.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
struct LastStores {
heap: MemoryState,
table: MemoryState,
vmctx: MemoryState,
other: MemoryState,
}
/// State of memory seen by a load.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum MemoryState {
/// State at function entry: nothing is known (but it is one
/// consistent value, so two loads from "entry" state at the same
/// address will still provide the same result).
#[default]
Entry,
/// State just after a store by the given instruction. The
/// instruction is a store from which we can forward.
Store(Inst),
/// State just before the given instruction. Used for abstract
/// value merges at merge-points when we cannot name a single
/// producing site.
BeforeInst(Inst),
/// State just after the given instruction. Used when the
/// instruction may update the associated state, but is not a
/// store whose value we can cleanly forward. (E.g., perhaps a
/// barrier of some sort.)
AfterInst(Inst),
}
impl LastStores {
fn update(&mut self, func: &Function, inst: Inst) {
let opcode = func.dfg[inst].opcode();
if has_memory_fence_semantics(opcode) {
self.heap = MemoryState::AfterInst(inst);
self.table = MemoryState::AfterInst(inst);
self.vmctx = MemoryState::AfterInst(inst);
self.other = MemoryState::AfterInst(inst);
} else if opcode.can_store() {
if let Some(memflags) = func.dfg[inst].memflags() {
*self.for_flags(memflags) = MemoryState::Store(inst);
} else {
self.heap = MemoryState::AfterInst(inst);
self.table = MemoryState::AfterInst(inst);
self.vmctx = MemoryState::AfterInst(inst);
self.other = MemoryState::AfterInst(inst);
}
}
}
fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState {
if memflags.heap() {
&mut self.heap
} else if memflags.table() {
&mut self.table
} else if memflags.vmctx() {
&mut self.vmctx
} else {
&mut self.other
}
}
fn meet_from(&mut self, other: &LastStores, loc: Inst) {
let meet = |a: MemoryState, b: MemoryState| -> MemoryState {
match (a, b) {
(a, b) if a == b => a,
_ => MemoryState::BeforeInst(loc),
}
};
self.heap = meet(self.heap, other.heap);
self.table = meet(self.table, other.table);
self.vmctx = meet(self.vmctx, other.vmctx);
self.other = meet(self.other, other.other);
}
}
/// An alias-analysis pass.
pub struct AliasAnalysis {
/// Last-store instruction (or none) for a given load. Use a hash map
/// instead of a `SecondaryMap` because this is sparse.
load_mem_state: FxHashMap<Inst, MemoryState>,
}
impl AliasAnalysis {
/// Perform an alias analysis pass.
pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis {
log::trace!("alias analysis: input is:\n{:?}", func);
let block_input = Self::compute_block_input_states(func, cfg);
let load_mem_state = Self::compute_load_last_stores(func, block_input);
AliasAnalysis { load_mem_state }
}
fn compute_block_input_states(
func: &Function,
cfg: &ControlFlowGraph,
) -> SecondaryMap<Block, Option<LastStores>> {
let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
let mut worklist: SmallVec<[Block; 8]> = smallvec![];
let mut worklist_set = FxHashSet::default();
let entry = func.layout.entry_block().unwrap();
worklist.push(entry);
worklist_set.insert(entry);
block_input[entry] = Some(LastStores::default());
while let Some(block) = worklist.pop() {
worklist_set.remove(&block);
let state = block_input[block].clone().unwrap();
trace!("alias analysis: input to {} is {:?}", block, state);
let state = func
.layout
.block_insts(block)
.fold(state, |mut state, inst| {
state.update(func, inst);
trace!("after {}: state is {:?}", inst, state);
state
});
for succ in cfg.succ_iter(block) {
let succ_first_inst = func.layout.first_inst(succ).unwrap();
let succ_state = &mut block_input[succ];
let old = succ_state.clone();
if let Some(succ_state) = succ_state.as_mut() {
succ_state.meet_from(&state, succ_first_inst);
} else {
*succ_state = Some(state);
};
let updated = *succ_state != old;
if updated && worklist_set.insert(succ) {
worklist.push(succ);
}
}
}
block_input
}
fn compute_load_last_stores(
func: &Function,
block_input: SecondaryMap<Block, Option<LastStores>>,
) -> FxHashMap<Inst, MemoryState> {
let mut load_mem_state = FxHashMap::default();
for block in func.layout.blocks() {
let mut state = block_input[block].clone().unwrap();
for inst in func.layout.block_insts(block) {
trace!(
"alias analysis: scanning at {} with state {:?} ({:?})",
inst,
state,
func.dfg[inst],
);
// N.B.: we match `Load` specifically, and not any
// other kinds of loads (or any opcode such that
// `opcode.can_load()` returns true), because some
// "can load" instructions actually have very
// different semantics (are not just a load of a
// particularly-typed value). For example, atomic
// (load/store, RMW, CAS) instructions "can load" but
// definitely should not participate in store-to-load
// forwarding or redundant-load elimination. Our goal
// here is to provide a `MemoryState` just for plain
// old loads whose semantics we can completely reason
// about.
if let InstructionData::Load {
opcode: Opcode::Load,
flags,
..
} = func.dfg[inst]
{
let mem_state = *state.for_flags(flags);
trace!(
"alias analysis: at {}: load with mem_state {:?}",
inst,
mem_state,
);
load_mem_state.insert(inst, mem_state);
}
state.update(func, inst);
}
}
load_mem_state
}
/// Get the state seen by a load, if any.
pub fn get_state_for_load(&self, inst: Inst) -> Option<MemoryState> {
self.load_mem_state.get(&inst).copied()
}
}

View File

@@ -11,6 +11,7 @@ pub fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) ->
}
/// Test whether the given opcode is unsafe to even consider as side-effect-free.
#[inline(always)]
fn trivially_has_side_effects(opcode: Opcode) -> bool {
opcode.is_call()
|| opcode.is_branch()
@@ -24,6 +25,7 @@ fn trivially_has_side_effects(opcode: Opcode) -> bool {
/// Load instructions without the `notrap` flag are defined to trap when
/// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded
/// value is unused.
#[inline(always)]
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
if !opcode.can_load() {
return false;
@@ -37,6 +39,7 @@ fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool
/// Does the given instruction have any side-effect that would preclude it from being removed when
/// its value is unused?
#[inline(always)]
pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
let data = &func.dfg[inst];
let opcode = data.opcode();
@@ -123,8 +126,10 @@ pub fn has_memory_fence_semantics(op: Opcode) -> bool {
| Opcode::AtomicCas
| Opcode::AtomicLoad
| Opcode::AtomicStore
| Opcode::Fence => true,
| Opcode::Fence
| Opcode::Debugtrap => true,
Opcode::Call | Opcode::CallIndirect => true,
op if op.can_trap() => true,
_ => false,
}
}

View File

@@ -120,6 +120,23 @@ impl DataFlowGraph {
self.immediates.clear();
}
/// Clear all instructions, but keep blocks and other metadata
/// (signatures, constants, immediates). Everything to do with
/// `Value`s is cleared, including block params and debug info.
///
/// Used during egraph-based optimization to clear out the pre-opt
/// body so that we can regenerate it from the egraph.
pub(crate) fn clear_insts(&mut self) {
self.insts.clear();
self.results.clear();
self.value_lists.clear();
self.values.clear();
self.values_labels = None;
for block in self.blocks.values_mut() {
block.params = ValueList::new();
}
}
/// Get the total number of instructions created in this function, whether they are currently
/// inserted in the layout or not.
///

View File

@@ -189,7 +189,7 @@ pub struct FunctionStencil {
///
/// Track the original source location for each instruction. The source locations are not
/// interpreted by Cranelift, only preserved.
srclocs: SourceLocs,
pub srclocs: SourceLocs,
/// An optional global value which represents an expression evaluating to
/// the stack limit for this function. This `GlobalValue` will be

View File

@@ -61,6 +61,18 @@ impl Layout {
self.last_block = None;
}
/// Clear instructions from every block, but keep the blocks.
///
/// Used by the egraph-based optimization to clear out the
/// function body but keep the CFG skeleton.
pub(crate) fn clear_insts(&mut self) {
self.insts.clear();
for block in self.blocks.values_mut() {
block.first_inst = None.into();
block.last_inst = None.into();
}
}
/// Returns the capacity of the `BlockData` map.
pub fn block_capacity(&self) -> usize {
self.blocks.capacity()

View File

@@ -48,7 +48,7 @@ pub use crate::ir::function::{DisplayFunctionAnnotations, Function};
pub use crate::ir::globalvalue::GlobalValueData;
pub use crate::ir::heap::{HeapData, HeapStyle};
pub use crate::ir::instructions::{
InstructionData, Opcode, ValueList, ValueListPool, VariableArgs,
InstructionData, InstructionImms, Opcode, ValueList, ValueListPool, VariableArgs,
};
pub use crate::ir::jumptable::JumpTableData;
pub use crate::ir::known_symbol::KnownSymbol;

View File

@@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize};
///
/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
/// that can't be given a real source location.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct SourceLoc(u32);

View File

@@ -34,6 +34,7 @@ use crate::{
abi::ArgPair, ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData,
},
};
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
use regalloc2::PReg;
use std::boxed::Box;
use std::convert::TryFrom;
@@ -96,7 +97,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}
impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
isle_prelude_methods!();
isle_lower_prelude_methods!();
isle_prelude_caller_methods!(crate::isa::aarch64::abi::AArch64MachineDeps, AArch64Caller);
fn sign_return_address_disabled(&mut self) -> Option<()> {

View File

@@ -41,10 +41,25 @@ pub(crate) fn lower_insn_to_regs(
match op {
Opcode::Iconst | Opcode::Bconst | Opcode::Null => implemented_in_isle(ctx),
Opcode::F32const | Opcode::F64const => unreachable!(
"Should never see constant ops at top level lowering entry
point, as constants are rematerialized at use-sites"
),
Opcode::F32const => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let val = ctx.get_constant(insn).unwrap();
for inst in
Inst::load_fp_constant32(rd, val as u32, |ty| ctx.alloc_tmp(ty).only_reg().unwrap())
{
ctx.emit(inst);
}
}
Opcode::F64const => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let val = ctx.get_constant(insn).unwrap();
for inst in
Inst::load_fp_constant64(rd, val, |ty| ctx.alloc_tmp(ty).only_reg().unwrap())
{
ctx.emit(inst);
}
}
Opcode::GetFramePointer | Opcode::GetStackPointer | Opcode::GetReturnAddress => {
implemented_in_isle(ctx)

View File

@@ -62,7 +62,15 @@ impl AArch64Backend {
let emit_info = EmitInfo::new(flags.clone());
let sigs = SigSet::new::<abi::AArch64MachineDeps>(func, &self.flags)?;
let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?;
compile::compile::<AArch64Backend>(func, self, abi, &self.machine_env, emit_info, sigs)
compile::compile::<AArch64Backend>(
func,
flags,
self,
abi,
&self.machine_env,
emit_info,
sigs,
)
}
}

View File

@@ -5,17 +5,14 @@
pub mod generated_code;
use generated_code::{Context, MInst};
use target_lexicon::Triple;
// Types that the generated ISLE code uses via `use super::*`.
use super::{writable_zero_reg, zero_reg};
use std::vec::Vec;
use crate::isa::riscv64::abi::Riscv64ABICaller;
use crate::isa::riscv64::settings::Flags as IsaFlags;
use crate::machinst::Reg;
use crate::machinst::{isle::*, MachInst, SmallInstVec};
use crate::settings::Flags;
use crate::machinst::{VCodeConstant, VCodeConstantData};
use crate::settings::Flags;
use crate::{
ir::{
immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags,
@@ -24,13 +21,12 @@ use crate::{
isa::riscv64::inst::*,
machinst::{ArgPair, InsnOutput, Lower},
};
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
use regalloc2::PReg;
use crate::isa::riscv64::abi::Riscv64ABICaller;
use std::boxed::Box;
use std::convert::TryFrom;
use crate::machinst::Reg;
use std::vec::Vec;
use target_lexicon::Triple;
type BoxCallInfo = Box<CallInfo>;
type BoxCallIndInfo = Box<CallIndInfo>;
@@ -64,7 +60,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}
impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
isle_prelude_methods!();
isle_lower_prelude_methods!();
isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller);
fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs {

View File

@@ -62,7 +62,7 @@ impl Riscv64Backend {
let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone());
let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
compile::compile::<Riscv64Backend>(func, self, abi, &self.mach_env, emit_info, sigs)
compile::compile::<Riscv64Backend>(func, flags, self, abi, &self.mach_env, emit_info, sigs)
}
}

View File

@@ -24,6 +24,7 @@ use crate::{
machinst::abi::ABIMachineSpec,
machinst::{ArgPair, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData},
};
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
use regalloc2::PReg;
use smallvec::{smallvec, SmallVec};
use std::boxed::Box;
@@ -88,7 +89,7 @@ pub(crate) fn lower_branch(
}
impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
isle_prelude_methods!();
isle_lower_prelude_methods!();
fn abi_sig(&mut self, sig_ref: SigRef) -> Sig {
self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref)

View File

@@ -60,7 +60,15 @@ impl S390xBackend {
let emit_info = EmitInfo::new(self.isa_flags.clone());
let sigs = SigSet::new::<abi::S390xMachineDeps>(func, &self.flags)?;
let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?;
compile::compile::<S390xBackend>(func, self, abi, &self.machine_env, emit_info, sigs)
compile::compile::<S390xBackend>(
func,
self.flags.clone(),
self,
abi,
&self.machine_env,
emit_info,
sigs,
)
}
}

View File

@@ -7,6 +7,7 @@ use crate::{
ir::AtomicRmwOp,
machinst::{InputSourceInst, Reg, Writable},
};
use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
use generated_code::{Context, MInst, RegisterClass};
// Types that the generated ISLE code uses via `use super::*`.
@@ -92,7 +93,7 @@ pub(crate) fn lower_branch(
}
impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
isle_prelude_methods!();
isle_lower_prelude_methods!();
isle_prelude_caller_methods!(X64ABIMachineSpec, X64Caller);
#[inline]

View File

@@ -55,7 +55,7 @@ impl X64Backend {
let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
let sigs = SigSet::new::<abi::X64ABIMachineSpec>(func, &self.flags)?;
let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?;
compile::compile::<Self>(&func, self, abi, &self.reg_env, emit_info, sigs)
compile::compile::<Self>(&func, flags, self, abi, &self.reg_env, emit_info, sigs)
}
}

View File

@@ -0,0 +1,604 @@
//! Shared ISLE prelude implementation for optimization (mid-end) and
//! lowering (backend) ISLE environments.
/// Helper macro to define methods in `prelude.isle` within `impl Context for
/// ...` for each backend. These methods are shared amongst all backends.
#[macro_export]
#[doc(hidden)]
macro_rules! isle_common_prelude_methods {
() => {
/// We don't have a way of making a `()` value in isle directly.
#[inline]
fn unit(&mut self) -> Unit {
()
}
#[inline]
fn u8_as_u32(&mut self, x: u8) -> Option<u32> {
Some(x.into())
}
#[inline]
fn u8_as_u64(&mut self, x: u8) -> Option<u64> {
Some(x.into())
}
#[inline]
fn u16_as_u64(&mut self, x: u16) -> Option<u64> {
Some(x.into())
}
#[inline]
fn u32_as_u64(&mut self, x: u32) -> Option<u64> {
Some(x.into())
}
#[inline]
fn i64_as_u64(&mut self, x: i64) -> Option<u64> {
Some(x as u64)
}
#[inline]
fn u64_add(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x.wrapping_add(y))
}
#[inline]
fn u64_sub(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x.wrapping_sub(y))
}
#[inline]
fn u64_mul(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x.wrapping_mul(y))
}
#[inline]
fn u64_sdiv(&mut self, x: u64, y: u64) -> Option<u64> {
let x = x as i64;
let y = y as i64;
x.checked_div(y).map(|d| d as u64)
}
#[inline]
fn u64_udiv(&mut self, x: u64, y: u64) -> Option<u64> {
x.checked_div(y)
}
#[inline]
fn u64_and(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x & y)
}
#[inline]
fn u64_or(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x | y)
}
#[inline]
fn u64_xor(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x ^ y)
}
#[inline]
fn u64_not(&mut self, x: u64) -> Option<u64> {
Some(!x)
}
#[inline]
fn u64_is_zero(&mut self, value: u64) -> bool {
0 == value
}
#[inline]
fn u64_sextend_u32(&mut self, x: u64) -> Option<u64> {
Some(x as u32 as i32 as i64 as u64)
}
#[inline]
fn ty_bits(&mut self, ty: Type) -> Option<u8> {
use std::convert::TryInto;
Some(ty.bits().try_into().unwrap())
}
#[inline]
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
ty.bits() as u16
}
#[inline]
fn ty_bits_u64(&mut self, ty: Type) -> u64 {
ty.bits() as u64
}
#[inline]
fn ty_bytes(&mut self, ty: Type) -> u16 {
u16::try_from(ty.bytes()).unwrap()
}
#[inline]
fn ty_mask(&mut self, ty: Type) -> u64 {
match ty.bits() {
1 => 1,
8 => 0xff,
16 => 0xffff,
32 => 0xffff_ffff,
64 => 0xffff_ffff_ffff_ffff,
_ => unimplemented!(),
}
}
fn fits_in_16(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 16 && !ty.is_dynamic_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn fits_in_32(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 32 && !ty.is_dynamic_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn lane_fits_in_32(&mut self, ty: Type) -> Option<Type> {
if !ty.is_vector() && !ty.is_dynamic_vector() {
None
} else if ty.lane_type().bits() <= 32 {
Some(ty)
} else {
None
}
}
#[inline]
fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 64 && !ty.is_dynamic_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_32(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 32 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 32 || ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_8_or_16(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 8 || ty.bits() == 16 {
Some(ty)
} else {
None
}
}
#[inline]
fn int_bool_fits_in_32(&mut self, ty: Type) -> Option<Type> {
match ty {
I8 | I16 | I32 | B8 | B16 | B32 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
match ty {
I64 | B64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option<Type> {
match ty {
I64 | B64 | R64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_128(&mut self, ty: Type) -> Option<Type> {
match ty {
I128 | B128 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int(&mut self, ty: Type) -> Option<Type> {
ty.is_int().then(|| ty)
}
#[inline]
fn ty_int_bool(&mut self, ty: Type) -> Option<Type> {
if ty.is_int() || ty.is_bool() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_scalar_float(&mut self, ty: Type) -> Option<Type> {
match ty {
F32 | F64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_float_or_vec(&mut self, ty: Type) -> Option<Type> {
match ty {
F32 | F64 => Some(ty),
ty if ty.is_vector() => Some(ty),
_ => None,
}
}
fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && !ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec64(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec128(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 128 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn_vec64(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn_vec128(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec64_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec128_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 128 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
imm.bits() as u64
}
#[inline]
fn u64_from_bool(&mut self, b: bool) -> u64 {
if b {
u64::MAX
} else {
0
}
}
#[inline]
fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
if ty.lane_count() > 1 {
Some((ty.lane_bits(), ty.lane_count()))
} else {
None
}
}
#[inline]
fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
if ty.is_dynamic_vector() {
Some((ty.lane_bits(), ty.min_lane_count()))
} else {
None
}
}
#[inline]
fn dynamic_int_lane(&mut self, ty: Type) -> Option<u32> {
if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type())
{
Some(ty.lane_bits())
} else {
None
}
}
#[inline]
fn dynamic_fp_lane(&mut self, ty: Type) -> Option<u32> {
if ty.is_dynamic_vector()
&& crate::machinst::ty_has_float_or_vec_representation(ty.lane_type())
{
Some(ty.lane_bits())
} else {
None
}
}
#[inline]
fn ty_dyn64_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && ty.min_bits() == 64 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn128_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && ty.min_bits() == 128 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
val.bits().into()
}
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
val.bits()
}
fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
val
}
fn not_vec32x2(&mut self, ty: Type) -> Option<Type> {
if ty.lane_bits() == 32 && ty.lane_count() == 2 {
None
} else {
Some(ty)
}
}
fn not_i64x2(&mut self, ty: Type) -> Option<()> {
if ty == I64X2 {
None
} else {
Some(())
}
}
fn trap_code_division_by_zero(&mut self) -> TrapCode {
TrapCode::IntegerDivisionByZero
}
fn trap_code_integer_overflow(&mut self) -> TrapCode {
TrapCode::IntegerOverflow
}
fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode {
TrapCode::BadConversionToInteger
}
fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
match val.bits() {
0 => None,
n => Some(n as u64),
}
}
#[inline]
fn u32_add(&mut self, a: u32, b: u32) -> u32 {
a.wrapping_add(b)
}
#[inline]
fn s32_add_fallible(&mut self, a: u32, b: u32) -> Option<u32> {
let a = a as i32;
let b = b as i32;
a.checked_add(b).map(|sum| sum as u32)
}
#[inline]
fn u32_nonnegative(&mut self, x: u32) -> Option<u32> {
if (x as i32) >= 0 {
Some(x)
} else {
None
}
}
#[inline]
fn u32_lteq(&mut self, a: u32, b: u32) -> Option<()> {
if a <= b {
Some(())
} else {
None
}
}
#[inline]
fn u8_lteq(&mut self, a: u8, b: u8) -> Option<()> {
if a <= b {
Some(())
} else {
None
}
}
#[inline]
fn u8_lt(&mut self, a: u8, b: u8) -> Option<()> {
if a < b {
Some(())
} else {
None
}
}
#[inline]
fn imm64(&mut self, x: u64) -> Option<Imm64> {
Some(Imm64::new(x as i64))
}
#[inline]
fn simm32(&mut self, x: Imm64) -> Option<u32> {
let x64: i64 = x.into();
let x32: i32 = x64.try_into().ok()?;
Some(x32 as u32)
}
#[inline]
fn uimm8(&mut self, x: Imm64) -> Option<u8> {
let x64: i64 = x.into();
let x8: u8 = x64.try_into().ok()?;
Some(x8)
}
#[inline]
fn offset32(&mut self, x: Offset32) -> Option<u32> {
let x: i32 = x.into();
Some(x as u32)
}
#[inline]
fn u8_and(&mut self, a: u8, b: u8) -> u8 {
a & b
}
#[inline]
fn lane_type(&mut self, ty: Type) -> Type {
ty.lane_type()
}
#[inline]
fn offset32_to_u32(&mut self, offset: Offset32) -> u32 {
let offset: i32 = offset.into();
offset as u32
}
fn range(&mut self, start: usize, end: usize) -> Range {
(start, end)
}
fn range_view(&mut self, (start, end): Range) -> RangeView {
if start >= end {
RangeView::Empty
} else {
RangeView::NonEmpty {
index: start,
rest: (start + 1, end),
}
}
}
#[inline]
fn mem_flags_trusted(&mut self) -> MemFlags {
MemFlags::trusted()
}
#[inline]
fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC {
x.unsigned()
}
#[inline]
fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option<condcodes::IntCC> {
match cc {
IntCC::Equal
| IntCC::UnsignedGreaterThanOrEqual
| IntCC::UnsignedGreaterThan
| IntCC::UnsignedLessThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::NotEqual => None,
IntCC::SignedGreaterThanOrEqual
| IntCC::SignedGreaterThan
| IntCC::SignedLessThanOrEqual
| IntCC::SignedLessThan => Some(*cc),
}
}
};
}

View File

@@ -97,12 +97,15 @@ mod constant_hash;
mod context;
mod dce;
mod divconst_magic_numbers;
mod egraph;
mod fx;
mod inst_predicates;
mod isle_prelude;
mod iterators;
mod legalizer;
mod licm;
mod nan_canonicalization;
mod opts;
mod remove_constant_phis;
mod result;
mod scoped_hash_map;

View File

@@ -10,6 +10,7 @@ use crate::ir::{Block, Function, Layout};
use crate::packed_option::PackedOption;
use crate::timing;
use alloc::vec::Vec;
use smallvec::{smallvec, SmallVec};
/// A opaque reference to a code loop.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
@@ -29,6 +30,48 @@ pub struct LoopAnalysis {
struct LoopData {
header: Block,
parent: PackedOption<Loop>,
level: LoopLevel,
}
/// A level in a loop nest.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct LoopLevel(u8);
impl LoopLevel {
const INVALID: u8 = 0xff;
/// Get the root level (no loop).
pub fn root() -> Self {
Self(0)
}
/// Get the loop level.
pub fn level(self) -> usize {
self.0 as usize
}
/// Invalid loop level.
pub fn invalid() -> Self {
Self(Self::INVALID)
}
/// One loop level deeper.
pub fn inc(self) -> Self {
if self.0 == (Self::INVALID - 1) {
self
} else {
Self(self.0 + 1)
}
}
/// A clamped loop level from a larger-width (usize) depth.
pub fn clamped(level: usize) -> Self {
Self(
u8::try_from(std::cmp::min(level, (Self::INVALID as usize) - 1))
.expect("Clamped value must always convert"),
)
}
}
impl std::default::Default for LoopLevel {
fn default() -> Self {
LoopLevel::invalid()
}
}
impl LoopData {
@@ -37,6 +80,7 @@ impl LoopData {
Self {
header,
parent: parent.into(),
level: LoopLevel::invalid(),
}
}
}
@@ -71,6 +115,17 @@ impl LoopAnalysis {
self.loops[lp].parent.expand()
}
/// Return the innermost loop for a given block.
pub fn innermost_loop(&self, block: Block) -> Option<Loop> {
self.block_loop_map[block].expand()
}
/// Determine if a Block is a loop header. If so, return the loop.
pub fn is_loop_header(&self, block: Block) -> Option<Loop> {
self.innermost_loop(block)
.filter(|&lp| self.loop_header(lp) == block)
}
/// Determine if a Block belongs to a loop by running a finger along the loop tree.
///
/// Returns `true` if `block` is in loop `lp`.
@@ -96,6 +151,12 @@ impl LoopAnalysis {
}
false
}
/// Returns the loop-nest level of a given block.
pub fn loop_level(&self, block: Block) -> LoopLevel {
self.innermost_loop(block)
.map_or(LoopLevel(0), |lp| self.loops[lp].level)
}
}
impl LoopAnalysis {
@@ -107,6 +168,7 @@ impl LoopAnalysis {
self.block_loop_map.resize(func.dfg.num_blocks());
self.find_loop_headers(cfg, domtree, &func.layout);
self.discover_loop_blocks(cfg, domtree, &func.layout);
self.assign_loop_levels();
self.valid = true;
}
@@ -228,6 +290,28 @@ impl LoopAnalysis {
}
}
}
fn assign_loop_levels(&mut self) {
let mut stack: SmallVec<[Loop; 8]> = smallvec![];
for lp in self.loops.keys() {
if self.loops[lp].level == LoopLevel::invalid() {
stack.push(lp);
while let Some(&lp) = stack.last() {
if let Some(parent) = self.loops[lp].parent.into() {
if self.loops[parent].level != LoopLevel::invalid() {
self.loops[lp].level = self.loops[parent].level.inc();
stack.pop();
} else {
stack.push(parent);
}
} else {
self.loops[lp].level = LoopLevel::root().inc();
stack.pop();
}
}
}
}
}
}
#[cfg(test)]
@@ -286,6 +370,10 @@ mod tests {
assert_eq!(loop_analysis.is_in_loop(block2, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(block3, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(block0, loops[1]), false);
assert_eq!(loop_analysis.loop_level(block0).level(), 1);
assert_eq!(loop_analysis.loop_level(block1).level(), 2);
assert_eq!(loop_analysis.loop_level(block2).level(), 2);
assert_eq!(loop_analysis.loop_level(block3).level(), 1);
}
#[test]
@@ -345,5 +433,11 @@ mod tests {
assert_eq!(loop_analysis.is_in_loop(block3, loops[2]), true);
assert_eq!(loop_analysis.is_in_loop(block4, loops[2]), true);
assert_eq!(loop_analysis.is_in_loop(block5, loops[0]), true);
assert_eq!(loop_analysis.loop_level(block0).level(), 1);
assert_eq!(loop_analysis.loop_level(block1).level(), 2);
assert_eq!(loop_analysis.loop_level(block2).level(), 2);
assert_eq!(loop_analysis.loop_level(block3).level(), 2);
assert_eq!(loop_analysis.loop_level(block4).level(), 2);
assert_eq!(loop_analysis.loop_level(block5).level(), 1);
}
}

View File

@@ -13,6 +13,7 @@ use regalloc2::{self, MachineEnv};
/// for binary emission.
pub fn compile<B: LowerBackend + TargetIsa>(
f: &Function,
flags: crate::settings::Flags,
b: &B,
abi: Callee<<<B as LowerBackend>::MInst as MachInst>::ABIMachineSpec>,
machine_env: &MachineEnv,
@@ -23,7 +24,7 @@ pub fn compile<B: LowerBackend + TargetIsa>(
let block_order = BlockLoweringOrder::new(f);
// Build the lowering context.
let lower = crate::machinst::Lower::new(f, abi, emit_info, block_order, sigs)?;
let lower = crate::machinst::Lower::new(f, flags, abi, emit_info, block_order, sigs)?;
// Lower the IR.
let vcode = {

View File

@@ -41,13 +41,9 @@ pub enum RangeView {
/// ...` for each backend. These methods are shared amongst all backends.
#[macro_export]
#[doc(hidden)]
macro_rules! isle_prelude_methods {
macro_rules! isle_lower_prelude_methods {
() => {
/// We don't have a way of making a `()` value in isle directly.
#[inline]
fn unit(&mut self) -> Unit {
()
}
isle_common_prelude_methods!();
#[inline]
fn same_value(&mut self, a: Value, b: Value) -> Option<Value> {
@@ -175,309 +171,6 @@ macro_rules! isle_prelude_methods {
regs.regs().len()
}
#[inline]
fn u8_as_u32(&mut self, x: u8) -> Option<u32> {
Some(x.into())
}
#[inline]
fn u8_as_u64(&mut self, x: u8) -> Option<u64> {
Some(x.into())
}
#[inline]
fn u16_as_u64(&mut self, x: u16) -> Option<u64> {
Some(x.into())
}
#[inline]
fn u32_as_u64(&mut self, x: u32) -> Option<u64> {
Some(x.into())
}
#[inline]
fn i64_as_u64(&mut self, x: i64) -> Option<u64> {
Some(x as u64)
}
#[inline]
fn u64_add(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x.wrapping_add(y))
}
#[inline]
fn u64_sub(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x.wrapping_sub(y))
}
#[inline]
fn u64_and(&mut self, x: u64, y: u64) -> Option<u64> {
Some(x & y)
}
#[inline]
fn u64_is_zero(&mut self, value: u64) -> bool {
0 == value
}
#[inline]
fn ty_bits(&mut self, ty: Type) -> Option<u8> {
use std::convert::TryInto;
Some(ty.bits().try_into().unwrap())
}
#[inline]
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
ty.bits().try_into().unwrap()
}
#[inline]
fn ty_bits_u64(&mut self, ty: Type) -> u64 {
ty.bits() as u64
}
#[inline]
fn ty_bytes(&mut self, ty: Type) -> u16 {
u16::try_from(ty.bytes()).unwrap()
}
#[inline]
fn ty_mask(&mut self, ty: Type) -> u64 {
match ty.bits() {
1 => 1,
8 => 0xff,
16 => 0xffff,
32 => 0xffff_ffff,
64 => 0xffff_ffff_ffff_ffff,
_ => unimplemented!(),
}
}
fn fits_in_16(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 16 {
Some(ty)
} else {
None
}
}
#[inline]
fn fits_in_32(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 32 && !ty.is_dynamic_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn lane_fits_in_32(&mut self, ty: Type) -> Option<Type> {
if !ty.is_vector() && !ty.is_dynamic_vector() {
None
} else if ty.lane_type().bits() <= 32 {
Some(ty)
} else {
None
}
}
#[inline]
fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 64 && !ty.is_dynamic_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_32(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 32 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 32 || ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_8_or_16(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 8 || ty.bits() == 16 {
Some(ty)
} else {
None
}
}
#[inline]
fn int_bool_fits_in_32(&mut self, ty: Type) -> Option<Type> {
match ty {
I8 | I16 | I32 | B8 | B16 | B32 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
match ty {
I64 | B64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option<Type> {
match ty {
I64 | B64 | R64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_128(&mut self, ty: Type) -> Option<Type> {
match ty {
I128 | B128 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int(&mut self, ty: Type) -> Option<Type> {
ty.is_int().then(|| ty)
}
#[inline]
fn ty_int_bool(&mut self, ty: Type) -> Option<Type> {
if ty.is_int() || ty.is_bool() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_scalar_float(&mut self, ty: Type) -> Option<Type> {
match ty {
F32 | F64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_float_or_vec(&mut self, ty: Type) -> Option<Type> {
match ty {
F32 | F64 => Some(ty),
ty if ty.is_vector() => Some(ty),
_ => None,
}
}
fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && !ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec64(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec128(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 128 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn_vec64(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn_vec128(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec64_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_vec128_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 128 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn value_list_slice(&mut self, list: ValueList) -> ValueSlice {
(list, 0)
@@ -521,20 +214,6 @@ macro_rules! isle_prelude_methods {
r.to_reg()
}
#[inline]
fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
imm.bits() as u64
}
#[inline]
fn u64_from_bool(&mut self, b: bool) -> u64 {
if b {
u64::MAX
} else {
0
}
}
#[inline]
fn inst_results(&mut self, inst: Inst) -> ValueSlice {
(self.lower_ctx.dfg().inst_results_list(inst), 0)
@@ -555,80 +234,11 @@ macro_rules! isle_prelude_methods {
self.lower_ctx.dfg().value_type(val)
}
#[inline]
fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
if ty.lane_count() > 1 {
Some((ty.lane_bits(), ty.lane_count()))
} else {
None
}
}
#[inline]
fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
if ty.is_dynamic_vector() {
Some((ty.lane_bits(), ty.min_lane_count()))
} else {
None
}
}
#[inline]
fn dynamic_int_lane(&mut self, ty: Type) -> Option<u32> {
if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type())
{
Some(ty.lane_bits())
} else {
None
}
}
#[inline]
fn dynamic_fp_lane(&mut self, ty: Type) -> Option<u32> {
if ty.is_dynamic_vector()
&& crate::machinst::ty_has_float_or_vec_representation(ty.lane_type())
{
Some(ty.lane_bits())
} else {
None
}
}
#[inline]
fn ty_dyn64_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && ty.min_bits() == 64 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn128_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && ty.min_bits() == 128 && ty.lane_type().is_int() {
Some(ty)
} else {
None
}
}
#[inline]
fn def_inst(&mut self, val: Value) -> Option<Inst> {
self.lower_ctx.dfg().value_def(val).inst()
}
fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
val.bits().into()
}
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
val.bits()
}
fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
val
}
fn zero_value(&mut self, value: Value) -> Option<Value> {
let insn = self.def_inst(value);
if insn.is_some() {
@@ -682,34 +292,6 @@ macro_rules! isle_prelude_methods {
}
}
fn not_vec32x2(&mut self, ty: Type) -> Option<Type> {
if ty.lane_bits() == 32 && ty.lane_count() == 2 {
None
} else {
Some(ty)
}
}
fn not_i64x2(&mut self, ty: Type) -> Option<()> {
if ty == I64X2 {
None
} else {
Some(())
}
}
fn trap_code_division_by_zero(&mut self) -> TrapCode {
TrapCode::IntegerDivisionByZero
}
fn trap_code_integer_overflow(&mut self) -> TrapCode {
TrapCode::IntegerOverflow
}
fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode {
TrapCode::BadConversionToInteger
}
fn avoid_div_traps(&mut self, _: Type) -> Option<()> {
if self.flags.avoid_div_traps() {
Some(())
@@ -820,79 +402,6 @@ macro_rules! isle_prelude_methods {
Some(u128::from_le_bytes(bytes.try_into().ok()?))
}
fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
match val.bits() {
0 => None,
n => Some(n as u64),
}
}
#[inline]
fn u32_add(&mut self, a: u32, b: u32) -> u32 {
a.wrapping_add(b)
}
#[inline]
fn s32_add_fallible(&mut self, a: u32, b: u32) -> Option<u32> {
let a = a as i32;
let b = b as i32;
a.checked_add(b).map(|sum| sum as u32)
}
#[inline]
fn u32_nonnegative(&mut self, x: u32) -> Option<u32> {
if (x as i32) >= 0 {
Some(x)
} else {
None
}
}
#[inline]
fn u32_lteq(&mut self, a: u32, b: u32) -> Option<()> {
if a <= b {
Some(())
} else {
None
}
}
#[inline]
fn simm32(&mut self, x: Imm64) -> Option<u32> {
let x64: i64 = x.into();
let x32: i32 = x64.try_into().ok()?;
Some(x32 as u32)
}
#[inline]
fn uimm8(&mut self, x: Imm64) -> Option<u8> {
let x64: i64 = x.into();
let x8: u8 = x64.try_into().ok()?;
Some(x8)
}
#[inline]
fn offset32(&mut self, x: Offset32) -> Option<u32> {
let x: i32 = x.into();
Some(x as u32)
}
#[inline]
fn u8_and(&mut self, a: u8, b: u8) -> u8 {
a & b
}
#[inline]
fn lane_type(&mut self, ty: Type) -> Type {
ty.lane_type()
}
#[inline]
fn offset32_to_u32(&mut self, offset: Offset32) -> u32 {
let offset: i32 = offset.into();
offset as u32
}
#[inline]
fn emit_u64_le_const(&mut self, value: u64) -> VCodeConstant {
let data = VCodeConstantData::U64(value.to_le_bytes());
@@ -913,21 +422,6 @@ macro_rules! isle_prelude_methods {
))
}
fn range(&mut self, start: usize, end: usize) -> Range {
(start, end)
}
fn range_view(&mut self, (start, end): Range) -> RangeView {
if start >= end {
RangeView::Empty
} else {
RangeView::NonEmpty {
index: start,
rest: (start + 1, end),
}
}
}
fn retval(&mut self, i: usize) -> WritableValueRegs {
self.lower_ctx.retval(i)
}
@@ -1067,11 +561,6 @@ macro_rules! isle_prelude_methods {
self.lower_ctx.sink_inst(inst);
}
#[inline]
fn mem_flags_trusted(&mut self) -> MemFlags {
MemFlags::trusted()
}
#[inline]
fn preg_to_reg(&mut self, preg: PReg) -> Reg {
preg.into()
@@ -1081,27 +570,6 @@ macro_rules! isle_prelude_methods {
fn gen_move(&mut self, ty: Type, dst: WritableReg, src: Reg) -> MInst {
MInst::gen_move(dst, src, ty)
}
#[inline]
fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC {
x.unsigned()
}
#[inline]
fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option<condcodes::IntCC> {
match cc {
IntCC::Equal
| IntCC::UnsignedGreaterThanOrEqual
| IntCC::UnsignedGreaterThan
| IntCC::UnsignedLessThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::NotEqual => None,
IntCC::SignedGreaterThanOrEqual
| IntCC::SignedGreaterThan
| IntCC::SignedLessThanOrEqual
| IntCC::SignedLessThan => Some(*cc),
}
}
};
}

View File

@@ -147,6 +147,9 @@ pub struct Lower<'func, I: VCodeInst> {
/// The function to lower.
f: &'func Function,
/// Machine-independent flags.
flags: crate::settings::Flags,
/// Lowered machine instructions.
vcode: VCodeBuilder<I>,
@@ -345,6 +348,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
/// Prepare a new lowering context for the given IR function.
pub fn new(
f: &'func Function,
flags: crate::settings::Flags,
abi: Callee<I::ABIMachineSpec>,
emit_info: I::Info,
block_order: BlockLoweringOrder,
@@ -433,6 +437,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
Ok(Lower {
f,
flags,
vcode,
value_regs,
retval_regs,
@@ -1265,26 +1270,30 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
assert!(!self.inst_sunk.contains(&inst));
}
// If the value is a constant, then (re)materialize it at each use. This
// lowers register pressure.
if let Some(c) = self
.f
.dfg
.value_def(val)
.inst()
.and_then(|inst| self.get_constant(inst))
{
let regs = self.alloc_tmp(ty);
trace!(" -> regs {:?}", regs);
assert!(regs.is_valid());
// If the value is a constant, then (re)materialize it at each
// use. This lowers register pressure. (Only do this if we are
// not using egraph-based compilation; the egraph framework
// more efficiently rematerializes constants where needed.)
if !self.flags.use_egraphs() {
if let Some(c) = self
.f
.dfg
.value_def(val)
.inst()
.and_then(|inst| self.get_constant(inst))
{
let regs = self.alloc_tmp(ty);
trace!(" -> regs {:?}", regs);
assert!(regs.is_valid());
let insts = I::gen_constant(regs, c.into(), ty, |ty| {
self.alloc_tmp(ty).only_reg().unwrap()
});
for inst in insts {
self.emit(inst);
let insts = I::gen_constant(regs, c.into(), ty, |ty| {
self.alloc_tmp(ty).only_reg().unwrap()
});
for inst in insts {
self.emit(inst);
}
return non_writable_value_regs(regs);
}
return non_writable_value_regs(regs);
}
let mut regs = self.value_regs[val];

View File

@@ -0,0 +1,297 @@
//! Optimization driver using ISLE rewrite rules on an egraph.
use crate::egraph::Analysis;
use crate::egraph::FuncEGraph;
use crate::egraph::MemoryState;
pub use crate::egraph::{Node, NodeCtx};
use crate::ir::condcodes;
pub use crate::ir::condcodes::{FloatCC, IntCC};
pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8};
pub use crate::ir::types::*;
pub use crate::ir::{
dynamic_to_fixed, AtomicRmwOp, Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap,
Immediate, InstructionImms, JumpTable, MemFlags, Opcode, StackSlot, Table, TrapCode, Type,
Value,
};
use crate::isle_common_prelude_methods;
use crate::machinst::isle::*;
use crate::trace;
pub use cranelift_egraph::{Id, NewOrExisting, NodeIter};
use cranelift_entity::{EntityList, EntityRef};
use smallvec::SmallVec;
use std::marker::PhantomData;
pub type IdArray = EntityList<Id>;
#[allow(dead_code)]
pub type Unit = ();
pub type Range = (usize, usize);
pub type ConstructorVec<T> = SmallVec<[T; 8]>;
mod generated_code;
use generated_code::ContextIter;
struct IsleContext<'a, 'b> {
egraph: &'a mut FuncEGraph<'b>,
}
const REWRITE_LIMIT: usize = 5;
pub fn optimize_eclass<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
trace!("running rules on eclass {}", id.index());
egraph.stats.rewrite_rule_invoked += 1;
if egraph.rewrite_depth > REWRITE_LIMIT {
egraph.stats.rewrite_depth_limit += 1;
return id;
}
egraph.rewrite_depth += 1;
// Find all possible rewrites and union them in, returning the
// union.
let mut ctx = IsleContext { egraph };
let optimized_ids = generated_code::constructor_simplify(&mut ctx, id);
let mut union_id = id;
if let Some(mut ids) = optimized_ids {
while let Some(new_id) = ids.next(&mut ctx) {
if ctx.egraph.subsume_ids.contains(&new_id) {
trace!(" -> eclass {} subsumes {}", new_id, id);
ctx.egraph.stats.node_subsume += 1;
// Merge in the unionfind so canonicalization still
// works, but take *only* the subsuming ID, and break
// now.
ctx.egraph.egraph.unionfind.union(union_id, new_id);
union_id = new_id;
break;
}
ctx.egraph.stats.node_union += 1;
let old_union_id = union_id;
union_id = ctx
.egraph
.egraph
.union(&ctx.egraph.node_ctx, union_id, new_id);
trace!(
" -> union eclass {} with {} to get {}",
new_id,
old_union_id,
union_id
);
}
}
trace!(" -> optimize {} got {}", id, union_id);
ctx.egraph.rewrite_depth -= 1;
union_id
}
pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
// Note that we only examine the latest enode in the eclass: opts
// are invoked for every new enode added to an eclass, so
// traversing the whole eclass would be redundant.
let load_key = egraph.egraph.classes[id].get_node().unwrap();
if let Node::Load {
op:
InstructionImms::Load {
opcode: Opcode::Load,
offset: load_offset,
..
},
ty: load_ty,
addr: load_addr,
mem_state: MemoryState::Store(store_inst),
..
} = load_key.node(&egraph.egraph.nodes)
{
trace!(" -> got load op for id {}", id);
if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) {
trace!(" -> got store id: {} ty: {}", store_id, store_ty);
let store_key = egraph.egraph.classes[*store_id].get_node().unwrap();
if let Node::Inst {
op:
InstructionImms::Store {
opcode: Opcode::Store,
offset: store_offset,
..
},
args: store_args,
..
} = store_key.node(&egraph.egraph.nodes)
{
let store_args = store_args.as_slice(&egraph.node_ctx.args);
let store_data = store_args[0];
let store_addr = store_args[1];
if *load_offset == *store_offset
&& *load_ty == *store_ty
&& egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr)
{
trace!(" -> same offset, type, address; forwarding");
egraph.stats.store_to_load_forward += 1;
return store_data;
}
}
}
}
id
}
struct NodesEtorIter<'a, 'b>
where
'b: 'a,
{
root: Id,
iter: NodeIter<NodeCtx, Analysis>,
_phantom1: PhantomData<&'a ()>,
_phantom2: PhantomData<&'b ()>,
}
impl<'a, 'b> generated_code::ContextIter for NodesEtorIter<'a, 'b>
where
'b: 'a,
{
type Context = IsleContext<'a, 'b>;
type Output = (Type, InstructionImms, IdArray);
fn next(&mut self, ctx: &mut IsleContext<'a, 'b>) -> Option<Self::Output> {
while let Some(node) = self.iter.next(&ctx.egraph.egraph) {
trace!("iter from root {}: node {:?}", self.root, node);
match node {
Node::Pure { op, args, types }
| Node::Inst {
op, args, types, ..
} if types.len() == 1 => {
let ty = types.as_slice(&ctx.egraph.node_ctx.types)[0];
return Some((ty, op.clone(), args.clone()));
}
_ => {}
}
}
None
}
}
impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> {
isle_common_prelude_methods!();
fn eclass_type(&mut self, eclass: Id) -> Option<Type> {
let mut iter = self.egraph.egraph.enodes(eclass);
while let Some(node) = iter.next(&self.egraph.egraph) {
match node {
&Node::Pure { types, .. } | &Node::Inst { types, .. } if types.len() == 1 => {
return Some(types.as_slice(&self.egraph.node_ctx.types)[0]);
}
&Node::Load { ty, .. } => return Some(ty),
&Node::Result { ty, .. } => return Some(ty),
&Node::Param { ty, .. } => return Some(ty),
_ => {}
}
}
None
}
fn at_loop_level(&mut self, eclass: Id) -> (u8, Id) {
(
self.egraph.egraph.analysis_value(eclass).loop_level.level() as u8,
eclass,
)
}
type enodes_etor_iter = NodesEtorIter<'a, 'b>;
fn enodes_etor(&mut self, eclass: Id) -> Option<NodesEtorIter<'a, 'b>> {
Some(NodesEtorIter {
root: eclass,
iter: self.egraph.egraph.enodes(eclass),
_phantom1: PhantomData,
_phantom2: PhantomData,
})
}
fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id {
let types = self.egraph.node_ctx.types.single(ty);
let types = types.freeze(&mut self.egraph.node_ctx.types);
let op = op.clone();
match self
.egraph
.egraph
.add(Node::Pure { op, args, types }, &mut self.egraph.node_ctx)
{
NewOrExisting::New(id) => {
self.egraph.stats.node_created += 1;
self.egraph.stats.node_pure += 1;
self.egraph.stats.node_ctor_created += 1;
optimize_eclass(id, self.egraph)
}
NewOrExisting::Existing(id) => {
self.egraph.stats.node_ctor_deduped += 1;
id
}
}
}
fn id_array_0_etor(&mut self, arg0: IdArray) -> Option<()> {
let values = arg0.as_slice(&self.egraph.node_ctx.args);
if values.len() == 0 {
Some(())
} else {
None
}
}
fn id_array_0_ctor(&mut self) -> IdArray {
EntityList::default()
}
fn id_array_1_etor(&mut self, arg0: IdArray) -> Option<Id> {
let values = arg0.as_slice(&self.egraph.node_ctx.args);
if values.len() == 1 {
Some(values[0])
} else {
None
}
}
fn id_array_1_ctor(&mut self, arg0: Id) -> IdArray {
EntityList::from_iter([arg0].into_iter(), &mut self.egraph.node_ctx.args)
}
fn id_array_2_etor(&mut self, arg0: IdArray) -> Option<(Id, Id)> {
let values = arg0.as_slice(&self.egraph.node_ctx.args);
if values.len() == 2 {
Some((values[0], values[1]))
} else {
None
}
}
fn id_array_2_ctor(&mut self, arg0: Id, arg1: Id) -> IdArray {
EntityList::from_iter([arg0, arg1].into_iter(), &mut self.egraph.node_ctx.args)
}
fn id_array_3_etor(&mut self, arg0: IdArray) -> Option<(Id, Id, Id)> {
let values = arg0.as_slice(&self.egraph.node_ctx.args);
if values.len() == 3 {
Some((values[0], values[1], values[2]))
} else {
None
}
}
fn id_array_3_ctor(&mut self, arg0: Id, arg1: Id, arg2: Id) -> IdArray {
EntityList::from_iter(
[arg0, arg1, arg2].into_iter(),
&mut self.egraph.node_ctx.args,
)
}
fn remat(&mut self, id: Id) -> Id {
trace!("remat: {}", id);
self.egraph.remat_ids.insert(id);
id
}
fn subsume(&mut self, id: Id) -> Id {
trace!("subsume: {}", id);
self.egraph.subsume_ids.insert(id);
id
}
}

View File

@@ -0,0 +1,207 @@
;; Algebraic optimizations.
;; Rules here are allowed to rewrite pure expressions arbitrarily,
;; using the same inputs as the original, or fewer. In other words, we
;; cannot pull a new eclass id out of thin air and refer to it, other
;; than a piece of the input or a new node that we construct; but we
;; can freely rewrite e.g. `x+y-y` to `x`.
;; uextend/sextend of a constant.
(rule (simplify (uextend $I64 (iconst $I32 imm)))
(iconst $I64 imm))
(rule (simplify (sextend $I64 (iconst $I32 (u64_from_imm64 imm))))
(iconst $I64 (imm64 (u64_sextend_u32 imm))))
;; x+0 == 0+x == x.
(rule (simplify (iadd ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (iadd ty
(iconst ty (u64_from_imm64 0))
x))
(subsume x))
;; x-0 == x.
(rule (simplify (isub ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
;; 0-x == (ineg x).
(rule (simplify (isub ty
(iconst ty (u64_from_imm64 0))
x))
(ineg ty x))
;; x*1 == 1*x == x.
(rule (simplify (imul ty
x
(iconst ty (u64_from_imm64 1))))
(subsume x))
(rule (simplify (imul ty
(iconst ty (u64_from_imm64 1))
x))
(subsume x))
;; x*0 == 0*x == x.
(rule (simplify (imul ty
x
(iconst ty (u64_from_imm64 0))))
(iconst ty (imm64 0)))
(rule (simplify (imul ty
(iconst ty (u64_from_imm64 0))
x))
(iconst ty (imm64 0)))
;; x/1 == x.
(rule (simplify (sdiv ty
x
(iconst ty (u64_from_imm64 1))))
(subsume x))
(rule (simplify (udiv ty
x
(iconst ty (u64_from_imm64 1))))
(subsume x))
;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x.
(rule (simplify (ishl ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (ushr ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (sshr ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (rotr ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (rotl ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
;; x | 0 == 0 | x == x | x == x.
(rule (simplify (bor ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (bor ty
(iconst ty (u64_from_imm64 0))
x))
(subsume x))
(rule (simplify (bor ty x x))
(subsume x))
;; x ^ 0 == 0 ^ x == x.
(rule (simplify (bxor ty
x
(iconst ty (u64_from_imm64 0))))
(subsume x))
(rule (simplify (bxor ty
(iconst ty (u64_from_imm64 0))
x))
(subsume x))
;; x ^ x == 0.
(rule (simplify (bxor ty x x))
(subsume (iconst ty (imm64 0))))
;; x ^ not(x) == not(x) ^ x == -1.
(rule (simplify (bxor $I32 x (bnot $I32 x))) (subsume (iconst $I32 (imm64 0xffff_ffff))))
(rule (simplify (bxor $I32 (bnot $I32 x) x)) (subsume (iconst $I32 (imm64 0xffff_ffff))))
(rule (simplify (bxor $I64 x (bnot $I64 x))) (subsume (iconst $I64 (imm64 0xffff_ffff_ffff_ffff))))
(rule (simplify (bxor $I64 (bnot $I64 x) x)) (subsume (iconst $I64 (imm64 0xffff_ffff_ffff_ffff))))
;; x & -1 == -1 & x == x & x == x.
(rule (simplify (band ty x x)) x)
(rule (simplify (band $I32 x (iconst $I32 (u64_from_imm64 0xffff_ffff)))) (subsume x))
(rule (simplify (band $I32 (iconst $I32 (u64_from_imm64 0xffff_ffff)) x)) (subsume x))
(rule (simplify (band $I64 x (iconst $I64 (u64_from_imm64 0xffff_ffff_ffff_ffff)))) (subsume x))
(rule (simplify (band $I64 (iconst $I64 (u64_from_imm64 0xffff_ffff_ffff_ffff)) x)) (subsume x))
;; x & 0 == 0 & x == 0.
(rule (simplify (band ty x (iconst ty (u64_from_imm64 0)))) (iconst ty (imm64 0)))
(rule (simplify (band ty (iconst ty (u64_from_imm64 0)) x)) (iconst ty (imm64 0)))
;; not(not(x)) == x.
(rule (simplify (bnot ty (bnot ty x))) (subsume x))
;; DeMorgan's rule (two versions):
;; bnot(bor(x, y)) == band(bnot(x), bnot(y))
(rule (simplify (bnot ty (bor ty x y)))
(band ty (bnot ty x) (bnot ty y)))
;; bnot(band(x, y)) == bor(bnot(x), bnot(y))
(rule (simplify (bnot ty (band t x y)))
(bor ty (bnot ty x) (bnot ty y)))
;; x*2 == 2*x == x+x.
(rule (simplify (imul ty x (iconst _ (simm32 2))))
(iadd ty x x))
(rule (simplify (imul ty (iconst _ (simm32 2)) x))
(iadd ty x x))
;; x<<32>>32: uextend/sextend 32->64.
(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
(uextend $I64 x))
(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
(sextend $I64 x))
;; TODO: strength reduction: mul/div to shifts
;; TODO: div/rem by constants -> magic multiplications
;; Reassociate when it benefits LICM.
(rule (simplify (iadd ty (iadd ty x y) z))
(if-let (at_loop_level lx _) x)
(if-let (at_loop_level ly _) y)
(if-let (at_loop_level lz _) z)
(if (u8_lt lx ly))
(if (u8_lt lz ly))
(iadd ty (iadd ty x z) y))
(rule (simplify (iadd ty (iadd ty x y) z))
(if-let (at_loop_level lx _) x)
(if-let (at_loop_level ly _) y)
(if-let (at_loop_level lz _) z)
(if (u8_lt ly lx))
(if (u8_lt lz lx))
(iadd ty (iadd ty y z) x))
;; Select's selector input doesn't need bint; remove the redundant op.
(rule (simplify (select ty (bint _ b) x y))
(subsume (select ty b x y)))
;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
;; used. This is neutral (add-with-imm) or positive (iconst) for
;; register pressure, and these ops are very cheap.
(rule (simplify x @ (iadd _ (iconst _ _) _))
(remat x))
(rule (simplify x @ (iadd _ _ (iconst _ _)))
(remat x))
(rule (simplify x @ (isub _ (iconst _ _) _))
(remat x))
(rule (simplify x @ (isub _ _ (iconst _ _)))
(remat x))
(rule (simplify x @ (band _ (iconst _ _) _))
(remat x))
(rule (simplify x @ (band _ _ (iconst _ _)))
(remat x))
(rule (simplify x @ (bor _ (iconst _ _) _))
(remat x))
(rule (simplify x @ (bor _ _ (iconst _ _)))
(remat x))
(rule (simplify x @ (bxor _ (iconst _ _) _))
(remat x))
(rule (simplify x @ (bxor _ _ (iconst _ _)))
(remat x))
(rule (simplify x @ (bnot _ _))
(remat x))
(rule (simplify x @ (iconst _ _))
(remat x))
(rule (simplify x @ (f32const _ _))
(remat x))
(rule (simplify x @ (f64const _ _))
(remat x))

View File

@@ -0,0 +1,134 @@
;; Constant propagation.
(rule (simplify
(iadd (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(subsume (iconst ty (imm64 (u64_add k1 k2)))))
(rule (simplify
(isub (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(subsume (iconst ty (imm64 (u64_sub k1 k2)))))
(rule (simplify
(imul (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(subsume (iconst ty (imm64 (u64_mul k1 k2)))))
(rule (simplify
(sdiv (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(if-let d (u64_sdiv k1 k2))
(subsume (iconst ty (imm64 d))))
(rule (simplify
(udiv (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(if-let d (u64_udiv k1 k2))
(subsume (iconst ty (imm64 d))))
(rule (simplify
(bor (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(subsume (iconst ty (imm64 (u64_or k1 k2)))))
(rule (simplify
(band (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(subsume (iconst ty (imm64 (u64_and k1 k2)))))
(rule (simplify
(bxor (fits_in_64 ty)
(iconst ty (u64_from_imm64 k1))
(iconst ty (u64_from_imm64 k2))))
(subsume (iconst ty (imm64 (u64_xor k1 k2)))))
(rule (simplify
(bnot (fits_in_64 ty)
(iconst ty (u64_from_imm64 k))))
(subsume (iconst ty (imm64 (u64_not k)))))
;; Canonicalize via commutativity: push immediates to the right.
;;
;; (op k x) --> (op x k)
(rule (simplify
(iadd ty k @ (iconst ty _) x))
(iadd ty x k))
;; sub is not commutative, but we can flip the args and negate the
;; whole thing.
(rule (simplify
(isub ty k @ (iconst ty _) x))
(ineg ty (isub ty x k)))
(rule (simplify
(imul ty k @ (iconst ty _) x))
(imul ty x k))
(rule (simplify
(bor ty k @ (iconst ty _) x))
(bor ty x k))
(rule (simplify
(band ty k @ (iconst ty _) x))
(band ty x k))
(rule (simplify
(bxor ty k @ (iconst ty _) x))
(bxor ty x k))
;; Canonicalize via associativity: reassociate to a right-heavy tree
;; for constants.
;;
;; (op (op x k) k) --> (op x (op k k))
(rule (simplify
(iadd ty (iadd ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
(iadd ty x (iadd ty k1 k2)))
;; sub is not directly associative, but we can flip a sub to an add to
;; make it work:
;; - (sub (sub x k1) k2) -> (sub x (add k1 k2))
;; - (sub (sub k1 x) k2) -> (sub (sub k1 k2) x)
;; - (sub (add x k1) k2) -> (sub x (sub k2 k1))
;; - (add (sub x k1) k2) -> (add x (sub k2 k1))
;; - (add (sub k1 x) k2) -> (sub (add k1 k2) x)
(rule (simplify (isub ty
(isub ty x (iconst ty (u64_from_imm64 k1)))
(iconst ty (u64_from_imm64 k2))))
(isub ty x (iconst ty (imm64 (u64_add k1 k2)))))
(rule (simplify (isub ty
(isub ty (iconst ty (u64_from_imm64 k1)) x)
(iconst ty (u64_from_imm64 k2))))
(isub ty (iconst ty (imm64 (u64_sub k1 k2))) x))
(rule (simplify (isub ty
(iadd ty x (iconst ty (u64_from_imm64 k1)))
(iconst ty (u64_from_imm64 k2))))
(isub ty x (iconst ty (imm64 (u64_sub k1 k2)))))
(rule (simplify (iadd ty
(isub ty x (iconst ty (u64_from_imm64 k1)))
(iconst ty (u64_from_imm64 k2))))
(iadd ty x (iconst ty (imm64 (u64_sub k2 k1)))))
(rule (simplify (iadd ty
(isub ty (iconst ty (u64_from_imm64 k1)) x)
(iconst ty (u64_from_imm64 k2))))
(isub ty (iconst ty (imm64 (u64_add k1 k2))) x))
(rule (simplify
(imul ty (imul ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
(imul ty x (imul ty k1 k2)))
(rule (simplify
(bor ty (bor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
(bor ty x (bor ty k1 k2)))
(rule (simplify
(band ty (band ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
(band ty x (band ty k1 k2)))
(rule (simplify
(bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
(bxor ty x (bxor ty k1 k2)))
;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs

View File

@@ -0,0 +1,11 @@
//! Wrapper environment for generated code from optimization rules in ISLE.
// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of
// the generated ISLE source below because we include!() it. We must include!() it because its path
// depends on an environment variable; and also because of this, we can't do the `#[path = "..."]
// mod generated_code;` trick either.
#![allow(dead_code, unreachable_code, unreachable_patterns)]
#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]
#![allow(irrefutable_let_patterns, non_camel_case_types)]
include!(concat!(env!("ISLE_DIR"), "/isle_opt.rs"));

View File

@@ -31,24 +31,7 @@
(type isize (primitive isize))
;; `cranelift-entity`-based identifiers.
(type Inst (primitive Inst))
(type Type (primitive Type))
(type Value (primitive Value))
;; ISLE representation of `&[Value]`.
(type ValueSlice (primitive ValueSlice))
;; ISLE representation of `Vec<u8>`
(type VecMask extern (enum))
(type ValueList (primitive ValueList))
(type ValueRegs (primitive ValueRegs))
(type WritableValueRegs (primitive WritableValueRegs))
;; Instruction lowering result: a vector of `ValueRegs`.
(type InstOutput (primitive InstOutput))
;; (Mutable) builder to incrementally construct an `InstOutput`.
(type InstOutputBuilder extern (enum))
(decl u32_add (u32 u32) u32)
(extern constructor u32_add u32_add)
@@ -72,6 +55,16 @@
(decl pure u32_lteq (u32 u32) Unit)
(extern constructor u32_lteq u32_lteq)
;; Pure/fallible constructor that tests if one u8 is less than or
;; equal to another.
(decl pure u8_lteq (u8 u8) Unit)
(extern constructor u8_lteq u8_lteq)
;; Pure/fallible constructor that tests if one u8 is strictly less
;; than another.
(decl pure u8_lt (u8 u8) Unit)
(extern constructor u8_lt u8_lt)
;; Get a signed 32-bit immediate in an u32 from an Imm64, if possible.
(decl simm32 (u32) Imm64)
(extern extractor simm32 simm32)
@@ -83,143 +76,6 @@
(decl u8_and (u8 u8) u8)
(extern constructor u8_and u8_and)
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type Reg (primitive Reg))
(type WritableReg (primitive WritableReg))
(type OptionWritableReg (primitive OptionWritableReg))
(type VecReg extern (enum))
(type VecWritableReg extern (enum))
(type PReg (primitive PReg))
;; Construct a `ValueRegs` of one register.
(decl value_reg (Reg) ValueRegs)
(extern constructor value_reg value_reg)
;; Construct a `ValueRegs` of two registers.
(decl value_regs (Reg Reg) ValueRegs)
(extern constructor value_regs value_regs)
;; Construct an empty `ValueRegs` containing only invalid register sentinels.
(decl value_regs_invalid () ValueRegs)
(extern constructor value_regs_invalid value_regs_invalid)
;; Construct an empty `InstOutput`.
(decl output_none () InstOutput)
(extern constructor output_none output_none)
;; Construct a single-element `InstOutput`.
(decl output (ValueRegs) InstOutput)
(extern constructor output output)
;; Construct a two-element `InstOutput`.
(decl output_pair (ValueRegs ValueRegs) InstOutput)
(extern constructor output_pair output_pair)
;; Construct a single-element `InstOutput` from a single register.
(decl output_reg (Reg) InstOutput)
(rule (output_reg reg) (output (value_reg reg)))
;; Construct a single-element `InstOutput` from a value.
(decl output_value (Value) InstOutput)
(rule (output_value val) (output (put_in_regs val)))
;; Initially empty `InstOutput` builder.
(decl output_builder_new () InstOutputBuilder)
(extern constructor output_builder_new output_builder_new)
;; Append a `ValueRegs` to an `InstOutput` under construction.
(decl output_builder_push (InstOutputBuilder ValueRegs) Unit)
(extern constructor output_builder_push output_builder_push)
;; Finish building an `InstOutput` incrementally.
(decl output_builder_finish (InstOutputBuilder) InstOutput)
(extern constructor output_builder_finish output_builder_finish)
;; Get a temporary register for writing.
(decl temp_writable_reg (Type) WritableReg)
(extern constructor temp_writable_reg temp_writable_reg)
;; Get a temporary register for reading.
(decl temp_reg (Type) Reg)
(rule (temp_reg ty)
(writable_reg_to_reg (temp_writable_reg ty)))
(decl is_valid_reg (bool) Reg)
(extern extractor infallible is_valid_reg is_valid_reg)
;; Get or match the invalid register.
(decl invalid_reg () Reg)
(extern constructor invalid_reg invalid_reg)
(extractor (invalid_reg) (is_valid_reg $false))
;; Match any register but the invalid register.
(decl valid_reg (Reg) Reg)
(extractor (valid_reg reg) (and (is_valid_reg $true) reg))
;; Mark this value as used, to ensure that it gets lowered.
(decl mark_value_used (Value) Unit)
(extern constructor mark_value_used mark_value_used)
;; Put the given value into a register.
;;
;; Asserts that the value fits into a single register, and doesn't require
;; multiple registers for its representation (like `i128` on x64 for example).
;;
;; As a side effect, this marks the value as used.
(decl put_in_reg (Value) Reg)
(extern constructor put_in_reg put_in_reg)
;; Put the given value into one or more registers.
;;
;; As a side effect, this marks the value as used.
(decl put_in_regs (Value) ValueRegs)
(extern constructor put_in_regs put_in_regs)
;; If the given reg is a real register, cause the value in reg to be in a virtual
;; reg, by copying it into a new virtual reg.
(decl ensure_in_vreg (Reg Type) Reg)
(extern constructor ensure_in_vreg ensure_in_vreg)
;; Get the `n`th register inside a `ValueRegs`.
(decl value_regs_get (ValueRegs usize) Reg)
(extern constructor value_regs_get value_regs_get)
;; Get the number of registers in a `ValueRegs`.
(decl value_regs_len (ValueRegs) usize)
(extern constructor value_regs_len value_regs_len)
;; Get a range for the number of regs in a `ValueRegs`.
(decl value_regs_range (ValueRegs) Range)
(rule (value_regs_range regs) (range 0 (value_regs_len regs)))
;; Put the value into one or more registers and return the first register.
;;
;; Unlike `put_in_reg`, this does not assert that the value fits in a single
;; register. This is useful for things like a `i128` shift amount, where we mask
;; the shift amount to the bit width of the value being shifted, and so the high
;; half of the `i128` won't ever be used.
;;
;; As a side efect, this marks that value as used.
(decl lo_reg (Value) Reg)
(rule (lo_reg val)
(let ((regs ValueRegs (put_in_regs val)))
(value_regs_get regs 0)))
;; Convert a `PReg` into a `Reg`
(decl preg_to_reg (PReg) Reg)
(extern constructor preg_to_reg preg_to_reg)
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type MachLabel (primitive MachLabel))
(type ValueLabel (primitive ValueLabel))
(type UnwindInst (primitive UnwindInst))
(type ExternalName (primitive ExternalName))
(type BoxExternalName (primitive BoxExternalName))
(type RelocDistance (primitive RelocDistance))
(type VecArgPair extern (enum))
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl pure u8_as_u32 (u8) u32)
@@ -245,9 +101,30 @@
(decl pure u64_sub (u64 u64) u64)
(extern constructor u64_sub u64_sub)
(decl pure u64_mul (u64 u64) u64)
(extern constructor u64_mul u64_mul)
(decl pure u64_sdiv (u64 u64) u64)
(extern constructor u64_sdiv u64_sdiv)
(decl pure u64_udiv (u64 u64) u64)
(extern constructor u64_udiv u64_udiv)
(decl pure u64_and (u64 u64) u64)
(extern constructor u64_and u64_and)
(decl pure u64_or (u64 u64) u64)
(extern constructor u64_or u64_or)
(decl pure u64_xor (u64 u64) u64)
(extern constructor u64_xor u64_xor)
(decl pure u64_not (u64) u64)
(extern constructor u64_not u64_not)
(decl pure u64_sextend_u32 (u64) u64)
(extern constructor u64_sextend_u32 u64_sextend_u32)
(decl u64_is_zero (bool) u64)
(extern extractor infallible u64_is_zero u64_is_zero)
@@ -443,46 +320,6 @@
(decl not_i64x2 () Type)
(extern extractor not_i64x2 not_i64x2)
;; Extractor to get a `ValueSlice` out of a `ValueList`.
(decl value_list_slice (ValueSlice) ValueList)
(extern extractor infallible value_list_slice value_list_slice)
;; Extractor to test whether a `ValueSlice` is empty.
(decl value_slice_empty () ValueSlice)
(extern extractor value_slice_empty value_slice_empty)
;; Extractor to split a `ValueSlice` into its first element plus a tail.
(decl value_slice_unwrap (Value ValueSlice) ValueSlice)
(extern extractor value_slice_unwrap value_slice_unwrap)
;; Return the length of a `ValueSlice`.
(decl value_slice_len (ValueSlice) usize)
(extern constructor value_slice_len value_slice_len)
;; Return any element of a `ValueSlice`.
(decl value_slice_get (ValueSlice usize) Value)
(extern constructor value_slice_get value_slice_get)
;; Extractor to get the first element from a value list, along with its tail as
;; a `ValueSlice`.
(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
(extractor (unwrap_head_value_list_1 head tail)
(value_list_slice (value_slice_unwrap head tail)))
;; Extractor to get the first two elements from a value list, along with its
;; tail as a `ValueSlice`.
(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
(extractor (unwrap_head_value_list_2 head1 head2 tail)
(value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
;; Constructor to test whether two values are same.
(decl pure same_value (Value Value) Value)
(extern constructor same_value same_value)
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
(decl writable_reg_to_reg (WritableReg) Reg)
(extern constructor writable_reg_to_reg writable_reg_to_reg)
;; Extract a `u8` from an `Uimm8`.
(decl u8_from_uimm8 (u8) Uimm8)
(extern extractor infallible u8_from_uimm8 u8_from_uimm8)
@@ -499,6 +336,10 @@
(decl nonzero_u64_from_imm64 (u64) Imm64)
(extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64)
;; Create a new Imm64.
(decl pure imm64 (u64) Imm64)
(extern constructor imm64 imm64)
;; Extract a `u64` from an `Ieee32`.
(decl u64_from_ieee32 (u64) Ieee32)
(extern extractor infallible u64_from_ieee32 u64_from_ieee32)
@@ -507,34 +348,6 @@
(decl u64_from_ieee64 (u64) Ieee64)
(extern extractor infallible u64_from_ieee64 u64_from_ieee64)
;; Extract the result values for the given instruction.
(decl inst_results (ValueSlice) Inst)
(extern extractor infallible inst_results inst_results)
;; Extract the first result value of the given instruction.
(decl first_result (Value) Inst)
(extern extractor first_result first_result)
;; Extract the `InstructionData` for an `Inst`.
(decl inst_data (InstructionData) Inst)
(extern extractor infallible inst_data inst_data)
;; Extract the type of a `Value`.
(decl value_type (Type) Value)
(extern extractor infallible value_type value_type)
;; Extract the type of the instruction's first result.
(decl result_type (Type) Inst)
(extractor (result_type ty)
(first_result (value_type ty)))
;; Extract the type of the instruction's first result and pass along the
;; instruction as well.
(decl has_type (Type Inst) Inst)
(extractor (has_type ty inst)
(and (result_type ty)
inst))
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
;; type. Will only match when there is more than one lane.
(decl multi_lane (u32 u32) Type)
@@ -565,27 +378,10 @@
(decl ty_dyn128_int (Type) Type)
(extern extractor ty_dyn128_int ty_dyn128_int)
;; Match the instruction that defines the given value, if any.
(decl def_inst (Inst) Value)
(extern extractor def_inst def_inst)
;; Extract a constant `u64` from a value defined by an `iconst`.
(decl u64_from_iconst (u64) Value)
(extractor (u64_from_iconst x)
(def_inst (iconst (u64_from_imm64 x))))
;; Convert an `Offset32` to a primitive number.
(decl offset32_to_u32 (Offset32) u32)
(extern constructor offset32_to_u32 offset32_to_u32)
;; Match any zero value for iconst, fconst32, fconst64, vconst and splat.
(decl pure zero_value (Value) Value)
(extern constructor zero_value zero_value)
;; Match a sinkable instruction from a value operand.
(decl pure is_sinkable_inst (Value) Inst)
(extern constructor is_sinkable_inst is_sinkable_inst)
;; This is a direct import of `IntCC::unsigned`.
;; Get the corresponding IntCC with the signed component removed.
;; For conditions without a signed component, this is a no-op.
@@ -596,283 +392,6 @@
(decl pure signed_cond_code (IntCC) IntCC)
(extern constructor signed_cond_code signed_cond_code)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emit an instruction.
;;
;; This is low-level and side-effectful; it should only be used as an
;; implementation detail by helpers that preserve the SSA facade themselves.
(decl emit (MInst) Unit)
(extern constructor emit emit)
;; Sink an instruction.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction has been sunk into another instruction, and no longer needs to
;; be lowered.
(decl sink_inst (Inst) Unit)
(extern constructor sink_inst sink_inst)
;; Constant pool emission.
(type VCodeConstant (primitive VCodeConstant))
;; Add a u64 little-endian constant to the in-memory constant pool and
;; return a VCodeConstant index that refers to it. This is
;; side-effecting but idempotent (constants are deduplicated).
(decl emit_u64_le_const (u64) VCodeConstant)
(extern constructor emit_u64_le_const emit_u64_le_const)
;; Add a u128 little-endian constant to the in-memory constant pool and
;; return a VCodeConstant index that refers to it. This is
;; side-effecting but idempotent (constants are deduplicated).
(decl emit_u128_le_const (u128) VCodeConstant)
(extern constructor emit_u128_le_const emit_u128_le_const)
;; Fetch the VCodeConstant associated with a Constant.
(decl const_to_vconst (Constant) VCodeConstant)
(extern constructor const_to_vconst const_to_vconst)
;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
(type SideEffectNoResult (enum
(Inst (inst MInst))
(Inst2 (inst1 MInst)
(inst2 MInst))
(Inst3 (inst1 MInst)
(inst2 MInst)
(inst3 MInst))))
;; Create an empty `InstOutput`, but do emit the given side-effectful
;; instruction.
(decl side_effect (SideEffectNoResult) InstOutput)
(rule (side_effect (SideEffectNoResult.Inst inst))
(let ((_ Unit (emit inst)))
(output_none)))
(rule (side_effect (SideEffectNoResult.Inst2 inst1 inst2))
(let ((_ Unit (emit inst1))
(_ Unit (emit inst2)))
(output_none)))
(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3))
(let ((_ Unit (emit inst1))
(_ Unit (emit inst2))
(_ Unit (emit inst3)))
(output_none)))
(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult)
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2))
(SideEffectNoResult.Inst2 inst1 inst2))
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3))
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3))
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Newtype wrapper around `MInst` for instructions that are used for their
;; effect on flags.
;;
;; Variant determines how result is given when combined with a
;; ConsumesFlags. See `with_flags` below for more.
(type ProducesFlags (enum
;; For cases where the flags have been produced by another
;; instruction, and we have out-of-band reasons to know
;; that they won't be clobbered by the time we depend on
;; them.
(AlreadyExistingFlags)
(ProducesFlagsSideEffect (inst MInst))
(ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst))
;; Not directly combinable with a ConsumesFlags;
;; used in s390x and unwrapped directly by `trapif`.
(ProducesFlagsReturnsReg (inst MInst) (result Reg))
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
;; Chain another producer to a `ProducesFlags`.
(decl produces_flags_append (ProducesFlags MInst) ProducesFlags)
(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2)
(ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2))
;; Newtype wrapper around `MInst` for instructions that consume flags.
;;
;; Variant determines how result is given when combined with a
;; ProducesFlags. See `with_flags` below for more.
(type ConsumesFlags (enum
(ConsumesFlagsSideEffect (inst MInst))
(ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst))
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(result ValueRegs))
(ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(inst3 MInst)
(inst4 MInst)
(result ValueRegs))))
;; Get the produced register out of a ProducesFlags.
(decl produces_flags_get_reg (ProducesFlags) Reg)
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
;; Modify a ProducesFlags to use it only for its side-effect, ignoring
;; its result.
(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
;; Helper for combining two flags-consumer instructions that return a
;; single Reg, giving a ConsumesFlags that returns both values in a
;; ValueRegs.
(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
(ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
inst1
inst2
(value_regs reg1 reg2)))
(rule (consumes_flags_concat
(ConsumesFlags.ConsumesFlagsSideEffect inst1)
(ConsumesFlags.ConsumesFlagsSideEffect inst2))
(ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2))
;; Combine flags-producing and -consuming instructions together, ensuring that
;; they are emitted back-to-back and no other instructions can be emitted
;; between them and potentially clobber the flags.
;;
;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
;;
;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst)))
(value_regs producer_result consumer_result)))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst)))
(value_reg consumer_result)))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_inst_3
consumer_inst_4
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2))
(_w Unit (emit consumer_inst_3))
(_v Unit (emit consumer_inst_4)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
(let ((_ Unit (emit producer_inst1))
(_ Unit (emit producer_inst2))
(_ Unit (emit consumer_inst)))
(value_reg consumer_result)))
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_ Unit (emit producer_inst1))
(_ Unit (emit producer_inst2))
(_ Unit (emit consumer_inst_1))
(_ Unit (emit consumer_inst_2)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_inst_3
consumer_inst_4
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_ Unit (emit producer_inst1))
(_ Unit (emit producer_inst2))
(_ Unit (emit consumer_inst_1))
(_ Unit (emit consumer_inst_2))
(_ Unit (emit consumer_inst_3))
(_ Unit (emit consumer_inst_4)))
consumer_result))
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
(rule (with_flags_reg p c)
(let ((v ValueRegs (with_flags p c)))
(value_regs_get v 0)))
;; Indicate that the current state of the flags register from the instruction
;; that produces this Value is relied on.
(decl flags_to_producesflags (Value) ProducesFlags)
(rule (flags_to_producesflags val)
(let ((_ Unit (mark_value_used val)))
(ProducesFlags.AlreadyExistingFlags)))
;; Combine a flags-producing instruction and a flags-consuming instruction that
;; produces no results.
;;
;; This function handles the following case only:
;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect
(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult)
(rule (with_flags_side_effect
(ProducesFlags.AlreadyExistingFlags)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst c))
(rule (with_flags_side_effect
(ProducesFlags.AlreadyExistingFlags)
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
(SideEffectNoResult.Inst2 c1 c2))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsSideEffect p)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst2 p c))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsSideEffect p)
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
(SideEffectNoResult.Inst3 p c1 c2))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst3 p1 p2 c))
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl trap_code_division_by_zero () TrapCode)
@@ -884,70 +403,6 @@
(decl trap_code_bad_conversion_to_integer () TrapCode)
(extern constructor trap_code_bad_conversion_to_integer trap_code_bad_conversion_to_integer)
;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl avoid_div_traps () Type)
(extern extractor avoid_div_traps avoid_div_traps)
;; This definition should be kept up to date with the values defined in
;; cranelift/codegen/meta/src/shared/settings.rs
(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff)))
(decl tls_model (TlsModel) Type)
(extern extractor infallible tls_model tls_model)
(decl pure tls_model_is_elf_gd () Unit)
(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
(decl pure tls_model_is_macho () Unit)
(extern constructor tls_model_is_macho tls_model_is_macho)
(decl pure tls_model_is_coff () Unit)
(extern constructor tls_model_is_coff tls_model_is_coff)
(decl pure preserve_frame_pointers () Unit)
(extern constructor preserve_frame_pointers preserve_frame_pointers)
;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Accessor for `FuncRef`.
(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef)
(extern extractor infallible func_ref_data func_ref_data)
;; Accessor for `GobalValue`.
(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue)
(extern extractor symbol_value_data symbol_value_data)
(decl box_external_name (ExternalName) BoxExternalName)
(extern constructor box_external_name box_external_name)
;; Accessor for `RelocDistance`.
(decl reloc_distance_near () RelocDistance)
(extern extractor reloc_distance_near reloc_distance_near)
;; Accessor for `Immediate` as u128.
(decl u128_from_immediate (u128) Immediate)
(extern extractor u128_from_immediate u128_from_immediate)
;; Accessor for `Immediate` as a vector of u8 values.
(decl vec_mask_from_immediate (VecMask) Immediate)
(extern extractor vec_mask_from_immediate vec_mask_from_immediate)
;; Accessor for `Constant` as u128.
(decl u128_from_constant (u128) Constant)
(extern extractor u128_from_constant u128_from_constant)
;; Accessor for `Constant` as u64.
(decl u64_from_constant (u64) Constant)
(extern extractor u64_from_constant u64_from_constant)
;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; A range of integers to loop through.
@@ -976,156 +431,7 @@
(decl range_unwrap (usize Range) Range)
(extractor (range_unwrap index rest) (range_view (RangeView.NonEmpty index rest)))
;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The (writable) register(s) that will contain the n'th return value.
(decl retval (usize) WritableValueRegs)
(extern constructor retval retval)
;; Extractor to check for the special case that a `WritableValueRegs`
;; contains only a single register.
(decl only_writable_reg (WritableReg) WritableValueRegs)
(extern extractor only_writable_reg only_writable_reg)
;; Get the `n`th register inside a `WritableValueRegs`.
(decl writable_regs_get (WritableValueRegs usize) WritableReg)
(extern constructor writable_regs_get writable_regs_get)
;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Type to hold information about a function call signature.
(type Sig extern (enum))
;; Information how to pass one argument or return value.
(type ABIArg extern (enum))
;; Information how to pass a single slot of one argument or return value.
(type ABIArgSlot extern
(enum
(Reg
(reg RealReg)
(ty Type)
(extension ArgumentExtension))
(Stack
(offset i64)
(ty Type)
(extension ArgumentExtension))))
;; Physical register that may hold an argument or return value.
(type RealReg (primitive RealReg))
;; Instruction on whether and how to extend an argument value.
(type ArgumentExtension extern
(enum
(None)
(Uext)
(Sext)))
;; Get the number of arguments expected.
(decl abi_num_args (Sig) usize)
(extern constructor abi_num_args abi_num_args)
;; Get information specifying how to pass one argument.
(decl abi_get_arg (Sig usize) ABIArg)
(extern constructor abi_get_arg abi_get_arg)
;; Get the number of return values expected.
(decl abi_num_rets (Sig) usize)
(extern constructor abi_num_rets abi_num_rets)
;; Get information specifying how to pass one return value.
(decl abi_get_ret (Sig usize) ABIArg)
(extern constructor abi_get_ret abi_get_ret)
;; Get information specifying how to pass the implicit pointer
;; to the return-value area on the stack, if required.
(decl abi_ret_arg (ABIArg) Sig)
(extern extractor abi_ret_arg abi_ret_arg)
;; Succeeds if no implicit return-value area pointer is required.
(decl abi_no_ret_arg () Sig)
(extern extractor abi_no_ret_arg abi_no_ret_arg)
;; Size of the argument area.
(decl abi_sized_stack_arg_space (Sig) i64)
(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space)
;; Size of the return-value area.
(decl abi_sized_stack_ret_space (Sig) i64)
(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space)
;; StackSlot addr
(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst)
(extern constructor abi_stackslot_addr abi_stackslot_addr)
;; DynamicStackSlot addr
(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst)
(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr)
;; Extractor to detect the special case where an argument or
;; return value only requires a single slot to be passed.
(decl abi_arg_only_slot (ABIArgSlot) ABIArg)
(extern extractor abi_arg_only_slot abi_arg_only_slot)
;; Extractor to detect the special case where a struct argument
;; is explicitly passed by reference using a hidden pointer.
(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg)
(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer)
;; Extractor to detect the special case where a non-struct argument
;; is implicitly passed by reference using a hidden pointer.
(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg)
(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer)
;; Convert a real register number into a virtual register.
(decl real_reg_to_reg (RealReg) Reg)
(extern constructor real_reg_to_reg real_reg_to_reg)
;; Convert a real register number into a writable virtual register.
(decl real_reg_to_writable_reg (RealReg) WritableReg)
(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg)
;; Generate a move between two registers.
(decl gen_move (Type WritableReg Reg) MInst)
(extern constructor gen_move gen_move)
;; Copy a return value to a set of registers.
(decl copy_to_regs (WritableValueRegs Value) Unit)
(rule (copy_to_regs dsts val @ (value_type ty))
(let ((srcs ValueRegs (put_in_regs val)))
(copy_to_regs_range ty (value_regs_range srcs) dsts srcs)))
;; Helper for `copy_to_regs` that uses a range to index into the reg/value
;; vectors. Fails for the empty range.
(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit)
(rule (copy_to_regs_range ty (range_empty) dsts srcs)
(unit))
(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs)
(let ((dst WritableReg (writable_regs_get dsts head))
(src Reg (value_regs_get srcs head))
(_ Unit (emit (gen_move ty dst src))))
(copy_to_regs_range ty tail dsts srcs)))
(decl lower_return (Range ValueSlice) InstOutput)
(rule (lower_return (range_empty) _) (output_none))
(rule (lower_return (range_unwrap head tail) args)
(let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head))))
(lower_return tail args)))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Inst Value def_inst)
(convert Reg ValueRegs value_reg)
(convert Value Reg put_in_reg)
(convert Value ValueRegs put_in_regs)
(convert WritableReg Reg writable_reg_to_reg)
(convert ValueRegs InstOutput output)
(convert Reg InstOutput output_reg)
(convert Value InstOutput output_value)
(convert Offset32 u32 offset32_to_u32)
(convert ExternalName BoxExternalName box_external_name)
(convert PReg Reg preg_to_reg)

View File

@@ -0,0 +1,740 @@
;; Prelude definitions specific to lowering environments (backends) in
;; ISLE.
;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `cranelift-entity`-based identifiers.
(type Inst (primitive Inst))
(type Value (primitive Value))
;; ISLE representation of `&[Value]`.
(type ValueSlice (primitive ValueSlice))
;; ISLE representation of `Vec<u8>`
(type VecMask extern (enum))
(type ValueList (primitive ValueList))
(type ValueRegs (primitive ValueRegs))
(type WritableValueRegs (primitive WritableValueRegs))
;; Instruction lowering result: a vector of `ValueRegs`.
(type InstOutput (primitive InstOutput))
;; (Mutable) builder to incrementally construct an `InstOutput`.
(type InstOutputBuilder extern (enum))
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type Reg (primitive Reg))
(type WritableReg (primitive WritableReg))
(type OptionWritableReg (primitive OptionWritableReg))
(type VecReg extern (enum))
(type VecWritableReg extern (enum))
(type PReg (primitive PReg))
;; Construct a `ValueRegs` of one register.
(decl value_reg (Reg) ValueRegs)
(extern constructor value_reg value_reg)
;; Construct a `ValueRegs` of two registers.
(decl value_regs (Reg Reg) ValueRegs)
(extern constructor value_regs value_regs)
;; Construct an empty `ValueRegs` containing only invalid register sentinels.
(decl value_regs_invalid () ValueRegs)
(extern constructor value_regs_invalid value_regs_invalid)
;; Construct an empty `InstOutput`.
(decl output_none () InstOutput)
(extern constructor output_none output_none)
;; Construct a single-element `InstOutput`.
(decl output (ValueRegs) InstOutput)
(extern constructor output output)
;; Construct a two-element `InstOutput`.
(decl output_pair (ValueRegs ValueRegs) InstOutput)
(extern constructor output_pair output_pair)
;; Construct a single-element `InstOutput` from a single register.
(decl output_reg (Reg) InstOutput)
(rule (output_reg reg) (output (value_reg reg)))
;; Construct a single-element `InstOutput` from a value.
(decl output_value (Value) InstOutput)
(rule (output_value val) (output (put_in_regs val)))
;; Initially empty `InstOutput` builder.
(decl output_builder_new () InstOutputBuilder)
(extern constructor output_builder_new output_builder_new)
;; Append a `ValueRegs` to an `InstOutput` under construction.
(decl output_builder_push (InstOutputBuilder ValueRegs) Unit)
(extern constructor output_builder_push output_builder_push)
;; Finish building an `InstOutput` incrementally.
(decl output_builder_finish (InstOutputBuilder) InstOutput)
(extern constructor output_builder_finish output_builder_finish)
;; Get a temporary register for writing.
(decl temp_writable_reg (Type) WritableReg)
(extern constructor temp_writable_reg temp_writable_reg)
;; Get a temporary register for reading.
(decl temp_reg (Type) Reg)
(rule (temp_reg ty)
(writable_reg_to_reg (temp_writable_reg ty)))
(decl is_valid_reg (bool) Reg)
(extern extractor infallible is_valid_reg is_valid_reg)
;; Get or match the invalid register.
(decl invalid_reg () Reg)
(extern constructor invalid_reg invalid_reg)
(extractor (invalid_reg) (is_valid_reg $false))
;; Match any register but the invalid register.
(decl valid_reg (Reg) Reg)
(extractor (valid_reg reg) (and (is_valid_reg $true) reg))
;; Mark this value as used, to ensure that it gets lowered.
(decl mark_value_used (Value) Unit)
(extern constructor mark_value_used mark_value_used)
;; Put the given value into a register.
;;
;; Asserts that the value fits into a single register, and doesn't require
;; multiple registers for its representation (like `i128` on x64 for example).
;;
;; As a side effect, this marks the value as used.
(decl put_in_reg (Value) Reg)
(extern constructor put_in_reg put_in_reg)
;; Put the given value into one or more registers.
;;
;; As a side effect, this marks the value as used.
(decl put_in_regs (Value) ValueRegs)
(extern constructor put_in_regs put_in_regs)
;; If the given reg is a real register, cause the value in reg to be in a virtual
;; reg, by copying it into a new virtual reg.
(decl ensure_in_vreg (Reg Type) Reg)
(extern constructor ensure_in_vreg ensure_in_vreg)
;; Get the `n`th register inside a `ValueRegs`.
(decl value_regs_get (ValueRegs usize) Reg)
(extern constructor value_regs_get value_regs_get)
;; Get the number of registers in a `ValueRegs`.
(decl value_regs_len (ValueRegs) usize)
(extern constructor value_regs_len value_regs_len)
;; Get a range for the number of regs in a `ValueRegs`.
(decl value_regs_range (ValueRegs) Range)
(rule (value_regs_range regs) (range 0 (value_regs_len regs)))
;; Put the value into one or more registers and return the first register.
;;
;; Unlike `put_in_reg`, this does not assert that the value fits in a single
;; register. This is useful for things like a `i128` shift amount, where we mask
;; the shift amount to the bit width of the value being shifted, and so the high
;; half of the `i128` won't ever be used.
;;
;; As a side efect, this marks that value as used.
(decl lo_reg (Value) Reg)
(rule (lo_reg val)
(let ((regs ValueRegs (put_in_regs val)))
(value_regs_get regs 0)))
;; Convert a `PReg` into a `Reg`.
(decl preg_to_reg (PReg) Reg)
(extern constructor preg_to_reg preg_to_reg)
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type MachLabel (primitive MachLabel))
(type ValueLabel (primitive ValueLabel))
(type UnwindInst (primitive UnwindInst))
(type ExternalName (primitive ExternalName))
(type BoxExternalName (primitive BoxExternalName))
(type RelocDistance (primitive RelocDistance))
(type VecArgPair extern (enum))
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extractor to get a `ValueSlice` out of a `ValueList`.
(decl value_list_slice (ValueSlice) ValueList)
(extern extractor infallible value_list_slice value_list_slice)
;; Extractor to test whether a `ValueSlice` is empty.
(decl value_slice_empty () ValueSlice)
(extern extractor value_slice_empty value_slice_empty)
;; Extractor to split a `ValueSlice` into its first element plus a tail.
(decl value_slice_unwrap (Value ValueSlice) ValueSlice)
(extern extractor value_slice_unwrap value_slice_unwrap)
;; Return the length of a `ValueSlice`.
(decl value_slice_len (ValueSlice) usize)
(extern constructor value_slice_len value_slice_len)
;; Return any element of a `ValueSlice`.
(decl value_slice_get (ValueSlice usize) Value)
(extern constructor value_slice_get value_slice_get)
;; Extractor to get the first element from a value list, along with its tail as
;; a `ValueSlice`.
(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
(extractor (unwrap_head_value_list_1 head tail)
(value_list_slice (value_slice_unwrap head tail)))
;; Extractor to get the first two elements from a value list, along with its
;; tail as a `ValueSlice`.
(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
(extractor (unwrap_head_value_list_2 head1 head2 tail)
(value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
;; Constructor to test whether two values are same.
(decl pure same_value (Value Value) Value)
(extern constructor same_value same_value)
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
(decl writable_reg_to_reg (WritableReg) Reg)
(extern constructor writable_reg_to_reg writable_reg_to_reg)
;; Extract the result values for the given instruction.
(decl inst_results (ValueSlice) Inst)
(extern extractor infallible inst_results inst_results)
;; Extract the first result value of the given instruction.
(decl first_result (Value) Inst)
(extern extractor first_result first_result)
;; Extract the `InstructionData` for an `Inst`.
(decl inst_data (InstructionData) Inst)
(extern extractor infallible inst_data inst_data)
;; Extract the type of a `Value`.
(decl value_type (Type) Value)
(extern extractor infallible value_type value_type)
;; Extract the type of the instruction's first result.
(decl result_type (Type) Inst)
(extractor (result_type ty)
(first_result (value_type ty)))
;; Extract the type of the instruction's first result and pass along the
;; instruction as well.
(decl has_type (Type Inst) Inst)
(extractor (has_type ty inst)
(and (result_type ty)
inst))
;; Match the instruction that defines the given value, if any.
(decl def_inst (Inst) Value)
(extern extractor def_inst def_inst)
;; Extract a constant `u64` from a value defined by an `iconst`.
(decl u64_from_iconst (u64) Value)
(extractor (u64_from_iconst x)
(def_inst (iconst (u64_from_imm64 x))))
;; Match any zero value for iconst, fconst32, fconst64, vconst and splat.
(decl pure zero_value (Value) Value)
(extern constructor zero_value zero_value)
;; Match a sinkable instruction from a value operand.
(decl pure is_sinkable_inst (Value) Inst)
(extern constructor is_sinkable_inst is_sinkable_inst)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emit an instruction.
;;
;; This is low-level and side-effectful; it should only be used as an
;; implementation detail by helpers that preserve the SSA facade themselves.
(decl emit (MInst) Unit)
(extern constructor emit emit)
;; Sink an instruction.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction has been sunk into another instruction, and no longer needs to
;; be lowered.
(decl sink_inst (Inst) Unit)
(extern constructor sink_inst sink_inst)
;; Constant pool emission.
(type VCodeConstant (primitive VCodeConstant))
;; Add a u64 little-endian constant to the in-memory constant pool and
;; return a VCodeConstant index that refers to it. This is
;; side-effecting but idempotent (constants are deduplicated).
(decl emit_u64_le_const (u64) VCodeConstant)
(extern constructor emit_u64_le_const emit_u64_le_const)
;; Add a u128 little-endian constant to the in-memory constant pool and
;; return a VCodeConstant index that refers to it. This is
;; side-effecting but idempotent (constants are deduplicated).
(decl emit_u128_le_const (u128) VCodeConstant)
(extern constructor emit_u128_le_const emit_u128_le_const)
;; Fetch the VCodeConstant associated with a Constant.
(decl const_to_vconst (Constant) VCodeConstant)
(extern constructor const_to_vconst const_to_vconst)
;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
(type SideEffectNoResult (enum
(Inst (inst MInst))
(Inst2 (inst1 MInst)
(inst2 MInst))
(Inst3 (inst1 MInst)
(inst2 MInst)
(inst3 MInst))))
;; Create an empty `InstOutput`, but do emit the given side-effectful
;; instruction.
(decl side_effect (SideEffectNoResult) InstOutput)
(rule (side_effect (SideEffectNoResult.Inst inst))
(let ((_ Unit (emit inst)))
(output_none)))
(rule (side_effect (SideEffectNoResult.Inst2 inst1 inst2))
(let ((_ Unit (emit inst1))
(_ Unit (emit inst2)))
(output_none)))
(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3))
(let ((_ Unit (emit inst1))
(_ Unit (emit inst2))
(_ Unit (emit inst3)))
(output_none)))
(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult)
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2))
(SideEffectNoResult.Inst2 inst1 inst2))
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3))
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3))
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Newtype wrapper around `MInst` for instructions that are used for their
;; effect on flags.
;;
;; Variant determines how result is given when combined with a
;; ConsumesFlags. See `with_flags` below for more.
(type ProducesFlags (enum
;; For cases where the flags have been produced by another
;; instruction, and we have out-of-band reasons to know
;; that they won't be clobbered by the time we depend on
;; them.
(AlreadyExistingFlags)
(ProducesFlagsSideEffect (inst MInst))
(ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst))
;; Not directly combinable with a ConsumesFlags;
;; used in s390x and unwrapped directly by `trapif`.
(ProducesFlagsReturnsReg (inst MInst) (result Reg))
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
;; Chain another producer to a `ProducesFlags`.
(decl produces_flags_append (ProducesFlags MInst) ProducesFlags)
(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2)
(ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2))
;; Newtype wrapper around `MInst` for instructions that consume flags.
;;
;; Variant determines how result is given when combined with a
;; ProducesFlags. See `with_flags` below for more.
(type ConsumesFlags (enum
(ConsumesFlagsSideEffect (inst MInst))
(ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst))
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(result ValueRegs))
(ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(inst3 MInst)
(inst4 MInst)
(result ValueRegs))))
;; Get the produced register out of a ProducesFlags.
(decl produces_flags_get_reg (ProducesFlags) Reg)
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
;; Modify a ProducesFlags to use it only for its side-effect, ignoring
;; its result.
(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
;; Helper for combining two flags-consumer instructions that return a
;; single Reg, giving a ConsumesFlags that returns both values in a
;; ValueRegs.
(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
(ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
inst1
inst2
(value_regs reg1 reg2)))
(rule (consumes_flags_concat
(ConsumesFlags.ConsumesFlagsSideEffect inst1)
(ConsumesFlags.ConsumesFlagsSideEffect inst2))
(ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2))
;; Combine flags-producing and -consuming instructions together, ensuring that
;; they are emitted back-to-back and no other instructions can be emitted
;; between them and potentially clobber the flags.
;;
;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
;;
;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst)))
(value_regs producer_result consumer_result)))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst)))
(value_reg consumer_result)))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_inst_3
consumer_inst_4
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2))
(_w Unit (emit consumer_inst_3))
(_v Unit (emit consumer_inst_4)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
(let ((_ Unit (emit producer_inst1))
(_ Unit (emit producer_inst2))
(_ Unit (emit consumer_inst)))
(value_reg consumer_result)))
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_ Unit (emit producer_inst1))
(_ Unit (emit producer_inst2))
(_ Unit (emit consumer_inst_1))
(_ Unit (emit consumer_inst_2)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_inst_3
consumer_inst_4
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_ Unit (emit producer_inst1))
(_ Unit (emit producer_inst2))
(_ Unit (emit consumer_inst_1))
(_ Unit (emit consumer_inst_2))
(_ Unit (emit consumer_inst_3))
(_ Unit (emit consumer_inst_4)))
consumer_result))
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
(rule (with_flags_reg p c)
(let ((v ValueRegs (with_flags p c)))
(value_regs_get v 0)))
;; Indicate that the current state of the flags register from the instruction
;; that produces this Value is relied on.
(decl flags_to_producesflags (Value) ProducesFlags)
(rule (flags_to_producesflags val)
(let ((_ Unit (mark_value_used val)))
(ProducesFlags.AlreadyExistingFlags)))
;; Combine a flags-producing instruction and a flags-consuming instruction that
;; produces no results.
;;
;; This function handles the following case only:
;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect
(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult)
(rule (with_flags_side_effect
(ProducesFlags.AlreadyExistingFlags)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst c))
(rule (with_flags_side_effect
(ProducesFlags.AlreadyExistingFlags)
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
(SideEffectNoResult.Inst2 c1 c2))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsSideEffect p)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst2 p c))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsSideEffect p)
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
(SideEffectNoResult.Inst3 p c1 c2))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst3 p1 p2 c))
;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl avoid_div_traps () Type)
(extern extractor avoid_div_traps avoid_div_traps)
;; This definition should be kept up to date with the values defined in
;; cranelift/codegen/meta/src/shared/settings.rs
(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff)))
(decl tls_model (TlsModel) Type)
(extern extractor infallible tls_model tls_model)
(decl pure tls_model_is_elf_gd () Unit)
(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
(decl pure tls_model_is_macho () Unit)
(extern constructor tls_model_is_macho tls_model_is_macho)
(decl pure tls_model_is_coff () Unit)
(extern constructor tls_model_is_coff tls_model_is_coff)
(decl pure preserve_frame_pointers () Unit)
(extern constructor preserve_frame_pointers preserve_frame_pointers)
;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl box_external_name (ExternalName) BoxExternalName)
(extern constructor box_external_name box_external_name)
;; Accessor for `FuncRef`.
(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef)
(extern extractor infallible func_ref_data func_ref_data)
;; Accessor for `GlobalValue`.
(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue)
(extern extractor symbol_value_data symbol_value_data)
;; Accessor for `RelocDistance`.
(decl reloc_distance_near () RelocDistance)
(extern extractor reloc_distance_near reloc_distance_near)
;; Accessor for `Immediate` as a vector of u8 values.
(decl vec_mask_from_immediate (VecMask) Immediate)
(extern extractor vec_mask_from_immediate vec_mask_from_immediate)
;; Accessor for `Immediate` as u128.
(decl u128_from_immediate (u128) Immediate)
(extern extractor u128_from_immediate u128_from_immediate)
;; Accessor for `Constant` as u128.
(decl u128_from_constant (u128) Constant)
(extern extractor u128_from_constant u128_from_constant)
;; Accessor for `Constant` as u64.
(decl u64_from_constant (u64) Constant)
(extern extractor u64_from_constant u64_from_constant)
;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The (writable) register(s) that will contain the n'th return value.
(decl retval (usize) WritableValueRegs)
(extern constructor retval retval)
;; Extractor to check for the special case that a `WritableValueRegs`
;; contains only a single register.
(decl only_writable_reg (WritableReg) WritableValueRegs)
(extern extractor only_writable_reg only_writable_reg)
;; Get the `n`th register inside a `WritableValueRegs`.
(decl writable_regs_get (WritableValueRegs usize) WritableReg)
(extern constructor writable_regs_get writable_regs_get)
;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Type to hold information about a function call signature.
(type Sig extern (enum))
;; Information how to pass one argument or return value.
(type ABIArg extern (enum))
;; Information how to pass a single slot of one argument or return value.
(type ABIArgSlot extern
(enum
(Reg
(reg RealReg)
(ty Type)
(extension ArgumentExtension))
(Stack
(offset i64)
(ty Type)
(extension ArgumentExtension))))
;; Physical register that may hold an argument or return value.
(type RealReg (primitive RealReg))
;; Instruction on whether and how to extend an argument value.
(type ArgumentExtension extern
(enum
(None)
(Uext)
(Sext)))
;; Get the number of arguments expected.
(decl abi_num_args (Sig) usize)
(extern constructor abi_num_args abi_num_args)
;; Get information specifying how to pass one argument.
(decl abi_get_arg (Sig usize) ABIArg)
(extern constructor abi_get_arg abi_get_arg)
;; Get the number of return values expected.
(decl abi_num_rets (Sig) usize)
(extern constructor abi_num_rets abi_num_rets)
;; Get information specifying how to pass one return value.
(decl abi_get_ret (Sig usize) ABIArg)
(extern constructor abi_get_ret abi_get_ret)
;; Get information specifying how to pass the implicit pointer
;; to the return-value area on the stack, if required.
(decl abi_ret_arg (ABIArg) Sig)
(extern extractor abi_ret_arg abi_ret_arg)
;; Succeeds if no implicit return-value area pointer is required.
(decl abi_no_ret_arg () Sig)
(extern extractor abi_no_ret_arg abi_no_ret_arg)
;; Size of the argument area.
(decl abi_sized_stack_arg_space (Sig) i64)
(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space)
;; Size of the return-value area.
(decl abi_sized_stack_ret_space (Sig) i64)
(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space)
;; StackSlot addr
(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst)
(extern constructor abi_stackslot_addr abi_stackslot_addr)
;; DynamicStackSlot addr
(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst)
(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr)
;; Extractor to detect the special case where an argument or
;; return value only requires a single slot to be passed.
(decl abi_arg_only_slot (ABIArgSlot) ABIArg)
(extern extractor abi_arg_only_slot abi_arg_only_slot)
;; Extractor to detect the special case where a struct argument
;; is explicitly passed by reference using a hidden pointer.
(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg)
(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer)
;; Extractor to detect the special case where a non-struct argument
;; is implicitly passed by reference using a hidden pointer.
(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg)
(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer)
;; Convert a real register number into a virtual register.
(decl real_reg_to_reg (RealReg) Reg)
(extern constructor real_reg_to_reg real_reg_to_reg)
;; Convert a real register number into a writable virtual register.
(decl real_reg_to_writable_reg (RealReg) WritableReg)
(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg)
;; Generate a move between two registers.
(decl gen_move (Type WritableReg Reg) MInst)
(extern constructor gen_move gen_move)
;; Copy a return value to a set of registers.
(decl copy_to_regs (WritableValueRegs Value) Unit)
(rule (copy_to_regs dsts val @ (value_type ty))
(let ((srcs ValueRegs (put_in_regs val)))
(copy_to_regs_range ty (value_regs_range srcs) dsts srcs)))
;; Helper for `copy_to_regs` that uses a range to index into the reg/value
;; vectors. Fails for the empty range.
(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit)
(rule (copy_to_regs_range ty (range_empty) dsts srcs)
(unit))
(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs)
(let ((dst WritableReg (writable_regs_get dsts head))
(src Reg (value_regs_get srcs head))
(_ Unit (emit (gen_move ty dst src))))
(copy_to_regs_range ty tail dsts srcs)))
(decl lower_return (Range ValueSlice) InstOutput)
(rule (lower_return (range_empty) _) (output_none))
(rule (lower_return (range_unwrap head tail) args)
(let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head))))
(lower_return tail args)))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Inst Value def_inst)
(convert Reg ValueRegs value_reg)
(convert Value Reg put_in_reg)
(convert Value ValueRegs put_in_regs)
(convert WritableReg Reg writable_reg_to_reg)
(convert ValueRegs InstOutput output)
(convert Reg InstOutput output_reg)
(convert Value InstOutput output_value)
(convert ExternalName BoxExternalName box_external_name)
(convert PReg Reg preg_to_reg)

View File

@@ -0,0 +1,61 @@
;; Prelude definitions specific to the mid-end.
;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; An eclass ID.
(type Id (primitive Id))
;; What is the type of an eclass (if a single type)?
(decl eclass_type (Type) Id)
(extern extractor eclass_type eclass_type)
;; Helper to wrap an Id-matching pattern and extract type.
(decl has_type (Type Id) Id)
(extractor (has_type ty id)
(and (eclass_type ty)
id))
;; Extract any node(s) for the given eclass ID.
(decl multi enodes (Type InstructionImms IdArray) Id)
(extern extractor enodes enodes_etor)
;; Construct a pure node, returning a new (or deduplicated
;; already-existing) eclass ID.
(decl pure_enode (Type InstructionImms IdArray) Id)
(extern constructor pure_enode pure_enode_ctor)
;; Type of an Id slice (for args).
(type IdArray (primitive IdArray))
(decl id_array_0 () IdArray)
(extern constructor id_array_0 id_array_0_ctor)
(extern extractor id_array_0 id_array_0_etor)
(decl id_array_1 (Id) IdArray)
(extern constructor id_array_1 id_array_1_ctor)
(extern extractor id_array_1 id_array_1_etor)
(decl id_array_2 (Id Id) IdArray)
(extern constructor id_array_2 id_array_2_ctor)
(extern extractor id_array_2 id_array_2_etor)
(decl id_array_3 (Id Id Id) IdArray)
(extern constructor id_array_3 id_array_3_ctor)
(extern extractor id_array_3 id_array_3_etor)
;; Extractor to get the min loop-level of an eclass.
(decl at_loop_level (u8 Id) Id)
(extern extractor infallible at_loop_level at_loop_level)
;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The main matcher rule invoked by the toplevel driver.
(decl multi simplify (Id) Id)
;; Mark a node as requiring remat when used in a different block.
(decl remat (Id) Id)
(extern constructor remat remat)
;; Mark a node as subsuming whatever else it's rewritten from -- this
;; is definitely preferable, not just a possible option. Useful for,
;; e.g., constant propagation where we arrive at a definite "final
;; answer".
(decl subsume (Id) Id)
(extern constructor subsume subsume)

View File

@@ -6,25 +6,22 @@
use crate::fx::FxHashMap;
use core::hash::Hash;
use core::mem;
use smallvec::{smallvec, SmallVec};
#[cfg(not(feature = "std"))]
use crate::fx::FxHasher;
#[cfg(not(feature = "std"))]
type Hasher = core::hash::BuildHasherDefault<FxHasher>;
struct Val<K, V> {
struct Val<V> {
value: V,
next_key: Option<K>,
depth: usize,
level: u32,
generation: u32,
}
/// A view into an occupied entry in a `ScopedHashMap`. It is part of the `Entry` enum.
pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
#[cfg(feature = "std")]
entry: super::hash_map::OccupiedEntry<'a, K, Val<K, V>>,
#[cfg(not(feature = "std"))]
entry: super::hash_map::OccupiedEntry<'a, K, Val<K, V>, Hasher>,
entry: super::hash_map::OccupiedEntry<'a, K, Val<V>>,
}
impl<'a, K, V> OccupiedEntry<'a, K, V> {
@@ -36,22 +33,34 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
/// A view into a vacant entry in a `ScopedHashMap`. It is part of the `Entry` enum.
pub struct VacantEntry<'a, K: 'a, V: 'a> {
#[cfg(feature = "std")]
entry: super::hash_map::VacantEntry<'a, K, Val<K, V>>,
#[cfg(not(feature = "std"))]
entry: super::hash_map::VacantEntry<'a, K, Val<K, V>, Hasher>,
next_key: Option<K>,
depth: usize,
entry: InsertLoc<'a, K, V>,
depth: u32,
generation: u32,
}
impl<'a, K: Hash, V> VacantEntry<'a, K, V> {
/// Where to insert from a `VacantEntry`. May be vacant or occupied in
/// the underlying map because of lazy (generation-based) deletion.
enum InsertLoc<'a, K: 'a, V: 'a> {
Vacant(super::hash_map::VacantEntry<'a, K, Val<V>>),
Occupied(super::hash_map::OccupiedEntry<'a, K, Val<V>>),
}
impl<'a, K, V> VacantEntry<'a, K, V> {
/// Sets the value of the entry with the `VacantEntry`'s key.
pub fn insert(self, value: V) {
self.entry.insert(Val {
let val = Val {
value,
next_key: self.next_key,
depth: self.depth,
});
level: self.depth,
generation: self.generation,
};
match self.entry {
InsertLoc::Vacant(v) => {
v.insert(val);
}
InsertLoc::Occupied(mut o) => {
o.insert(val);
}
}
}
}
@@ -69,9 +78,9 @@ pub enum Entry<'a, K: 'a, V: 'a> {
/// Shadowing, where one scope has entries with the same keys as a containing scope,
/// is not supported in this implementation.
pub struct ScopedHashMap<K, V> {
map: FxHashMap<K, Val<K, V>>,
last_insert: Option<K>,
current_depth: usize,
map: FxHashMap<K, Val<V>>,
generation_by_depth: SmallVec<[u32; 8]>,
generation: u32,
}
impl<K, V> ScopedHashMap<K, V>
@@ -82,52 +91,115 @@ where
pub fn new() -> Self {
Self {
map: FxHashMap(),
last_insert: None,
current_depth: 0,
generation: 0,
generation_by_depth: smallvec![0],
}
}
/// Creates an empty `ScopedHashMap` with some pre-allocated capacity.
pub fn with_capacity(cap: usize) -> Self {
let mut map = FxHashMap::default();
map.reserve(cap);
Self {
map,
generation: 0,
generation_by_depth: smallvec![0],
}
}
/// Similar to `FxHashMap::entry`, gets the given key's corresponding entry in the map for
/// in-place manipulation.
pub fn entry(&mut self, key: K) -> Entry<K, V> {
pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> {
self.entry_with_depth(key, self.depth())
}
/// Get the entry, setting the scope depth at which to insert.
pub fn entry_with_depth<'a>(&'a mut self, key: K, depth: usize) -> Entry<'a, K, V> {
debug_assert!(depth <= self.generation_by_depth.len());
let generation = self.generation_by_depth[depth];
let depth = depth as u32;
use super::hash_map::Entry::*;
match self.map.entry(key) {
Occupied(entry) => Entry::Occupied(OccupiedEntry { entry }),
Vacant(entry) => {
let clone_key = entry.key().clone();
Entry::Vacant(VacantEntry {
entry,
next_key: mem::replace(&mut self.last_insert, Some(clone_key)),
depth: self.current_depth,
})
Occupied(entry) => {
let entry_generation = entry.get().generation;
let entry_depth = entry.get().level as usize;
if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) {
Entry::Occupied(OccupiedEntry { entry })
} else {
Entry::Vacant(VacantEntry {
entry: InsertLoc::Occupied(entry),
depth,
generation,
})
}
}
Vacant(entry) => Entry::Vacant(VacantEntry {
entry: InsertLoc::Vacant(entry),
depth,
generation,
}),
}
}
/// Get a value from a key, if present.
pub fn get<'a>(&'a self, key: &K) -> Option<&'a V> {
self.map
.get(key)
.filter(|entry| {
let level = entry.level as usize;
self.generation_by_depth.get(level).cloned() == Some(entry.generation)
})
.map(|entry| &entry.value)
}
/// Insert a key-value pair if absent. No-op if already exists.
pub fn insert_if_absent(&mut self, key: K, value: V) {
self.insert_if_absent_with_depth(key, value, self.depth());
}
/// Insert a key-value pair if absent, using the given depth for
/// the insertion. No-op if already exists.
pub fn insert_if_absent_with_depth(&mut self, key: K, value: V, depth: usize) {
match self.entry_with_depth(key, depth) {
Entry::Vacant(v) => {
v.insert(value);
}
Entry::Occupied(_) => {
// Nothing.
}
}
}
/// Enter a new scope.
pub fn increment_depth(&mut self) {
// Increment the depth.
self.current_depth = self.current_depth.checked_add(1).unwrap();
self.generation_by_depth.push(self.generation);
}
/// Exit the current scope.
pub fn decrement_depth(&mut self) {
// Remove all elements inserted at the current depth.
while let Some(key) = self.last_insert.clone() {
use crate::hash_map::Entry::*;
match self.map.entry(key) {
Occupied(entry) => {
if entry.get().depth != self.current_depth {
break;
}
self.last_insert = entry.remove_entry().1.next_key;
}
Vacant(_) => panic!(),
}
}
self.generation += 1;
self.generation_by_depth.pop();
}
// Decrement the depth.
self.current_depth = self.current_depth.checked_sub(1).unwrap();
/// Return the current scope depth.
pub fn depth(&self) -> usize {
self.generation_by_depth
.len()
.checked_sub(1)
.expect("generation_by_depth cannot be empty")
}
/// Remote an entry.
pub fn remove(&mut self, key: &K) -> Option<V> {
self.map.remove(key).and_then(|val| {
let entry_generation = val.generation;
let entry_depth = val.level as usize;
if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) {
Some(val.value)
} else {
None
}
})
}
}
@@ -230,4 +302,22 @@ mod tests {
Entry::Vacant(entry) => entry.insert(3),
}
}
#[test]
fn insert_arbitrary_depth() {
let mut map: ScopedHashMap<i32, i32> = ScopedHashMap::new();
map.insert_if_absent(1, 2);
assert_eq!(map.get(&1), Some(&2));
map.increment_depth();
assert_eq!(map.get(&1), Some(&2));
map.insert_if_absent(3, 4);
assert_eq!(map.get(&3), Some(&4));
map.decrement_depth();
assert_eq!(map.get(&3), None);
map.increment_depth();
map.insert_if_absent_with_depth(3, 4, 0);
assert_eq!(map.get(&3), Some(&4));
map.decrement_depth();
assert_eq!(map.get(&3), Some(&4));
}
}

View File

@@ -529,6 +529,7 @@ probestack_strategy = "outline"
regalloc_checker = false
regalloc_verbose_logs = false
enable_alias_analysis = true
use_egraphs = false
enable_verifier = true
is_pic = false
use_colocated_libcalls = false

View File

@@ -87,15 +87,17 @@
//!
//! ## Data Structure and Example
//!
//! Each eclass id refers to a table entry that can be one of:
//! Each eclass id refers to a table entry ("eclass node", which is
//! different than an "enode") that can be one of:
//!
//! - A single enode;
//! - An enode and an earlier eclass id it is appended to;
//! - An enode and an earlier eclass id it is appended to (a "child"
//! eclass node);
//! - A "union node" with two earlier eclass ids.
//!
//! Building the aegraph consists solely of adding new entries to the
//! end of this table. An enode in any given entry can only refer to
//! earlier eclass ids.
//! end of this table of eclass nodes. An enode referenced from any
//! given eclass node can only refer to earlier eclass ids.
//!
//! For example, consider the following eclass table:
//!
@@ -218,7 +220,7 @@
//! POPL 2021. <https://dl.acm.org/doi/10.1145/3434304>
use cranelift_entity::PrimaryMap;
use cranelift_entity::{entity_impl, packed_option::ReservedValue};
use cranelift_entity::{entity_impl, packed_option::ReservedValue, SecondaryMap};
use smallvec::{smallvec, SmallVec};
use std::fmt::Debug;
use std::hash::Hash;
@@ -256,6 +258,20 @@ pub trait Language: CtxEq<Self::Node, Self::Node> + CtxHash<Self::Node> {
fn needs_dedup(&self, node: &Self::Node) -> bool;
}
/// A trait that allows the aegraph to compute a property of each
/// node as it is created.
pub trait Analysis {
type L: Language;
type Value: Clone + Default;
fn for_node(
&self,
ctx: &Self::L,
n: &<Self::L as Language>::Node,
values: &SecondaryMap<Id, Self::Value>,
) -> Self::Value;
fn meet(&self, ctx: &Self::L, v1: &Self::Value, v2: &Self::Value) -> Self::Value;
}
/// Conditionally-compiled trace-log macro. (Borrowed from
/// `cranelift-codegen`; it's not worth factoring out a common
/// subcrate for this.)
@@ -269,18 +285,20 @@ macro_rules! trace {
}
/// An egraph.
pub struct EGraph<L: Language> {
pub struct EGraph<L: Language, A: Analysis<L = L>> {
/// Node-allocation arena.
pub nodes: Vec<L::Node>,
/// Hash-consing map from Nodes to eclass IDs.
node_map: CtxHashMap<NodeKey, Id>,
/// Eclass definitions. Each eclass consists of an enode, and
/// parent pointer to the rest of the eclass.
/// child pointer to the rest of the eclass.
pub classes: PrimaryMap<Id, EClass>,
/// Union-find for canonical ID generation. This lets us name an
/// eclass with a canonical ID that is the same for all
/// generations of the class.
pub unionfind: UnionFind,
/// Analysis and per-node state.
pub analysis: Option<(A, SecondaryMap<Id, A::Value>)>,
}
/// A reference to a node.
@@ -298,7 +316,7 @@ impl NodeKey {
/// Get the node for this NodeKey, given the `nodes` from the
/// appropriate `EGraph`.
pub fn node<'a, L: Language>(&self, nodes: &'a [L::Node]) -> &'a L::Node {
pub fn node<'a, N>(&self, nodes: &'a [N]) -> &'a N {
&nodes[self.index as usize]
}
@@ -311,35 +329,35 @@ impl NodeKey {
}
}
struct NodeKeyCtx<'a, L: Language> {
struct NodeKeyCtx<'a, 'b, L: Language> {
nodes: &'a [L::Node],
node_ctx: &'a L,
node_ctx: &'b L,
}
impl<'ctx, L: Language> CtxEq<NodeKey, NodeKey> for NodeKeyCtx<'ctx, L> {
impl<'a, 'b, L: Language> CtxEq<NodeKey, NodeKey> for NodeKeyCtx<'a, 'b, L> {
fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool {
let a = a.node::<L>(self.nodes);
let b = b.node::<L>(self.nodes);
let a = a.node(self.nodes);
let b = b.node(self.nodes);
self.node_ctx.ctx_eq(a, b, uf)
}
}
impl<'ctx, L: Language> CtxHash<NodeKey> for NodeKeyCtx<'ctx, L> {
impl<'a, 'b, L: Language> CtxHash<NodeKey> for NodeKeyCtx<'a, 'b, L> {
fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 {
self.node_ctx.ctx_hash(value.node::<L>(self.nodes), uf)
self.node_ctx.ctx_hash(value.node(self.nodes), uf)
}
}
/// An EClass entry. Contains either a single new enode and a parent
/// eclass (i.e., adds one new enode), or unions two parent eclasses
/// An EClass entry. Contains either a single new enode and a child
/// eclass (i.e., adds one new enode), or unions two child eclasses
/// together.
#[derive(Debug, Clone, Copy)]
pub struct EClass {
// formats:
//
// 00 | unused (31 bits) | NodeKey (31 bits)
// 01 | eclass_parent (31 bits) | NodeKey (31 bits)
// 10 | eclass_parent_1 (31 bits) | eclass_parent_id_2 (31 bits)
// 00 | unused (31 bits) | NodeKey (31 bits)
// 01 | eclass_child (31 bits) | NodeKey (31 bits)
// 10 | eclass_child_1 (31 bits) | eclass_child_id_2 (31 bits)
bits: u64,
}
@@ -352,47 +370,47 @@ impl EClass {
}
}
fn node_and_parent(node: NodeKey, eclass_parent: Id) -> EClass {
fn node_and_child(node: NodeKey, eclass_child: Id) -> EClass {
let node_idx = node.bits() as u64;
debug_assert!(node_idx < (1 << 31));
debug_assert!(eclass_parent != Id::invalid());
let parent = eclass_parent.0 as u64;
debug_assert!(parent < (1 << 31));
debug_assert!(eclass_child != Id::invalid());
let child = eclass_child.0 as u64;
debug_assert!(child < (1 << 31));
EClass {
bits: (0b01 << 62) | (parent << 31) | node_idx,
bits: (0b01 << 62) | (child << 31) | node_idx,
}
}
fn union(parent1: Id, parent2: Id) -> EClass {
debug_assert!(parent1 != Id::invalid());
let parent1 = parent1.0 as u64;
debug_assert!(parent1 < (1 << 31));
fn union(child1: Id, child2: Id) -> EClass {
debug_assert!(child1 != Id::invalid());
let child1 = child1.0 as u64;
debug_assert!(child1 < (1 << 31));
debug_assert!(parent2 != Id::invalid());
let parent2 = parent2.0 as u64;
debug_assert!(parent2 < (1 << 31));
debug_assert!(child2 != Id::invalid());
let child2 = child2.0 as u64;
debug_assert!(child2 < (1 << 31));
EClass {
bits: (0b10 << 62) | (parent1 << 31) | parent2,
bits: (0b10 << 62) | (child1 << 31) | child2,
}
}
/// Get the node, if any, from a node-only or node-and-parent
/// Get the node, if any, from a node-only or node-and-child
/// eclass.
pub fn get_node(&self) -> Option<NodeKey> {
self.as_node()
.or_else(|| self.as_node_and_parent().map(|(node, _)| node))
.or_else(|| self.as_node_and_child().map(|(node, _)| node))
}
/// Get the first parent, if any.
pub fn parent1(&self) -> Option<Id> {
self.as_node_and_parent()
/// Get the first child, if any.
pub fn child1(&self) -> Option<Id> {
self.as_node_and_child()
.map(|(_, p1)| p1)
.or(self.as_union().map(|(p1, _)| p1))
}
/// Get the second parent, if any.
pub fn parent2(&self) -> Option<Id> {
/// Get the second child, if any.
pub fn child2(&self) -> Option<Id> {
self.as_union().map(|(_, p2)| p2)
}
@@ -406,25 +424,25 @@ impl EClass {
}
}
/// If this EClass is one new enode and a parent, return the node
/// and parent ID.
pub fn as_node_and_parent(&self) -> Option<(NodeKey, Id)> {
/// If this EClass is one new enode and a child, return the node
/// and child ID.
pub fn as_node_and_child(&self) -> Option<(NodeKey, Id)> {
if (self.bits >> 62) == 0b01 {
let node_idx = (self.bits & ((1 << 31) - 1)) as u32;
let parent = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
Some((NodeKey::from_bits(node_idx), Id::from_bits(parent)))
let child = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
Some((NodeKey::from_bits(node_idx), Id::from_bits(child)))
} else {
None
}
}
/// If this EClass is the union variety, return the two parent
/// If this EClass is the union variety, return the two child
/// EClasses. Both are guaranteed not to be `Id::invalid()`.
pub fn as_union(&self) -> Option<(Id, Id)> {
if (self.bits >> 62) == 0b10 {
let parent1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
let parent2 = (self.bits & ((1 << 31) - 1)) as u32;
Some((Id::from_bits(parent1), Id::from_bits(parent2)))
let child1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
let child2 = (self.bits & ((1 << 31) - 1)) as u32;
Some((Id::from_bits(child1), Id::from_bits(child2)))
} else {
None
}
@@ -449,27 +467,31 @@ impl<T> NewOrExisting<T> {
}
}
impl<L: Language> EGraph<L>
impl<L: Language, A: Analysis<L = L>> EGraph<L, A>
where
L::Node: 'static,
{
/// Create a new aegraph.
pub fn new() -> Self {
pub fn new(analysis: Option<A>) -> Self {
let analysis = analysis.map(|a| (a, SecondaryMap::new()));
Self {
nodes: vec![],
node_map: CtxHashMap::new(),
classes: PrimaryMap::new(),
unionfind: UnionFind::new(),
analysis,
}
}
/// Create a new aegraph with the given capacity.
pub fn with_capacity(nodes: usize) -> Self {
pub fn with_capacity(nodes: usize, analysis: Option<A>) -> Self {
let analysis = analysis.map(|a| (a, SecondaryMap::with_capacity(nodes)));
Self {
nodes: Vec::with_capacity(nodes),
node_map: CtxHashMap::with_capacity(nodes),
classes: PrimaryMap::with_capacity(nodes),
unionfind: UnionFind::with_capacity(nodes),
analysis,
}
}
@@ -506,6 +528,10 @@ where
// Add to interning map with a NodeKey referring to the eclass.
v.insert(eclass_id);
// Update analysis.
let node_ctx = ctx.node_ctx;
self.update_analysis(node_ctx, eclass_id);
NewOrExisting::New(eclass_id)
}
}
@@ -520,7 +546,7 @@ where
/// property (args must have lower eclass Ids than the eclass
/// containing the node with those args). Returns the Id of the
/// merged eclass.
pub fn union(&mut self, a: Id, b: Id) -> Id {
pub fn union(&mut self, ctx: &L, a: Id, b: Id) -> Id {
assert_ne!(a, Id::invalid());
assert_ne!(b, Id::invalid());
let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b));
@@ -532,16 +558,17 @@ where
self.unionfind.union(a, b);
// If the younger eclass has no parent, we can link it
// If the younger eclass has no child, we can link it
// directly and return that eclass. Otherwise, we create a new
// union eclass.
if let Some(node) = self.classes[a].as_node() {
trace!(
" -> id {} is one-node eclass; making into node-and-parent with id {}",
" -> id {} is one-node eclass; making into node-and-child with id {}",
a,
b
);
self.classes[a] = EClass::node_and_parent(node, b);
self.classes[a] = EClass::node_and_child(node, b);
self.update_analysis(ctx, a);
return a;
}
@@ -549,6 +576,7 @@ where
self.unionfind.add(u);
self.unionfind.union(u, b);
trace!(" -> union id {} and id {} into id {}", a, b, u);
self.update_analysis(ctx, u);
u
}
@@ -569,12 +597,41 @@ where
}
/// Get the enodes for a given eclass.
pub fn enodes(&self, eclass: Id) -> NodeIter<L> {
pub fn enodes(&self, eclass: Id) -> NodeIter<L, A> {
NodeIter {
stack: smallvec![eclass],
_phantom: PhantomData,
_phantom1: PhantomData,
_phantom2: PhantomData,
}
}
/// Update analysis for a given eclass node.
fn update_analysis(&mut self, ctx: &L, eclass: Id) {
if let Some((analysis, state)) = self.analysis.as_mut() {
let eclass_data = self.classes[eclass];
let value = if let Some(node_key) = eclass_data.as_node() {
let node = node_key.node(&self.nodes);
analysis.for_node(ctx, node, state)
} else if let Some((node_key, child)) = eclass_data.as_node_and_child() {
let node = node_key.node(&self.nodes);
let value = analysis.for_node(ctx, node, state);
let child_value = &state[child];
analysis.meet(ctx, &value, child_value)
} else if let Some((c1, c2)) = eclass_data.as_union() {
let c1 = &state[c1];
let c2 = &state[c2];
analysis.meet(ctx, c1, c2)
} else {
panic!("Invalid eclass node: {:?}", eclass_data);
};
state[eclass] = value;
}
}
/// Get the analysis value for a given eclass. Panics if no analysis is present.
pub fn analysis_value(&self, eclass: Id) -> &A::Value {
&self.analysis.as_ref().unwrap().1[eclass]
}
}
/// An iterator over all nodes in an eclass.
@@ -582,27 +639,28 @@ where
/// Because eclasses are immutable once created, this does *not* need
/// to hold an open borrow on the egraph; it is free to add new nodes,
/// while our existing Ids will remain valid.
pub struct NodeIter<L: Language> {
pub struct NodeIter<L: Language, A: Analysis<L = L>> {
stack: SmallVec<[Id; 8]>,
_phantom: PhantomData<L>,
_phantom1: PhantomData<L>,
_phantom2: PhantomData<A>,
}
impl<L: Language> NodeIter<L> {
pub fn next<'a>(&mut self, egraph: &'a EGraph<L>) -> Option<&'a L::Node> {
impl<L: Language, A: Analysis<L = L>> NodeIter<L, A> {
pub fn next<'a>(&mut self, egraph: &'a EGraph<L, A>) -> Option<&'a L::Node> {
while let Some(next) = self.stack.pop() {
let eclass = egraph.classes[next];
if let Some(node) = eclass.as_node() {
return Some(&egraph.nodes[node.index as usize]);
} else if let Some((node, parent)) = eclass.as_node_and_parent() {
if parent != Id::invalid() {
self.stack.push(parent);
} else if let Some((node, child)) = eclass.as_node_and_child() {
if child != Id::invalid() {
self.stack.push(child);
}
return Some(&egraph.nodes[node.index as usize]);
} else if let Some((parent1, parent2)) = eclass.as_union() {
debug_assert!(parent1 != Id::invalid());
debug_assert!(parent2 != Id::invalid());
self.stack.push(parent2);
self.stack.push(parent1);
} else if let Some((child1, child2)) = eclass.as_union() {
debug_assert!(child1 != Id::invalid());
debug_assert!(child2 != Id::invalid());
self.stack.push(child2);
self.stack.push(child1);
continue;
} else {
unreachable!("Invalid eclass format");

View File

@@ -2,6 +2,7 @@
use crate::{trace, Id};
use cranelift_entity::SecondaryMap;
use std::hash::{Hash, Hasher};
/// A union-find data structure. The data structure can allocate
/// `Id`s, indicating eclasses, and can merge eclasses together.
@@ -67,4 +68,18 @@ impl UnionFind {
trace!("union: {}, {}", a, b);
}
}
/// Determine if two `Id`s are equivalent, after
/// canonicalizing. Update union-find data structure during our
/// canonicalization to make future lookups faster.
pub fn equiv_id_mut(&mut self, a: Id, b: Id) -> bool {
self.find_and_update(a) == self.find_and_update(b)
}
/// Hash an `Id` after canonicalizing it. Update union-find data
/// structure to make future lookups/hashing faster.
pub fn hash_id_mut<H: Hasher>(&mut self, hash: &mut H, id: Id) {
let id = self.find_and_update(id);
id.hash(hash);
}
}

View File

@@ -143,6 +143,24 @@ impl<T: EntityRef + ReservedValue> ListPool<T> {
}
}
/// Create a new list pool with the given capacity for data pre-allocated.
pub fn with_capacity(len: usize) -> Self {
Self {
data: Vec::with_capacity(len),
free: Vec::new(),
}
}
/// Get the capacity of this pool. This will be somewhat higher
/// than the total length of lists that can be stored without
/// reallocating, because of internal metadata overheads. It is
/// mostly useful to allow another pool to be allocated that is
/// likely to hold data transferred from this one without the need
/// to grow.
pub fn capacity(&self) -> usize {
self.data.capacity()
}
/// Clear the pool, forgetting about all lists that use it.
///
/// This invalidates any existing entity lists that used this pool to allocate memory.

View File

@@ -0,0 +1,13 @@
test optimize
set opt_level=none
set use_egraphs=true
target x86_64
function %f(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = imul v0, v1
; check: v1 = iadd v0, v0
; nextln: return v1
return v2
}

View File

@@ -0,0 +1,22 @@
test optimize
set opt_level=none
set use_egraphs=true
target x86_64
function %f(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = bor.i64 v0, v1
v3 = load.i64 heap v0
v4 = load.i64 heap v2
v5 = band.i64 v3, v4
store.i64 v0, v5
v6 = load.i64 v3
v7 = load.i64 v6
return v7
}
; check: v1 = load.i64 heap v0
; nextln: store v0, v1
; nextln: v2 = load.i64 v0
; nextln: return v2

View File

@@ -0,0 +1,29 @@
test optimize
set opt_level=none
set use_egraphs=true
target x86_64
function %f(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = iadd v0, v1
brnz v2, block1(v0)
jump block2(v1)
block1(v3: i32):
v4 = iadd v0, v1
v5 = iadd v4, v3
return v5
block2(v6: i32):
return v6
}
;; Check that the `iadd` for `v4` is subsumed by `v2`:
; check: block0(v0: i32, v1: i32):
; nextln: v2 = iadd v0, v1
; check: block1:
; nextln: v3 = iadd.i32 v2, v0
; nextln: return v3
; check: block2:
; nextln: return v1

View File

@@ -0,0 +1,40 @@
test optimize
set opt_level=none
set use_egraphs=true
target x86_64
function %f(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
jump block1(v0)
block1(v2: i32):
v3 = iconst.i32 1
v4 = iadd.i32 v1, v3
v5 = iconst.i32 40
v6 = icmp eq v2, v5
v7 = iconst.i32 1
v8 = iadd.i32 v2, v7
brnz v6, block2(v4)
jump block1(v8)
block2(v9: i32):
return v9
}
; check: block0(v0: i32, v1: i32):
; nextln: jump block1(v0)
; check: block1(v2: i32):
;; constants are not lifted; they are rematerialized in each block where used
; nextln: v3 = iconst.i32 40
; nextln: v4 = icmp eq v2, v3
; nextln: v5 = iconst.i32 1
; nextln: v6 = iadd v2, v5
; nextln: brnz v4, block2
; nextln: jump block1(v6)
; check: block2:
; nextln: v7 = iconst.i32 1
; nextln: v8 = iadd.i32 v1, v7
; nextln: return v8

View File

@@ -0,0 +1,21 @@
test optimize
set opt_level=none
set use_egraphs=true
target x86_64
function %stack_load(i64) -> i64 {
ss0 = explicit_slot 8
block0(v0: i64):
stack_store.i64 v0, ss0
v1 = stack_load.i64 ss0
return v1
}
; check: function %stack_load(i64) -> i64 fast {
; nextln: ss0 = explicit_slot 8
; check: block0(v0: i64):
; nextln: v1 = stack_addr.i64 ss0
; nextln: store notrap aligned v0, v1
; nextln: return v0
; nextln: }

View File

@@ -0,0 +1,24 @@
test compile precise-output
set use_egraphs=true
target x86_64
;; We want to make sure that this compiles successfully, so we are properly
;; handling multi-value operator nodes.
function u0:359(i64) -> i8, i8 system_v {
sig0 = (i64) -> i8, i8 system_v
fn0 = colocated u0:521 sig0
block0(v0: i64):
v3, v4 = call fn0(v0)
return v3, v4
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; call User(userextname0)
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,23 @@
test compile precise-output
set use_egraphs=true
target x86_64
;; `atomic_rmw` is not a load, but it reports `true` to `.can_load()`. We want
;; to make sure the alias analysis machinery doesn't break when we have these odd
;; memory ops in the IR.
function u0:1302(i64) -> i64 system_v {
block0(v0: i64):
v9 = atomic_rmw.i64 add v0, v0
return v0
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,35 @@
test optimize
set opt_level=none
set use_egraphs=true
target x86_64
function %f(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 42
v2 = iadd.i32 v0, v1
brnz v2, block1
jump block2
block1:
v3 = iconst.i32 84
v4 = iadd.i32 v2, v3
return v4
block2:
return v2
}
; check: block0(v0: i32):
; nextln: v1 = iconst.i32 42
; nextln: v2 = iadd v0, v1
; nextln: brnz v2, block1
; nextln: jump block2
; check: block1:
; nextln: v5 = iconst.i32 126
; nextln: v6 = iadd.i32 v0, v5
; nextln: return v6
; check: block2:
; nextln: v3 = iconst.i32 42
; nextln: v4 = iadd.i32 v0, v3
; nextln: return v4

View File

@@ -45,6 +45,7 @@ mod test_domtree;
mod test_interpret;
mod test_legalizer;
mod test_licm;
mod test_optimize;
mod test_preopt;
mod test_print_cfg;
mod test_run;
@@ -120,6 +121,7 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result<Box<dyn subtest::SubTest>
"interpret" => test_interpret::subtest(parsed),
"legalizer" => test_legalizer::subtest(parsed),
"licm" => test_licm::subtest(parsed),
"optimize" => test_optimize::subtest(parsed),
"preopt" => test_preopt::subtest(parsed),
"print-cfg" => test_print_cfg::subtest(parsed),
"run" => test_run::subtest(parsed),

View File

@@ -0,0 +1,47 @@
//! Test command for testing the optimization phases.
//!
//! The `optimize` test command runs each function through the
//! optimization passes, but not lowering or regalloc. The output for
//! filecheck purposes is the resulting CLIF.
//!
//! Some legalization may be ISA-specific, so this requires an ISA
//! (for now).
use crate::subtest::{run_filecheck, Context, SubTest};
use anyhow::Result;
use cranelift_codegen::ir;
use cranelift_reader::TestCommand;
use std::borrow::Cow;
struct TestOptimize;
pub fn subtest(parsed: &TestCommand) -> Result<Box<dyn SubTest>> {
assert_eq!(parsed.command, "optimize");
Ok(Box::new(TestOptimize))
}
impl SubTest for TestOptimize {
fn name(&self) -> &'static str {
"optimize"
}
fn is_mutating(&self) -> bool {
true
}
fn needs_isa(&self) -> bool {
true
}
fn run(&self, func: Cow<ir::Function>, context: &Context) -> Result<()> {
let isa = context.isa.expect("optimize needs an ISA");
let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());
comp_ctx
.optimize(isa)
.map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?;
let clif = format!("{:?}", comp_ctx.func);
run_filecheck(&clif, context)
}
}

View File

@@ -108,6 +108,18 @@ fn check_overlaps(env: &TermEnv) -> Errors {
let mut by_term = HashMap::new();
for rule in env.rules.iter() {
if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs {
let is_multi_ctor = match &env.terms[tid.index()].kind {
&TermKind::Decl { multi, .. } => multi,
_ => false,
};
if is_multi_ctor {
// Rules for multi-constructors are not checked for
// overlap: the ctor returns *every* match, not just
// the first or highest-priority one, so overlap does
// not actually affect the results.
continue;
}
let mut binds = Vec::new();
let rule = RulePatterns {
rule,

View File

@@ -393,6 +393,7 @@ impl Engine {
| "machine_code_cfg_info"
| "tls_model" // wasmtime doesn't use tls right now
| "opt_level" // opt level doesn't change semantics
| "use_egraphs" // optimizing with egraphs doesn't change semantics
| "enable_alias_analysis" // alias analysis-based opts don't change semantics
| "probestack_func_adjusts_sp" // probestack above asserted disabled
| "probestack_size_log2" // probestack above asserted disabled

View File

@@ -24,8 +24,8 @@ const CRATES_TO_PUBLISH: &[&str] = &[
"cranelift-bforest",
"cranelift-codegen-shared",
"cranelift-codegen-meta",
"cranelift-codegen",
"cranelift-egraph",
"cranelift-codegen",
"cranelift-reader",
"cranelift-serde",
"cranelift-module",
@@ -88,6 +88,7 @@ const PUBLIC_CRATES: &[&str] = &[
"cranelift-bforest",
"cranelift-codegen-shared",
"cranelift-codegen-meta",
"cranelift-egraph",
"cranelift-codegen",
"cranelift-reader",
"cranelift-serde",