From 2be12a5167cc5a4eb6ffe11530e35507db583708 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 11 Oct 2022 18:15:53 -0700 Subject: [PATCH] egraph-based midend: draw the rest of the owl (productionized). (#4953) * egraph-based midend: draw the rest of the owl. * Rename `egg` submodule of cranelift-codegen to `egraph`. * Apply some feedback from @jsharp during code walkthrough. * Remove recursion from find_best_node by doing a single pass. Rather than recursively computing the lowest-cost node for a given eclass and memoizing the answer at each eclass node, we can do a single forward pass; because every eclass node refers only to earlier nodes, this is sufficient. The behavior may slightly differ from the earlier behavior because we cannot short-circuit costs to zero once a node is elaborated; but in practice this should not matter. * Make elaboration non-recursive. Use an explicit stack instead (with `ElabStackEntry` entries, alongside a result stack). * Make elaboration traversal of the domtree non-recursive/stack-safe. * Work analysis logic in Cranelift-side egraph glue into a general analysis framework in cranelift-egraph. * Apply static recursion limit to rule application. * Fix aarch64 wrt dynamic-vector support -- broken rebase. * Topo-sort cranelift-egraph before cranelift-codegen in publish script, like the comment instructs me to! * Fix multi-result call testcase. * Include `cranelift-egraph` in `PUBLISHED_CRATES`. * Fix atomic_rmw: not really a load. * Remove now-unnecessary PartialOrd/Ord derivations. * Address some code-review comments. * Review feedback. * Review feedback. * No overlap in mid-end rules, because we are defining a multi-constructor. * rustfmt * Review feedback. * Review feedback. * Review feedback. * Review feedback. * Remove redundant `mut`. * Add comment noting what rules can do. * Review feedback. * Clarify comment wording. * Update `has_memory_fence_semantics`. * Apply @jameysharp's improved loop-level computation. Co-authored-by: Jamey Sharp * Fix suggestion commit. * Fix off-by-one in new loop-nest analysis. * Review feedback. * Review feedback. * Review feedback. * Use `Default`, not `std::default::Default`, as per @fitzgen Co-authored-by: Nick Fitzgerald * Apply @fitzgen's comment elaboration to a doc-comment. Co-authored-by: Nick Fitzgerald * Add stat for hitting the rewrite-depth limit. * Some code motion in split prelude to make the diff a little clearer wrt `main`. * Take @jameysharp's suggested `try_into()` usage for blockparam indices. Co-authored-by: Jamey Sharp * Take @jameysharp's suggestion to avoid double-match on load op. Co-authored-by: Jamey Sharp * Fix suggestion (add import). * Review feedback. * Fix stack_load handling. * Remove redundant can_store case. * Take @jameysharp's suggested improvement to FuncEGraph::build() logic Co-authored-by: Jamey Sharp * Tweaks to FuncEGraph::build() on top of suggestion. * Take @jameysharp's suggested clarified condition Co-authored-by: Jamey Sharp * Clean up after suggestion (unused variable). * Fix loop analysis. * loop level asserts * Revert constant-space loop analysis -- edge cases were incorrect, so let's go with the simple thing for now. * Take @jameysharp's suggestion re: result_tys Co-authored-by: Jamey Sharp * Fix up after suggestion * Take @jameysharp's suggestion to use fold rather than reduce Co-authored-by: Jamey Sharp * Fixup after suggestion * Take @jameysharp's suggestion to remove elaborate_eclass_use's return value. * Clarifying comment in terminator insts. Co-authored-by: Jamey Sharp Co-authored-by: Nick Fitzgerald --- Cargo.lock | 1 + Cargo.toml | 1 + cranelift/codegen/Cargo.toml | 1 + cranelift/codegen/build.rs | 35 +- cranelift/codegen/meta/src/gen_inst.rs | 552 ++++++++++--- cranelift/codegen/meta/src/lib.rs | 3 +- cranelift/codegen/meta/src/shared/settings.rs | 11 + cranelift/codegen/src/context.rs | 53 +- cranelift/codegen/src/egraph.rs | 414 ++++++++++ cranelift/codegen/src/egraph/domtree.rs | 69 ++ cranelift/codegen/src/egraph/elaborate.rs | 612 ++++++++++++++ cranelift/codegen/src/egraph/node.rs | 376 +++++++++ cranelift/codegen/src/egraph/stores.rs | 266 ++++++ cranelift/codegen/src/inst_predicates.rs | 7 +- cranelift/codegen/src/ir/dfg.rs | 17 + cranelift/codegen/src/ir/function.rs | 2 +- cranelift/codegen/src/ir/layout.rs | 12 + cranelift/codegen/src/ir/mod.rs | 2 +- cranelift/codegen/src/ir/sourceloc.rs | 2 +- .../codegen/src/isa/aarch64/lower/isle.rs | 3 +- .../codegen/src/isa/aarch64/lower_inst.rs | 23 +- cranelift/codegen/src/isa/aarch64/mod.rs | 10 +- .../codegen/src/isa/riscv64/lower/isle.rs | 18 +- cranelift/codegen/src/isa/riscv64/mod.rs | 2 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 3 +- cranelift/codegen/src/isa/s390x/mod.rs | 10 +- cranelift/codegen/src/isa/x64/lower/isle.rs | 3 +- cranelift/codegen/src/isa/x64/mod.rs | 2 +- cranelift/codegen/src/isle_prelude.rs | 604 ++++++++++++++ cranelift/codegen/src/lib.rs | 3 + cranelift/codegen/src/loop_analysis.rs | 94 +++ cranelift/codegen/src/machinst/compile.rs | 3 +- cranelift/codegen/src/machinst/isle.rs | 536 +----------- cranelift/codegen/src/machinst/lower.rs | 45 +- cranelift/codegen/src/opts.rs | 297 +++++++ cranelift/codegen/src/opts/algebraic.isle | 207 +++++ cranelift/codegen/src/opts/cprop.isle | 134 +++ cranelift/codegen/src/opts/generated_code.rs | 11 + cranelift/codegen/src/prelude.isle | 766 +----------------- cranelift/codegen/src/prelude_lower.isle | 740 +++++++++++++++++ cranelift/codegen/src/prelude_opt.isle | 61 ++ cranelift/codegen/src/scoped_hash_map.rs | 190 +++-- cranelift/codegen/src/settings.rs | 1 + cranelift/egraph/src/lib.rs | 198 +++-- cranelift/egraph/src/unionfind.rs | 15 + cranelift/entity/src/list.rs | 18 + .../filetests/filetests/egraph/algebraic.clif | 13 + .../filetests/egraph/alias_analysis.clif | 22 + .../filetests/filetests/egraph/basic-gvn.clif | 29 + .../filetests/filetests/egraph/licm.clif | 40 + .../filetests/filetests/egraph/misc.clif | 21 + .../filetests/egraph/multivalue.clif | 24 + .../filetests/egraph/not_a_load.clif | 23 + .../filetests/filetests/egraph/remat.clif | 35 + cranelift/filetests/src/lib.rs | 2 + cranelift/filetests/src/test_optimize.rs | 47 ++ cranelift/isle/isle/src/overlap.rs | 12 + crates/wasmtime/src/engine.rs | 1 + scripts/publish.rs | 3 +- 59 files changed, 5125 insertions(+), 1580 deletions(-) create mode 100644 cranelift/codegen/src/egraph.rs create mode 100644 cranelift/codegen/src/egraph/domtree.rs create mode 100644 cranelift/codegen/src/egraph/elaborate.rs create mode 100644 cranelift/codegen/src/egraph/node.rs create mode 100644 cranelift/codegen/src/egraph/stores.rs create mode 100644 cranelift/codegen/src/isle_prelude.rs create mode 100644 cranelift/codegen/src/opts.rs create mode 100644 cranelift/codegen/src/opts/algebraic.isle create mode 100644 cranelift/codegen/src/opts/cprop.isle create mode 100644 cranelift/codegen/src/opts/generated_code.rs create mode 100644 cranelift/codegen/src/prelude_lower.isle create mode 100644 cranelift/codegen/src/prelude_opt.isle create mode 100644 cranelift/filetests/filetests/egraph/algebraic.clif create mode 100644 cranelift/filetests/filetests/egraph/alias_analysis.clif create mode 100644 cranelift/filetests/filetests/egraph/basic-gvn.clif create mode 100644 cranelift/filetests/filetests/egraph/licm.clif create mode 100644 cranelift/filetests/filetests/egraph/misc.clif create mode 100644 cranelift/filetests/filetests/egraph/multivalue.clif create mode 100644 cranelift/filetests/filetests/egraph/not_a_load.clif create mode 100644 cranelift/filetests/filetests/egraph/remat.clif create mode 100644 cranelift/filetests/src/test_optimize.rs diff --git a/Cargo.lock b/Cargo.lock index 5cfb6a64a1..48b84a3b16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,6 +537,7 @@ dependencies = [ "cranelift-bforest", "cranelift-codegen-meta", "cranelift-codegen-shared", + "cranelift-egraph", "cranelift-entity", "cranelift-isle", "criterion", diff --git a/Cargo.toml b/Cargo.toml index f6da78e7a0..5762dc02a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,6 +129,7 @@ wasmtime-fuzzing = { path = "crates/fuzzing" } cranelift-wasm = { path = "cranelift/wasm", version = "0.90.0" } cranelift-codegen = { path = "cranelift/codegen", version = "0.90.0" } +cranelift-egraph = { path = "cranelift/egraph", version = "0.90.0" } cranelift-frontend = { path = "cranelift/frontend", version = "0.90.0" } cranelift-entity = { path = "cranelift/entity", version = "0.90.0" } cranelift-native = { path = "cranelift/native", version = "0.90.0" } diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 86e2808d14..9d170e0364 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -18,6 +18,7 @@ bumpalo = "3" cranelift-codegen-shared = { path = "./shared", version = "0.90.0" } cranelift-entity = { workspace = true } cranelift-bforest = { workspace = true } +cranelift-egraph = { workspace = true } hashbrown = { workspace = true, optional = true } target-lexicon = { workspace = true } log = { workspace = true } diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index f81950277e..085b5f0151 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -177,9 +177,19 @@ fn get_isle_compilations( ) -> Result { let cur_dir = std::env::current_dir()?; - let clif_isle = out_dir.join("clif.isle"); + // Preludes. + let clif_lower_isle = out_dir.join("clif_lower.isle"); + let clif_opt_isle = out_dir.join("clif_opt.isle"); let prelude_isle = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude.isle")); + let prelude_opt_isle = + make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude_opt.isle")); + let prelude_lower_isle = + make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("prelude_lower.isle")); + + // Directory for mid-end optimizations. + let src_opts = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("opts")); + // Directories for lowering backends. let src_isa_x64 = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("x64")); let src_isa_aarch64 = @@ -204,47 +214,62 @@ fn get_isle_compilations( // `cranelift/codegen/src/isa/*/lower/isle/generated_code.rs`! Ok(IsleCompilations { items: vec![ + // The mid-end optimization rules. + IsleCompilation { + output: out_dir.join("isle_opt.rs"), + inputs: vec![ + prelude_isle.clone(), + prelude_opt_isle.clone(), + src_opts.join("algebraic.isle"), + src_opts.join("cprop.isle"), + ], + untracked_inputs: vec![clif_opt_isle.clone()], + }, // The x86-64 instruction selector. IsleCompilation { output: out_dir.join("isle_x64.rs"), inputs: vec![ prelude_isle.clone(), + prelude_lower_isle.clone(), src_isa_x64.join("inst.isle"), src_isa_x64.join("lower.isle"), ], - untracked_inputs: vec![clif_isle.clone()], + untracked_inputs: vec![clif_lower_isle.clone()], }, // The aarch64 instruction selector. IsleCompilation { output: out_dir.join("isle_aarch64.rs"), inputs: vec![ prelude_isle.clone(), + prelude_lower_isle.clone(), src_isa_aarch64.join("inst.isle"), src_isa_aarch64.join("inst_neon.isle"), src_isa_aarch64.join("lower.isle"), src_isa_aarch64.join("lower_dynamic_neon.isle"), ], - untracked_inputs: vec![clif_isle.clone()], + untracked_inputs: vec![clif_lower_isle.clone()], }, // The s390x instruction selector. IsleCompilation { output: out_dir.join("isle_s390x.rs"), inputs: vec![ prelude_isle.clone(), + prelude_lower_isle.clone(), src_isa_s390x.join("inst.isle"), src_isa_s390x.join("lower.isle"), ], - untracked_inputs: vec![clif_isle.clone()], + untracked_inputs: vec![clif_lower_isle.clone()], }, // The risc-v instruction selector. IsleCompilation { output: out_dir.join("isle_riscv64.rs"), inputs: vec![ prelude_isle.clone(), + prelude_lower_isle.clone(), src_isa_risc_v.join("inst.isle"), src_isa_risc_v.join("lower.isle"), ], - untracked_inputs: vec![clif_isle.clone()], + untracked_inputs: vec![clif_lower_isle.clone()], }, ], }) diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs index 1d2fefc2b3..3ff454d46f 100644 --- a/cranelift/codegen/meta/src/gen_inst.rs +++ b/cranelift/codegen/meta/src/gen_inst.rs @@ -60,36 +60,52 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) { fmt.empty_line(); } -/// Generate the InstructionData enum. +/// Generate the InstructionData and InstructionImms enums. /// /// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a /// `ValueList` to store the additional information out of line. +/// +/// `InstructionImms` stores everything about an instruction except for the arguments: in other +/// words, the `Opcode` and any immediates or other parameters. `InstructionData` stores this, plus +/// the SSA `Value` arguments. fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) { - fmt.line("#[derive(Clone, Debug, PartialEq, Hash)]"); - fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#); - fmt.line("#[allow(missing_docs)]"); - fmt.line("pub enum InstructionData {"); - fmt.indent(|fmt| { - for format in formats { - fmtln!(fmt, "{} {{", format.name); - fmt.indent(|fmt| { - fmt.line("opcode: Opcode,"); - if format.has_value_list { - fmt.line("args: ValueList,"); - } else if format.num_value_operands == 1 { - fmt.line("arg: Value,"); - } else if format.num_value_operands > 0 { - fmtln!(fmt, "args: [Value; {}],", format.num_value_operands); - } - for field in &format.imm_fields { - fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type); - } - }); - fmtln!(fmt, "},"); + for (name, include_args) in &[("InstructionData", true), ("InstructionImms", false)] { + fmt.line("#[derive(Clone, Debug, PartialEq, Hash)]"); + if !include_args { + // `InstructionImms` gets some extra derives: it acts like + // a sort of extended opcode and we want to allow for + // hashconsing via Eq. `Copy` also turns out to be useful. + fmt.line("#[derive(Copy, Eq)]"); } - }); - fmt.line("}"); + fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#); + fmt.line("#[allow(missing_docs)]"); + // generate `enum InstructionData` or `enum InstructionImms`. + // (This comment exists so one can grep for `enum InstructionData`!) + fmtln!(fmt, "pub enum {} {{", name); + fmt.indent(|fmt| { + for format in formats { + fmtln!(fmt, "{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode: Opcode,"); + if *include_args { + if format.has_value_list { + fmt.line("args: ValueList,"); + } else if format.num_value_operands == 1 { + fmt.line("arg: Value,"); + } else if format.num_value_operands > 0 { + fmtln!(fmt, "args: [Value; {}],", format.num_value_operands); + } + } + for field in &format.imm_fields { + fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type); + } + }); + fmtln!(fmt, "},"); + } + }); + fmt.line("}"); + } } fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) { @@ -150,6 +166,122 @@ fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_ fmtln!(fmt, "}"); } +/// Generate the conversion from `InstructionData` to `InstructionImms`, stripping out the +/// `Value`s. +fn gen_instruction_data_to_instruction_imms(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("impl std::convert::From<&InstructionData> for InstructionImms {"); + fmt.indent(|fmt| { + fmt.doc_comment("Convert an `InstructionData` into an `InstructionImms`."); + fmt.line("fn from(data: &InstructionData) -> InstructionImms {"); + fmt.indent(|fmt| { + fmt.line("match data {"); + fmt.indent(|fmt| { + for format in formats { + fmtln!(fmt, "InstructionData::{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode,"); + for field in &format.imm_fields { + fmtln!(fmt, "{},", field.member); + } + fmt.line(".."); + }); + fmtln!(fmt, "}} => InstructionImms::{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode: *opcode,"); + for field in &format.imm_fields { + fmtln!(fmt, "{}: {}.clone(),", field.member, field.member); + } + }); + fmt.line("},"); + } + }); + fmt.line("}"); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); +} + +/// Generate the conversion from `InstructionImms` to `InstructionData`, adding the +/// `Value`s. +fn gen_instruction_imms_to_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("impl InstructionImms {"); + fmt.indent(|fmt| { + fmt.doc_comment("Convert an `InstructionImms` into an `InstructionData` by adding args."); + fmt.line( + "pub fn with_args(&self, values: &[Value], value_list: &mut ValueListPool) -> InstructionData {", + ); + fmt.indent(|fmt| { + fmt.line("match self {"); + fmt.indent(|fmt| { + for format in formats { + fmtln!(fmt, "InstructionImms::{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode,"); + for field in &format.imm_fields { + fmtln!(fmt, "{},", field.member); + } + }); + fmt.line("} => {"); + if format.has_value_list { + fmtln!(fmt, "let args = ValueList::from_slice(values, value_list);"); + } + fmt.indent(|fmt| { + fmtln!(fmt, "InstructionData::{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode: *opcode,"); + for field in &format.imm_fields { + fmtln!(fmt, "{}: {}.clone(),", field.member, field.member); + } + if format.has_value_list { + fmtln!(fmt, "args,"); + } else if format.num_value_operands == 1 { + fmtln!(fmt, "arg: values[0],"); + } else if format.num_value_operands > 0 { + let mut args = vec![]; + for i in 0..format.num_value_operands { + args.push(format!("values[{}]", i)); + } + fmtln!(fmt, "args: [{}],", args.join(", ")); + } + }); + fmt.line("}"); + }); + fmt.line("},"); + } + }); + fmt.line("}"); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); +} + +/// Generate the `opcode` method on InstructionImms. +fn gen_instruction_imms_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("impl InstructionImms {"); + fmt.indent(|fmt| { + fmt.doc_comment("Get the opcode of this instruction."); + fmt.line("pub fn opcode(&self) -> Opcode {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + m.arm( + format!("Self::{}", format.name), + vec!["opcode", ".."], + "opcode".to_string(), + ); + } + fmt.add_match(m); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); +} + /// Generate the boring parts of the InstructionData implementation. /// /// These methods in `impl InstructionData` can be generated automatically from the instruction @@ -1070,7 +1202,12 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo fmtln!(fmt, "}") } -fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: &mut Formatter) { +fn gen_common_isle( + formats: &[&InstructionFormat], + instructions: &AllInstructions, + fmt: &mut Formatter, + is_lower: bool, +) { use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Write; @@ -1123,40 +1260,46 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: gen_isle_enum(name, variants, fmt) } - // Generate all of the value arrays we need for `InstructionData` as well as - // the constructors and extractors for them. - fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"); - fmt.empty_line(); - let value_array_arities: BTreeSet<_> = formats - .iter() - .filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1) - .map(|f| f.num_value_operands) - .collect(); - for n in value_array_arities { - fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n); - fmtln!(fmt, "(type ValueArray{} extern (enum))", n); + if is_lower { + // Generate all of the value arrays we need for `InstructionData` as well as + // the constructors and extractors for them. + fmt.line( + ";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;", + ); fmt.empty_line(); + let value_array_arities: BTreeSet<_> = formats + .iter() + .filter(|f| { + f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1 + }) + .map(|f| f.num_value_operands) + .collect(); + for n in value_array_arities { + fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n); + fmtln!(fmt, "(type ValueArray{} extern (enum))", n); + fmt.empty_line(); - fmtln!( - fmt, - "(decl value_array_{} ({}) ValueArray{})", - n, - (0..n).map(|_| "Value").collect::>().join(" "), - n - ); - fmtln!( - fmt, - "(extern constructor value_array_{} pack_value_array_{})", - n, - n - ); - fmtln!( - fmt, - "(extern extractor infallible value_array_{} unpack_value_array_{})", - n, - n - ); - fmt.empty_line(); + fmtln!( + fmt, + "(decl value_array_{} ({}) ValueArray{})", + n, + (0..n).map(|_| "Value").collect::>().join(" "), + n + ); + fmtln!( + fmt, + "(extern constructor value_array_{} pack_value_array_{})", + n, + n + ); + fmtln!( + fmt, + "(extern extractor infallible value_array_{} unpack_value_array_{})", + n, + n + ); + fmt.empty_line(); + } } // Generate the extern type declaration for `Opcode`. @@ -1175,21 +1318,33 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: fmt.line(")"); fmt.empty_line(); - // Generate the extern type declaration for `InstructionData`. - fmt.line(";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"); + // Generate the extern type declaration for `InstructionData` + // (lowering) or `InstructionImms` (opt). + let inst_data_name = if is_lower { + "InstructionData" + } else { + "InstructionImms" + }; + fmtln!( + fmt, + ";;;; `{}` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;", + inst_data_name + ); fmt.empty_line(); - fmt.line("(type InstructionData extern"); + fmtln!(fmt, "(type {} extern", inst_data_name); fmt.indent(|fmt| { fmt.line("(enum"); fmt.indent(|fmt| { for format in formats { let mut s = format!("({} (opcode Opcode)", format.name); - if format.has_value_list { - s.push_str(" (args ValueList)"); - } else if format.num_value_operands == 1 { - s.push_str(" (arg Value)"); - } else if format.num_value_operands > 1 { - write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap(); + if is_lower { + if format.has_value_list { + s.push_str(" (args ValueList)"); + } else if format.num_value_operands == 1 { + s.push_str(" (arg Value)"); + } else if format.num_value_operands > 1 { + write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap(); + } } for field in &format.imm_fields { write!( @@ -1210,85 +1365,157 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: fmt.empty_line(); // Generate the helper extractors for each opcode's full instruction. - // - // TODO: if/when we port our peephole optimization passes to ISLE we will - // want helper constructors as well. - fmt.line(";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;"); + fmtln!( + fmt, + ";;;; Extracting Opcode, Operands, and Immediates from `{}` ;;;;;;;;", + inst_data_name + ); fmt.empty_line(); + let ret_ty = if is_lower { "Inst" } else { "Id" }; for inst in instructions { + if !is_lower && inst.format.has_value_list { + continue; + } + fmtln!( fmt, - "(decl {} ({}) Inst)", + "(decl {} ({}{}) {})", inst.name, + if is_lower { "" } else { "Type " }, inst.operands_in .iter() .map(|o| { let ty = o.kind.rust_type; - if ty == "&[Value]" { - "ValueSlice" + if is_lower { + if ty == "&[Value]" { + "ValueSlice" + } else { + ty.rsplit("::").next().unwrap() + } } else { - ty.rsplit("::").next().unwrap() + if ty == "&[Value]" { + panic!("value slice in mid-end extractor"); + } else if ty == "Value" || ty == "ir::Value" { + "Id" + } else { + ty.rsplit("::").next().unwrap() + } } }) .collect::>() - .join(" ") + .join(" "), + ret_ty ); fmtln!(fmt, "(extractor"); fmt.indent(|fmt| { fmtln!( fmt, - "({} {})", + "({} {}{})", inst.name, + if is_lower { "" } else { "ty " }, inst.operands_in .iter() .map(|o| { o.name }) .collect::>() .join(" ") ); - let mut s = format!( - "(inst_data (InstructionData.{} (Opcode.{})", - inst.format.name, inst.camel_name - ); - // Value and varargs operands. - if inst.format.has_value_list { - // The instruction format uses a value list, but the - // instruction itself might have not only a `&[Value]` - // varargs operand, but also one or more `Value` operands as - // well. If this is the case, then we need to read them off - // the front of the `ValueList`. - let values: Vec<_> = inst - .operands_in - .iter() - .filter(|o| o.is_value()) - .map(|o| o.name) - .collect(); - let varargs = inst - .operands_in - .iter() - .find(|o| o.is_varargs()) - .unwrap() - .name; - if values.is_empty() { - write!(&mut s, " (value_list_slice {})", varargs).unwrap(); - } else { + if is_lower { + let mut s = format!( + "(inst_data (InstructionData.{} (Opcode.{})", + inst.format.name, inst.camel_name + ); + + // Value and varargs operands. + if inst.format.has_value_list { + // The instruction format uses a value list, but the + // instruction itself might have not only a `&[Value]` + // varargs operand, but also one or more `Value` operands as + // well. If this is the case, then we need to read them off + // the front of the `ValueList`. + let values: Vec<_> = inst + .operands_in + .iter() + .filter(|o| o.is_value()) + .map(|o| o.name) + .collect(); + let varargs = inst + .operands_in + .iter() + .find(|o| o.is_varargs()) + .unwrap() + .name; + if values.is_empty() { + write!(&mut s, " (value_list_slice {})", varargs).unwrap(); + } else { + write!( + &mut s, + " (unwrap_head_value_list_{} {} {})", + values.len(), + values.join(" "), + varargs + ) + .unwrap(); + } + } else if inst.format.num_value_operands == 1 { write!( &mut s, - " (unwrap_head_value_list_{} {} {})", - values.len(), - values.join(" "), - varargs + " {}", + inst.operands_in.iter().find(|o| o.is_value()).unwrap().name + ) + .unwrap(); + } else if inst.format.num_value_operands > 1 { + let values = inst + .operands_in + .iter() + .filter(|o| o.is_value()) + .map(|o| o.name) + .collect::>(); + assert_eq!(values.len(), inst.format.num_value_operands); + let values = values.join(" "); + write!( + &mut s, + " (value_array_{} {})", + inst.format.num_value_operands, values, ) .unwrap(); } - } else if inst.format.num_value_operands == 1 { - write!( - &mut s, - " {}", - inst.operands_in.iter().find(|o| o.is_value()).unwrap().name - ) - .unwrap(); - } else if inst.format.num_value_operands > 1 { + + // Immediates. + let imm_operands: Vec<_> = inst + .operands_in + .iter() + .filter(|o| !o.is_value() && !o.is_varargs()) + .collect(); + assert_eq!(imm_operands.len(), inst.format.imm_fields.len()); + for op in imm_operands { + write!(&mut s, " {}", op.name).unwrap(); + } + + s.push_str("))"); + fmt.line(&s); + } else { + // Mid-end case. + let mut s = format!( + "(enodes ty (InstructionImms.{} (Opcode.{})", + inst.format.name, inst.camel_name + ); + + // Immediates. + let imm_operands: Vec<_> = inst + .operands_in + .iter() + .filter(|o| !o.is_value() && !o.is_varargs()) + .collect(); + assert_eq!(imm_operands.len(), inst.format.imm_fields.len()); + for op in imm_operands { + write!(&mut s, " {}", op.name).unwrap(); + } + // End of `InstructionImms`. + s.push_str(")"); + + // Second arg to `enode`: value args. + assert!(!inst.operands_in.iter().any(|op| op.is_varargs())); let values = inst .operands_in .iter() @@ -1299,31 +1526,83 @@ fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: let values = values.join(" "); write!( &mut s, - " (value_array_{} {})", + " (id_array_{} {})", inst.format.num_value_operands, values, ) .unwrap(); - } - // Immediates. - let imm_operands: Vec<_> = inst - .operands_in - .iter() - .filter(|o| !o.is_value() && !o.is_varargs()) - .collect(); - assert_eq!(imm_operands.len(), inst.format.imm_fields.len()); - for op in imm_operands { - write!(&mut s, " {}", op.name).unwrap(); + s.push_str(")"); + fmt.line(&s); } - - s.push_str("))"); - fmt.line(&s); }); fmt.line(")"); + + // Generate a constructor if this is the mid-end prelude. + if !is_lower { + fmtln!( + fmt, + "(rule ({} ty {})", + inst.name, + inst.operands_in + .iter() + .map(|o| o.name) + .collect::>() + .join(" ") + ); + fmt.indent(|fmt| { + let mut s = format!( + "(pure_enode ty (InstructionImms.{} (Opcode.{})", + inst.format.name, inst.camel_name + ); + + for o in inst + .operands_in + .iter() + .filter(|o| !o.is_value() && !o.is_varargs()) + { + write!(&mut s, " {}", o.name).unwrap(); + } + s.push_str(")"); + + let values = inst + .operands_in + .iter() + .filter(|o| o.is_value()) + .map(|o| o.name) + .collect::>(); + let values = values.join(" "); + write!( + &mut s, + " (id_array_{} {})", + inst.format.num_value_operands, values + ) + .unwrap(); + s.push_str(")"); + fmt.line(&s); + }); + fmt.line(")"); + } + fmt.empty_line(); } } +fn gen_opt_isle( + formats: &[&InstructionFormat], + instructions: &AllInstructions, + fmt: &mut Formatter, +) { + gen_common_isle(formats, instructions, fmt, /* is_lower = */ false); +} + +fn gen_lower_isle( + formats: &[&InstructionFormat], + instructions: &AllInstructions, + fmt: &mut Formatter, +) { + gen_common_isle(formats, instructions, fmt, /* is_lower = */ true); +} + /// Generate an `enum` immediate in ISLE. fn gen_isle_enum(name: &str, mut variants: Vec<&str>, fmt: &mut Formatter) { variants.sort(); @@ -1388,7 +1667,8 @@ pub(crate) fn generate( all_inst: &AllInstructions, opcode_filename: &str, inst_builder_filename: &str, - isle_filename: &str, + isle_opt_filename: &str, + isle_lower_filename: &str, out_dir: &str, isle_dir: &str, ) -> Result<(), error::Error> { @@ -1398,16 +1678,24 @@ pub(crate) fn generate( gen_instruction_data(&formats, &mut fmt); fmt.empty_line(); gen_instruction_data_impl(&formats, &mut fmt); + gen_instruction_data_to_instruction_imms(&formats, &mut fmt); + gen_instruction_imms_impl(&formats, &mut fmt); + gen_instruction_imms_to_instruction_data(&formats, &mut fmt); fmt.empty_line(); gen_opcodes(all_inst, &mut fmt); fmt.empty_line(); gen_type_constraints(all_inst, &mut fmt); fmt.update_file(opcode_filename, out_dir)?; - // ISLE DSL. + // ISLE DSL: mid-end ("opt") generated bindings. let mut fmt = Formatter::new(); - gen_isle(&formats, all_inst, &mut fmt); - fmt.update_file(isle_filename, isle_dir)?; + gen_opt_isle(&formats, all_inst, &mut fmt); + fmt.update_file(isle_opt_filename, isle_dir)?; + + // ISLE DSL: lowering generated bindings. + let mut fmt = Formatter::new(); + gen_lower_isle(&formats, all_inst, &mut fmt); + fmt.update_file(isle_lower_filename, isle_dir)?; // Instruction builder. let mut fmt = Formatter::new(); diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 8b525acabf..764283927f 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -47,7 +47,8 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str, isle_dir: &str) -> Result<(), &shared_defs.all_instructions, "opcodes.rs", "inst_builder.rs", - "clif.isle", + "clif_opt.isle", + "clif_lower.isle", &out_dir, isle_dir, )?; diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index 9b4b3656ec..6b9c241c7d 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -53,6 +53,17 @@ pub(crate) fn define() -> SettingGroup { true, ); + settings.add_bool( + "use_egraphs", + "Enable egraph-based optimization.", + r#" + This enables an optimization phase that converts CLIF to an egraph (equivalence graph) + representation, performs various rewrites, and then converts it back. This can result in + better optimization, but is currently considered experimental. + "#, + false, + ); + settings.add_bool( "enable_verifier", "Run the Cranelift IR verifier at strategic times during compilation.", diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index b136d220df..01724612f2 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -12,6 +12,7 @@ use crate::alias_analysis::AliasAnalysis; use crate::dce::do_dce; use crate::dominator_tree::DominatorTree; +use crate::egraph::FuncEGraph; use crate::flowgraph::ControlFlowGraph; use crate::ir::Function; use crate::isa::TargetIsa; @@ -104,15 +105,20 @@ impl Context { /// Compile the function, and emit machine code into a `Vec`. /// - /// Run the function through all the passes necessary to generate code for the target ISA - /// represented by `isa`, as well as the final step of emitting machine code into a - /// `Vec`. The machine code is not relocated. Instead, any relocations can be obtained - /// from `compiled_code()`. + /// Run the function through all the passes necessary to generate + /// code for the target ISA represented by `isa`, as well as the + /// final step of emitting machine code into a `Vec`. The + /// machine code is not relocated. Instead, any relocations can be + /// obtained from `compiled_code()`. + /// + /// Performs any optimizations that are enabled, unless + /// `optimize()` was already invoked. /// /// This function calls `compile`, taking care to resize `mem` as - /// needed, so it provides a safe interface. + /// needed. /// - /// Returns information about the function's code and read-only data. + /// Returns information about the function's code and read-only + /// data. pub fn compile_and_emit( &mut self, isa: &dyn TargetIsa, @@ -131,15 +137,26 @@ impl Context { self.verify_if(isa)?; + self.optimize(isa)?; + + isa.compile_function(&self.func, self.want_disasm) + } + + /// Optimize the function, performing all compilation steps up to + /// but not including machine-code lowering and register + /// allocation. + /// + /// Public only for testing purposes. + pub fn optimize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { let opt_level = isa.flags().opt_level(); log::trace!( - "Compiling (opt level {:?}):\n{}", + "Optimizing (opt level {:?}):\n{}", opt_level, self.func.display() ); self.compute_cfg(); - if opt_level != OptLevel::None { + if !isa.flags().use_egraphs() && opt_level != OptLevel::None { self.preopt(isa)?; } if isa.flags().enable_nan_canonicalization() { @@ -147,7 +164,8 @@ impl Context { } self.legalize(isa)?; - if opt_level != OptLevel::None { + + if !isa.flags().use_egraphs() && opt_level != OptLevel::None { self.compute_domtree(); self.compute_loop_analysis(); self.licm(isa)?; @@ -156,18 +174,29 @@ impl Context { self.compute_domtree(); self.eliminate_unreachable_code(isa)?; - if opt_level != OptLevel::None { + + if isa.flags().use_egraphs() || opt_level != OptLevel::None { self.dce(isa)?; } self.remove_constant_phis(isa)?; - if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() { + if isa.flags().use_egraphs() { + log::debug!( + "About to optimize with egraph phase:\n{}", + self.func.display() + ); + self.compute_loop_analysis(); + let mut eg = FuncEGraph::new(&self.func, &self.domtree, &self.loop_analysis, &self.cfg); + eg.elaborate(&mut self.func); + log::debug!("After egraph optimization:\n{}", self.func.display()); + log::info!("egraph stats: {:?}", eg.stats); + } else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() { self.replace_redundant_loads()?; self.simple_gvn(isa)?; } - isa.compile_function(&self.func, self.want_disasm) + Ok(()) } /// Compile the function. diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs new file mode 100644 index 0000000000..f9216596a7 --- /dev/null +++ b/cranelift/codegen/src/egraph.rs @@ -0,0 +1,414 @@ +//! Egraph-based mid-end optimization framework. + +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::loop_analysis::{LoopAnalysis, LoopLevel}; +use crate::trace; +use crate::{ + fx::{FxHashMap, FxHashSet}, + inst_predicates::has_side_effect, + ir::{Block, Function, Inst, InstructionData, InstructionImms, Opcode, Type}, +}; +use alloc::vec::Vec; +use core::ops::Range; +use cranelift_egraph::{EGraph, Id, Language, NewOrExisting}; +use cranelift_entity::EntityList; +use cranelift_entity::SecondaryMap; + +mod domtree; +mod elaborate; +mod node; +mod stores; + +use elaborate::Elaborator; +pub use node::{Node, NodeCtx}; +pub use stores::{AliasAnalysis, MemoryState}; + +pub struct FuncEGraph<'a> { + /// Dominator tree, used for elaboration pass. + domtree: &'a DominatorTree, + /// Loop analysis results, used for built-in LICM during elaboration. + loop_analysis: &'a LoopAnalysis, + /// Last-store tracker for integrated alias analysis during egraph build. + alias_analysis: AliasAnalysis, + /// The egraph itself. + pub(crate) egraph: EGraph, + /// "node context", containing arenas for node data. + pub(crate) node_ctx: NodeCtx, + /// Ranges in `side_effect_ids` for sequences of side-effecting + /// eclasses per block. + side_effects: SecondaryMap>, + side_effect_ids: Vec, + /// Map from store instructions to their nodes; used for store-to-load forwarding. + pub(crate) store_nodes: FxHashMap, + /// Ranges in `blockparam_ids_tys` for sequences of blockparam + /// eclass IDs and types per block. + blockparams: SecondaryMap>, + blockparam_ids_tys: Vec<(Id, Type)>, + /// Which canonical node IDs do we want to rematerialize in each + /// block where they're used? + pub(crate) remat_ids: FxHashSet, + /// Which canonical node IDs have an enode whose value subsumes + /// all others it's unioned with? + pub(crate) subsume_ids: FxHashSet, + /// Statistics recorded during the process of building, + /// optimizing, and lowering out of this egraph. + pub(crate) stats: Stats, + /// Current rewrite-recursion depth. Used to enforce a finite + /// limit on rewrite rule application so that we don't get stuck + /// in an infinite chain. + pub(crate) rewrite_depth: usize, +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct Stats { + pub(crate) node_created: u64, + pub(crate) node_param: u64, + pub(crate) node_result: u64, + pub(crate) node_pure: u64, + pub(crate) node_inst: u64, + pub(crate) node_load: u64, + pub(crate) node_dedup_query: u64, + pub(crate) node_dedup_hit: u64, + pub(crate) node_dedup_miss: u64, + pub(crate) node_ctor_created: u64, + pub(crate) node_ctor_deduped: u64, + pub(crate) node_union: u64, + pub(crate) node_subsume: u64, + pub(crate) store_map_insert: u64, + pub(crate) side_effect_nodes: u64, + pub(crate) rewrite_rule_invoked: u64, + pub(crate) rewrite_depth_limit: u64, + pub(crate) store_to_load_forward: u64, + pub(crate) elaborate_visit_node: u64, + pub(crate) elaborate_memoize_hit: u64, + pub(crate) elaborate_memoize_miss: u64, + pub(crate) elaborate_memoize_miss_remat: u64, + pub(crate) elaborate_licm_hoist: u64, + pub(crate) elaborate_func: u64, + pub(crate) elaborate_func_pre_insts: u64, + pub(crate) elaborate_func_post_insts: u64, +} + +impl<'a> FuncEGraph<'a> { + /// Create a new EGraph for the given function. Requires the + /// domtree to be precomputed as well; the domtree is used for + /// scheduling when lowering out of the egraph. + pub fn new( + func: &Function, + domtree: &'a DominatorTree, + loop_analysis: &'a LoopAnalysis, + cfg: &ControlFlowGraph, + ) -> FuncEGraph<'a> { + let node_count_estimate = func.dfg.num_values() * 2; + let alias_analysis = AliasAnalysis::new(func, cfg); + let mut this = Self { + domtree, + loop_analysis, + alias_analysis, + egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)), + node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg), + side_effects: SecondaryMap::default(), + side_effect_ids: vec![], + store_nodes: FxHashMap::default(), + blockparams: SecondaryMap::default(), + blockparam_ids_tys: vec![], + remat_ids: FxHashSet::default(), + subsume_ids: FxHashSet::default(), + stats: Default::default(), + rewrite_depth: 0, + }; + this.build(func); + this + } + + fn build(&mut self, func: &Function) { + // Mapping of SSA `Value` to eclass ID. + let mut value_to_id = FxHashMap::default(); + + // For each block in RPO, create an enode for block entry, for + // each block param, and for each instruction. + for &block in self.domtree.cfg_postorder().iter().rev() { + let loop_level = self.loop_analysis.loop_level(block); + let blockparam_start = + u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count"); + for (i, &value) in func.dfg.block_params(block).iter().enumerate() { + let ty = func.dfg.value_type(value); + let param = self + .egraph + .add( + Node::Param { + block, + index: i + .try_into() + .expect("blockparam index should fit in Node::Param"), + ty, + loop_level, + }, + &mut self.node_ctx, + ) + .get(); + value_to_id.insert(value, param); + self.blockparam_ids_tys.push((param, ty)); + self.stats.node_created += 1; + self.stats.node_param += 1; + } + let blockparam_end = + u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count"); + self.blockparams[block] = blockparam_start..blockparam_end; + + let side_effect_start = + u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count"); + for inst in func.layout.block_insts(block) { + // Build args from SSA values. + let args = EntityList::from_iter( + func.dfg.inst_args(inst).iter().map(|&arg| { + let arg = func.dfg.resolve_aliases(arg); + *value_to_id + .get(&arg) + .expect("Must have seen def before this use") + }), + &mut self.node_ctx.args, + ); + + let results = func.dfg.inst_results(inst); + + let types = self + .node_ctx + .types + .from_iter(results.iter().map(|&val| func.dfg.value_type(val))); + let types = types.freeze(&mut self.node_ctx.types); + + let load_mem_state = self.alias_analysis.get_state_for_load(inst); + let is_readonly_load = match func.dfg[inst] { + InstructionData::Load { + opcode: Opcode::Load, + flags, + .. + } => flags.readonly() && flags.notrap(), + _ => false, + }; + + // Create the egraph node. + let op = InstructionImms::from(&func.dfg[inst]); + let opcode = op.opcode(); + let srcloc = func.srclocs[inst]; + + let node = if is_readonly_load { + self.stats.node_created += 1; + self.stats.node_pure += 1; + Node::Pure { op, args, types } + } else if let Some(load_mem_state) = load_mem_state { + let addr = args.as_slice(&self.node_ctx.args)[0]; + let ty = types.as_slice(&self.node_ctx.types)[0]; + trace!("load at inst {} has mem state {:?}", inst, load_mem_state); + self.stats.node_created += 1; + self.stats.node_load += 1; + Node::Load { + op, + ty, + inst, + addr, + mem_state: load_mem_state, + srcloc, + } + } else if has_side_effect(func, inst) || opcode.can_load() { + self.stats.node_created += 1; + self.stats.node_inst += 1; + Node::Inst { + op, + inst, + args, + types, + srcloc, + loop_level, + } + } else { + self.stats.node_created += 1; + self.stats.node_pure += 1; + Node::Pure { op, args, types } + }; + let dedup_needed = self.node_ctx.needs_dedup(&node); + let is_pure = matches!(node, Node::Pure { .. }); + + let mut id = self.egraph.add(node, &mut self.node_ctx); + + if dedup_needed { + self.stats.node_dedup_query += 1; + match id { + NewOrExisting::New(_) => { + self.stats.node_dedup_miss += 1; + } + NewOrExisting::Existing(_) => { + self.stats.node_dedup_hit += 1; + } + } + } + + if opcode == Opcode::Store { + let store_data_ty = func.dfg.value_type(func.dfg.inst_args(inst)[0]); + self.store_nodes.insert(inst, (store_data_ty, id.get())); + self.stats.store_map_insert += 1; + } + + // Loads that did not already merge into an existing + // load: try to forward from a store (store-to-load + // forwarding). + if let NewOrExisting::New(new_id) = id { + if load_mem_state.is_some() { + let opt_id = crate::opts::store_to_load(new_id, self); + trace!("store_to_load: {} -> {}", new_id, opt_id); + if opt_id != new_id { + id = NewOrExisting::Existing(opt_id); + } + } + } + + // Now either optimize (for new pure nodes), or add to + // the side-effecting list (for all other new nodes). + let id = match id { + NewOrExisting::Existing(id) => id, + NewOrExisting::New(id) if is_pure => { + // Apply all optimization rules immediately; the + // aegraph (acyclic egraph) works best when we do + // this so all uses pick up the eclass with all + // possible enodes. + crate::opts::optimize_eclass(id, self) + } + NewOrExisting::New(id) => { + self.side_effect_ids.push(id); + self.stats.side_effect_nodes += 1; + id + } + }; + + // Create results and save in Value->Id map. + match results { + &[] => {} + &[one_result] => { + trace!("build: value {} -> id {}", one_result, id); + value_to_id.insert(one_result, id); + } + many_results => { + debug_assert!(many_results.len() > 1); + for (i, &result) in many_results.iter().enumerate() { + let ty = func.dfg.value_type(result); + let projection = self + .egraph + .add( + Node::Result { + value: id, + result: i, + ty, + }, + &mut self.node_ctx, + ) + .get(); + self.stats.node_created += 1; + self.stats.node_result += 1; + trace!("build: value {} -> id {}", result, projection); + value_to_id.insert(result, projection); + } + } + } + } + + let side_effect_end = + u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count"); + let side_effect_range = side_effect_start..side_effect_end; + self.side_effects[block] = side_effect_range; + } + } + + /// Scoped elaboration: compute a final ordering of op computation + /// for each block and replace the given Func body. + /// + /// This works in concert with the domtree. We do a preorder + /// traversal of the domtree, tracking a scoped map from Id to + /// (new) Value. The map's scopes correspond to levels in the + /// domtree. + /// + /// At each block, we iterate forward over the side-effecting + /// eclasses, and recursively generate their arg eclasses, then + /// emit the ops themselves. + /// + /// To use an eclass in a given block, we first look it up in the + /// scoped map, and get the Value if already present. If not, we + /// need to generate it. We emit the extracted enode for this + /// eclass after recursively generating its args. Eclasses are + /// thus computed "as late as possible", but then memoized into + /// the Id-to-Value map and available to all dominated blocks and + /// for the rest of this block. (This subsumes GVN.) + pub fn elaborate(&mut self, func: &mut Function) { + let mut elab = Elaborator::new( + func, + self.domtree, + self.loop_analysis, + &self.egraph, + &self.node_ctx, + &self.remat_ids, + &mut self.stats, + ); + elab.elaborate( + |block| { + let blockparam_range = self.blockparams[block].clone(); + &self.blockparam_ids_tys + [blockparam_range.start as usize..blockparam_range.end as usize] + }, + |block| { + let side_effect_range = self.side_effects[block].clone(); + &self.side_effect_ids + [side_effect_range.start as usize..side_effect_range.end as usize] + }, + ); + } +} + +/// State for egraph analysis that computes all needed properties. +pub(crate) struct Analysis; + +/// Analysis results for each eclass id. +#[derive(Clone, Debug)] +pub(crate) struct AnalysisValue { + pub(crate) loop_level: LoopLevel, +} + +impl Default for AnalysisValue { + fn default() -> Self { + Self { + loop_level: LoopLevel::root(), + } + } +} + +impl cranelift_egraph::Analysis for Analysis { + type L = NodeCtx; + type Value = AnalysisValue; + + fn for_node( + &self, + ctx: &NodeCtx, + n: &Node, + values: &SecondaryMap, + ) -> AnalysisValue { + let loop_level = match n { + &Node::Pure { ref args, .. } => args + .as_slice(&ctx.args) + .iter() + .map(|&arg| values[arg].loop_level) + .max() + .unwrap_or(LoopLevel::root()), + &Node::Load { addr, .. } => values[addr].loop_level, + &Node::Result { value, .. } => values[value].loop_level, + &Node::Inst { loop_level, .. } | &Node::Param { loop_level, .. } => loop_level, + }; + + AnalysisValue { loop_level } + } + + fn meet(&self, _ctx: &NodeCtx, v1: &AnalysisValue, v2: &AnalysisValue) -> AnalysisValue { + AnalysisValue { + loop_level: std::cmp::max(v1.loop_level, v2.loop_level), + } + } +} diff --git a/cranelift/codegen/src/egraph/domtree.rs b/cranelift/codegen/src/egraph/domtree.rs new file mode 100644 index 0000000000..f0af89e2a2 --- /dev/null +++ b/cranelift/codegen/src/egraph/domtree.rs @@ -0,0 +1,69 @@ +//! Extended domtree with various traversal support. + +use crate::dominator_tree::DominatorTree; +use crate::ir::{Block, Function}; +use cranelift_entity::{packed_option::PackedOption, SecondaryMap}; + +#[derive(Clone, Debug)] +pub(crate) struct DomTreeWithChildren { + nodes: SecondaryMap, + root: Block, +} + +#[derive(Clone, Copy, Debug, Default)] +struct DomTreeNode { + children: PackedOption, + next: PackedOption, +} + +impl DomTreeWithChildren { + pub(crate) fn new(func: &Function, domtree: &DominatorTree) -> DomTreeWithChildren { + let mut nodes: SecondaryMap = + SecondaryMap::with_capacity(func.dfg.num_blocks()); + + for block in func.layout.blocks() { + let idom_inst = match domtree.idom(block) { + Some(idom_inst) => idom_inst, + None => continue, + }; + let idom = func + .layout + .inst_block(idom_inst) + .expect("Dominating instruction should be part of a block"); + + nodes[block].next = nodes[idom].children; + nodes[idom].children = block.into(); + } + + let root = func.layout.entry_block().unwrap(); + + Self { nodes, root } + } + + pub(crate) fn root(&self) -> Block { + self.root + } + + pub(crate) fn children<'a>(&'a self, block: Block) -> DomTreeChildIter<'a> { + let block = self.nodes[block].children; + DomTreeChildIter { + domtree: self, + block, + } + } +} + +pub(crate) struct DomTreeChildIter<'a> { + domtree: &'a DomTreeWithChildren, + block: PackedOption, +} + +impl<'a> Iterator for DomTreeChildIter<'a> { + type Item = Block; + fn next(&mut self) -> Option { + self.block.expand().map(|block| { + self.block = self.domtree.nodes[block].next; + block + }) + } +} diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs new file mode 100644 index 0000000000..47e6f40c32 --- /dev/null +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -0,0 +1,612 @@ +//! Elaboration phase: lowers EGraph back to sequences of operations +//! in CFG nodes. + +use super::domtree::DomTreeWithChildren; +use super::node::{op_cost, Cost, Node, NodeCtx}; +use super::Analysis; +use super::Stats; +use crate::dominator_tree::DominatorTree; +use crate::fx::FxHashSet; +use crate::ir::{Block, Function, Inst, Opcode, RelSourceLoc, Type, Value, ValueList}; +use crate::loop_analysis::LoopAnalysis; +use crate::scoped_hash_map::ScopedHashMap; +use crate::trace; +use alloc::vec::Vec; +use cranelift_egraph::{EGraph, Id, Language, NodeKey}; +use cranelift_entity::{packed_option::PackedOption, SecondaryMap}; +use smallvec::{smallvec, SmallVec}; +use std::ops::Add; + +type LoopDepth = u32; + +pub(crate) struct Elaborator<'a> { + func: &'a mut Function, + domtree: &'a DominatorTree, + loop_analysis: &'a LoopAnalysis, + node_ctx: &'a NodeCtx, + egraph: &'a EGraph, + id_to_value: ScopedHashMap, + id_to_best_cost_and_node: SecondaryMap, + /// Stack of blocks and loops in current elaboration path. + loop_stack: SmallVec<[LoopStackEntry; 8]>, + cur_block: Option, + first_branch: SecondaryMap>, + remat_ids: &'a FxHashSet, + /// Explicitly-unrolled value elaboration stack. + elab_stack: Vec, + elab_result_stack: Vec, + /// Explicitly-unrolled block elaboration stack. + block_stack: Vec, + stats: &'a mut Stats, +} + +#[derive(Clone, Debug)] +struct LoopStackEntry { + /// The hoist point: a block that immediately dominates this + /// loop. May not be an immediate predecessor, but will be a valid + /// point to place all loop-invariant ops: they must depend only + /// on inputs that dominate the loop, so are available at (the end + /// of) this block. + hoist_block: Block, + /// The depth in the scope map. + scope_depth: u32, +} + +#[derive(Clone, Debug)] +enum ElabStackEntry { + /// Next action is to resolve this id into a node and elaborate + /// args. + Start { id: Id }, + /// Args have been pushed; waiting for results. + PendingNode { + canonical: Id, + node_key: NodeKey, + remat: bool, + num_args: usize, + }, + /// Waiting for a result to return one projected value of a + /// multi-value result. + PendingProjection { canonical: Id, index: usize }, +} + +#[derive(Clone, Debug)] +enum BlockStackEntry { + Elaborate { block: Block, idom: Option }, + Pop, +} + +#[derive(Clone, Debug)] +enum IdValue { + /// A single value. + Value { + depth: LoopDepth, + block: Block, + value: Value, + }, + /// Multiple results; indices in `node_args`. + Values { + depth: LoopDepth, + block: Block, + values: ValueList, + }, +} + +impl IdValue { + fn block(&self) -> Block { + match self { + IdValue::Value { block, .. } | IdValue::Values { block, .. } => *block, + } + } +} + +impl<'a> Elaborator<'a> { + pub(crate) fn new( + func: &'a mut Function, + domtree: &'a DominatorTree, + loop_analysis: &'a LoopAnalysis, + egraph: &'a EGraph, + node_ctx: &'a NodeCtx, + remat_ids: &'a FxHashSet, + stats: &'a mut Stats, + ) -> Self { + let num_blocks = func.dfg.num_blocks(); + let mut id_to_best_cost_and_node = + SecondaryMap::with_default((Cost::infinity(), Id::invalid())); + id_to_best_cost_and_node.resize(egraph.classes.len()); + Self { + func, + domtree, + loop_analysis, + egraph, + node_ctx, + id_to_value: ScopedHashMap::with_capacity(egraph.classes.len()), + id_to_best_cost_and_node, + loop_stack: smallvec![], + cur_block: None, + first_branch: SecondaryMap::with_capacity(num_blocks), + remat_ids, + elab_stack: vec![], + elab_result_stack: vec![], + block_stack: vec![], + stats, + } + } + + fn cur_loop_depth(&self) -> LoopDepth { + self.loop_stack.len() as LoopDepth + } + + fn start_block(&mut self, idom: Option, block: Block, block_params: &[(Id, Type)]) { + trace!( + "start_block: block {:?} with idom {:?} at loop depth {} scope depth {}", + block, + idom, + self.cur_loop_depth(), + self.id_to_value.depth() + ); + + // Note that if the *entry* block is a loop header, we will + // not make note of the loop here because it will not have an + // immediate dominator. We must disallow this case because we + // will skip adding the `LoopStackEntry` here but our + // `LoopAnalysis` will otherwise still make note of this loop + // and loop depths will not match. + if let Some(idom) = idom { + if self.loop_analysis.is_loop_header(block).is_some() { + self.loop_stack.push(LoopStackEntry { + // Any code hoisted out of this loop will have code + // placed in `idom`, and will have def mappings + // inserted in to the scoped hashmap at that block's + // level. + hoist_block: idom, + scope_depth: (self.id_to_value.depth() - 1) as u32, + }); + trace!( + " -> loop header, pushing; depth now {}", + self.loop_stack.len() + ); + } + } else { + debug_assert!( + self.loop_analysis.is_loop_header(block).is_none(), + "Entry block (domtree root) cannot be a loop header!" + ); + } + + self.cur_block = Some(block); + for &(id, ty) in block_params { + let value = self.func.dfg.append_block_param(block, ty); + trace!(" -> block param id {:?} value {:?}", id, value); + self.id_to_value.insert_if_absent( + id, + IdValue::Value { + depth: self.cur_loop_depth(), + block, + value, + }, + ); + } + } + + fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList { + let (instdata, result_tys) = match node { + Node::Pure { op, types, .. } | Node::Inst { op, types, .. } => ( + op.with_args(args, &mut self.func.dfg.value_lists), + types.as_slice(&self.node_ctx.types), + ), + Node::Load { op, ty, .. } => ( + op.with_args(args, &mut self.func.dfg.value_lists), + std::slice::from_ref(ty), + ), + _ => panic!("Cannot `add_node()` on block param or projection"), + }; + let srcloc = match node { + Node::Inst { srcloc, .. } | Node::Load { srcloc, .. } => *srcloc, + _ => RelSourceLoc::default(), + }; + let opcode = instdata.opcode(); + // Is this instruction either an actual terminator (an + // instruction that must end the block), or at least in the + // group of branches at the end (including conditional + // branches that may be followed by an actual terminator)? We + // call this the "terminator group", and we record the first + // inst in this group (`first_branch` below) so that we do not + // insert instructions needed only by args of later + // instructions in the terminator group in the middle of the + // terminator group. + // + // E.g., for the original sequence + // v1 = op ... + // brnz vCond, block1 + // jump block2(v1) + // + // elaboration would naively produce + // + // brnz vCond, block1 + // v1 = op ... + // jump block2(v1) + // + // but we use the `first_branch` mechanism below to ensure + // that once we've emitted at least one branch, all other + // elaborated insts have to go before that. So we emit brnz + // first, then as we elaborate the jump, we find we need the + // `op`; we `insert_inst` it *before* the brnz (which is the + // `first_branch`). + let is_terminator_group_inst = + opcode.is_branch() || opcode.is_return() || opcode == Opcode::Trap; + let inst = self.func.dfg.make_inst(instdata); + self.func.srclocs[inst] = srcloc; + + for &ty in result_tys { + self.func.dfg.append_result(inst, ty); + } + + if is_terminator_group_inst { + self.func.layout.append_inst(inst, to_block); + if self.first_branch[to_block].is_none() { + self.first_branch[to_block] = Some(inst).into(); + } + } else if let Some(branch) = self.first_branch[to_block].into() { + self.func.layout.insert_inst(inst, branch); + } else { + self.func.layout.append_inst(inst, to_block); + } + self.func.dfg.inst_results_list(inst) + } + + fn compute_best_nodes(&mut self) { + let best = &mut self.id_to_best_cost_and_node; + for (eclass_id, eclass) in &self.egraph.classes { + trace!("computing best for eclass {:?}", eclass_id); + if let Some(child1) = eclass.child1() { + trace!(" -> child {:?}", child1); + best[eclass_id] = best[child1]; + } + if let Some(child2) = eclass.child2() { + trace!(" -> child {:?}", child2); + if best[child2].0 < best[eclass_id].0 { + best[eclass_id] = best[child2]; + } + } + if let Some(node_key) = eclass.get_node() { + let node = node_key.node(&self.egraph.nodes); + trace!(" -> eclass {:?}: node {:?}", eclass_id, node); + let (cost, id) = match node { + Node::Param { .. } + | Node::Inst { .. } + | Node::Load { .. } + | Node::Result { .. } => (Cost::zero(), eclass_id), + Node::Pure { op, .. } => { + let args_cost = self + .node_ctx + .children(node) + .iter() + .map(|&arg_id| { + trace!(" -> arg {:?}", arg_id); + best[arg_id].0 + }) + // Can't use `.sum()` for `Cost` types; do + // an explicit reduce instead. + .fold(Cost::zero(), Cost::add); + let level = self.egraph.analysis_value(eclass_id).loop_level; + let cost = op_cost(op).at_level(level) + args_cost; + (cost, eclass_id) + } + }; + + if cost < best[eclass_id].0 { + best[eclass_id] = (cost, id); + } + } + debug_assert_ne!(best[eclass_id].0, Cost::infinity()); + debug_assert_ne!(best[eclass_id].1, Id::invalid()); + trace!("best for eclass {:?}: {:?}", eclass_id, best[eclass_id]); + } + } + + fn elaborate_eclass_use(&mut self, id: Id) { + self.elab_stack.push(ElabStackEntry::Start { id }); + self.process_elab_stack(); + debug_assert_eq!(self.elab_result_stack.len(), 1); + self.elab_result_stack.clear(); + } + + fn process_elab_stack(&mut self) { + while let Some(entry) = self.elab_stack.last() { + match entry { + &ElabStackEntry::Start { id } => { + // We always replace the Start entry, so pop it now. + self.elab_stack.pop(); + + self.stats.elaborate_visit_node += 1; + let canonical = self.egraph.canonical_id(id); + trace!("elaborate: id {}", id); + + let remat = if let Some(val) = self.id_to_value.get(&canonical) { + // Look at the defined block, and determine whether this + // node kind allows rematerialization if the value comes + // from another block. If so, ignore the hit and recompute + // below. + let remat = val.block() != self.cur_block.unwrap() + && self.remat_ids.contains(&canonical); + if !remat { + trace!("elaborate: id {} -> {:?}", id, val); + self.stats.elaborate_memoize_hit += 1; + self.elab_result_stack.push(val.clone()); + continue; + } + trace!("elaborate: id {} -> remat", id); + self.stats.elaborate_memoize_miss_remat += 1; + // The op is pure at this point, so it is always valid to + // remove from this map. + self.id_to_value.remove(&canonical); + true + } else { + self.remat_ids.contains(&canonical) + }; + self.stats.elaborate_memoize_miss += 1; + + // Get the best option; we use `id` (latest id) here so we + // have a full view of the eclass. + let (_, best_node_eclass) = self.id_to_best_cost_and_node[id]; + debug_assert_ne!(best_node_eclass, Id::invalid()); + + trace!( + "elaborate: id {} -> best {} -> eclass node {:?}", + id, + best_node_eclass, + self.egraph.classes[best_node_eclass] + ); + let node_key = self.egraph.classes[best_node_eclass].get_node().unwrap(); + let node = node_key.node(&self.egraph.nodes); + trace!(" -> enode {:?}", node); + + // Is the node a block param? We should never get here if so + // (they are inserted when first visiting the block). + if matches!(node, Node::Param { .. }) { + unreachable!("Param nodes should already be inserted"); + } + + // Is the node a result projection? If so, resolve + // the value we are projecting a part of, then + // eventually return here (saving state with a + // PendingProjection). + if let Node::Result { value, result, .. } = node { + trace!(" -> result; pushing arg value {}", value); + self.elab_stack.push(ElabStackEntry::PendingProjection { + index: *result, + canonical, + }); + self.elab_stack.push(ElabStackEntry::Start { id: *value }); + continue; + } + + // We're going to need to emit this + // operator. First, enqueue all args to be + // elaborated. Push state to receive the results + // and later elab this node. + let num_args = self.node_ctx.children(&node).len(); + self.elab_stack.push(ElabStackEntry::PendingNode { + canonical, + node_key, + remat, + num_args, + }); + // Push args in reverse order so we process the + // first arg first. + for &arg_id in self.node_ctx.children(&node).iter().rev() { + self.elab_stack.push(ElabStackEntry::Start { id: arg_id }); + } + } + + &ElabStackEntry::PendingNode { + canonical, + node_key, + remat, + num_args, + } => { + self.elab_stack.pop(); + + let node = node_key.node(&self.egraph.nodes); + + // We should have all args resolved at this point. + let arg_idx = self.elab_result_stack.len() - num_args; + let args = &self.elab_result_stack[arg_idx..]; + + // Gather the individual output-CLIF `Value`s. + let arg_values: SmallVec<[Value; 8]> = args + .iter() + .map(|idvalue| match idvalue { + IdValue::Value { value, .. } => *value, + IdValue::Values { .. } => { + panic!("enode depends directly on multi-value result") + } + }) + .collect(); + + // Compute max loop depth. + let max_loop_depth = args + .iter() + .map(|idvalue| match idvalue { + IdValue::Value { depth, .. } => *depth, + IdValue::Values { .. } => unreachable!(), + }) + .max() + .unwrap_or(0); + + // Remove args from result stack. + self.elab_result_stack.truncate(arg_idx); + + // Determine the location at which we emit it. This is the + // current block *unless* we hoist above a loop when all args + // are loop-invariant (and this op is pure). + let (loop_depth, scope_depth, block) = if node.is_non_pure() { + // Non-pure op: always at the current location. + ( + self.cur_loop_depth(), + self.id_to_value.depth(), + self.cur_block.unwrap(), + ) + } else if max_loop_depth == self.cur_loop_depth() || remat { + // Pure op, but depends on some value at the current loop + // depth, or remat forces it here: as above. + ( + self.cur_loop_depth(), + self.id_to_value.depth(), + self.cur_block.unwrap(), + ) + } else { + // Pure op, and does not depend on any args at current + // loop depth: hoist out of loop. + self.stats.elaborate_licm_hoist += 1; + let data = &self.loop_stack[max_loop_depth as usize]; + (max_loop_depth, data.scope_depth as usize, data.hoist_block) + }; + // Loop scopes are a subset of all scopes. + debug_assert!(scope_depth >= loop_depth as usize); + + // This is an actual operation; emit the node in sequence now. + let results = self.add_node(node, &arg_values[..], block); + let results_slice = results.as_slice(&self.func.dfg.value_lists); + + // Build the result and memoize in the id-to-value map. + let result = if results_slice.len() == 1 { + IdValue::Value { + depth: loop_depth, + block, + value: results_slice[0], + } + } else { + IdValue::Values { + depth: loop_depth, + block, + values: results, + } + }; + + self.id_to_value.insert_if_absent_with_depth( + canonical, + result.clone(), + scope_depth, + ); + + // Push onto the elab-results stack. + self.elab_result_stack.push(result) + } + &ElabStackEntry::PendingProjection { index, canonical } => { + self.elab_stack.pop(); + + // Grab the input from the elab-result stack. + let value = self.elab_result_stack.pop().expect("Should have result"); + + let (depth, block, values) = match value { + IdValue::Values { + depth, + block, + values, + .. + } => (depth, block, values), + IdValue::Value { .. } => { + unreachable!("Projection nodes should not be used on single results"); + } + }; + let values = values.as_slice(&self.func.dfg.value_lists); + let value = IdValue::Value { + depth, + block, + value: values[index], + }; + self.id_to_value.insert_if_absent(canonical, value.clone()); + + self.elab_result_stack.push(value); + } + } + } + } + + fn elaborate_block<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>( + &mut self, + idom: Option, + block: Block, + block_params_fn: &PF, + block_side_effects_fn: &SEF, + ) { + let blockparam_ids_tys = (block_params_fn)(block); + self.start_block(idom, block, blockparam_ids_tys); + for &id in (block_side_effects_fn)(block) { + self.elaborate_eclass_use(id); + } + } + + fn elaborate_domtree<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>( + &mut self, + block_params_fn: &PF, + block_side_effects_fn: &SEF, + domtree: &DomTreeWithChildren, + ) { + let root = domtree.root(); + self.block_stack.push(BlockStackEntry::Elaborate { + block: root, + idom: None, + }); + while let Some(top) = self.block_stack.pop() { + match top { + BlockStackEntry::Elaborate { block, idom } => { + self.block_stack.push(BlockStackEntry::Pop); + self.id_to_value.increment_depth(); + + self.elaborate_block(idom, block, block_params_fn, block_side_effects_fn); + + // Push children. We are doing a preorder + // traversal so we do this after processing this + // block above. + let block_stack_end = self.block_stack.len(); + for child in domtree.children(block) { + self.block_stack.push(BlockStackEntry::Elaborate { + block: child, + idom: Some(block), + }); + } + // Reverse what we just pushed so we elaborate in + // original block order. (The domtree iter is a + // single-ended iter over a singly-linked list so + // we can't `.rev()` above.) + self.block_stack[block_stack_end..].reverse(); + } + BlockStackEntry::Pop => { + self.id_to_value.decrement_depth(); + if let Some(innermost_loop) = self.loop_stack.last() { + if innermost_loop.scope_depth as usize == self.id_to_value.depth() { + self.loop_stack.pop(); + } + } + } + } + } + } + + fn clear_func_body(&mut self) { + // Clear all instructions and args/results from the DFG. We + // rebuild them entirely during elaboration. (TODO: reuse the + // existing inst for the *first* copy of a given node.) + self.func.dfg.clear_insts(); + // Clear the instructions in every block, but leave the list + // of blocks and their layout unmodified. + self.func.layout.clear_insts(); + self.func.srclocs.clear(); + } + + pub(crate) fn elaborate<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>( + &mut self, + block_params_fn: PF, + block_side_effects_fn: SEF, + ) { + let domtree = DomTreeWithChildren::new(self.func, self.domtree); + self.stats.elaborate_func += 1; + self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64; + self.clear_func_body(); + self.compute_best_nodes(); + self.elaborate_domtree(&block_params_fn, &block_side_effects_fn, &domtree); + self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64; + } +} diff --git a/cranelift/codegen/src/egraph/node.rs b/cranelift/codegen/src/egraph/node.rs new file mode 100644 index 0000000000..2e8ea42b2f --- /dev/null +++ b/cranelift/codegen/src/egraph/node.rs @@ -0,0 +1,376 @@ +//! Node definition for EGraph representation. + +use super::MemoryState; +use crate::ir::{Block, DataFlowGraph, Inst, InstructionImms, Opcode, RelSourceLoc, Type}; +use crate::loop_analysis::LoopLevel; +use cranelift_egraph::{BumpArena, BumpSlice, CtxEq, CtxHash, Id, Language, UnionFind}; +use cranelift_entity::{EntityList, ListPool}; +use std::hash::{Hash, Hasher}; + +#[derive(Debug)] +pub enum Node { + /// A blockparam. Effectively an input/root; does not refer to + /// predecessors' branch arguments, because this would create + /// cycles. + Param { + /// CLIF block this param comes from. + block: Block, + /// Index of blockparam within block. + index: u32, + /// Type of the value. + ty: Type, + /// The loop level of this Param. + loop_level: LoopLevel, + }, + /// A CLIF instruction that is pure (has no side-effects). Not + /// tied to any location; we will compute a set of locations at + /// which to compute this node during lowering back out of the + /// egraph. + Pure { + /// The instruction data, without SSA values. + op: InstructionImms, + /// eclass arguments to the operator. + args: EntityList, + /// Types of results. + types: BumpSlice, + }, + /// A CLIF instruction that has side-effects or is otherwise not + /// representable by `Pure`. + Inst { + /// The instruction data, without SSA values. + op: InstructionImms, + /// eclass arguments to the operator. + args: EntityList, + /// Types of results. + types: BumpSlice, + /// The index of the original instruction. We include this so + /// that the `Inst`s are not deduplicated: every instance is a + /// logically separate and unique side-effect. However, + /// because we clear the DataFlowGraph before elaboration, + /// this `Inst` is *not* valid to fetch any details from the + /// original instruction. + inst: Inst, + /// The source location to preserve. + srcloc: RelSourceLoc, + /// The loop level of this Inst. + loop_level: LoopLevel, + }, + /// A projection of one result of an `Inst` or `Pure`. + Result { + /// `Inst` or `Pure` node. + value: Id, + /// Index of the result we want. + result: usize, + /// Type of the value. + ty: Type, + }, + + /// A load instruction. Nominally a side-effecting `Inst` (and + /// included in the list of side-effecting roots so it will always + /// be elaborated), but represented as a distinct kind of node so + /// that we can leverage deduplication to do + /// redundant-load-elimination for free (and make store-to-load + /// forwarding much easier). + Load { + // -- identity depends on: + /// The original load operation. Must have one argument, the + /// address. + op: InstructionImms, + /// The type of the load result. + ty: Type, + /// Address argument. Actual address has an offset, which is + /// included in `op` (and thus already considered as part of + /// the key). + addr: Id, + /// The abstract memory state that this load accesses. + mem_state: MemoryState, + + // -- not included in dedup key: + /// The `Inst` we will use for a trap location for this + /// load. Excluded from Eq/Hash so that loads that are + /// identical except for the specific instance will dedup on + /// top of each other. + inst: Inst, + /// Source location, for traps. Not included in Eq/Hash. + srcloc: RelSourceLoc, + }, +} + +impl Node { + pub(crate) fn is_non_pure(&self) -> bool { + match self { + Node::Inst { .. } | Node::Load { .. } => true, + _ => false, + } + } +} + +/// Shared pools for type and id lists in nodes. +pub struct NodeCtx { + /// Arena for result-type arrays. + pub types: BumpArena, + /// Arena for arg eclass-ID lists. + pub args: ListPool, +} + +impl NodeCtx { + pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self { + let n_types = dfg.num_values(); + let n_args = dfg.value_lists.capacity(); + Self { + types: BumpArena::arena_with_capacity(n_types), + args: ListPool::with_capacity(n_args), + } + } +} + +impl NodeCtx { + fn ids_eq(&self, a: &EntityList, b: &EntityList, uf: &mut UnionFind) -> bool { + let a = a.as_slice(&self.args); + let b = b.as_slice(&self.args); + a.len() == b.len() && a.iter().zip(b.iter()).all(|(&a, &b)| uf.equiv_id_mut(a, b)) + } + + fn hash_ids(&self, a: &EntityList, hash: &mut H, uf: &mut UnionFind) { + let a = a.as_slice(&self.args); + for &id in a { + uf.hash_id_mut(hash, id); + } + } +} + +impl CtxEq for NodeCtx { + fn ctx_eq(&self, a: &Node, b: &Node, uf: &mut UnionFind) -> bool { + match (a, b) { + ( + &Node::Param { + block, + index, + ty, + loop_level: _, + }, + &Node::Param { + block: other_block, + index: other_index, + ty: other_ty, + loop_level: _, + }, + ) => block == other_block && index == other_index && ty == other_ty, + ( + &Node::Result { value, result, ty }, + &Node::Result { + value: other_value, + result: other_result, + ty: other_ty, + }, + ) => uf.equiv_id_mut(value, other_value) && result == other_result && ty == other_ty, + ( + &Node::Pure { + ref op, + ref args, + ref types, + }, + &Node::Pure { + op: ref other_op, + args: ref other_args, + types: ref other_types, + }, + ) => { + *op == *other_op + && self.ids_eq(args, other_args, uf) + && types.as_slice(&self.types) == other_types.as_slice(&self.types) + } + ( + &Node::Inst { inst, ref args, .. }, + &Node::Inst { + inst: other_inst, + args: ref other_args, + .. + }, + ) => inst == other_inst && self.ids_eq(args, other_args, uf), + ( + &Node::Load { + ref op, + ty, + addr, + mem_state, + .. + }, + &Node::Load { + op: ref other_op, + ty: other_ty, + addr: other_addr, + mem_state: other_mem_state, + // Explicitly exclude: `inst` and `srcloc`. We + // want loads to merge if identical in + // opcode/offset, address expression, and last + // store (this does implicit + // redundant-load-elimination.) + // + // Note however that we *do* include `ty` (the + // type) and match on that: we otherwise would + // have no way of disambiguating loads of + // different widths to the same address. + .. + }, + ) => { + op == other_op + && ty == other_ty + && uf.equiv_id_mut(addr, other_addr) + && mem_state == other_mem_state + } + _ => false, + } + } +} + +impl CtxHash for NodeCtx { + fn ctx_hash(&self, value: &Node, uf: &mut UnionFind) -> u64 { + let mut state = crate::fx::FxHasher::default(); + std::mem::discriminant(value).hash(&mut state); + match value { + &Node::Param { + block, + index, + ty: _, + loop_level: _, + } => { + block.hash(&mut state); + index.hash(&mut state); + } + &Node::Result { + value, + result, + ty: _, + } => { + uf.hash_id_mut(&mut state, value); + result.hash(&mut state); + } + &Node::Pure { + ref op, + ref args, + types: _, + } => { + op.hash(&mut state); + self.hash_ids(args, &mut state, uf); + // Don't hash `types`: it requires an indirection + // (hence cache misses), and result type *should* be + // fully determined by op and args. + } + &Node::Inst { inst, ref args, .. } => { + inst.hash(&mut state); + self.hash_ids(args, &mut state, uf); + } + &Node::Load { + ref op, + ty, + addr, + mem_state, + .. + } => { + op.hash(&mut state); + ty.hash(&mut state); + uf.hash_id_mut(&mut state, addr); + mem_state.hash(&mut state); + } + } + + state.finish() + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct Cost(u32); +impl Cost { + pub(crate) fn at_level(&self, loop_level: LoopLevel) -> Cost { + let loop_level = std::cmp::min(2, loop_level.level()); + let multiplier = 1u32 << ((10 * loop_level) as u32); + Cost(self.0.saturating_mul(multiplier)).finite() + } + + pub(crate) fn infinity() -> Cost { + // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` + // only for heuristics and always saturate so this suffices!) + Cost(u32::MAX) + } + + pub(crate) fn zero() -> Cost { + Cost(0) + } + + /// Clamp this cost at a "finite" value. Can be used in + /// conjunction with saturating ops to avoid saturating into + /// `infinity()`. + fn finite(self) -> Cost { + Cost(std::cmp::min(u32::MAX - 1, self.0)) + } +} + +impl std::default::Default for Cost { + fn default() -> Cost { + Cost::zero() + } +} + +impl std::ops::Add for Cost { + type Output = Cost; + fn add(self, other: Cost) -> Cost { + Cost(self.0.saturating_add(other.0)).finite() + } +} + +pub(crate) fn op_cost(op: &InstructionImms) -> Cost { + match op.opcode() { + // Constants. + Opcode::Iconst | Opcode::F32const | Opcode::F64const | Opcode::Bconst => Cost(0), + // Extends/reduces. + Opcode::Bextend + | Opcode::Breduce + | Opcode::Uextend + | Opcode::Sextend + | Opcode::Ireduce + | Opcode::Iconcat + | Opcode::Isplit => Cost(1), + // "Simple" arithmetic. + Opcode::Iadd + | Opcode::Isub + | Opcode::Band + | Opcode::BandNot + | Opcode::Bor + | Opcode::BorNot + | Opcode::Bxor + | Opcode::BxorNot + | Opcode::Bnot => Cost(2), + // Everything else. + _ => Cost(3), + } +} + +impl Language for NodeCtx { + type Node = Node; + + fn children<'a>(&'a self, node: &'a Node) -> &'a [Id] { + match node { + Node::Param { .. } => &[], + Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_slice(&self.args), + Node::Load { addr, .. } => std::slice::from_ref(addr), + Node::Result { value, .. } => std::slice::from_ref(value), + } + } + + fn children_mut<'a>(&'a mut self, node: &'a mut Node) -> &'a mut [Id] { + match node { + Node::Param { .. } => &mut [], + Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_mut_slice(&mut self.args), + Node::Load { addr, .. } => std::slice::from_mut(addr), + Node::Result { value, .. } => std::slice::from_mut(value), + } + } + + fn needs_dedup(&self, node: &Node) -> bool { + match node { + Node::Pure { .. } | Node::Load { .. } => true, + _ => false, + } + } +} diff --git a/cranelift/codegen/src/egraph/stores.rs b/cranelift/codegen/src/egraph/stores.rs new file mode 100644 index 0000000000..8ca3bd6671 --- /dev/null +++ b/cranelift/codegen/src/egraph/stores.rs @@ -0,0 +1,266 @@ +//! Last-store tracking via alias analysis. +//! +//! We partition memory state into several *disjoint pieces* of +//! "abstract state". There are a finite number of such pieces: +//! currently, we call them "heap", "table", "vmctx", and "other". Any +//! given address in memory belongs to exactly one disjoint piece. +//! +//! One never tracks which piece a concrete address belongs to at +//! runtime; this is a purely static concept. Instead, all +//! memory-accessing instructions (loads and stores) are labeled with +//! one of these four categories in the `MemFlags`. It is forbidden +//! for a load or store to access memory under one category and a +//! later load or store to access the same memory under a different +//! category. This is ensured to be true by construction during +//! frontend translation into CLIF and during legalization. +//! +//! Given that this non-aliasing property is ensured by the producer +//! of CLIF, we can compute a *may-alias* property: one load or store +//! may-alias another load or store if both access the same category +//! of abstract state. +//! +//! The "last store" pass helps to compute this aliasing: we perform a +//! fixpoint analysis to track the last instruction that *might have* +//! written to a given part of abstract state. We also track the block +//! containing this store. +//! +//! We can't say for sure that the "last store" *did* actually write +//! that state, but we know for sure that no instruction *later* than +//! it (up to the current instruction) did. However, we can get a +//! must-alias property from this: if at a given load or store, we +//! look backward to the "last store", *AND* we find that it has +//! exactly the same address expression and value type, then we know +//! that the current instruction's access *must* be to the same memory +//! location. +//! +//! To get this must-alias property, we leverage the node +//! hashconsing. We design the Eq/Hash (node identity relation +//! definition) of the `Node` struct so that all loads with (i) the +//! same "last store", and (ii) the same address expression, and (iii) +//! the same opcode-and-offset, will deduplicate (the first will be +//! computed, and the later ones will use the same value). Furthermore +//! we have an optimization that rewrites a load into the stored value +//! of the last store *if* the last store has the same address +//! expression and constant offset. +//! +//! This gives us two optimizations, "redundant load elimination" and +//! "store-to-load forwarding". +//! +//! In theory we could also do *dead-store elimination*, where if a +//! store overwrites a value earlier written by another store, *and* +//! if no other load/store to the abstract state category occurred, +//! *and* no other trapping instruction occurred (at which point we +//! need an up-to-date memory state because post-trap-termination +//! memory state can be observed), *and* we can prove the original +//! store could not have trapped, then we can eliminate the original +//! store. Because this is so complex, and the conditions for doing it +//! correctly when post-trap state must be correct likely reduce the +//! potential benefit, we don't yet do this. + +use crate::flowgraph::ControlFlowGraph; +use crate::fx::{FxHashMap, FxHashSet}; +use crate::inst_predicates::has_memory_fence_semantics; +use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode}; +use crate::trace; +use cranelift_entity::SecondaryMap; +use smallvec::{smallvec, SmallVec}; + +/// For a given program point, the vector of last-store instruction +/// indices for each disjoint category of abstract state. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +struct LastStores { + heap: MemoryState, + table: MemoryState, + vmctx: MemoryState, + other: MemoryState, +} + +/// State of memory seen by a load. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub enum MemoryState { + /// State at function entry: nothing is known (but it is one + /// consistent value, so two loads from "entry" state at the same + /// address will still provide the same result). + #[default] + Entry, + /// State just after a store by the given instruction. The + /// instruction is a store from which we can forward. + Store(Inst), + /// State just before the given instruction. Used for abstract + /// value merges at merge-points when we cannot name a single + /// producing site. + BeforeInst(Inst), + /// State just after the given instruction. Used when the + /// instruction may update the associated state, but is not a + /// store whose value we can cleanly forward. (E.g., perhaps a + /// barrier of some sort.) + AfterInst(Inst), +} + +impl LastStores { + fn update(&mut self, func: &Function, inst: Inst) { + let opcode = func.dfg[inst].opcode(); + if has_memory_fence_semantics(opcode) { + self.heap = MemoryState::AfterInst(inst); + self.table = MemoryState::AfterInst(inst); + self.vmctx = MemoryState::AfterInst(inst); + self.other = MemoryState::AfterInst(inst); + } else if opcode.can_store() { + if let Some(memflags) = func.dfg[inst].memflags() { + *self.for_flags(memflags) = MemoryState::Store(inst); + } else { + self.heap = MemoryState::AfterInst(inst); + self.table = MemoryState::AfterInst(inst); + self.vmctx = MemoryState::AfterInst(inst); + self.other = MemoryState::AfterInst(inst); + } + } + } + + fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState { + if memflags.heap() { + &mut self.heap + } else if memflags.table() { + &mut self.table + } else if memflags.vmctx() { + &mut self.vmctx + } else { + &mut self.other + } + } + + fn meet_from(&mut self, other: &LastStores, loc: Inst) { + let meet = |a: MemoryState, b: MemoryState| -> MemoryState { + match (a, b) { + (a, b) if a == b => a, + _ => MemoryState::BeforeInst(loc), + } + }; + + self.heap = meet(self.heap, other.heap); + self.table = meet(self.table, other.table); + self.vmctx = meet(self.vmctx, other.vmctx); + self.other = meet(self.other, other.other); + } +} + +/// An alias-analysis pass. +pub struct AliasAnalysis { + /// Last-store instruction (or none) for a given load. Use a hash map + /// instead of a `SecondaryMap` because this is sparse. + load_mem_state: FxHashMap, +} + +impl AliasAnalysis { + /// Perform an alias analysis pass. + pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis { + log::trace!("alias analysis: input is:\n{:?}", func); + let block_input = Self::compute_block_input_states(func, cfg); + let load_mem_state = Self::compute_load_last_stores(func, block_input); + AliasAnalysis { load_mem_state } + } + + fn compute_block_input_states( + func: &Function, + cfg: &ControlFlowGraph, + ) -> SecondaryMap> { + let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks()); + let mut worklist: SmallVec<[Block; 8]> = smallvec![]; + let mut worklist_set = FxHashSet::default(); + let entry = func.layout.entry_block().unwrap(); + worklist.push(entry); + worklist_set.insert(entry); + block_input[entry] = Some(LastStores::default()); + + while let Some(block) = worklist.pop() { + worklist_set.remove(&block); + let state = block_input[block].clone().unwrap(); + + trace!("alias analysis: input to {} is {:?}", block, state); + + let state = func + .layout + .block_insts(block) + .fold(state, |mut state, inst| { + state.update(func, inst); + trace!("after {}: state is {:?}", inst, state); + state + }); + + for succ in cfg.succ_iter(block) { + let succ_first_inst = func.layout.first_inst(succ).unwrap(); + let succ_state = &mut block_input[succ]; + let old = succ_state.clone(); + if let Some(succ_state) = succ_state.as_mut() { + succ_state.meet_from(&state, succ_first_inst); + } else { + *succ_state = Some(state); + }; + let updated = *succ_state != old; + + if updated && worklist_set.insert(succ) { + worklist.push(succ); + } + } + } + + block_input + } + + fn compute_load_last_stores( + func: &Function, + block_input: SecondaryMap>, + ) -> FxHashMap { + let mut load_mem_state = FxHashMap::default(); + + for block in func.layout.blocks() { + let mut state = block_input[block].clone().unwrap(); + + for inst in func.layout.block_insts(block) { + trace!( + "alias analysis: scanning at {} with state {:?} ({:?})", + inst, + state, + func.dfg[inst], + ); + + // N.B.: we match `Load` specifically, and not any + // other kinds of loads (or any opcode such that + // `opcode.can_load()` returns true), because some + // "can load" instructions actually have very + // different semantics (are not just a load of a + // particularly-typed value). For example, atomic + // (load/store, RMW, CAS) instructions "can load" but + // definitely should not participate in store-to-load + // forwarding or redundant-load elimination. Our goal + // here is to provide a `MemoryState` just for plain + // old loads whose semantics we can completely reason + // about. + if let InstructionData::Load { + opcode: Opcode::Load, + flags, + .. + } = func.dfg[inst] + { + let mem_state = *state.for_flags(flags); + trace!( + "alias analysis: at {}: load with mem_state {:?}", + inst, + mem_state, + ); + + load_mem_state.insert(inst, mem_state); + } + + state.update(func, inst); + } + } + + load_mem_state + } + + /// Get the state seen by a load, if any. + pub fn get_state_for_load(&self, inst: Inst) -> Option { + self.load_mem_state.get(&inst).copied() + } +} diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 6b6d76a6b2..cc16f9ac6d 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -11,6 +11,7 @@ pub fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> } /// Test whether the given opcode is unsafe to even consider as side-effect-free. +#[inline(always)] fn trivially_has_side_effects(opcode: Opcode) -> bool { opcode.is_call() || opcode.is_branch() @@ -24,6 +25,7 @@ fn trivially_has_side_effects(opcode: Opcode) -> bool { /// Load instructions without the `notrap` flag are defined to trap when /// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded /// value is unused. +#[inline(always)] fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool { if !opcode.can_load() { return false; @@ -37,6 +39,7 @@ fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool /// Does the given instruction have any side-effect that would preclude it from being removed when /// its value is unused? +#[inline(always)] pub fn has_side_effect(func: &Function, inst: Inst) -> bool { let data = &func.dfg[inst]; let opcode = data.opcode(); @@ -123,8 +126,10 @@ pub fn has_memory_fence_semantics(op: Opcode) -> bool { | Opcode::AtomicCas | Opcode::AtomicLoad | Opcode::AtomicStore - | Opcode::Fence => true, + | Opcode::Fence + | Opcode::Debugtrap => true, Opcode::Call | Opcode::CallIndirect => true, + op if op.can_trap() => true, _ => false, } } diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index 930816b291..1da10598c8 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -120,6 +120,23 @@ impl DataFlowGraph { self.immediates.clear(); } + /// Clear all instructions, but keep blocks and other metadata + /// (signatures, constants, immediates). Everything to do with + /// `Value`s is cleared, including block params and debug info. + /// + /// Used during egraph-based optimization to clear out the pre-opt + /// body so that we can regenerate it from the egraph. + pub(crate) fn clear_insts(&mut self) { + self.insts.clear(); + self.results.clear(); + self.value_lists.clear(); + self.values.clear(); + self.values_labels = None; + for block in self.blocks.values_mut() { + block.params = ValueList::new(); + } + } + /// Get the total number of instructions created in this function, whether they are currently /// inserted in the layout or not. /// diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index 5ed3cb553f..4fdcfdde64 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -189,7 +189,7 @@ pub struct FunctionStencil { /// /// Track the original source location for each instruction. The source locations are not /// interpreted by Cranelift, only preserved. - srclocs: SourceLocs, + pub srclocs: SourceLocs, /// An optional global value which represents an expression evaluating to /// the stack limit for this function. This `GlobalValue` will be diff --git a/cranelift/codegen/src/ir/layout.rs b/cranelift/codegen/src/ir/layout.rs index 7162c848c5..819c332d45 100644 --- a/cranelift/codegen/src/ir/layout.rs +++ b/cranelift/codegen/src/ir/layout.rs @@ -61,6 +61,18 @@ impl Layout { self.last_block = None; } + /// Clear instructions from every block, but keep the blocks. + /// + /// Used by the egraph-based optimization to clear out the + /// function body but keep the CFG skeleton. + pub(crate) fn clear_insts(&mut self) { + self.insts.clear(); + for block in self.blocks.values_mut() { + block.first_inst = None.into(); + block.last_inst = None.into(); + } + } + /// Returns the capacity of the `BlockData` map. pub fn block_capacity(&self) -> usize { self.blocks.capacity() diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 8ba18987da..52a4b60373 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -48,7 +48,7 @@ pub use crate::ir::function::{DisplayFunctionAnnotations, Function}; pub use crate::ir::globalvalue::GlobalValueData; pub use crate::ir::heap::{HeapData, HeapStyle}; pub use crate::ir::instructions::{ - InstructionData, Opcode, ValueList, ValueListPool, VariableArgs, + InstructionData, InstructionImms, Opcode, ValueList, ValueListPool, VariableArgs, }; pub use crate::ir::jumptable::JumpTableData; pub use crate::ir::known_symbol::KnownSymbol; diff --git a/cranelift/codegen/src/ir/sourceloc.rs b/cranelift/codegen/src/ir/sourceloc.rs index 57e7a4160c..21f7da5ab5 100644 --- a/cranelift/codegen/src/ir/sourceloc.rs +++ b/cranelift/codegen/src/ir/sourceloc.rs @@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize}; /// /// The default source location uses the all-ones bit pattern `!0`. It is used for instructions /// that can't be given a real source location. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct SourceLoc(u32); diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 5308280134..ccd7411679 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -34,6 +34,7 @@ use crate::{ abi::ArgPair, ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData, }, }; +use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; use regalloc2::PReg; use std::boxed::Box; use std::convert::TryFrom; @@ -96,7 +97,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { - isle_prelude_methods!(); + isle_lower_prelude_methods!(); isle_prelude_caller_methods!(crate::isa::aarch64::abi::AArch64MachineDeps, AArch64Caller); fn sign_return_address_disabled(&mut self) -> Option<()> { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index cb875e5f73..808c79f716 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -41,10 +41,25 @@ pub(crate) fn lower_insn_to_regs( match op { Opcode::Iconst | Opcode::Bconst | Opcode::Null => implemented_in_isle(ctx), - Opcode::F32const | Opcode::F64const => unreachable!( - "Should never see constant ops at top level lowering entry - point, as constants are rematerialized at use-sites" - ), + Opcode::F32const => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let val = ctx.get_constant(insn).unwrap(); + for inst in + Inst::load_fp_constant32(rd, val as u32, |ty| ctx.alloc_tmp(ty).only_reg().unwrap()) + { + ctx.emit(inst); + } + } + + Opcode::F64const => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let val = ctx.get_constant(insn).unwrap(); + for inst in + Inst::load_fp_constant64(rd, val, |ty| ctx.alloc_tmp(ty).only_reg().unwrap()) + { + ctx.emit(inst); + } + } Opcode::GetFramePointer | Opcode::GetStackPointer | Opcode::GetReturnAddress => { implemented_in_isle(ctx) diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index a132c470df..1a0a7c9972 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -62,7 +62,15 @@ impl AArch64Backend { let emit_info = EmitInfo::new(flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::(func, self, abi, &self.machine_env, emit_info, sigs) + compile::compile::( + func, + flags, + self, + abi, + &self.machine_env, + emit_info, + sigs, + ) } } diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 056b499c69..c9038daab7 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -5,17 +5,14 @@ pub mod generated_code; use generated_code::{Context, MInst}; -use target_lexicon::Triple; - // Types that the generated ISLE code uses via `use super::*`. use super::{writable_zero_reg, zero_reg}; -use std::vec::Vec; - +use crate::isa::riscv64::abi::Riscv64ABICaller; use crate::isa::riscv64::settings::Flags as IsaFlags; +use crate::machinst::Reg; use crate::machinst::{isle::*, MachInst, SmallInstVec}; -use crate::settings::Flags; - use crate::machinst::{VCodeConstant, VCodeConstantData}; +use crate::settings::Flags; use crate::{ ir::{ immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags, @@ -24,13 +21,12 @@ use crate::{ isa::riscv64::inst::*, machinst::{ArgPair, InsnOutput, Lower}, }; +use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; use regalloc2::PReg; - -use crate::isa::riscv64::abi::Riscv64ABICaller; use std::boxed::Box; use std::convert::TryFrom; - -use crate::machinst::Reg; +use std::vec::Vec; +use target_lexicon::Triple; type BoxCallInfo = Box; type BoxCallIndInfo = Box; @@ -64,7 +60,7 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { - isle_prelude_methods!(); + isle_lower_prelude_methods!(); isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller); fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs { diff --git a/cranelift/codegen/src/isa/riscv64/mod.rs b/cranelift/codegen/src/isa/riscv64/mod.rs index 3632ed1d8c..d47d2ec618 100644 --- a/cranelift/codegen/src/isa/riscv64/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/mod.rs @@ -62,7 +62,7 @@ impl Riscv64Backend { let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::(func, self, abi, &self.mach_env, emit_info, sigs) + compile::compile::(func, flags, self, abi, &self.mach_env, emit_info, sigs) } } diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index a2f4838836..b41ae258f0 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -24,6 +24,7 @@ use crate::{ machinst::abi::ABIMachineSpec, machinst::{ArgPair, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData}, }; +use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; use regalloc2::PReg; use smallvec::{smallvec, SmallVec}; use std::boxed::Box; @@ -88,7 +89,7 @@ pub(crate) fn lower_branch( } impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { - isle_prelude_methods!(); + isle_lower_prelude_methods!(); fn abi_sig(&mut self, sig_ref: SigRef) -> Sig { self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref) diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 877a7374f0..a5b85f6d6f 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -60,7 +60,15 @@ impl S390xBackend { let emit_info = EmitInfo::new(self.isa_flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::(func, self, abi, &self.machine_env, emit_info, sigs) + compile::compile::( + func, + self.flags.clone(), + self, + abi, + &self.machine_env, + emit_info, + sigs, + ) } } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index e7df356074..2e624b039c 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -7,6 +7,7 @@ use crate::{ ir::AtomicRmwOp, machinst::{InputSourceInst, Reg, Writable}, }; +use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; use generated_code::{Context, MInst, RegisterClass}; // Types that the generated ISLE code uses via `use super::*`. @@ -92,7 +93,7 @@ pub(crate) fn lower_branch( } impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { - isle_prelude_methods!(); + isle_lower_prelude_methods!(); isle_prelude_caller_methods!(X64ABIMachineSpec, X64Caller); #[inline] diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index ddb3523578..3d20183fdb 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -55,7 +55,7 @@ impl X64Backend { let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?; - compile::compile::(&func, self, abi, &self.reg_env, emit_info, sigs) + compile::compile::(&func, flags, self, abi, &self.reg_env, emit_info, sigs) } } diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs new file mode 100644 index 0000000000..6339f3efb3 --- /dev/null +++ b/cranelift/codegen/src/isle_prelude.rs @@ -0,0 +1,604 @@ +//! Shared ISLE prelude implementation for optimization (mid-end) and +//! lowering (backend) ISLE environments. + +/// Helper macro to define methods in `prelude.isle` within `impl Context for +/// ...` for each backend. These methods are shared amongst all backends. +#[macro_export] +#[doc(hidden)] +macro_rules! isle_common_prelude_methods { + () => { + /// We don't have a way of making a `()` value in isle directly. + #[inline] + fn unit(&mut self) -> Unit { + () + } + + #[inline] + fn u8_as_u32(&mut self, x: u8) -> Option { + Some(x.into()) + } + + #[inline] + fn u8_as_u64(&mut self, x: u8) -> Option { + Some(x.into()) + } + + #[inline] + fn u16_as_u64(&mut self, x: u16) -> Option { + Some(x.into()) + } + + #[inline] + fn u32_as_u64(&mut self, x: u32) -> Option { + Some(x.into()) + } + + #[inline] + fn i64_as_u64(&mut self, x: i64) -> Option { + Some(x as u64) + } + + #[inline] + fn u64_add(&mut self, x: u64, y: u64) -> Option { + Some(x.wrapping_add(y)) + } + + #[inline] + fn u64_sub(&mut self, x: u64, y: u64) -> Option { + Some(x.wrapping_sub(y)) + } + + #[inline] + fn u64_mul(&mut self, x: u64, y: u64) -> Option { + Some(x.wrapping_mul(y)) + } + + #[inline] + fn u64_sdiv(&mut self, x: u64, y: u64) -> Option { + let x = x as i64; + let y = y as i64; + x.checked_div(y).map(|d| d as u64) + } + + #[inline] + fn u64_udiv(&mut self, x: u64, y: u64) -> Option { + x.checked_div(y) + } + + #[inline] + fn u64_and(&mut self, x: u64, y: u64) -> Option { + Some(x & y) + } + + #[inline] + fn u64_or(&mut self, x: u64, y: u64) -> Option { + Some(x | y) + } + + #[inline] + fn u64_xor(&mut self, x: u64, y: u64) -> Option { + Some(x ^ y) + } + + #[inline] + fn u64_not(&mut self, x: u64) -> Option { + Some(!x) + } + + #[inline] + fn u64_is_zero(&mut self, value: u64) -> bool { + 0 == value + } + + #[inline] + fn u64_sextend_u32(&mut self, x: u64) -> Option { + Some(x as u32 as i32 as i64 as u64) + } + + #[inline] + fn ty_bits(&mut self, ty: Type) -> Option { + use std::convert::TryInto; + Some(ty.bits().try_into().unwrap()) + } + + #[inline] + fn ty_bits_u16(&mut self, ty: Type) -> u16 { + ty.bits() as u16 + } + + #[inline] + fn ty_bits_u64(&mut self, ty: Type) -> u64 { + ty.bits() as u64 + } + + #[inline] + fn ty_bytes(&mut self, ty: Type) -> u16 { + u16::try_from(ty.bytes()).unwrap() + } + + #[inline] + fn ty_mask(&mut self, ty: Type) -> u64 { + match ty.bits() { + 1 => 1, + 8 => 0xff, + 16 => 0xffff, + 32 => 0xffff_ffff, + 64 => 0xffff_ffff_ffff_ffff, + _ => unimplemented!(), + } + } + + fn fits_in_16(&mut self, ty: Type) -> Option { + if ty.bits() <= 16 && !ty.is_dynamic_vector() { + Some(ty) + } else { + None + } + } + + #[inline] + fn fits_in_32(&mut self, ty: Type) -> Option { + if ty.bits() <= 32 && !ty.is_dynamic_vector() { + Some(ty) + } else { + None + } + } + + #[inline] + fn lane_fits_in_32(&mut self, ty: Type) -> Option { + if !ty.is_vector() && !ty.is_dynamic_vector() { + None + } else if ty.lane_type().bits() <= 32 { + Some(ty) + } else { + None + } + } + + #[inline] + fn fits_in_64(&mut self, ty: Type) -> Option { + if ty.bits() <= 64 && !ty.is_dynamic_vector() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option { + if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_32(&mut self, ty: Type) -> Option { + if ty.bits() == 32 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_64(&mut self, ty: Type) -> Option { + if ty.bits() == 64 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_32_or_64(&mut self, ty: Type) -> Option { + if ty.bits() == 32 || ty.bits() == 64 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_8_or_16(&mut self, ty: Type) -> Option { + if ty.bits() == 8 || ty.bits() == 16 { + Some(ty) + } else { + None + } + } + + #[inline] + fn int_bool_fits_in_32(&mut self, ty: Type) -> Option { + match ty { + I8 | I16 | I32 | B8 | B16 | B32 => Some(ty), + _ => None, + } + } + + #[inline] + fn ty_int_bool_64(&mut self, ty: Type) -> Option { + match ty { + I64 | B64 => Some(ty), + _ => None, + } + } + + #[inline] + fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option { + match ty { + I64 | B64 | R64 => Some(ty), + _ => None, + } + } + + #[inline] + fn ty_int_bool_128(&mut self, ty: Type) -> Option { + match ty { + I128 | B128 => Some(ty), + _ => None, + } + } + + #[inline] + fn ty_int(&mut self, ty: Type) -> Option { + ty.is_int().then(|| ty) + } + + #[inline] + fn ty_int_bool(&mut self, ty: Type) -> Option { + if ty.is_int() || ty.is_bool() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_scalar_float(&mut self, ty: Type) -> Option { + match ty { + F32 | F64 => Some(ty), + _ => None, + } + } + + #[inline] + fn ty_float_or_vec(&mut self, ty: Type) -> Option { + match ty { + F32 | F64 => Some(ty), + ty if ty.is_vector() => Some(ty), + _ => None, + } + } + + fn ty_vector_float(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.lane_type().is_float() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vector_not_float(&mut self, ty: Type) -> Option { + if ty.is_vector() && !ty.lane_type().is_float() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vec64_ctor(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.bits() == 64 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vec64(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.bits() == 64 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vec128(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.bits() == 128 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_dyn_vec64(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_dyn_vec128(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vec64_int(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vec128_int(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.bits() == 128 && ty.lane_type().is_int() { + Some(ty) + } else { + None + } + } + + #[inline] + fn u64_from_imm64(&mut self, imm: Imm64) -> u64 { + imm.bits() as u64 + } + + #[inline] + fn u64_from_bool(&mut self, b: bool) -> u64 { + if b { + u64::MAX + } else { + 0 + } + } + + #[inline] + fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> { + if ty.lane_count() > 1 { + Some((ty.lane_bits(), ty.lane_count())) + } else { + None + } + } + + #[inline] + fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> { + if ty.is_dynamic_vector() { + Some((ty.lane_bits(), ty.min_lane_count())) + } else { + None + } + } + + #[inline] + fn dynamic_int_lane(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type()) + { + Some(ty.lane_bits()) + } else { + None + } + } + + #[inline] + fn dynamic_fp_lane(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() + && crate::machinst::ty_has_float_or_vec_representation(ty.lane_type()) + { + Some(ty.lane_bits()) + } else { + None + } + } + + #[inline] + fn ty_dyn64_int(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && ty.min_bits() == 64 && ty.lane_type().is_int() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_dyn128_int(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && ty.min_bits() == 128 && ty.lane_type().is_int() { + Some(ty) + } else { + None + } + } + + fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 { + val.bits().into() + } + + fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 { + val.bits() + } + + fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 { + val + } + + fn not_vec32x2(&mut self, ty: Type) -> Option { + if ty.lane_bits() == 32 && ty.lane_count() == 2 { + None + } else { + Some(ty) + } + } + + fn not_i64x2(&mut self, ty: Type) -> Option<()> { + if ty == I64X2 { + None + } else { + Some(()) + } + } + + fn trap_code_division_by_zero(&mut self) -> TrapCode { + TrapCode::IntegerDivisionByZero + } + + fn trap_code_integer_overflow(&mut self) -> TrapCode { + TrapCode::IntegerOverflow + } + + fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode { + TrapCode::BadConversionToInteger + } + + fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option { + match val.bits() { + 0 => None, + n => Some(n as u64), + } + } + + #[inline] + fn u32_add(&mut self, a: u32, b: u32) -> u32 { + a.wrapping_add(b) + } + + #[inline] + fn s32_add_fallible(&mut self, a: u32, b: u32) -> Option { + let a = a as i32; + let b = b as i32; + a.checked_add(b).map(|sum| sum as u32) + } + + #[inline] + fn u32_nonnegative(&mut self, x: u32) -> Option { + if (x as i32) >= 0 { + Some(x) + } else { + None + } + } + + #[inline] + fn u32_lteq(&mut self, a: u32, b: u32) -> Option<()> { + if a <= b { + Some(()) + } else { + None + } + } + + #[inline] + fn u8_lteq(&mut self, a: u8, b: u8) -> Option<()> { + if a <= b { + Some(()) + } else { + None + } + } + + #[inline] + fn u8_lt(&mut self, a: u8, b: u8) -> Option<()> { + if a < b { + Some(()) + } else { + None + } + } + + #[inline] + fn imm64(&mut self, x: u64) -> Option { + Some(Imm64::new(x as i64)) + } + + #[inline] + fn simm32(&mut self, x: Imm64) -> Option { + let x64: i64 = x.into(); + let x32: i32 = x64.try_into().ok()?; + Some(x32 as u32) + } + + #[inline] + fn uimm8(&mut self, x: Imm64) -> Option { + let x64: i64 = x.into(); + let x8: u8 = x64.try_into().ok()?; + Some(x8) + } + + #[inline] + fn offset32(&mut self, x: Offset32) -> Option { + let x: i32 = x.into(); + Some(x as u32) + } + + #[inline] + fn u8_and(&mut self, a: u8, b: u8) -> u8 { + a & b + } + + #[inline] + fn lane_type(&mut self, ty: Type) -> Type { + ty.lane_type() + } + + #[inline] + fn offset32_to_u32(&mut self, offset: Offset32) -> u32 { + let offset: i32 = offset.into(); + offset as u32 + } + + fn range(&mut self, start: usize, end: usize) -> Range { + (start, end) + } + + fn range_view(&mut self, (start, end): Range) -> RangeView { + if start >= end { + RangeView::Empty + } else { + RangeView::NonEmpty { + index: start, + rest: (start + 1, end), + } + } + } + + #[inline] + fn mem_flags_trusted(&mut self) -> MemFlags { + MemFlags::trusted() + } + + #[inline] + fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC { + x.unsigned() + } + + #[inline] + fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option { + match cc { + IntCC::Equal + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedGreaterThan + | IntCC::UnsignedLessThanOrEqual + | IntCC::UnsignedLessThan + | IntCC::NotEqual => None, + IntCC::SignedGreaterThanOrEqual + | IntCC::SignedGreaterThan + | IntCC::SignedLessThanOrEqual + | IntCC::SignedLessThan => Some(*cc), + } + } + }; +} diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index e244fbafc1..3c2ffad5c3 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -97,12 +97,15 @@ mod constant_hash; mod context; mod dce; mod divconst_magic_numbers; +mod egraph; mod fx; mod inst_predicates; +mod isle_prelude; mod iterators; mod legalizer; mod licm; mod nan_canonicalization; +mod opts; mod remove_constant_phis; mod result; mod scoped_hash_map; diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index 0e8715ae91..be6d5e588e 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -10,6 +10,7 @@ use crate::ir::{Block, Function, Layout}; use crate::packed_option::PackedOption; use crate::timing; use alloc::vec::Vec; +use smallvec::{smallvec, SmallVec}; /// A opaque reference to a code loop. #[derive(Copy, Clone, PartialEq, Eq, Hash)] @@ -29,6 +30,48 @@ pub struct LoopAnalysis { struct LoopData { header: Block, parent: PackedOption, + level: LoopLevel, +} + +/// A level in a loop nest. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct LoopLevel(u8); +impl LoopLevel { + const INVALID: u8 = 0xff; + + /// Get the root level (no loop). + pub fn root() -> Self { + Self(0) + } + /// Get the loop level. + pub fn level(self) -> usize { + self.0 as usize + } + /// Invalid loop level. + pub fn invalid() -> Self { + Self(Self::INVALID) + } + /// One loop level deeper. + pub fn inc(self) -> Self { + if self.0 == (Self::INVALID - 1) { + self + } else { + Self(self.0 + 1) + } + } + /// A clamped loop level from a larger-width (usize) depth. + pub fn clamped(level: usize) -> Self { + Self( + u8::try_from(std::cmp::min(level, (Self::INVALID as usize) - 1)) + .expect("Clamped value must always convert"), + ) + } +} + +impl std::default::Default for LoopLevel { + fn default() -> Self { + LoopLevel::invalid() + } } impl LoopData { @@ -37,6 +80,7 @@ impl LoopData { Self { header, parent: parent.into(), + level: LoopLevel::invalid(), } } } @@ -71,6 +115,17 @@ impl LoopAnalysis { self.loops[lp].parent.expand() } + /// Return the innermost loop for a given block. + pub fn innermost_loop(&self, block: Block) -> Option { + self.block_loop_map[block].expand() + } + + /// Determine if a Block is a loop header. If so, return the loop. + pub fn is_loop_header(&self, block: Block) -> Option { + self.innermost_loop(block) + .filter(|&lp| self.loop_header(lp) == block) + } + /// Determine if a Block belongs to a loop by running a finger along the loop tree. /// /// Returns `true` if `block` is in loop `lp`. @@ -96,6 +151,12 @@ impl LoopAnalysis { } false } + + /// Returns the loop-nest level of a given block. + pub fn loop_level(&self, block: Block) -> LoopLevel { + self.innermost_loop(block) + .map_or(LoopLevel(0), |lp| self.loops[lp].level) + } } impl LoopAnalysis { @@ -107,6 +168,7 @@ impl LoopAnalysis { self.block_loop_map.resize(func.dfg.num_blocks()); self.find_loop_headers(cfg, domtree, &func.layout); self.discover_loop_blocks(cfg, domtree, &func.layout); + self.assign_loop_levels(); self.valid = true; } @@ -228,6 +290,28 @@ impl LoopAnalysis { } } } + + fn assign_loop_levels(&mut self) { + let mut stack: SmallVec<[Loop; 8]> = smallvec![]; + for lp in self.loops.keys() { + if self.loops[lp].level == LoopLevel::invalid() { + stack.push(lp); + while let Some(&lp) = stack.last() { + if let Some(parent) = self.loops[lp].parent.into() { + if self.loops[parent].level != LoopLevel::invalid() { + self.loops[lp].level = self.loops[parent].level.inc(); + stack.pop(); + } else { + stack.push(parent); + } + } else { + self.loops[lp].level = LoopLevel::root().inc(); + stack.pop(); + } + } + } + } + } } #[cfg(test)] @@ -286,6 +370,10 @@ mod tests { assert_eq!(loop_analysis.is_in_loop(block2, loops[0]), true); assert_eq!(loop_analysis.is_in_loop(block3, loops[0]), true); assert_eq!(loop_analysis.is_in_loop(block0, loops[1]), false); + assert_eq!(loop_analysis.loop_level(block0).level(), 1); + assert_eq!(loop_analysis.loop_level(block1).level(), 2); + assert_eq!(loop_analysis.loop_level(block2).level(), 2); + assert_eq!(loop_analysis.loop_level(block3).level(), 1); } #[test] @@ -345,5 +433,11 @@ mod tests { assert_eq!(loop_analysis.is_in_loop(block3, loops[2]), true); assert_eq!(loop_analysis.is_in_loop(block4, loops[2]), true); assert_eq!(loop_analysis.is_in_loop(block5, loops[0]), true); + assert_eq!(loop_analysis.loop_level(block0).level(), 1); + assert_eq!(loop_analysis.loop_level(block1).level(), 2); + assert_eq!(loop_analysis.loop_level(block2).level(), 2); + assert_eq!(loop_analysis.loop_level(block3).level(), 2); + assert_eq!(loop_analysis.loop_level(block4).level(), 2); + assert_eq!(loop_analysis.loop_level(block5).level(), 1); } } diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index d64d054d38..a140842a90 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -13,6 +13,7 @@ use regalloc2::{self, MachineEnv}; /// for binary emission. pub fn compile( f: &Function, + flags: crate::settings::Flags, b: &B, abi: Callee<<::MInst as MachInst>::ABIMachineSpec>, machine_env: &MachineEnv, @@ -23,7 +24,7 @@ pub fn compile( let block_order = BlockLoweringOrder::new(f); // Build the lowering context. - let lower = crate::machinst::Lower::new(f, abi, emit_info, block_order, sigs)?; + let lower = crate::machinst::Lower::new(f, flags, abi, emit_info, block_order, sigs)?; // Lower the IR. let vcode = { diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 8781d8372d..e82b7152eb 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -41,13 +41,9 @@ pub enum RangeView { /// ...` for each backend. These methods are shared amongst all backends. #[macro_export] #[doc(hidden)] -macro_rules! isle_prelude_methods { +macro_rules! isle_lower_prelude_methods { () => { - /// We don't have a way of making a `()` value in isle directly. - #[inline] - fn unit(&mut self) -> Unit { - () - } + isle_common_prelude_methods!(); #[inline] fn same_value(&mut self, a: Value, b: Value) -> Option { @@ -175,309 +171,6 @@ macro_rules! isle_prelude_methods { regs.regs().len() } - #[inline] - fn u8_as_u32(&mut self, x: u8) -> Option { - Some(x.into()) - } - - #[inline] - fn u8_as_u64(&mut self, x: u8) -> Option { - Some(x.into()) - } - - #[inline] - fn u16_as_u64(&mut self, x: u16) -> Option { - Some(x.into()) - } - - #[inline] - fn u32_as_u64(&mut self, x: u32) -> Option { - Some(x.into()) - } - - #[inline] - fn i64_as_u64(&mut self, x: i64) -> Option { - Some(x as u64) - } - - #[inline] - fn u64_add(&mut self, x: u64, y: u64) -> Option { - Some(x.wrapping_add(y)) - } - - #[inline] - fn u64_sub(&mut self, x: u64, y: u64) -> Option { - Some(x.wrapping_sub(y)) - } - - #[inline] - fn u64_and(&mut self, x: u64, y: u64) -> Option { - Some(x & y) - } - - #[inline] - fn u64_is_zero(&mut self, value: u64) -> bool { - 0 == value - } - - #[inline] - fn ty_bits(&mut self, ty: Type) -> Option { - use std::convert::TryInto; - Some(ty.bits().try_into().unwrap()) - } - - #[inline] - fn ty_bits_u16(&mut self, ty: Type) -> u16 { - ty.bits().try_into().unwrap() - } - - #[inline] - fn ty_bits_u64(&mut self, ty: Type) -> u64 { - ty.bits() as u64 - } - - #[inline] - fn ty_bytes(&mut self, ty: Type) -> u16 { - u16::try_from(ty.bytes()).unwrap() - } - - #[inline] - fn ty_mask(&mut self, ty: Type) -> u64 { - match ty.bits() { - 1 => 1, - 8 => 0xff, - 16 => 0xffff, - 32 => 0xffff_ffff, - 64 => 0xffff_ffff_ffff_ffff, - _ => unimplemented!(), - } - } - - fn fits_in_16(&mut self, ty: Type) -> Option { - if ty.bits() <= 16 { - Some(ty) - } else { - None - } - } - - #[inline] - fn fits_in_32(&mut self, ty: Type) -> Option { - if ty.bits() <= 32 && !ty.is_dynamic_vector() { - Some(ty) - } else { - None - } - } - - #[inline] - fn lane_fits_in_32(&mut self, ty: Type) -> Option { - if !ty.is_vector() && !ty.is_dynamic_vector() { - None - } else if ty.lane_type().bits() <= 32 { - Some(ty) - } else { - None - } - } - - #[inline] - fn fits_in_64(&mut self, ty: Type) -> Option { - if ty.bits() <= 64 && !ty.is_dynamic_vector() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option { - if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_32(&mut self, ty: Type) -> Option { - if ty.bits() == 32 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_64(&mut self, ty: Type) -> Option { - if ty.bits() == 64 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_32_or_64(&mut self, ty: Type) -> Option { - if ty.bits() == 32 || ty.bits() == 64 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_8_or_16(&mut self, ty: Type) -> Option { - if ty.bits() == 8 || ty.bits() == 16 { - Some(ty) - } else { - None - } - } - - #[inline] - fn int_bool_fits_in_32(&mut self, ty: Type) -> Option { - match ty { - I8 | I16 | I32 | B8 | B16 | B32 => Some(ty), - _ => None, - } - } - - #[inline] - fn ty_int_bool_64(&mut self, ty: Type) -> Option { - match ty { - I64 | B64 => Some(ty), - _ => None, - } - } - - #[inline] - fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option { - match ty { - I64 | B64 | R64 => Some(ty), - _ => None, - } - } - - #[inline] - fn ty_int_bool_128(&mut self, ty: Type) -> Option { - match ty { - I128 | B128 => Some(ty), - _ => None, - } - } - - #[inline] - fn ty_int(&mut self, ty: Type) -> Option { - ty.is_int().then(|| ty) - } - - #[inline] - fn ty_int_bool(&mut self, ty: Type) -> Option { - if ty.is_int() || ty.is_bool() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_scalar_float(&mut self, ty: Type) -> Option { - match ty { - F32 | F64 => Some(ty), - _ => None, - } - } - - #[inline] - fn ty_float_or_vec(&mut self, ty: Type) -> Option { - match ty { - F32 | F64 => Some(ty), - ty if ty.is_vector() => Some(ty), - _ => None, - } - } - - fn ty_vector_float(&mut self, ty: Type) -> Option { - if ty.is_vector() && ty.lane_type().is_float() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_vector_not_float(&mut self, ty: Type) -> Option { - if ty.is_vector() && !ty.lane_type().is_float() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_vec64_ctor(&mut self, ty: Type) -> Option { - if ty.is_vector() && ty.bits() == 64 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_vec64(&mut self, ty: Type) -> Option { - if ty.is_vector() && ty.bits() == 64 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_vec128(&mut self, ty: Type) -> Option { - if ty.is_vector() && ty.bits() == 128 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_dyn_vec64(&mut self, ty: Type) -> Option { - if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_dyn_vec128(&mut self, ty: Type) -> Option { - if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_vec64_int(&mut self, ty: Type) -> Option { - if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_vec128_int(&mut self, ty: Type) -> Option { - if ty.is_vector() && ty.bits() == 128 && ty.lane_type().is_int() { - Some(ty) - } else { - None - } - } - #[inline] fn value_list_slice(&mut self, list: ValueList) -> ValueSlice { (list, 0) @@ -521,20 +214,6 @@ macro_rules! isle_prelude_methods { r.to_reg() } - #[inline] - fn u64_from_imm64(&mut self, imm: Imm64) -> u64 { - imm.bits() as u64 - } - - #[inline] - fn u64_from_bool(&mut self, b: bool) -> u64 { - if b { - u64::MAX - } else { - 0 - } - } - #[inline] fn inst_results(&mut self, inst: Inst) -> ValueSlice { (self.lower_ctx.dfg().inst_results_list(inst), 0) @@ -555,80 +234,11 @@ macro_rules! isle_prelude_methods { self.lower_ctx.dfg().value_type(val) } - #[inline] - fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> { - if ty.lane_count() > 1 { - Some((ty.lane_bits(), ty.lane_count())) - } else { - None - } - } - - #[inline] - fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> { - if ty.is_dynamic_vector() { - Some((ty.lane_bits(), ty.min_lane_count())) - } else { - None - } - } - - #[inline] - fn dynamic_int_lane(&mut self, ty: Type) -> Option { - if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type()) - { - Some(ty.lane_bits()) - } else { - None - } - } - - #[inline] - fn dynamic_fp_lane(&mut self, ty: Type) -> Option { - if ty.is_dynamic_vector() - && crate::machinst::ty_has_float_or_vec_representation(ty.lane_type()) - { - Some(ty.lane_bits()) - } else { - None - } - } - - #[inline] - fn ty_dyn64_int(&mut self, ty: Type) -> Option { - if ty.is_dynamic_vector() && ty.min_bits() == 64 && ty.lane_type().is_int() { - Some(ty) - } else { - None - } - } - - #[inline] - fn ty_dyn128_int(&mut self, ty: Type) -> Option { - if ty.is_dynamic_vector() && ty.min_bits() == 128 && ty.lane_type().is_int() { - Some(ty) - } else { - None - } - } - #[inline] fn def_inst(&mut self, val: Value) -> Option { self.lower_ctx.dfg().value_def(val).inst() } - fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 { - val.bits().into() - } - - fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 { - val.bits() - } - - fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 { - val - } - fn zero_value(&mut self, value: Value) -> Option { let insn = self.def_inst(value); if insn.is_some() { @@ -682,34 +292,6 @@ macro_rules! isle_prelude_methods { } } - fn not_vec32x2(&mut self, ty: Type) -> Option { - if ty.lane_bits() == 32 && ty.lane_count() == 2 { - None - } else { - Some(ty) - } - } - - fn not_i64x2(&mut self, ty: Type) -> Option<()> { - if ty == I64X2 { - None - } else { - Some(()) - } - } - - fn trap_code_division_by_zero(&mut self) -> TrapCode { - TrapCode::IntegerDivisionByZero - } - - fn trap_code_integer_overflow(&mut self) -> TrapCode { - TrapCode::IntegerOverflow - } - - fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode { - TrapCode::BadConversionToInteger - } - fn avoid_div_traps(&mut self, _: Type) -> Option<()> { if self.flags.avoid_div_traps() { Some(()) @@ -820,79 +402,6 @@ macro_rules! isle_prelude_methods { Some(u128::from_le_bytes(bytes.try_into().ok()?)) } - fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option { - match val.bits() { - 0 => None, - n => Some(n as u64), - } - } - - #[inline] - fn u32_add(&mut self, a: u32, b: u32) -> u32 { - a.wrapping_add(b) - } - - #[inline] - fn s32_add_fallible(&mut self, a: u32, b: u32) -> Option { - let a = a as i32; - let b = b as i32; - a.checked_add(b).map(|sum| sum as u32) - } - - #[inline] - fn u32_nonnegative(&mut self, x: u32) -> Option { - if (x as i32) >= 0 { - Some(x) - } else { - None - } - } - - #[inline] - fn u32_lteq(&mut self, a: u32, b: u32) -> Option<()> { - if a <= b { - Some(()) - } else { - None - } - } - - #[inline] - fn simm32(&mut self, x: Imm64) -> Option { - let x64: i64 = x.into(); - let x32: i32 = x64.try_into().ok()?; - Some(x32 as u32) - } - - #[inline] - fn uimm8(&mut self, x: Imm64) -> Option { - let x64: i64 = x.into(); - let x8: u8 = x64.try_into().ok()?; - Some(x8) - } - - #[inline] - fn offset32(&mut self, x: Offset32) -> Option { - let x: i32 = x.into(); - Some(x as u32) - } - - #[inline] - fn u8_and(&mut self, a: u8, b: u8) -> u8 { - a & b - } - - #[inline] - fn lane_type(&mut self, ty: Type) -> Type { - ty.lane_type() - } - - #[inline] - fn offset32_to_u32(&mut self, offset: Offset32) -> u32 { - let offset: i32 = offset.into(); - offset as u32 - } - #[inline] fn emit_u64_le_const(&mut self, value: u64) -> VCodeConstant { let data = VCodeConstantData::U64(value.to_le_bytes()); @@ -913,21 +422,6 @@ macro_rules! isle_prelude_methods { )) } - fn range(&mut self, start: usize, end: usize) -> Range { - (start, end) - } - - fn range_view(&mut self, (start, end): Range) -> RangeView { - if start >= end { - RangeView::Empty - } else { - RangeView::NonEmpty { - index: start, - rest: (start + 1, end), - } - } - } - fn retval(&mut self, i: usize) -> WritableValueRegs { self.lower_ctx.retval(i) } @@ -1067,11 +561,6 @@ macro_rules! isle_prelude_methods { self.lower_ctx.sink_inst(inst); } - #[inline] - fn mem_flags_trusted(&mut self) -> MemFlags { - MemFlags::trusted() - } - #[inline] fn preg_to_reg(&mut self, preg: PReg) -> Reg { preg.into() @@ -1081,27 +570,6 @@ macro_rules! isle_prelude_methods { fn gen_move(&mut self, ty: Type, dst: WritableReg, src: Reg) -> MInst { MInst::gen_move(dst, src, ty) } - - #[inline] - fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC { - x.unsigned() - } - - #[inline] - fn signed_cond_code(&mut self, cc: &condcodes::IntCC) -> Option { - match cc { - IntCC::Equal - | IntCC::UnsignedGreaterThanOrEqual - | IntCC::UnsignedGreaterThan - | IntCC::UnsignedLessThanOrEqual - | IntCC::UnsignedLessThan - | IntCC::NotEqual => None, - IntCC::SignedGreaterThanOrEqual - | IntCC::SignedGreaterThan - | IntCC::SignedLessThanOrEqual - | IntCC::SignedLessThan => Some(*cc), - } - } }; } diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 63b8c23276..bccf40e116 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -147,6 +147,9 @@ pub struct Lower<'func, I: VCodeInst> { /// The function to lower. f: &'func Function, + /// Machine-independent flags. + flags: crate::settings::Flags, + /// Lowered machine instructions. vcode: VCodeBuilder, @@ -345,6 +348,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { /// Prepare a new lowering context for the given IR function. pub fn new( f: &'func Function, + flags: crate::settings::Flags, abi: Callee, emit_info: I::Info, block_order: BlockLoweringOrder, @@ -433,6 +437,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { Ok(Lower { f, + flags, vcode, value_regs, retval_regs, @@ -1265,26 +1270,30 @@ impl<'func, I: VCodeInst> Lower<'func, I> { assert!(!self.inst_sunk.contains(&inst)); } - // If the value is a constant, then (re)materialize it at each use. This - // lowers register pressure. - if let Some(c) = self - .f - .dfg - .value_def(val) - .inst() - .and_then(|inst| self.get_constant(inst)) - { - let regs = self.alloc_tmp(ty); - trace!(" -> regs {:?}", regs); - assert!(regs.is_valid()); + // If the value is a constant, then (re)materialize it at each + // use. This lowers register pressure. (Only do this if we are + // not using egraph-based compilation; the egraph framework + // more efficiently rematerializes constants where needed.) + if !self.flags.use_egraphs() { + if let Some(c) = self + .f + .dfg + .value_def(val) + .inst() + .and_then(|inst| self.get_constant(inst)) + { + let regs = self.alloc_tmp(ty); + trace!(" -> regs {:?}", regs); + assert!(regs.is_valid()); - let insts = I::gen_constant(regs, c.into(), ty, |ty| { - self.alloc_tmp(ty).only_reg().unwrap() - }); - for inst in insts { - self.emit(inst); + let insts = I::gen_constant(regs, c.into(), ty, |ty| { + self.alloc_tmp(ty).only_reg().unwrap() + }); + for inst in insts { + self.emit(inst); + } + return non_writable_value_regs(regs); } - return non_writable_value_regs(regs); } let mut regs = self.value_regs[val]; diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs new file mode 100644 index 0000000000..61a2a3aebf --- /dev/null +++ b/cranelift/codegen/src/opts.rs @@ -0,0 +1,297 @@ +//! Optimization driver using ISLE rewrite rules on an egraph. + +use crate::egraph::Analysis; +use crate::egraph::FuncEGraph; +use crate::egraph::MemoryState; +pub use crate::egraph::{Node, NodeCtx}; +use crate::ir::condcodes; +pub use crate::ir::condcodes::{FloatCC, IntCC}; +pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8}; +pub use crate::ir::types::*; +pub use crate::ir::{ + dynamic_to_fixed, AtomicRmwOp, Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap, + Immediate, InstructionImms, JumpTable, MemFlags, Opcode, StackSlot, Table, TrapCode, Type, + Value, +}; +use crate::isle_common_prelude_methods; +use crate::machinst::isle::*; +use crate::trace; +pub use cranelift_egraph::{Id, NewOrExisting, NodeIter}; +use cranelift_entity::{EntityList, EntityRef}; +use smallvec::SmallVec; +use std::marker::PhantomData; + +pub type IdArray = EntityList; +#[allow(dead_code)] +pub type Unit = (); +pub type Range = (usize, usize); + +pub type ConstructorVec = SmallVec<[T; 8]>; + +mod generated_code; +use generated_code::ContextIter; + +struct IsleContext<'a, 'b> { + egraph: &'a mut FuncEGraph<'b>, +} + +const REWRITE_LIMIT: usize = 5; + +pub fn optimize_eclass<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id { + trace!("running rules on eclass {}", id.index()); + egraph.stats.rewrite_rule_invoked += 1; + + if egraph.rewrite_depth > REWRITE_LIMIT { + egraph.stats.rewrite_depth_limit += 1; + return id; + } + egraph.rewrite_depth += 1; + + // Find all possible rewrites and union them in, returning the + // union. + let mut ctx = IsleContext { egraph }; + let optimized_ids = generated_code::constructor_simplify(&mut ctx, id); + let mut union_id = id; + if let Some(mut ids) = optimized_ids { + while let Some(new_id) = ids.next(&mut ctx) { + if ctx.egraph.subsume_ids.contains(&new_id) { + trace!(" -> eclass {} subsumes {}", new_id, id); + ctx.egraph.stats.node_subsume += 1; + // Merge in the unionfind so canonicalization still + // works, but take *only* the subsuming ID, and break + // now. + ctx.egraph.egraph.unionfind.union(union_id, new_id); + union_id = new_id; + break; + } + ctx.egraph.stats.node_union += 1; + let old_union_id = union_id; + union_id = ctx + .egraph + .egraph + .union(&ctx.egraph.node_ctx, union_id, new_id); + trace!( + " -> union eclass {} with {} to get {}", + new_id, + old_union_id, + union_id + ); + } + } + trace!(" -> optimize {} got {}", id, union_id); + ctx.egraph.rewrite_depth -= 1; + union_id +} + +pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id { + // Note that we only examine the latest enode in the eclass: opts + // are invoked for every new enode added to an eclass, so + // traversing the whole eclass would be redundant. + let load_key = egraph.egraph.classes[id].get_node().unwrap(); + if let Node::Load { + op: + InstructionImms::Load { + opcode: Opcode::Load, + offset: load_offset, + .. + }, + ty: load_ty, + addr: load_addr, + mem_state: MemoryState::Store(store_inst), + .. + } = load_key.node(&egraph.egraph.nodes) + { + trace!(" -> got load op for id {}", id); + if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) { + trace!(" -> got store id: {} ty: {}", store_id, store_ty); + let store_key = egraph.egraph.classes[*store_id].get_node().unwrap(); + if let Node::Inst { + op: + InstructionImms::Store { + opcode: Opcode::Store, + offset: store_offset, + .. + }, + args: store_args, + .. + } = store_key.node(&egraph.egraph.nodes) + { + let store_args = store_args.as_slice(&egraph.node_ctx.args); + let store_data = store_args[0]; + let store_addr = store_args[1]; + if *load_offset == *store_offset + && *load_ty == *store_ty + && egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr) + { + trace!(" -> same offset, type, address; forwarding"); + egraph.stats.store_to_load_forward += 1; + return store_data; + } + } + } + } + + id +} + +struct NodesEtorIter<'a, 'b> +where + 'b: 'a, +{ + root: Id, + iter: NodeIter, + _phantom1: PhantomData<&'a ()>, + _phantom2: PhantomData<&'b ()>, +} + +impl<'a, 'b> generated_code::ContextIter for NodesEtorIter<'a, 'b> +where + 'b: 'a, +{ + type Context = IsleContext<'a, 'b>; + type Output = (Type, InstructionImms, IdArray); + + fn next(&mut self, ctx: &mut IsleContext<'a, 'b>) -> Option { + while let Some(node) = self.iter.next(&ctx.egraph.egraph) { + trace!("iter from root {}: node {:?}", self.root, node); + match node { + Node::Pure { op, args, types } + | Node::Inst { + op, args, types, .. + } if types.len() == 1 => { + let ty = types.as_slice(&ctx.egraph.node_ctx.types)[0]; + return Some((ty, op.clone(), args.clone())); + } + _ => {} + } + } + None + } +} + +impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> { + isle_common_prelude_methods!(); + + fn eclass_type(&mut self, eclass: Id) -> Option { + let mut iter = self.egraph.egraph.enodes(eclass); + while let Some(node) = iter.next(&self.egraph.egraph) { + match node { + &Node::Pure { types, .. } | &Node::Inst { types, .. } if types.len() == 1 => { + return Some(types.as_slice(&self.egraph.node_ctx.types)[0]); + } + &Node::Load { ty, .. } => return Some(ty), + &Node::Result { ty, .. } => return Some(ty), + &Node::Param { ty, .. } => return Some(ty), + _ => {} + } + } + None + } + + fn at_loop_level(&mut self, eclass: Id) -> (u8, Id) { + ( + self.egraph.egraph.analysis_value(eclass).loop_level.level() as u8, + eclass, + ) + } + + type enodes_etor_iter = NodesEtorIter<'a, 'b>; + + fn enodes_etor(&mut self, eclass: Id) -> Option> { + Some(NodesEtorIter { + root: eclass, + iter: self.egraph.egraph.enodes(eclass), + _phantom1: PhantomData, + _phantom2: PhantomData, + }) + } + + fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id { + let types = self.egraph.node_ctx.types.single(ty); + let types = types.freeze(&mut self.egraph.node_ctx.types); + let op = op.clone(); + match self + .egraph + .egraph + .add(Node::Pure { op, args, types }, &mut self.egraph.node_ctx) + { + NewOrExisting::New(id) => { + self.egraph.stats.node_created += 1; + self.egraph.stats.node_pure += 1; + self.egraph.stats.node_ctor_created += 1; + optimize_eclass(id, self.egraph) + } + NewOrExisting::Existing(id) => { + self.egraph.stats.node_ctor_deduped += 1; + id + } + } + } + + fn id_array_0_etor(&mut self, arg0: IdArray) -> Option<()> { + let values = arg0.as_slice(&self.egraph.node_ctx.args); + if values.len() == 0 { + Some(()) + } else { + None + } + } + + fn id_array_0_ctor(&mut self) -> IdArray { + EntityList::default() + } + + fn id_array_1_etor(&mut self, arg0: IdArray) -> Option { + let values = arg0.as_slice(&self.egraph.node_ctx.args); + if values.len() == 1 { + Some(values[0]) + } else { + None + } + } + + fn id_array_1_ctor(&mut self, arg0: Id) -> IdArray { + EntityList::from_iter([arg0].into_iter(), &mut self.egraph.node_ctx.args) + } + + fn id_array_2_etor(&mut self, arg0: IdArray) -> Option<(Id, Id)> { + let values = arg0.as_slice(&self.egraph.node_ctx.args); + if values.len() == 2 { + Some((values[0], values[1])) + } else { + None + } + } + + fn id_array_2_ctor(&mut self, arg0: Id, arg1: Id) -> IdArray { + EntityList::from_iter([arg0, arg1].into_iter(), &mut self.egraph.node_ctx.args) + } + + fn id_array_3_etor(&mut self, arg0: IdArray) -> Option<(Id, Id, Id)> { + let values = arg0.as_slice(&self.egraph.node_ctx.args); + if values.len() == 3 { + Some((values[0], values[1], values[2])) + } else { + None + } + } + + fn id_array_3_ctor(&mut self, arg0: Id, arg1: Id, arg2: Id) -> IdArray { + EntityList::from_iter( + [arg0, arg1, arg2].into_iter(), + &mut self.egraph.node_ctx.args, + ) + } + + fn remat(&mut self, id: Id) -> Id { + trace!("remat: {}", id); + self.egraph.remat_ids.insert(id); + id + } + + fn subsume(&mut self, id: Id) -> Id { + trace!("subsume: {}", id); + self.egraph.subsume_ids.insert(id); + id + } +} diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle new file mode 100644 index 0000000000..4669fc3491 --- /dev/null +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -0,0 +1,207 @@ +;; Algebraic optimizations. + +;; Rules here are allowed to rewrite pure expressions arbitrarily, +;; using the same inputs as the original, or fewer. In other words, we +;; cannot pull a new eclass id out of thin air and refer to it, other +;; than a piece of the input or a new node that we construct; but we +;; can freely rewrite e.g. `x+y-y` to `x`. + +;; uextend/sextend of a constant. +(rule (simplify (uextend $I64 (iconst $I32 imm))) + (iconst $I64 imm)) +(rule (simplify (sextend $I64 (iconst $I32 (u64_from_imm64 imm)))) + (iconst $I64 (imm64 (u64_sextend_u32 imm)))) + +;; x+0 == 0+x == x. +(rule (simplify (iadd ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (iadd ty + (iconst ty (u64_from_imm64 0)) + x)) + (subsume x)) +;; x-0 == x. +(rule (simplify (isub ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +;; 0-x == (ineg x). +(rule (simplify (isub ty + (iconst ty (u64_from_imm64 0)) + x)) + (ineg ty x)) + +;; x*1 == 1*x == x. +(rule (simplify (imul ty + x + (iconst ty (u64_from_imm64 1)))) + (subsume x)) +(rule (simplify (imul ty + (iconst ty (u64_from_imm64 1)) + x)) + (subsume x)) + +;; x*0 == 0*x == x. +(rule (simplify (imul ty + x + (iconst ty (u64_from_imm64 0)))) + (iconst ty (imm64 0))) +(rule (simplify (imul ty + (iconst ty (u64_from_imm64 0)) + x)) + (iconst ty (imm64 0))) + +;; x/1 == x. +(rule (simplify (sdiv ty + x + (iconst ty (u64_from_imm64 1)))) + (subsume x)) +(rule (simplify (udiv ty + x + (iconst ty (u64_from_imm64 1)))) + (subsume x)) + +;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x. +(rule (simplify (ishl ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (ushr ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (sshr ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (rotr ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (rotl ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) + +;; x | 0 == 0 | x == x | x == x. +(rule (simplify (bor ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (bor ty + (iconst ty (u64_from_imm64 0)) + x)) + (subsume x)) +(rule (simplify (bor ty x x)) + (subsume x)) + +;; x ^ 0 == 0 ^ x == x. +(rule (simplify (bxor ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (bxor ty + (iconst ty (u64_from_imm64 0)) + x)) + (subsume x)) + +;; x ^ x == 0. +(rule (simplify (bxor ty x x)) + (subsume (iconst ty (imm64 0)))) + +;; x ^ not(x) == not(x) ^ x == -1. +(rule (simplify (bxor $I32 x (bnot $I32 x))) (subsume (iconst $I32 (imm64 0xffff_ffff)))) +(rule (simplify (bxor $I32 (bnot $I32 x) x)) (subsume (iconst $I32 (imm64 0xffff_ffff)))) +(rule (simplify (bxor $I64 x (bnot $I64 x))) (subsume (iconst $I64 (imm64 0xffff_ffff_ffff_ffff)))) +(rule (simplify (bxor $I64 (bnot $I64 x) x)) (subsume (iconst $I64 (imm64 0xffff_ffff_ffff_ffff)))) + +;; x & -1 == -1 & x == x & x == x. +(rule (simplify (band ty x x)) x) +(rule (simplify (band $I32 x (iconst $I32 (u64_from_imm64 0xffff_ffff)))) (subsume x)) +(rule (simplify (band $I32 (iconst $I32 (u64_from_imm64 0xffff_ffff)) x)) (subsume x)) +(rule (simplify (band $I64 x (iconst $I64 (u64_from_imm64 0xffff_ffff_ffff_ffff)))) (subsume x)) +(rule (simplify (band $I64 (iconst $I64 (u64_from_imm64 0xffff_ffff_ffff_ffff)) x)) (subsume x)) + +;; x & 0 == 0 & x == 0. +(rule (simplify (band ty x (iconst ty (u64_from_imm64 0)))) (iconst ty (imm64 0))) +(rule (simplify (band ty (iconst ty (u64_from_imm64 0)) x)) (iconst ty (imm64 0))) + +;; not(not(x)) == x. +(rule (simplify (bnot ty (bnot ty x))) (subsume x)) + +;; DeMorgan's rule (two versions): +;; bnot(bor(x, y)) == band(bnot(x), bnot(y)) +(rule (simplify (bnot ty (bor ty x y))) + (band ty (bnot ty x) (bnot ty y))) +;; bnot(band(x, y)) == bor(bnot(x), bnot(y)) +(rule (simplify (bnot ty (band t x y))) + (bor ty (bnot ty x) (bnot ty y))) + +;; x*2 == 2*x == x+x. +(rule (simplify (imul ty x (iconst _ (simm32 2)))) + (iadd ty x x)) +(rule (simplify (imul ty (iconst _ (simm32 2)) x)) + (iadd ty x x)) + +;; x<<32>>32: uextend/sextend 32->64. +(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) + (uextend $I64 x)) + +(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) + (sextend $I64 x)) + +;; TODO: strength reduction: mul/div to shifts +;; TODO: div/rem by constants -> magic multiplications + +;; Reassociate when it benefits LICM. +(rule (simplify (iadd ty (iadd ty x y) z)) + (if-let (at_loop_level lx _) x) + (if-let (at_loop_level ly _) y) + (if-let (at_loop_level lz _) z) + (if (u8_lt lx ly)) + (if (u8_lt lz ly)) + (iadd ty (iadd ty x z) y)) +(rule (simplify (iadd ty (iadd ty x y) z)) + (if-let (at_loop_level lx _) x) + (if-let (at_loop_level ly _) y) + (if-let (at_loop_level lz _) z) + (if (u8_lt ly lx)) + (if (u8_lt lz lx)) + (iadd ty (iadd ty y z) x)) + +;; Select's selector input doesn't need bint; remove the redundant op. +(rule (simplify (select ty (bint _ b) x y)) + (subsume (select ty b x y))) + +;; Rematerialize ALU-op-with-imm and iconsts in each block where they're +;; used. This is neutral (add-with-imm) or positive (iconst) for +;; register pressure, and these ops are very cheap. +(rule (simplify x @ (iadd _ (iconst _ _) _)) + (remat x)) +(rule (simplify x @ (iadd _ _ (iconst _ _))) + (remat x)) +(rule (simplify x @ (isub _ (iconst _ _) _)) + (remat x)) +(rule (simplify x @ (isub _ _ (iconst _ _))) + (remat x)) +(rule (simplify x @ (band _ (iconst _ _) _)) + (remat x)) +(rule (simplify x @ (band _ _ (iconst _ _))) + (remat x)) +(rule (simplify x @ (bor _ (iconst _ _) _)) + (remat x)) +(rule (simplify x @ (bor _ _ (iconst _ _))) + (remat x)) +(rule (simplify x @ (bxor _ (iconst _ _) _)) + (remat x)) +(rule (simplify x @ (bxor _ _ (iconst _ _))) + (remat x)) +(rule (simplify x @ (bnot _ _)) + (remat x)) +(rule (simplify x @ (iconst _ _)) + (remat x)) +(rule (simplify x @ (f32const _ _)) + (remat x)) +(rule (simplify x @ (f64const _ _)) + (remat x)) diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle new file mode 100644 index 0000000000..e3573bcc3a --- /dev/null +++ b/cranelift/codegen/src/opts/cprop.isle @@ -0,0 +1,134 @@ +;; Constant propagation. + +(rule (simplify + (iadd (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (subsume (iconst ty (imm64 (u64_add k1 k2))))) + +(rule (simplify + (isub (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (subsume (iconst ty (imm64 (u64_sub k1 k2))))) + +(rule (simplify + (imul (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (subsume (iconst ty (imm64 (u64_mul k1 k2))))) + +(rule (simplify + (sdiv (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (if-let d (u64_sdiv k1 k2)) + (subsume (iconst ty (imm64 d)))) + +(rule (simplify + (udiv (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (if-let d (u64_udiv k1 k2)) + (subsume (iconst ty (imm64 d)))) + +(rule (simplify + (bor (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (subsume (iconst ty (imm64 (u64_or k1 k2))))) + +(rule (simplify + (band (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (subsume (iconst ty (imm64 (u64_and k1 k2))))) + +(rule (simplify + (bxor (fits_in_64 ty) + (iconst ty (u64_from_imm64 k1)) + (iconst ty (u64_from_imm64 k2)))) + (subsume (iconst ty (imm64 (u64_xor k1 k2))))) + +(rule (simplify + (bnot (fits_in_64 ty) + (iconst ty (u64_from_imm64 k)))) + (subsume (iconst ty (imm64 (u64_not k))))) + +;; Canonicalize via commutativity: push immediates to the right. +;; +;; (op k x) --> (op x k) + +(rule (simplify + (iadd ty k @ (iconst ty _) x)) + (iadd ty x k)) +;; sub is not commutative, but we can flip the args and negate the +;; whole thing. +(rule (simplify + (isub ty k @ (iconst ty _) x)) + (ineg ty (isub ty x k))) +(rule (simplify + (imul ty k @ (iconst ty _) x)) + (imul ty x k)) + +(rule (simplify + (bor ty k @ (iconst ty _) x)) + (bor ty x k)) +(rule (simplify + (band ty k @ (iconst ty _) x)) + (band ty x k)) +(rule (simplify + (bxor ty k @ (iconst ty _) x)) + (bxor ty x k)) + +;; Canonicalize via associativity: reassociate to a right-heavy tree +;; for constants. +;; +;; (op (op x k) k) --> (op x (op k k)) + +(rule (simplify + (iadd ty (iadd ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) + (iadd ty x (iadd ty k1 k2))) +;; sub is not directly associative, but we can flip a sub to an add to +;; make it work: +;; - (sub (sub x k1) k2) -> (sub x (add k1 k2)) +;; - (sub (sub k1 x) k2) -> (sub (sub k1 k2) x) +;; - (sub (add x k1) k2) -> (sub x (sub k2 k1)) +;; - (add (sub x k1) k2) -> (add x (sub k2 k1)) +;; - (add (sub k1 x) k2) -> (sub (add k1 k2) x) +(rule (simplify (isub ty + (isub ty x (iconst ty (u64_from_imm64 k1))) + (iconst ty (u64_from_imm64 k2)))) + (isub ty x (iconst ty (imm64 (u64_add k1 k2))))) +(rule (simplify (isub ty + (isub ty (iconst ty (u64_from_imm64 k1)) x) + (iconst ty (u64_from_imm64 k2)))) + (isub ty (iconst ty (imm64 (u64_sub k1 k2))) x)) +(rule (simplify (isub ty + (iadd ty x (iconst ty (u64_from_imm64 k1))) + (iconst ty (u64_from_imm64 k2)))) + (isub ty x (iconst ty (imm64 (u64_sub k1 k2))))) +(rule (simplify (iadd ty + (isub ty x (iconst ty (u64_from_imm64 k1))) + (iconst ty (u64_from_imm64 k2)))) + (iadd ty x (iconst ty (imm64 (u64_sub k2 k1))))) +(rule (simplify (iadd ty + (isub ty (iconst ty (u64_from_imm64 k1)) x) + (iconst ty (u64_from_imm64 k2)))) + (isub ty (iconst ty (imm64 (u64_add k1 k2))) x)) + +(rule (simplify + (imul ty (imul ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) + (imul ty x (imul ty k1 k2))) +(rule (simplify + (bor ty (bor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) + (bor ty x (bor ty k1 k2))) +(rule (simplify + (band ty (band ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) + (band ty x (band ty k1 k2))) +(rule (simplify + (bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) + (bxor ty x (bxor ty k1 k2))) + +;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs + diff --git a/cranelift/codegen/src/opts/generated_code.rs b/cranelift/codegen/src/opts/generated_code.rs new file mode 100644 index 0000000000..b196e10509 --- /dev/null +++ b/cranelift/codegen/src/opts/generated_code.rs @@ -0,0 +1,11 @@ +//! Wrapper environment for generated code from optimization rules in ISLE. + +// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of +// the generated ISLE source below because we include!() it. We must include!() it because its path +// depends on an environment variable; and also because of this, we can't do the `#[path = "..."] +// mod generated_code;` trick either. +#![allow(dead_code, unreachable_code, unreachable_patterns)] +#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)] +#![allow(irrefutable_let_patterns, non_camel_case_types)] + +include!(concat!(env!("ISLE_DIR"), "/isle_opt.rs")); diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 9c4f59ef87..de2be1588c 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -31,24 +31,7 @@ (type isize (primitive isize)) ;; `cranelift-entity`-based identifiers. -(type Inst (primitive Inst)) (type Type (primitive Type)) -(type Value (primitive Value)) - -;; ISLE representation of `&[Value]`. -(type ValueSlice (primitive ValueSlice)) - -;; ISLE representation of `Vec` -(type VecMask extern (enum)) - -(type ValueList (primitive ValueList)) -(type ValueRegs (primitive ValueRegs)) -(type WritableValueRegs (primitive WritableValueRegs)) - -;; Instruction lowering result: a vector of `ValueRegs`. -(type InstOutput (primitive InstOutput)) -;; (Mutable) builder to incrementally construct an `InstOutput`. -(type InstOutputBuilder extern (enum)) (decl u32_add (u32 u32) u32) (extern constructor u32_add u32_add) @@ -72,6 +55,16 @@ (decl pure u32_lteq (u32 u32) Unit) (extern constructor u32_lteq u32_lteq) +;; Pure/fallible constructor that tests if one u8 is less than or +;; equal to another. +(decl pure u8_lteq (u8 u8) Unit) +(extern constructor u8_lteq u8_lteq) + +;; Pure/fallible constructor that tests if one u8 is strictly less +;; than another. +(decl pure u8_lt (u8 u8) Unit) +(extern constructor u8_lt u8_lt) + ;; Get a signed 32-bit immediate in an u32 from an Imm64, if possible. (decl simm32 (u32) Imm64) (extern extractor simm32 simm32) @@ -83,143 +76,6 @@ (decl u8_and (u8 u8) u8) (extern constructor u8_and u8_and) -;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(type Reg (primitive Reg)) -(type WritableReg (primitive WritableReg)) -(type OptionWritableReg (primitive OptionWritableReg)) -(type VecReg extern (enum)) -(type VecWritableReg extern (enum)) -(type PReg (primitive PReg)) - -;; Construct a `ValueRegs` of one register. -(decl value_reg (Reg) ValueRegs) -(extern constructor value_reg value_reg) - -;; Construct a `ValueRegs` of two registers. -(decl value_regs (Reg Reg) ValueRegs) -(extern constructor value_regs value_regs) - -;; Construct an empty `ValueRegs` containing only invalid register sentinels. -(decl value_regs_invalid () ValueRegs) -(extern constructor value_regs_invalid value_regs_invalid) - -;; Construct an empty `InstOutput`. -(decl output_none () InstOutput) -(extern constructor output_none output_none) - -;; Construct a single-element `InstOutput`. -(decl output (ValueRegs) InstOutput) -(extern constructor output output) - -;; Construct a two-element `InstOutput`. -(decl output_pair (ValueRegs ValueRegs) InstOutput) -(extern constructor output_pair output_pair) - -;; Construct a single-element `InstOutput` from a single register. -(decl output_reg (Reg) InstOutput) -(rule (output_reg reg) (output (value_reg reg))) - -;; Construct a single-element `InstOutput` from a value. -(decl output_value (Value) InstOutput) -(rule (output_value val) (output (put_in_regs val))) - -;; Initially empty `InstOutput` builder. -(decl output_builder_new () InstOutputBuilder) -(extern constructor output_builder_new output_builder_new) - -;; Append a `ValueRegs` to an `InstOutput` under construction. -(decl output_builder_push (InstOutputBuilder ValueRegs) Unit) -(extern constructor output_builder_push output_builder_push) - -;; Finish building an `InstOutput` incrementally. -(decl output_builder_finish (InstOutputBuilder) InstOutput) -(extern constructor output_builder_finish output_builder_finish) - -;; Get a temporary register for writing. -(decl temp_writable_reg (Type) WritableReg) -(extern constructor temp_writable_reg temp_writable_reg) - -;; Get a temporary register for reading. -(decl temp_reg (Type) Reg) -(rule (temp_reg ty) - (writable_reg_to_reg (temp_writable_reg ty))) - -(decl is_valid_reg (bool) Reg) -(extern extractor infallible is_valid_reg is_valid_reg) - -;; Get or match the invalid register. -(decl invalid_reg () Reg) -(extern constructor invalid_reg invalid_reg) -(extractor (invalid_reg) (is_valid_reg $false)) - -;; Match any register but the invalid register. -(decl valid_reg (Reg) Reg) -(extractor (valid_reg reg) (and (is_valid_reg $true) reg)) - -;; Mark this value as used, to ensure that it gets lowered. -(decl mark_value_used (Value) Unit) -(extern constructor mark_value_used mark_value_used) - -;; Put the given value into a register. -;; -;; Asserts that the value fits into a single register, and doesn't require -;; multiple registers for its representation (like `i128` on x64 for example). -;; -;; As a side effect, this marks the value as used. -(decl put_in_reg (Value) Reg) -(extern constructor put_in_reg put_in_reg) - -;; Put the given value into one or more registers. -;; -;; As a side effect, this marks the value as used. -(decl put_in_regs (Value) ValueRegs) -(extern constructor put_in_regs put_in_regs) - -;; If the given reg is a real register, cause the value in reg to be in a virtual -;; reg, by copying it into a new virtual reg. -(decl ensure_in_vreg (Reg Type) Reg) -(extern constructor ensure_in_vreg ensure_in_vreg) - -;; Get the `n`th register inside a `ValueRegs`. -(decl value_regs_get (ValueRegs usize) Reg) -(extern constructor value_regs_get value_regs_get) - -;; Get the number of registers in a `ValueRegs`. -(decl value_regs_len (ValueRegs) usize) -(extern constructor value_regs_len value_regs_len) - -;; Get a range for the number of regs in a `ValueRegs`. -(decl value_regs_range (ValueRegs) Range) -(rule (value_regs_range regs) (range 0 (value_regs_len regs))) - -;; Put the value into one or more registers and return the first register. -;; -;; Unlike `put_in_reg`, this does not assert that the value fits in a single -;; register. This is useful for things like a `i128` shift amount, where we mask -;; the shift amount to the bit width of the value being shifted, and so the high -;; half of the `i128` won't ever be used. -;; -;; As a side efect, this marks that value as used. -(decl lo_reg (Value) Reg) -(rule (lo_reg val) - (let ((regs ValueRegs (put_in_regs val))) - (value_regs_get regs 0))) - -;; Convert a `PReg` into a `Reg` -(decl preg_to_reg (PReg) Reg) -(extern constructor preg_to_reg preg_to_reg) - -;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(type MachLabel (primitive MachLabel)) -(type ValueLabel (primitive ValueLabel)) -(type UnwindInst (primitive UnwindInst)) -(type ExternalName (primitive ExternalName)) -(type BoxExternalName (primitive BoxExternalName)) -(type RelocDistance (primitive RelocDistance)) -(type VecArgPair extern (enum)) - ;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl pure u8_as_u32 (u8) u32) @@ -245,9 +101,30 @@ (decl pure u64_sub (u64 u64) u64) (extern constructor u64_sub u64_sub) +(decl pure u64_mul (u64 u64) u64) +(extern constructor u64_mul u64_mul) + +(decl pure u64_sdiv (u64 u64) u64) +(extern constructor u64_sdiv u64_sdiv) + +(decl pure u64_udiv (u64 u64) u64) +(extern constructor u64_udiv u64_udiv) + (decl pure u64_and (u64 u64) u64) (extern constructor u64_and u64_and) +(decl pure u64_or (u64 u64) u64) +(extern constructor u64_or u64_or) + +(decl pure u64_xor (u64 u64) u64) +(extern constructor u64_xor u64_xor) + +(decl pure u64_not (u64) u64) +(extern constructor u64_not u64_not) + +(decl pure u64_sextend_u32 (u64) u64) +(extern constructor u64_sextend_u32 u64_sextend_u32) + (decl u64_is_zero (bool) u64) (extern extractor infallible u64_is_zero u64_is_zero) @@ -443,46 +320,6 @@ (decl not_i64x2 () Type) (extern extractor not_i64x2 not_i64x2) -;; Extractor to get a `ValueSlice` out of a `ValueList`. -(decl value_list_slice (ValueSlice) ValueList) -(extern extractor infallible value_list_slice value_list_slice) - -;; Extractor to test whether a `ValueSlice` is empty. -(decl value_slice_empty () ValueSlice) -(extern extractor value_slice_empty value_slice_empty) - -;; Extractor to split a `ValueSlice` into its first element plus a tail. -(decl value_slice_unwrap (Value ValueSlice) ValueSlice) -(extern extractor value_slice_unwrap value_slice_unwrap) - -;; Return the length of a `ValueSlice`. -(decl value_slice_len (ValueSlice) usize) -(extern constructor value_slice_len value_slice_len) - -;; Return any element of a `ValueSlice`. -(decl value_slice_get (ValueSlice usize) Value) -(extern constructor value_slice_get value_slice_get) - -;; Extractor to get the first element from a value list, along with its tail as -;; a `ValueSlice`. -(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList) -(extractor (unwrap_head_value_list_1 head tail) - (value_list_slice (value_slice_unwrap head tail))) - -;; Extractor to get the first two elements from a value list, along with its -;; tail as a `ValueSlice`. -(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList) -(extractor (unwrap_head_value_list_2 head1 head2 tail) - (value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail)))) - -;; Constructor to test whether two values are same. -(decl pure same_value (Value Value) Value) -(extern constructor same_value same_value) - -;; Turn a `Writable` into a `Reg` via `Writable::to_reg`. -(decl writable_reg_to_reg (WritableReg) Reg) -(extern constructor writable_reg_to_reg writable_reg_to_reg) - ;; Extract a `u8` from an `Uimm8`. (decl u8_from_uimm8 (u8) Uimm8) (extern extractor infallible u8_from_uimm8 u8_from_uimm8) @@ -499,6 +336,10 @@ (decl nonzero_u64_from_imm64 (u64) Imm64) (extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64) +;; Create a new Imm64. +(decl pure imm64 (u64) Imm64) +(extern constructor imm64 imm64) + ;; Extract a `u64` from an `Ieee32`. (decl u64_from_ieee32 (u64) Ieee32) (extern extractor infallible u64_from_ieee32 u64_from_ieee32) @@ -507,34 +348,6 @@ (decl u64_from_ieee64 (u64) Ieee64) (extern extractor infallible u64_from_ieee64 u64_from_ieee64) -;; Extract the result values for the given instruction. -(decl inst_results (ValueSlice) Inst) -(extern extractor infallible inst_results inst_results) - -;; Extract the first result value of the given instruction. -(decl first_result (Value) Inst) -(extern extractor first_result first_result) - -;; Extract the `InstructionData` for an `Inst`. -(decl inst_data (InstructionData) Inst) -(extern extractor infallible inst_data inst_data) - -;; Extract the type of a `Value`. -(decl value_type (Type) Value) -(extern extractor infallible value_type value_type) - -;; Extract the type of the instruction's first result. -(decl result_type (Type) Inst) -(extractor (result_type ty) - (first_result (value_type ty))) - -;; Extract the type of the instruction's first result and pass along the -;; instruction as well. -(decl has_type (Type Inst) Inst) -(extractor (has_type ty inst) - (and (result_type ty) - inst)) - ;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given ;; type. Will only match when there is more than one lane. (decl multi_lane (u32 u32) Type) @@ -565,27 +378,10 @@ (decl ty_dyn128_int (Type) Type) (extern extractor ty_dyn128_int ty_dyn128_int) -;; Match the instruction that defines the given value, if any. -(decl def_inst (Inst) Value) -(extern extractor def_inst def_inst) - -;; Extract a constant `u64` from a value defined by an `iconst`. -(decl u64_from_iconst (u64) Value) -(extractor (u64_from_iconst x) - (def_inst (iconst (u64_from_imm64 x)))) - ;; Convert an `Offset32` to a primitive number. (decl offset32_to_u32 (Offset32) u32) (extern constructor offset32_to_u32 offset32_to_u32) -;; Match any zero value for iconst, fconst32, fconst64, vconst and splat. -(decl pure zero_value (Value) Value) -(extern constructor zero_value zero_value) - -;; Match a sinkable instruction from a value operand. -(decl pure is_sinkable_inst (Value) Inst) -(extern constructor is_sinkable_inst is_sinkable_inst) - ;; This is a direct import of `IntCC::unsigned`. ;; Get the corresponding IntCC with the signed component removed. ;; For conditions without a signed component, this is a no-op. @@ -596,283 +392,6 @@ (decl pure signed_cond_code (IntCC) IntCC) (extern constructor signed_cond_code signed_cond_code) -;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Emit an instruction. -;; -;; This is low-level and side-effectful; it should only be used as an -;; implementation detail by helpers that preserve the SSA facade themselves. - -(decl emit (MInst) Unit) -(extern constructor emit emit) - -;; Sink an instruction. -;; -;; This is a side-effectful operation that notifies the context that the -;; instruction has been sunk into another instruction, and no longer needs to -;; be lowered. -(decl sink_inst (Inst) Unit) -(extern constructor sink_inst sink_inst) - -;; Constant pool emission. - -(type VCodeConstant (primitive VCodeConstant)) - -;; Add a u64 little-endian constant to the in-memory constant pool and -;; return a VCodeConstant index that refers to it. This is -;; side-effecting but idempotent (constants are deduplicated). -(decl emit_u64_le_const (u64) VCodeConstant) -(extern constructor emit_u64_le_const emit_u64_le_const) - -;; Add a u128 little-endian constant to the in-memory constant pool and -;; return a VCodeConstant index that refers to it. This is -;; side-effecting but idempotent (constants are deduplicated). -(decl emit_u128_le_const (u128) VCodeConstant) -(extern constructor emit_u128_le_const emit_u128_le_const) - -;; Fetch the VCodeConstant associated with a Constant. -(decl const_to_vconst (Constant) VCodeConstant) -(extern constructor const_to_vconst const_to_vconst) - -;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;; - -(type SideEffectNoResult (enum - (Inst (inst MInst)) - (Inst2 (inst1 MInst) - (inst2 MInst)) - (Inst3 (inst1 MInst) - (inst2 MInst) - (inst3 MInst)))) - -;; Create an empty `InstOutput`, but do emit the given side-effectful -;; instruction. -(decl side_effect (SideEffectNoResult) InstOutput) -(rule (side_effect (SideEffectNoResult.Inst inst)) - (let ((_ Unit (emit inst))) - (output_none))) -(rule (side_effect (SideEffectNoResult.Inst2 inst1 inst2)) - (let ((_ Unit (emit inst1)) - (_ Unit (emit inst2))) - (output_none))) -(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3)) - (let ((_ Unit (emit inst1)) - (_ Unit (emit inst2)) - (_ Unit (emit inst3))) - (output_none))) - -(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult) -(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2)) - (SideEffectNoResult.Inst2 inst1 inst2)) -(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3)) - (SideEffectNoResult.Inst3 inst1 inst2 inst3)) -(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3)) - (SideEffectNoResult.Inst3 inst1 inst2 inst3)) - -;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Newtype wrapper around `MInst` for instructions that are used for their -;; effect on flags. -;; -;; Variant determines how result is given when combined with a -;; ConsumesFlags. See `with_flags` below for more. -(type ProducesFlags (enum - ;; For cases where the flags have been produced by another - ;; instruction, and we have out-of-band reasons to know - ;; that they won't be clobbered by the time we depend on - ;; them. - (AlreadyExistingFlags) - (ProducesFlagsSideEffect (inst MInst)) - (ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst)) - ;; Not directly combinable with a ConsumesFlags; - ;; used in s390x and unwrapped directly by `trapif`. - (ProducesFlagsReturnsReg (inst MInst) (result Reg)) - (ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg)))) - -;; Chain another producer to a `ProducesFlags`. -(decl produces_flags_append (ProducesFlags MInst) ProducesFlags) -(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2) - (ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2)) - -;; Newtype wrapper around `MInst` for instructions that consume flags. -;; -;; Variant determines how result is given when combined with a -;; ProducesFlags. See `with_flags` below for more. -(type ConsumesFlags (enum - (ConsumesFlagsSideEffect (inst MInst)) - (ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst)) - (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg)) - (ConsumesFlagsReturnsReg (inst MInst) (result Reg)) - (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) - (inst2 MInst) - (result ValueRegs)) - (ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst) - (inst2 MInst) - (inst3 MInst) - (inst4 MInst) - (result ValueRegs)))) - - - -;; Get the produced register out of a ProducesFlags. -(decl produces_flags_get_reg (ProducesFlags) Reg) -(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg) - -;; Modify a ProducesFlags to use it only for its side-effect, ignoring -;; its result. -(decl produces_flags_ignore (ProducesFlags) ProducesFlags) -(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _)) - (ProducesFlags.ProducesFlagsSideEffect inst)) -(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _)) - (ProducesFlags.ProducesFlagsSideEffect inst)) - -;; Helper for combining two flags-consumer instructions that return a -;; single Reg, giving a ConsumesFlags that returns both values in a -;; ValueRegs. -(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags) -(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1) - (ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2)) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs - inst1 - inst2 - (value_regs reg1 reg2))) -(rule (consumes_flags_concat - (ConsumesFlags.ConsumesFlagsSideEffect inst1) - (ConsumesFlags.ConsumesFlagsSideEffect inst2)) - (ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2)) - -;; Combine flags-producing and -consuming instructions together, ensuring that -;; they are emitted back-to-back and no other instructions can be emitted -;; between them and potentially clobber the flags. -;; -;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes: -;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer -;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer -;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer -;; -;; See `with_flags_reg` below for a variant that extracts out just the lower Reg. -(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs) - -(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result) - (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result)) - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst))) - (value_regs producer_result consumer_result))) - -(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) - (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst))) - (value_reg consumer_result))) - -(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst_1)) - (_z Unit (emit consumer_inst_2))) - consumer_result)) - -(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_inst_3 - consumer_inst_4 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst_1)) - (_z Unit (emit consumer_inst_2)) - (_w Unit (emit consumer_inst_3)) - (_v Unit (emit consumer_inst_4))) - consumer_result)) - -(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) - (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) - (let ((_ Unit (emit producer_inst1)) - (_ Unit (emit producer_inst2)) - (_ Unit (emit consumer_inst))) - (value_reg consumer_result))) - -(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_ Unit (emit producer_inst1)) - (_ Unit (emit producer_inst2)) - (_ Unit (emit consumer_inst_1)) - (_ Unit (emit consumer_inst_2))) - consumer_result)) - -(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_inst_3 - consumer_inst_4 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_ Unit (emit producer_inst1)) - (_ Unit (emit producer_inst2)) - (_ Unit (emit consumer_inst_1)) - (_ Unit (emit consumer_inst_2)) - (_ Unit (emit consumer_inst_3)) - (_ Unit (emit consumer_inst_4))) - consumer_result)) - -(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg) -(rule (with_flags_reg p c) - (let ((v ValueRegs (with_flags p c))) - (value_regs_get v 0))) - -;; Indicate that the current state of the flags register from the instruction -;; that produces this Value is relied on. -(decl flags_to_producesflags (Value) ProducesFlags) -(rule (flags_to_producesflags val) - (let ((_ Unit (mark_value_used val))) - (ProducesFlags.AlreadyExistingFlags))) - -;; Combine a flags-producing instruction and a flags-consuming instruction that -;; produces no results. -;; -;; This function handles the following case only: -;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect -(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult) - -(rule (with_flags_side_effect - (ProducesFlags.AlreadyExistingFlags) - (ConsumesFlags.ConsumesFlagsSideEffect c)) - (SideEffectNoResult.Inst c)) - -(rule (with_flags_side_effect - (ProducesFlags.AlreadyExistingFlags) - (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) - (SideEffectNoResult.Inst2 c1 c2)) - -(rule (with_flags_side_effect - (ProducesFlags.ProducesFlagsSideEffect p) - (ConsumesFlags.ConsumesFlagsSideEffect c)) - (SideEffectNoResult.Inst2 p c)) - -(rule (with_flags_side_effect - (ProducesFlags.ProducesFlagsSideEffect p) - (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) - (SideEffectNoResult.Inst3 p c1 c2)) - -(rule (with_flags_side_effect - (ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2) - (ConsumesFlags.ConsumesFlagsSideEffect c)) - (SideEffectNoResult.Inst3 p1 p2 c)) - ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl trap_code_division_by_zero () TrapCode) @@ -884,70 +403,6 @@ (decl trap_code_bad_conversion_to_integer () TrapCode) (extern constructor trap_code_bad_conversion_to_integer trap_code_bad_conversion_to_integer) -;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl avoid_div_traps () Type) -(extern extractor avoid_div_traps avoid_div_traps) - -;; This definition should be kept up to date with the values defined in -;; cranelift/codegen/meta/src/shared/settings.rs -(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff))) - -(decl tls_model (TlsModel) Type) -(extern extractor infallible tls_model tls_model) - -(decl pure tls_model_is_elf_gd () Unit) -(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd) - -(decl pure tls_model_is_macho () Unit) -(extern constructor tls_model_is_macho tls_model_is_macho) - -(decl pure tls_model_is_coff () Unit) -(extern constructor tls_model_is_coff tls_model_is_coff) - -(decl pure preserve_frame_pointers () Unit) -(extern constructor preserve_frame_pointers preserve_frame_pointers) - -;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Accessor for `FuncRef`. - -(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef) -(extern extractor infallible func_ref_data func_ref_data) - -;; Accessor for `GobalValue`. - -(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue) -(extern extractor symbol_value_data symbol_value_data) - -(decl box_external_name (ExternalName) BoxExternalName) -(extern constructor box_external_name box_external_name) - -;; Accessor for `RelocDistance`. - -(decl reloc_distance_near () RelocDistance) -(extern extractor reloc_distance_near reloc_distance_near) - -;; Accessor for `Immediate` as u128. - -(decl u128_from_immediate (u128) Immediate) -(extern extractor u128_from_immediate u128_from_immediate) - -;; Accessor for `Immediate` as a vector of u8 values. - -(decl vec_mask_from_immediate (VecMask) Immediate) -(extern extractor vec_mask_from_immediate vec_mask_from_immediate) - -;; Accessor for `Constant` as u128. - -(decl u128_from_constant (u128) Constant) -(extern extractor u128_from_constant u128_from_constant) - -;; Accessor for `Constant` as u64. - -(decl u64_from_constant (u64) Constant) -(extern extractor u64_from_constant u64_from_constant) - ;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; A range of integers to loop through. @@ -976,156 +431,7 @@ (decl range_unwrap (usize Range) Range) (extractor (range_unwrap index rest) (range_view (RangeView.NonEmpty index rest))) -;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; The (writable) register(s) that will contain the n'th return value. -(decl retval (usize) WritableValueRegs) -(extern constructor retval retval) - -;; Extractor to check for the special case that a `WritableValueRegs` -;; contains only a single register. -(decl only_writable_reg (WritableReg) WritableValueRegs) -(extern extractor only_writable_reg only_writable_reg) - -;; Get the `n`th register inside a `WritableValueRegs`. -(decl writable_regs_get (WritableValueRegs usize) WritableReg) -(extern constructor writable_regs_get writable_regs_get) - -;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Type to hold information about a function call signature. -(type Sig extern (enum)) - -;; Information how to pass one argument or return value. -(type ABIArg extern (enum)) - -;; Information how to pass a single slot of one argument or return value. -(type ABIArgSlot extern - (enum - (Reg - (reg RealReg) - (ty Type) - (extension ArgumentExtension)) - (Stack - (offset i64) - (ty Type) - (extension ArgumentExtension)))) - - -;; Physical register that may hold an argument or return value. -(type RealReg (primitive RealReg)) - -;; Instruction on whether and how to extend an argument value. -(type ArgumentExtension extern - (enum - (None) - (Uext) - (Sext))) - - -;; Get the number of arguments expected. -(decl abi_num_args (Sig) usize) -(extern constructor abi_num_args abi_num_args) - -;; Get information specifying how to pass one argument. -(decl abi_get_arg (Sig usize) ABIArg) -(extern constructor abi_get_arg abi_get_arg) - -;; Get the number of return values expected. -(decl abi_num_rets (Sig) usize) -(extern constructor abi_num_rets abi_num_rets) - -;; Get information specifying how to pass one return value. -(decl abi_get_ret (Sig usize) ABIArg) -(extern constructor abi_get_ret abi_get_ret) - -;; Get information specifying how to pass the implicit pointer -;; to the return-value area on the stack, if required. -(decl abi_ret_arg (ABIArg) Sig) -(extern extractor abi_ret_arg abi_ret_arg) - -;; Succeeds if no implicit return-value area pointer is required. -(decl abi_no_ret_arg () Sig) -(extern extractor abi_no_ret_arg abi_no_ret_arg) - -;; Size of the argument area. -(decl abi_sized_stack_arg_space (Sig) i64) -(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space) - -;; Size of the return-value area. -(decl abi_sized_stack_ret_space (Sig) i64) -(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space) - -;; StackSlot addr -(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) -(extern constructor abi_stackslot_addr abi_stackslot_addr) - -;; DynamicStackSlot addr -(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst) -(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr) - -;; Extractor to detect the special case where an argument or -;; return value only requires a single slot to be passed. -(decl abi_arg_only_slot (ABIArgSlot) ABIArg) -(extern extractor abi_arg_only_slot abi_arg_only_slot) - -;; Extractor to detect the special case where a struct argument -;; is explicitly passed by reference using a hidden pointer. -(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg) -(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer) - -;; Extractor to detect the special case where a non-struct argument -;; is implicitly passed by reference using a hidden pointer. -(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg) -(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer) - -;; Convert a real register number into a virtual register. -(decl real_reg_to_reg (RealReg) Reg) -(extern constructor real_reg_to_reg real_reg_to_reg) - -;; Convert a real register number into a writable virtual register. -(decl real_reg_to_writable_reg (RealReg) WritableReg) -(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg) - -;; Generate a move between two registers. -(decl gen_move (Type WritableReg Reg) MInst) -(extern constructor gen_move gen_move) - -;; Copy a return value to a set of registers. -(decl copy_to_regs (WritableValueRegs Value) Unit) -(rule (copy_to_regs dsts val @ (value_type ty)) - (let ((srcs ValueRegs (put_in_regs val))) - (copy_to_regs_range ty (value_regs_range srcs) dsts srcs))) - -;; Helper for `copy_to_regs` that uses a range to index into the reg/value -;; vectors. Fails for the empty range. -(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit) - -(rule (copy_to_regs_range ty (range_empty) dsts srcs) - (unit)) - -(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs) - (let ((dst WritableReg (writable_regs_get dsts head)) - (src Reg (value_regs_get srcs head)) - (_ Unit (emit (gen_move ty dst src)))) - (copy_to_regs_range ty tail dsts srcs))) - -(decl lower_return (Range ValueSlice) InstOutput) -(rule (lower_return (range_empty) _) (output_none)) -(rule (lower_return (range_unwrap head tail) args) - (let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head)))) - (lower_return tail args))) - ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(convert Inst Value def_inst) -(convert Reg ValueRegs value_reg) -(convert Value Reg put_in_reg) -(convert Value ValueRegs put_in_regs) -(convert WritableReg Reg writable_reg_to_reg) -(convert ValueRegs InstOutput output) -(convert Reg InstOutput output_reg) -(convert Value InstOutput output_value) (convert Offset32 u32 offset32_to_u32) -(convert ExternalName BoxExternalName box_external_name) -(convert PReg Reg preg_to_reg) + diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle new file mode 100644 index 0000000000..f71286dba1 --- /dev/null +++ b/cranelift/codegen/src/prelude_lower.isle @@ -0,0 +1,740 @@ +;; Prelude definitions specific to lowering environments (backends) in +;; ISLE. + +;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `cranelift-entity`-based identifiers. +(type Inst (primitive Inst)) +(type Value (primitive Value)) + +;; ISLE representation of `&[Value]`. +(type ValueSlice (primitive ValueSlice)) + +;; ISLE representation of `Vec` +(type VecMask extern (enum)) + +(type ValueList (primitive ValueList)) +(type ValueRegs (primitive ValueRegs)) +(type WritableValueRegs (primitive WritableValueRegs)) + +;; Instruction lowering result: a vector of `ValueRegs`. +(type InstOutput (primitive InstOutput)) +;; (Mutable) builder to incrementally construct an `InstOutput`. +(type InstOutputBuilder extern (enum)) + +;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type Reg (primitive Reg)) +(type WritableReg (primitive WritableReg)) +(type OptionWritableReg (primitive OptionWritableReg)) +(type VecReg extern (enum)) +(type VecWritableReg extern (enum)) +(type PReg (primitive PReg)) + +;; Construct a `ValueRegs` of one register. +(decl value_reg (Reg) ValueRegs) +(extern constructor value_reg value_reg) + +;; Construct a `ValueRegs` of two registers. +(decl value_regs (Reg Reg) ValueRegs) +(extern constructor value_regs value_regs) + +;; Construct an empty `ValueRegs` containing only invalid register sentinels. +(decl value_regs_invalid () ValueRegs) +(extern constructor value_regs_invalid value_regs_invalid) + +;; Construct an empty `InstOutput`. +(decl output_none () InstOutput) +(extern constructor output_none output_none) + +;; Construct a single-element `InstOutput`. +(decl output (ValueRegs) InstOutput) +(extern constructor output output) + +;; Construct a two-element `InstOutput`. +(decl output_pair (ValueRegs ValueRegs) InstOutput) +(extern constructor output_pair output_pair) + +;; Construct a single-element `InstOutput` from a single register. +(decl output_reg (Reg) InstOutput) +(rule (output_reg reg) (output (value_reg reg))) + +;; Construct a single-element `InstOutput` from a value. +(decl output_value (Value) InstOutput) +(rule (output_value val) (output (put_in_regs val))) + +;; Initially empty `InstOutput` builder. +(decl output_builder_new () InstOutputBuilder) +(extern constructor output_builder_new output_builder_new) + +;; Append a `ValueRegs` to an `InstOutput` under construction. +(decl output_builder_push (InstOutputBuilder ValueRegs) Unit) +(extern constructor output_builder_push output_builder_push) + +;; Finish building an `InstOutput` incrementally. +(decl output_builder_finish (InstOutputBuilder) InstOutput) +(extern constructor output_builder_finish output_builder_finish) + +;; Get a temporary register for writing. +(decl temp_writable_reg (Type) WritableReg) +(extern constructor temp_writable_reg temp_writable_reg) + +;; Get a temporary register for reading. +(decl temp_reg (Type) Reg) +(rule (temp_reg ty) + (writable_reg_to_reg (temp_writable_reg ty))) + +(decl is_valid_reg (bool) Reg) +(extern extractor infallible is_valid_reg is_valid_reg) + +;; Get or match the invalid register. +(decl invalid_reg () Reg) +(extern constructor invalid_reg invalid_reg) +(extractor (invalid_reg) (is_valid_reg $false)) + +;; Match any register but the invalid register. +(decl valid_reg (Reg) Reg) +(extractor (valid_reg reg) (and (is_valid_reg $true) reg)) + +;; Mark this value as used, to ensure that it gets lowered. +(decl mark_value_used (Value) Unit) +(extern constructor mark_value_used mark_value_used) + +;; Put the given value into a register. +;; +;; Asserts that the value fits into a single register, and doesn't require +;; multiple registers for its representation (like `i128` on x64 for example). +;; +;; As a side effect, this marks the value as used. +(decl put_in_reg (Value) Reg) +(extern constructor put_in_reg put_in_reg) + +;; Put the given value into one or more registers. +;; +;; As a side effect, this marks the value as used. +(decl put_in_regs (Value) ValueRegs) +(extern constructor put_in_regs put_in_regs) + +;; If the given reg is a real register, cause the value in reg to be in a virtual +;; reg, by copying it into a new virtual reg. +(decl ensure_in_vreg (Reg Type) Reg) +(extern constructor ensure_in_vreg ensure_in_vreg) + +;; Get the `n`th register inside a `ValueRegs`. +(decl value_regs_get (ValueRegs usize) Reg) +(extern constructor value_regs_get value_regs_get) + +;; Get the number of registers in a `ValueRegs`. +(decl value_regs_len (ValueRegs) usize) +(extern constructor value_regs_len value_regs_len) + +;; Get a range for the number of regs in a `ValueRegs`. +(decl value_regs_range (ValueRegs) Range) +(rule (value_regs_range regs) (range 0 (value_regs_len regs))) + +;; Put the value into one or more registers and return the first register. +;; +;; Unlike `put_in_reg`, this does not assert that the value fits in a single +;; register. This is useful for things like a `i128` shift amount, where we mask +;; the shift amount to the bit width of the value being shifted, and so the high +;; half of the `i128` won't ever be used. +;; +;; As a side efect, this marks that value as used. +(decl lo_reg (Value) Reg) +(rule (lo_reg val) + (let ((regs ValueRegs (put_in_regs val))) + (value_regs_get regs 0))) + +;; Convert a `PReg` into a `Reg`. +(decl preg_to_reg (PReg) Reg) +(extern constructor preg_to_reg preg_to_reg) + +;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type MachLabel (primitive MachLabel)) +(type ValueLabel (primitive ValueLabel)) +(type UnwindInst (primitive UnwindInst)) +(type ExternalName (primitive ExternalName)) +(type BoxExternalName (primitive BoxExternalName)) +(type RelocDistance (primitive RelocDistance)) +(type VecArgPair extern (enum)) + +;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Extractor to get a `ValueSlice` out of a `ValueList`. +(decl value_list_slice (ValueSlice) ValueList) +(extern extractor infallible value_list_slice value_list_slice) + +;; Extractor to test whether a `ValueSlice` is empty. +(decl value_slice_empty () ValueSlice) +(extern extractor value_slice_empty value_slice_empty) + +;; Extractor to split a `ValueSlice` into its first element plus a tail. +(decl value_slice_unwrap (Value ValueSlice) ValueSlice) +(extern extractor value_slice_unwrap value_slice_unwrap) + +;; Return the length of a `ValueSlice`. +(decl value_slice_len (ValueSlice) usize) +(extern constructor value_slice_len value_slice_len) + +;; Return any element of a `ValueSlice`. +(decl value_slice_get (ValueSlice usize) Value) +(extern constructor value_slice_get value_slice_get) + +;; Extractor to get the first element from a value list, along with its tail as +;; a `ValueSlice`. +(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList) +(extractor (unwrap_head_value_list_1 head tail) + (value_list_slice (value_slice_unwrap head tail))) + +;; Extractor to get the first two elements from a value list, along with its +;; tail as a `ValueSlice`. +(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList) +(extractor (unwrap_head_value_list_2 head1 head2 tail) + (value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail)))) + +;; Constructor to test whether two values are same. +(decl pure same_value (Value Value) Value) +(extern constructor same_value same_value) + +;; Turn a `Writable` into a `Reg` via `Writable::to_reg`. +(decl writable_reg_to_reg (WritableReg) Reg) +(extern constructor writable_reg_to_reg writable_reg_to_reg) + +;; Extract the result values for the given instruction. +(decl inst_results (ValueSlice) Inst) +(extern extractor infallible inst_results inst_results) + +;; Extract the first result value of the given instruction. +(decl first_result (Value) Inst) +(extern extractor first_result first_result) + +;; Extract the `InstructionData` for an `Inst`. +(decl inst_data (InstructionData) Inst) +(extern extractor infallible inst_data inst_data) + +;; Extract the type of a `Value`. +(decl value_type (Type) Value) +(extern extractor infallible value_type value_type) + +;; Extract the type of the instruction's first result. +(decl result_type (Type) Inst) +(extractor (result_type ty) + (first_result (value_type ty))) + +;; Extract the type of the instruction's first result and pass along the +;; instruction as well. +(decl has_type (Type Inst) Inst) +(extractor (has_type ty inst) + (and (result_type ty) + inst)) + +;; Match the instruction that defines the given value, if any. +(decl def_inst (Inst) Value) +(extern extractor def_inst def_inst) + +;; Extract a constant `u64` from a value defined by an `iconst`. +(decl u64_from_iconst (u64) Value) +(extractor (u64_from_iconst x) + (def_inst (iconst (u64_from_imm64 x)))) + +;; Match any zero value for iconst, fconst32, fconst64, vconst and splat. +(decl pure zero_value (Value) Value) +(extern constructor zero_value zero_value) + +;; Match a sinkable instruction from a value operand. +(decl pure is_sinkable_inst (Value) Inst) +(extern constructor is_sinkable_inst is_sinkable_inst) + +;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Emit an instruction. +;; +;; This is low-level and side-effectful; it should only be used as an +;; implementation detail by helpers that preserve the SSA facade themselves. + +(decl emit (MInst) Unit) +(extern constructor emit emit) + +;; Sink an instruction. +;; +;; This is a side-effectful operation that notifies the context that the +;; instruction has been sunk into another instruction, and no longer needs to +;; be lowered. +(decl sink_inst (Inst) Unit) +(extern constructor sink_inst sink_inst) + +;; Constant pool emission. + +(type VCodeConstant (primitive VCodeConstant)) + +;; Add a u64 little-endian constant to the in-memory constant pool and +;; return a VCodeConstant index that refers to it. This is +;; side-effecting but idempotent (constants are deduplicated). +(decl emit_u64_le_const (u64) VCodeConstant) +(extern constructor emit_u64_le_const emit_u64_le_const) + +;; Add a u128 little-endian constant to the in-memory constant pool and +;; return a VCodeConstant index that refers to it. This is +;; side-effecting but idempotent (constants are deduplicated). +(decl emit_u128_le_const (u128) VCodeConstant) +(extern constructor emit_u128_le_const emit_u128_le_const) + +;; Fetch the VCodeConstant associated with a Constant. +(decl const_to_vconst (Constant) VCodeConstant) +(extern constructor const_to_vconst const_to_vconst) + +;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;; + +(type SideEffectNoResult (enum + (Inst (inst MInst)) + (Inst2 (inst1 MInst) + (inst2 MInst)) + (Inst3 (inst1 MInst) + (inst2 MInst) + (inst3 MInst)))) + +;; Create an empty `InstOutput`, but do emit the given side-effectful +;; instruction. +(decl side_effect (SideEffectNoResult) InstOutput) +(rule (side_effect (SideEffectNoResult.Inst inst)) + (let ((_ Unit (emit inst))) + (output_none))) +(rule (side_effect (SideEffectNoResult.Inst2 inst1 inst2)) + (let ((_ Unit (emit inst1)) + (_ Unit (emit inst2))) + (output_none))) +(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3)) + (let ((_ Unit (emit inst1)) + (_ Unit (emit inst2)) + (_ Unit (emit inst3))) + (output_none))) + +(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult) +(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2)) + (SideEffectNoResult.Inst2 inst1 inst2)) +(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3)) + (SideEffectNoResult.Inst3 inst1 inst2 inst3)) +(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3)) + (SideEffectNoResult.Inst3 inst1 inst2 inst3)) + +;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Newtype wrapper around `MInst` for instructions that are used for their +;; effect on flags. +;; +;; Variant determines how result is given when combined with a +;; ConsumesFlags. See `with_flags` below for more. +(type ProducesFlags (enum + ;; For cases where the flags have been produced by another + ;; instruction, and we have out-of-band reasons to know + ;; that they won't be clobbered by the time we depend on + ;; them. + (AlreadyExistingFlags) + (ProducesFlagsSideEffect (inst MInst)) + (ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst)) + ;; Not directly combinable with a ConsumesFlags; + ;; used in s390x and unwrapped directly by `trapif`. + (ProducesFlagsReturnsReg (inst MInst) (result Reg)) + (ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg)))) + +;; Chain another producer to a `ProducesFlags`. +(decl produces_flags_append (ProducesFlags MInst) ProducesFlags) +(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2) + (ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2)) + +;; Newtype wrapper around `MInst` for instructions that consume flags. +;; +;; Variant determines how result is given when combined with a +;; ProducesFlags. See `with_flags` below for more. +(type ConsumesFlags (enum + (ConsumesFlagsSideEffect (inst MInst)) + (ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst)) + (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg)) + (ConsumesFlagsReturnsReg (inst MInst) (result Reg)) + (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) + (inst2 MInst) + (result ValueRegs)) + (ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst) + (inst2 MInst) + (inst3 MInst) + (inst4 MInst) + (result ValueRegs)))) + + + +;; Get the produced register out of a ProducesFlags. +(decl produces_flags_get_reg (ProducesFlags) Reg) +(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg) + +;; Modify a ProducesFlags to use it only for its side-effect, ignoring +;; its result. +(decl produces_flags_ignore (ProducesFlags) ProducesFlags) +(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _)) + (ProducesFlags.ProducesFlagsSideEffect inst)) +(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _)) + (ProducesFlags.ProducesFlagsSideEffect inst)) + +;; Helper for combining two flags-consumer instructions that return a +;; single Reg, giving a ConsumesFlags that returns both values in a +;; ValueRegs. +(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags) +(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1) + (ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2)) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + inst1 + inst2 + (value_regs reg1 reg2))) +(rule (consumes_flags_concat + (ConsumesFlags.ConsumesFlagsSideEffect inst1) + (ConsumesFlags.ConsumesFlagsSideEffect inst2)) + (ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2)) + +;; Combine flags-producing and -consuming instructions together, ensuring that +;; they are emitted back-to-back and no other instructions can be emitted +;; between them and potentially clobber the flags. +;; +;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes: +;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer +;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer +;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer +;; +;; See `with_flags_reg` below for a variant that extracts out just the lower Reg. +(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs) + +(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result) + (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result)) + (let ((_x Unit (emit producer_inst)) + (_y Unit (emit consumer_inst))) + (value_regs producer_result consumer_result))) + +(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) + (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) + (let ((_x Unit (emit producer_inst)) + (_y Unit (emit consumer_inst))) + (value_reg consumer_result))) + +(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_x Unit (emit producer_inst)) + (_y Unit (emit consumer_inst_1)) + (_z Unit (emit consumer_inst_2))) + consumer_result)) + +(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_inst_3 + consumer_inst_4 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_x Unit (emit producer_inst)) + (_y Unit (emit consumer_inst_1)) + (_z Unit (emit consumer_inst_2)) + (_w Unit (emit consumer_inst_3)) + (_v Unit (emit consumer_inst_4))) + consumer_result)) + +(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) + (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) + (let ((_ Unit (emit producer_inst1)) + (_ Unit (emit producer_inst2)) + (_ Unit (emit consumer_inst))) + (value_reg consumer_result))) + +(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_ Unit (emit producer_inst1)) + (_ Unit (emit producer_inst2)) + (_ Unit (emit consumer_inst_1)) + (_ Unit (emit consumer_inst_2))) + consumer_result)) + +(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_inst_3 + consumer_inst_4 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_ Unit (emit producer_inst1)) + (_ Unit (emit producer_inst2)) + (_ Unit (emit consumer_inst_1)) + (_ Unit (emit consumer_inst_2)) + (_ Unit (emit consumer_inst_3)) + (_ Unit (emit consumer_inst_4))) + consumer_result)) + +(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg) +(rule (with_flags_reg p c) + (let ((v ValueRegs (with_flags p c))) + (value_regs_get v 0))) + +;; Indicate that the current state of the flags register from the instruction +;; that produces this Value is relied on. +(decl flags_to_producesflags (Value) ProducesFlags) +(rule (flags_to_producesflags val) + (let ((_ Unit (mark_value_used val))) + (ProducesFlags.AlreadyExistingFlags))) + +;; Combine a flags-producing instruction and a flags-consuming instruction that +;; produces no results. +;; +;; This function handles the following case only: +;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect +(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult) + +(rule (with_flags_side_effect + (ProducesFlags.AlreadyExistingFlags) + (ConsumesFlags.ConsumesFlagsSideEffect c)) + (SideEffectNoResult.Inst c)) + +(rule (with_flags_side_effect + (ProducesFlags.AlreadyExistingFlags) + (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) + (SideEffectNoResult.Inst2 c1 c2)) + +(rule (with_flags_side_effect + (ProducesFlags.ProducesFlagsSideEffect p) + (ConsumesFlags.ConsumesFlagsSideEffect c)) + (SideEffectNoResult.Inst2 p c)) + +(rule (with_flags_side_effect + (ProducesFlags.ProducesFlagsSideEffect p) + (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) + (SideEffectNoResult.Inst3 p c1 c2)) + +(rule (with_flags_side_effect + (ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2) + (ConsumesFlags.ConsumesFlagsSideEffect c)) + (SideEffectNoResult.Inst3 p1 p2 c)) + +;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl avoid_div_traps () Type) +(extern extractor avoid_div_traps avoid_div_traps) + +;; This definition should be kept up to date with the values defined in +;; cranelift/codegen/meta/src/shared/settings.rs +(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff))) + +(decl tls_model (TlsModel) Type) +(extern extractor infallible tls_model tls_model) + +(decl pure tls_model_is_elf_gd () Unit) +(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd) + +(decl pure tls_model_is_macho () Unit) +(extern constructor tls_model_is_macho tls_model_is_macho) + +(decl pure tls_model_is_coff () Unit) +(extern constructor tls_model_is_coff tls_model_is_coff) + +(decl pure preserve_frame_pointers () Unit) +(extern constructor preserve_frame_pointers preserve_frame_pointers) + +;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl box_external_name (ExternalName) BoxExternalName) +(extern constructor box_external_name box_external_name) + +;; Accessor for `FuncRef`. + +(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef) +(extern extractor infallible func_ref_data func_ref_data) + +;; Accessor for `GlobalValue`. + +(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue) +(extern extractor symbol_value_data symbol_value_data) + +;; Accessor for `RelocDistance`. + +(decl reloc_distance_near () RelocDistance) +(extern extractor reloc_distance_near reloc_distance_near) + +;; Accessor for `Immediate` as a vector of u8 values. + +(decl vec_mask_from_immediate (VecMask) Immediate) +(extern extractor vec_mask_from_immediate vec_mask_from_immediate) + +;; Accessor for `Immediate` as u128. + +(decl u128_from_immediate (u128) Immediate) +(extern extractor u128_from_immediate u128_from_immediate) + +;; Accessor for `Constant` as u128. + +(decl u128_from_constant (u128) Constant) +(extern extractor u128_from_constant u128_from_constant) + +;; Accessor for `Constant` as u64. + +(decl u64_from_constant (u64) Constant) +(extern extractor u64_from_constant u64_from_constant) + +;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The (writable) register(s) that will contain the n'th return value. +(decl retval (usize) WritableValueRegs) +(extern constructor retval retval) + +;; Extractor to check for the special case that a `WritableValueRegs` +;; contains only a single register. +(decl only_writable_reg (WritableReg) WritableValueRegs) +(extern extractor only_writable_reg only_writable_reg) + +;; Get the `n`th register inside a `WritableValueRegs`. +(decl writable_regs_get (WritableValueRegs usize) WritableReg) +(extern constructor writable_regs_get writable_regs_get) + +;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Type to hold information about a function call signature. +(type Sig extern (enum)) + +;; Information how to pass one argument or return value. +(type ABIArg extern (enum)) + +;; Information how to pass a single slot of one argument or return value. +(type ABIArgSlot extern + (enum + (Reg + (reg RealReg) + (ty Type) + (extension ArgumentExtension)) + (Stack + (offset i64) + (ty Type) + (extension ArgumentExtension)))) + +;; Physical register that may hold an argument or return value. +(type RealReg (primitive RealReg)) + +;; Instruction on whether and how to extend an argument value. +(type ArgumentExtension extern + (enum + (None) + (Uext) + (Sext))) + +;; Get the number of arguments expected. +(decl abi_num_args (Sig) usize) +(extern constructor abi_num_args abi_num_args) + +;; Get information specifying how to pass one argument. +(decl abi_get_arg (Sig usize) ABIArg) +(extern constructor abi_get_arg abi_get_arg) + +;; Get the number of return values expected. +(decl abi_num_rets (Sig) usize) +(extern constructor abi_num_rets abi_num_rets) + +;; Get information specifying how to pass one return value. +(decl abi_get_ret (Sig usize) ABIArg) +(extern constructor abi_get_ret abi_get_ret) + +;; Get information specifying how to pass the implicit pointer +;; to the return-value area on the stack, if required. +(decl abi_ret_arg (ABIArg) Sig) +(extern extractor abi_ret_arg abi_ret_arg) + +;; Succeeds if no implicit return-value area pointer is required. +(decl abi_no_ret_arg () Sig) +(extern extractor abi_no_ret_arg abi_no_ret_arg) + +;; Size of the argument area. +(decl abi_sized_stack_arg_space (Sig) i64) +(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space) + +;; Size of the return-value area. +(decl abi_sized_stack_ret_space (Sig) i64) +(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space) + +;; StackSlot addr +(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) +(extern constructor abi_stackslot_addr abi_stackslot_addr) + +;; DynamicStackSlot addr +(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst) +(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr) + +;; Extractor to detect the special case where an argument or +;; return value only requires a single slot to be passed. +(decl abi_arg_only_slot (ABIArgSlot) ABIArg) +(extern extractor abi_arg_only_slot abi_arg_only_slot) + +;; Extractor to detect the special case where a struct argument +;; is explicitly passed by reference using a hidden pointer. +(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg) +(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer) + +;; Extractor to detect the special case where a non-struct argument +;; is implicitly passed by reference using a hidden pointer. +(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg) +(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer) + +;; Convert a real register number into a virtual register. +(decl real_reg_to_reg (RealReg) Reg) +(extern constructor real_reg_to_reg real_reg_to_reg) + +;; Convert a real register number into a writable virtual register. +(decl real_reg_to_writable_reg (RealReg) WritableReg) +(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg) + +;; Generate a move between two registers. +(decl gen_move (Type WritableReg Reg) MInst) +(extern constructor gen_move gen_move) + +;; Copy a return value to a set of registers. +(decl copy_to_regs (WritableValueRegs Value) Unit) +(rule (copy_to_regs dsts val @ (value_type ty)) + (let ((srcs ValueRegs (put_in_regs val))) + (copy_to_regs_range ty (value_regs_range srcs) dsts srcs))) + +;; Helper for `copy_to_regs` that uses a range to index into the reg/value +;; vectors. Fails for the empty range. +(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit) + +(rule (copy_to_regs_range ty (range_empty) dsts srcs) + (unit)) + +(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs) + (let ((dst WritableReg (writable_regs_get dsts head)) + (src Reg (value_regs_get srcs head)) + (_ Unit (emit (gen_move ty dst src)))) + (copy_to_regs_range ty tail dsts srcs))) + + +(decl lower_return (Range ValueSlice) InstOutput) +(rule (lower_return (range_empty) _) (output_none)) +(rule (lower_return (range_unwrap head tail) args) + (let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head)))) + (lower_return tail args))) + +;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(convert Inst Value def_inst) +(convert Reg ValueRegs value_reg) +(convert Value Reg put_in_reg) +(convert Value ValueRegs put_in_regs) +(convert WritableReg Reg writable_reg_to_reg) +(convert ValueRegs InstOutput output) +(convert Reg InstOutput output_reg) +(convert Value InstOutput output_value) +(convert ExternalName BoxExternalName box_external_name) +(convert PReg Reg preg_to_reg) diff --git a/cranelift/codegen/src/prelude_opt.isle b/cranelift/codegen/src/prelude_opt.isle new file mode 100644 index 0000000000..46baaddd13 --- /dev/null +++ b/cranelift/codegen/src/prelude_opt.isle @@ -0,0 +1,61 @@ +;; Prelude definitions specific to the mid-end. + +;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; An eclass ID. +(type Id (primitive Id)) + +;; What is the type of an eclass (if a single type)? +(decl eclass_type (Type) Id) +(extern extractor eclass_type eclass_type) + +;; Helper to wrap an Id-matching pattern and extract type. +(decl has_type (Type Id) Id) +(extractor (has_type ty id) + (and (eclass_type ty) + id)) + +;; Extract any node(s) for the given eclass ID. +(decl multi enodes (Type InstructionImms IdArray) Id) +(extern extractor enodes enodes_etor) + +;; Construct a pure node, returning a new (or deduplicated +;; already-existing) eclass ID. +(decl pure_enode (Type InstructionImms IdArray) Id) +(extern constructor pure_enode pure_enode_ctor) + +;; Type of an Id slice (for args). +(type IdArray (primitive IdArray)) + +(decl id_array_0 () IdArray) +(extern constructor id_array_0 id_array_0_ctor) +(extern extractor id_array_0 id_array_0_etor) +(decl id_array_1 (Id) IdArray) +(extern constructor id_array_1 id_array_1_ctor) +(extern extractor id_array_1 id_array_1_etor) +(decl id_array_2 (Id Id) IdArray) +(extern constructor id_array_2 id_array_2_ctor) +(extern extractor id_array_2 id_array_2_etor) +(decl id_array_3 (Id Id Id) IdArray) +(extern constructor id_array_3 id_array_3_ctor) +(extern extractor id_array_3 id_array_3_etor) + +;; Extractor to get the min loop-level of an eclass. +(decl at_loop_level (u8 Id) Id) +(extern extractor infallible at_loop_level at_loop_level) + +;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The main matcher rule invoked by the toplevel driver. +(decl multi simplify (Id) Id) + +;; Mark a node as requiring remat when used in a different block. +(decl remat (Id) Id) +(extern constructor remat remat) + +;; Mark a node as subsuming whatever else it's rewritten from -- this +;; is definitely preferable, not just a possible option. Useful for, +;; e.g., constant propagation where we arrive at a definite "final +;; answer". +(decl subsume (Id) Id) +(extern constructor subsume subsume) diff --git a/cranelift/codegen/src/scoped_hash_map.rs b/cranelift/codegen/src/scoped_hash_map.rs index 809d22132a..1b8fde1a54 100644 --- a/cranelift/codegen/src/scoped_hash_map.rs +++ b/cranelift/codegen/src/scoped_hash_map.rs @@ -6,25 +6,22 @@ use crate::fx::FxHashMap; use core::hash::Hash; -use core::mem; +use smallvec::{smallvec, SmallVec}; #[cfg(not(feature = "std"))] use crate::fx::FxHasher; #[cfg(not(feature = "std"))] type Hasher = core::hash::BuildHasherDefault; -struct Val { +struct Val { value: V, - next_key: Option, - depth: usize, + level: u32, + generation: u32, } /// A view into an occupied entry in a `ScopedHashMap`. It is part of the `Entry` enum. pub struct OccupiedEntry<'a, K: 'a, V: 'a> { - #[cfg(feature = "std")] - entry: super::hash_map::OccupiedEntry<'a, K, Val>, - #[cfg(not(feature = "std"))] - entry: super::hash_map::OccupiedEntry<'a, K, Val, Hasher>, + entry: super::hash_map::OccupiedEntry<'a, K, Val>, } impl<'a, K, V> OccupiedEntry<'a, K, V> { @@ -36,22 +33,34 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// A view into a vacant entry in a `ScopedHashMap`. It is part of the `Entry` enum. pub struct VacantEntry<'a, K: 'a, V: 'a> { - #[cfg(feature = "std")] - entry: super::hash_map::VacantEntry<'a, K, Val>, - #[cfg(not(feature = "std"))] - entry: super::hash_map::VacantEntry<'a, K, Val, Hasher>, - next_key: Option, - depth: usize, + entry: InsertLoc<'a, K, V>, + depth: u32, + generation: u32, } -impl<'a, K: Hash, V> VacantEntry<'a, K, V> { +/// Where to insert from a `VacantEntry`. May be vacant or occupied in +/// the underlying map because of lazy (generation-based) deletion. +enum InsertLoc<'a, K: 'a, V: 'a> { + Vacant(super::hash_map::VacantEntry<'a, K, Val>), + Occupied(super::hash_map::OccupiedEntry<'a, K, Val>), +} + +impl<'a, K, V> VacantEntry<'a, K, V> { /// Sets the value of the entry with the `VacantEntry`'s key. pub fn insert(self, value: V) { - self.entry.insert(Val { + let val = Val { value, - next_key: self.next_key, - depth: self.depth, - }); + level: self.depth, + generation: self.generation, + }; + match self.entry { + InsertLoc::Vacant(v) => { + v.insert(val); + } + InsertLoc::Occupied(mut o) => { + o.insert(val); + } + } } } @@ -69,9 +78,9 @@ pub enum Entry<'a, K: 'a, V: 'a> { /// Shadowing, where one scope has entries with the same keys as a containing scope, /// is not supported in this implementation. pub struct ScopedHashMap { - map: FxHashMap>, - last_insert: Option, - current_depth: usize, + map: FxHashMap>, + generation_by_depth: SmallVec<[u32; 8]>, + generation: u32, } impl ScopedHashMap @@ -82,52 +91,115 @@ where pub fn new() -> Self { Self { map: FxHashMap(), - last_insert: None, - current_depth: 0, + generation: 0, + generation_by_depth: smallvec![0], + } + } + + /// Creates an empty `ScopedHashMap` with some pre-allocated capacity. + pub fn with_capacity(cap: usize) -> Self { + let mut map = FxHashMap::default(); + map.reserve(cap); + Self { + map, + generation: 0, + generation_by_depth: smallvec![0], } } /// Similar to `FxHashMap::entry`, gets the given key's corresponding entry in the map for /// in-place manipulation. - pub fn entry(&mut self, key: K) -> Entry { + pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> { + self.entry_with_depth(key, self.depth()) + } + + /// Get the entry, setting the scope depth at which to insert. + pub fn entry_with_depth<'a>(&'a mut self, key: K, depth: usize) -> Entry<'a, K, V> { + debug_assert!(depth <= self.generation_by_depth.len()); + let generation = self.generation_by_depth[depth]; + let depth = depth as u32; use super::hash_map::Entry::*; match self.map.entry(key) { - Occupied(entry) => Entry::Occupied(OccupiedEntry { entry }), - Vacant(entry) => { - let clone_key = entry.key().clone(); - Entry::Vacant(VacantEntry { - entry, - next_key: mem::replace(&mut self.last_insert, Some(clone_key)), - depth: self.current_depth, - }) + Occupied(entry) => { + let entry_generation = entry.get().generation; + let entry_depth = entry.get().level as usize; + if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) { + Entry::Occupied(OccupiedEntry { entry }) + } else { + Entry::Vacant(VacantEntry { + entry: InsertLoc::Occupied(entry), + depth, + generation, + }) + } + } + Vacant(entry) => Entry::Vacant(VacantEntry { + entry: InsertLoc::Vacant(entry), + depth, + generation, + }), + } + } + + /// Get a value from a key, if present. + pub fn get<'a>(&'a self, key: &K) -> Option<&'a V> { + self.map + .get(key) + .filter(|entry| { + let level = entry.level as usize; + self.generation_by_depth.get(level).cloned() == Some(entry.generation) + }) + .map(|entry| &entry.value) + } + + /// Insert a key-value pair if absent. No-op if already exists. + pub fn insert_if_absent(&mut self, key: K, value: V) { + self.insert_if_absent_with_depth(key, value, self.depth()); + } + + /// Insert a key-value pair if absent, using the given depth for + /// the insertion. No-op if already exists. + pub fn insert_if_absent_with_depth(&mut self, key: K, value: V, depth: usize) { + match self.entry_with_depth(key, depth) { + Entry::Vacant(v) => { + v.insert(value); + } + Entry::Occupied(_) => { + // Nothing. } } } /// Enter a new scope. pub fn increment_depth(&mut self) { - // Increment the depth. - self.current_depth = self.current_depth.checked_add(1).unwrap(); + self.generation_by_depth.push(self.generation); } /// Exit the current scope. pub fn decrement_depth(&mut self) { - // Remove all elements inserted at the current depth. - while let Some(key) = self.last_insert.clone() { - use crate::hash_map::Entry::*; - match self.map.entry(key) { - Occupied(entry) => { - if entry.get().depth != self.current_depth { - break; - } - self.last_insert = entry.remove_entry().1.next_key; - } - Vacant(_) => panic!(), - } - } + self.generation += 1; + self.generation_by_depth.pop(); + } - // Decrement the depth. - self.current_depth = self.current_depth.checked_sub(1).unwrap(); + /// Return the current scope depth. + pub fn depth(&self) -> usize { + self.generation_by_depth + .len() + .checked_sub(1) + .expect("generation_by_depth cannot be empty") + } + + /// Remote an entry. + pub fn remove(&mut self, key: &K) -> Option { + self.map.remove(key).and_then(|val| { + let entry_generation = val.generation; + let entry_depth = val.level as usize; + if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) { + Some(val.value) + } else { + None + } + }) } } @@ -230,4 +302,22 @@ mod tests { Entry::Vacant(entry) => entry.insert(3), } } + + #[test] + fn insert_arbitrary_depth() { + let mut map: ScopedHashMap = ScopedHashMap::new(); + map.insert_if_absent(1, 2); + assert_eq!(map.get(&1), Some(&2)); + map.increment_depth(); + assert_eq!(map.get(&1), Some(&2)); + map.insert_if_absent(3, 4); + assert_eq!(map.get(&3), Some(&4)); + map.decrement_depth(); + assert_eq!(map.get(&3), None); + map.increment_depth(); + map.insert_if_absent_with_depth(3, 4, 0); + assert_eq!(map.get(&3), Some(&4)); + map.decrement_depth(); + assert_eq!(map.get(&3), Some(&4)); + } } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index 29ad916097..0a214dfbde 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -529,6 +529,7 @@ probestack_strategy = "outline" regalloc_checker = false regalloc_verbose_logs = false enable_alias_analysis = true +use_egraphs = false enable_verifier = true is_pic = false use_colocated_libcalls = false diff --git a/cranelift/egraph/src/lib.rs b/cranelift/egraph/src/lib.rs index 81006be2b8..af0f1729fe 100644 --- a/cranelift/egraph/src/lib.rs +++ b/cranelift/egraph/src/lib.rs @@ -87,15 +87,17 @@ //! //! ## Data Structure and Example //! -//! Each eclass id refers to a table entry that can be one of: +//! Each eclass id refers to a table entry ("eclass node", which is +//! different than an "enode") that can be one of: //! //! - A single enode; -//! - An enode and an earlier eclass id it is appended to; +//! - An enode and an earlier eclass id it is appended to (a "child" +//! eclass node); //! - A "union node" with two earlier eclass ids. //! //! Building the aegraph consists solely of adding new entries to the -//! end of this table. An enode in any given entry can only refer to -//! earlier eclass ids. +//! end of this table of eclass nodes. An enode referenced from any +//! given eclass node can only refer to earlier eclass ids. //! //! For example, consider the following eclass table: //! @@ -218,7 +220,7 @@ //! POPL 2021. use cranelift_entity::PrimaryMap; -use cranelift_entity::{entity_impl, packed_option::ReservedValue}; +use cranelift_entity::{entity_impl, packed_option::ReservedValue, SecondaryMap}; use smallvec::{smallvec, SmallVec}; use std::fmt::Debug; use std::hash::Hash; @@ -256,6 +258,20 @@ pub trait Language: CtxEq + CtxHash { fn needs_dedup(&self, node: &Self::Node) -> bool; } +/// A trait that allows the aegraph to compute a property of each +/// node as it is created. +pub trait Analysis { + type L: Language; + type Value: Clone + Default; + fn for_node( + &self, + ctx: &Self::L, + n: &::Node, + values: &SecondaryMap, + ) -> Self::Value; + fn meet(&self, ctx: &Self::L, v1: &Self::Value, v2: &Self::Value) -> Self::Value; +} + /// Conditionally-compiled trace-log macro. (Borrowed from /// `cranelift-codegen`; it's not worth factoring out a common /// subcrate for this.) @@ -269,18 +285,20 @@ macro_rules! trace { } /// An egraph. -pub struct EGraph { +pub struct EGraph> { /// Node-allocation arena. pub nodes: Vec, /// Hash-consing map from Nodes to eclass IDs. node_map: CtxHashMap, /// Eclass definitions. Each eclass consists of an enode, and - /// parent pointer to the rest of the eclass. + /// child pointer to the rest of the eclass. pub classes: PrimaryMap, /// Union-find for canonical ID generation. This lets us name an /// eclass with a canonical ID that is the same for all /// generations of the class. pub unionfind: UnionFind, + /// Analysis and per-node state. + pub analysis: Option<(A, SecondaryMap)>, } /// A reference to a node. @@ -298,7 +316,7 @@ impl NodeKey { /// Get the node for this NodeKey, given the `nodes` from the /// appropriate `EGraph`. - pub fn node<'a, L: Language>(&self, nodes: &'a [L::Node]) -> &'a L::Node { + pub fn node<'a, N>(&self, nodes: &'a [N]) -> &'a N { &nodes[self.index as usize] } @@ -311,35 +329,35 @@ impl NodeKey { } } -struct NodeKeyCtx<'a, L: Language> { +struct NodeKeyCtx<'a, 'b, L: Language> { nodes: &'a [L::Node], - node_ctx: &'a L, + node_ctx: &'b L, } -impl<'ctx, L: Language> CtxEq for NodeKeyCtx<'ctx, L> { +impl<'a, 'b, L: Language> CtxEq for NodeKeyCtx<'a, 'b, L> { fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool { - let a = a.node::(self.nodes); - let b = b.node::(self.nodes); + let a = a.node(self.nodes); + let b = b.node(self.nodes); self.node_ctx.ctx_eq(a, b, uf) } } -impl<'ctx, L: Language> CtxHash for NodeKeyCtx<'ctx, L> { +impl<'a, 'b, L: Language> CtxHash for NodeKeyCtx<'a, 'b, L> { fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 { - self.node_ctx.ctx_hash(value.node::(self.nodes), uf) + self.node_ctx.ctx_hash(value.node(self.nodes), uf) } } -/// An EClass entry. Contains either a single new enode and a parent -/// eclass (i.e., adds one new enode), or unions two parent eclasses +/// An EClass entry. Contains either a single new enode and a child +/// eclass (i.e., adds one new enode), or unions two child eclasses /// together. #[derive(Debug, Clone, Copy)] pub struct EClass { // formats: // - // 00 | unused (31 bits) | NodeKey (31 bits) - // 01 | eclass_parent (31 bits) | NodeKey (31 bits) - // 10 | eclass_parent_1 (31 bits) | eclass_parent_id_2 (31 bits) + // 00 | unused (31 bits) | NodeKey (31 bits) + // 01 | eclass_child (31 bits) | NodeKey (31 bits) + // 10 | eclass_child_1 (31 bits) | eclass_child_id_2 (31 bits) bits: u64, } @@ -352,47 +370,47 @@ impl EClass { } } - fn node_and_parent(node: NodeKey, eclass_parent: Id) -> EClass { + fn node_and_child(node: NodeKey, eclass_child: Id) -> EClass { let node_idx = node.bits() as u64; debug_assert!(node_idx < (1 << 31)); - debug_assert!(eclass_parent != Id::invalid()); - let parent = eclass_parent.0 as u64; - debug_assert!(parent < (1 << 31)); + debug_assert!(eclass_child != Id::invalid()); + let child = eclass_child.0 as u64; + debug_assert!(child < (1 << 31)); EClass { - bits: (0b01 << 62) | (parent << 31) | node_idx, + bits: (0b01 << 62) | (child << 31) | node_idx, } } - fn union(parent1: Id, parent2: Id) -> EClass { - debug_assert!(parent1 != Id::invalid()); - let parent1 = parent1.0 as u64; - debug_assert!(parent1 < (1 << 31)); + fn union(child1: Id, child2: Id) -> EClass { + debug_assert!(child1 != Id::invalid()); + let child1 = child1.0 as u64; + debug_assert!(child1 < (1 << 31)); - debug_assert!(parent2 != Id::invalid()); - let parent2 = parent2.0 as u64; - debug_assert!(parent2 < (1 << 31)); + debug_assert!(child2 != Id::invalid()); + let child2 = child2.0 as u64; + debug_assert!(child2 < (1 << 31)); EClass { - bits: (0b10 << 62) | (parent1 << 31) | parent2, + bits: (0b10 << 62) | (child1 << 31) | child2, } } - /// Get the node, if any, from a node-only or node-and-parent + /// Get the node, if any, from a node-only or node-and-child /// eclass. pub fn get_node(&self) -> Option { self.as_node() - .or_else(|| self.as_node_and_parent().map(|(node, _)| node)) + .or_else(|| self.as_node_and_child().map(|(node, _)| node)) } - /// Get the first parent, if any. - pub fn parent1(&self) -> Option { - self.as_node_and_parent() + /// Get the first child, if any. + pub fn child1(&self) -> Option { + self.as_node_and_child() .map(|(_, p1)| p1) .or(self.as_union().map(|(p1, _)| p1)) } - /// Get the second parent, if any. - pub fn parent2(&self) -> Option { + /// Get the second child, if any. + pub fn child2(&self) -> Option { self.as_union().map(|(_, p2)| p2) } @@ -406,25 +424,25 @@ impl EClass { } } - /// If this EClass is one new enode and a parent, return the node - /// and parent ID. - pub fn as_node_and_parent(&self) -> Option<(NodeKey, Id)> { + /// If this EClass is one new enode and a child, return the node + /// and child ID. + pub fn as_node_and_child(&self) -> Option<(NodeKey, Id)> { if (self.bits >> 62) == 0b01 { let node_idx = (self.bits & ((1 << 31) - 1)) as u32; - let parent = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; - Some((NodeKey::from_bits(node_idx), Id::from_bits(parent))) + let child = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; + Some((NodeKey::from_bits(node_idx), Id::from_bits(child))) } else { None } } - /// If this EClass is the union variety, return the two parent + /// If this EClass is the union variety, return the two child /// EClasses. Both are guaranteed not to be `Id::invalid()`. pub fn as_union(&self) -> Option<(Id, Id)> { if (self.bits >> 62) == 0b10 { - let parent1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; - let parent2 = (self.bits & ((1 << 31) - 1)) as u32; - Some((Id::from_bits(parent1), Id::from_bits(parent2))) + let child1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; + let child2 = (self.bits & ((1 << 31) - 1)) as u32; + Some((Id::from_bits(child1), Id::from_bits(child2))) } else { None } @@ -449,27 +467,31 @@ impl NewOrExisting { } } -impl EGraph +impl> EGraph where L::Node: 'static, { /// Create a new aegraph. - pub fn new() -> Self { + pub fn new(analysis: Option) -> Self { + let analysis = analysis.map(|a| (a, SecondaryMap::new())); Self { nodes: vec![], node_map: CtxHashMap::new(), classes: PrimaryMap::new(), unionfind: UnionFind::new(), + analysis, } } /// Create a new aegraph with the given capacity. - pub fn with_capacity(nodes: usize) -> Self { + pub fn with_capacity(nodes: usize, analysis: Option) -> Self { + let analysis = analysis.map(|a| (a, SecondaryMap::with_capacity(nodes))); Self { nodes: Vec::with_capacity(nodes), node_map: CtxHashMap::with_capacity(nodes), classes: PrimaryMap::with_capacity(nodes), unionfind: UnionFind::with_capacity(nodes), + analysis, } } @@ -506,6 +528,10 @@ where // Add to interning map with a NodeKey referring to the eclass. v.insert(eclass_id); + // Update analysis. + let node_ctx = ctx.node_ctx; + self.update_analysis(node_ctx, eclass_id); + NewOrExisting::New(eclass_id) } } @@ -520,7 +546,7 @@ where /// property (args must have lower eclass Ids than the eclass /// containing the node with those args). Returns the Id of the /// merged eclass. - pub fn union(&mut self, a: Id, b: Id) -> Id { + pub fn union(&mut self, ctx: &L, a: Id, b: Id) -> Id { assert_ne!(a, Id::invalid()); assert_ne!(b, Id::invalid()); let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b)); @@ -532,16 +558,17 @@ where self.unionfind.union(a, b); - // If the younger eclass has no parent, we can link it + // If the younger eclass has no child, we can link it // directly and return that eclass. Otherwise, we create a new // union eclass. if let Some(node) = self.classes[a].as_node() { trace!( - " -> id {} is one-node eclass; making into node-and-parent with id {}", + " -> id {} is one-node eclass; making into node-and-child with id {}", a, b ); - self.classes[a] = EClass::node_and_parent(node, b); + self.classes[a] = EClass::node_and_child(node, b); + self.update_analysis(ctx, a); return a; } @@ -549,6 +576,7 @@ where self.unionfind.add(u); self.unionfind.union(u, b); trace!(" -> union id {} and id {} into id {}", a, b, u); + self.update_analysis(ctx, u); u } @@ -569,12 +597,41 @@ where } /// Get the enodes for a given eclass. - pub fn enodes(&self, eclass: Id) -> NodeIter { + pub fn enodes(&self, eclass: Id) -> NodeIter { NodeIter { stack: smallvec![eclass], - _phantom: PhantomData, + _phantom1: PhantomData, + _phantom2: PhantomData, } } + + /// Update analysis for a given eclass node. + fn update_analysis(&mut self, ctx: &L, eclass: Id) { + if let Some((analysis, state)) = self.analysis.as_mut() { + let eclass_data = self.classes[eclass]; + let value = if let Some(node_key) = eclass_data.as_node() { + let node = node_key.node(&self.nodes); + analysis.for_node(ctx, node, state) + } else if let Some((node_key, child)) = eclass_data.as_node_and_child() { + let node = node_key.node(&self.nodes); + let value = analysis.for_node(ctx, node, state); + let child_value = &state[child]; + analysis.meet(ctx, &value, child_value) + } else if let Some((c1, c2)) = eclass_data.as_union() { + let c1 = &state[c1]; + let c2 = &state[c2]; + analysis.meet(ctx, c1, c2) + } else { + panic!("Invalid eclass node: {:?}", eclass_data); + }; + state[eclass] = value; + } + } + + /// Get the analysis value for a given eclass. Panics if no analysis is present. + pub fn analysis_value(&self, eclass: Id) -> &A::Value { + &self.analysis.as_ref().unwrap().1[eclass] + } } /// An iterator over all nodes in an eclass. @@ -582,27 +639,28 @@ where /// Because eclasses are immutable once created, this does *not* need /// to hold an open borrow on the egraph; it is free to add new nodes, /// while our existing Ids will remain valid. -pub struct NodeIter { +pub struct NodeIter> { stack: SmallVec<[Id; 8]>, - _phantom: PhantomData, + _phantom1: PhantomData, + _phantom2: PhantomData, } -impl NodeIter { - pub fn next<'a>(&mut self, egraph: &'a EGraph) -> Option<&'a L::Node> { +impl> NodeIter { + pub fn next<'a>(&mut self, egraph: &'a EGraph) -> Option<&'a L::Node> { while let Some(next) = self.stack.pop() { let eclass = egraph.classes[next]; if let Some(node) = eclass.as_node() { return Some(&egraph.nodes[node.index as usize]); - } else if let Some((node, parent)) = eclass.as_node_and_parent() { - if parent != Id::invalid() { - self.stack.push(parent); + } else if let Some((node, child)) = eclass.as_node_and_child() { + if child != Id::invalid() { + self.stack.push(child); } return Some(&egraph.nodes[node.index as usize]); - } else if let Some((parent1, parent2)) = eclass.as_union() { - debug_assert!(parent1 != Id::invalid()); - debug_assert!(parent2 != Id::invalid()); - self.stack.push(parent2); - self.stack.push(parent1); + } else if let Some((child1, child2)) = eclass.as_union() { + debug_assert!(child1 != Id::invalid()); + debug_assert!(child2 != Id::invalid()); + self.stack.push(child2); + self.stack.push(child1); continue; } else { unreachable!("Invalid eclass format"); diff --git a/cranelift/egraph/src/unionfind.rs b/cranelift/egraph/src/unionfind.rs index 70106e2896..dd90fc8c23 100644 --- a/cranelift/egraph/src/unionfind.rs +++ b/cranelift/egraph/src/unionfind.rs @@ -2,6 +2,7 @@ use crate::{trace, Id}; use cranelift_entity::SecondaryMap; +use std::hash::{Hash, Hasher}; /// A union-find data structure. The data structure can allocate /// `Id`s, indicating eclasses, and can merge eclasses together. @@ -67,4 +68,18 @@ impl UnionFind { trace!("union: {}, {}", a, b); } } + + /// Determine if two `Id`s are equivalent, after + /// canonicalizing. Update union-find data structure during our + /// canonicalization to make future lookups faster. + pub fn equiv_id_mut(&mut self, a: Id, b: Id) -> bool { + self.find_and_update(a) == self.find_and_update(b) + } + + /// Hash an `Id` after canonicalizing it. Update union-find data + /// structure to make future lookups/hashing faster. + pub fn hash_id_mut(&mut self, hash: &mut H, id: Id) { + let id = self.find_and_update(id); + id.hash(hash); + } } diff --git a/cranelift/entity/src/list.rs b/cranelift/entity/src/list.rs index 3434380ac4..659b94a9bc 100644 --- a/cranelift/entity/src/list.rs +++ b/cranelift/entity/src/list.rs @@ -143,6 +143,24 @@ impl ListPool { } } + /// Create a new list pool with the given capacity for data pre-allocated. + pub fn with_capacity(len: usize) -> Self { + Self { + data: Vec::with_capacity(len), + free: Vec::new(), + } + } + + /// Get the capacity of this pool. This will be somewhat higher + /// than the total length of lists that can be stored without + /// reallocating, because of internal metadata overheads. It is + /// mostly useful to allow another pool to be allocated that is + /// likely to hold data transferred from this one without the need + /// to grow. + pub fn capacity(&self) -> usize { + self.data.capacity() + } + /// Clear the pool, forgetting about all lists that use it. /// /// This invalidates any existing entity lists that used this pool to allocate memory. diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif new file mode 100644 index 0000000000..51ae57c2ce --- /dev/null +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -0,0 +1,13 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = imul v0, v1 + ; check: v1 = iadd v0, v0 + ; nextln: return v1 + return v2 +} diff --git a/cranelift/filetests/filetests/egraph/alias_analysis.clif b/cranelift/filetests/filetests/egraph/alias_analysis.clif new file mode 100644 index 0000000000..340455dfad --- /dev/null +++ b/cranelift/filetests/filetests/egraph/alias_analysis.clif @@ -0,0 +1,22 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 0 + v2 = bor.i64 v0, v1 + v3 = load.i64 heap v0 + v4 = load.i64 heap v2 + v5 = band.i64 v3, v4 + store.i64 v0, v5 + v6 = load.i64 v3 + v7 = load.i64 v6 + return v7 +} + +; check: v1 = load.i64 heap v0 +; nextln: store v0, v1 +; nextln: v2 = load.i64 v0 +; nextln: return v2 diff --git a/cranelift/filetests/filetests/egraph/basic-gvn.clif b/cranelift/filetests/filetests/egraph/basic-gvn.clif new file mode 100644 index 0000000000..d8023f0ac9 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/basic-gvn.clif @@ -0,0 +1,29 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + brnz v2, block1(v0) + jump block2(v1) + +block1(v3: i32): + v4 = iadd v0, v1 + v5 = iadd v4, v3 + return v5 + +block2(v6: i32): + return v6 +} + +;; Check that the `iadd` for `v4` is subsumed by `v2`: + +; check: block0(v0: i32, v1: i32): +; nextln: v2 = iadd v0, v1 +; check: block1: +; nextln: v3 = iadd.i32 v2, v0 +; nextln: return v3 +; check: block2: +; nextln: return v1 diff --git a/cranelift/filetests/filetests/egraph/licm.clif b/cranelift/filetests/filetests/egraph/licm.clif new file mode 100644 index 0000000000..233763d9e6 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/licm.clif @@ -0,0 +1,40 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + jump block1(v0) + +block1(v2: i32): + v3 = iconst.i32 1 + v4 = iadd.i32 v1, v3 + v5 = iconst.i32 40 + v6 = icmp eq v2, v5 + v7 = iconst.i32 1 + v8 = iadd.i32 v2, v7 + brnz v6, block2(v4) + jump block1(v8) + +block2(v9: i32): + return v9 +} + +; check: block0(v0: i32, v1: i32): +; nextln: jump block1(v0) + +; check: block1(v2: i32): +;; constants are not lifted; they are rematerialized in each block where used +; nextln: v3 = iconst.i32 40 +; nextln: v4 = icmp eq v2, v3 +; nextln: v5 = iconst.i32 1 +; nextln: v6 = iadd v2, v5 +; nextln: brnz v4, block2 +; nextln: jump block1(v6) + +; check: block2: +; nextln: v7 = iconst.i32 1 +; nextln: v8 = iadd.i32 v1, v7 +; nextln: return v8 + diff --git a/cranelift/filetests/filetests/egraph/misc.clif b/cranelift/filetests/filetests/egraph/misc.clif new file mode 100644 index 0000000000..33b4c88197 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/misc.clif @@ -0,0 +1,21 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %stack_load(i64) -> i64 { + ss0 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + v1 = stack_load.i64 ss0 + return v1 +} + +; check: function %stack_load(i64) -> i64 fast { +; nextln: ss0 = explicit_slot 8 +; check: block0(v0: i64): +; nextln: v1 = stack_addr.i64 ss0 +; nextln: store notrap aligned v0, v1 +; nextln: return v0 +; nextln: } diff --git a/cranelift/filetests/filetests/egraph/multivalue.clif b/cranelift/filetests/filetests/egraph/multivalue.clif new file mode 100644 index 0000000000..f2e2e11472 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/multivalue.clif @@ -0,0 +1,24 @@ +test compile precise-output +set use_egraphs=true +target x86_64 + +;; We want to make sure that this compiles successfully, so we are properly +;; handling multi-value operator nodes. + +function u0:359(i64) -> i8, i8 system_v { + sig0 = (i64) -> i8, i8 system_v + fn0 = colocated u0:521 sig0 + + block0(v0: i64): + v3, v4 = call fn0(v0) + return v3, v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; call User(userextname0) +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/egraph/not_a_load.clif b/cranelift/filetests/filetests/egraph/not_a_load.clif new file mode 100644 index 0000000000..fde8c2d0e6 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/not_a_load.clif @@ -0,0 +1,23 @@ +test compile precise-output +set use_egraphs=true +target x86_64 + +;; `atomic_rmw` is not a load, but it reports `true` to `.can_load()`. We want +;; to make sure the alias analysis machinery doesn't break when we have these odd +;; memory ops in the IR. + +function u0:1302(i64) -> i64 system_v { + block0(v0: i64): + v9 = atomic_rmw.i64 add v0, v0 + return v0 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash } +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/egraph/remat.clif b/cranelift/filetests/filetests/egraph/remat.clif new file mode 100644 index 0000000000..0df7db6141 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/remat.clif @@ -0,0 +1,35 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 42 + v2 = iadd.i32 v0, v1 + brnz v2, block1 + jump block2 + +block1: + v3 = iconst.i32 84 + v4 = iadd.i32 v2, v3 + return v4 + +block2: + return v2 +} + +; check: block0(v0: i32): +; nextln: v1 = iconst.i32 42 +; nextln: v2 = iadd v0, v1 +; nextln: brnz v2, block1 +; nextln: jump block2 +; check: block1: +; nextln: v5 = iconst.i32 126 +; nextln: v6 = iadd.i32 v0, v5 +; nextln: return v6 +; check: block2: +; nextln: v3 = iconst.i32 42 +; nextln: v4 = iadd.i32 v0, v3 +; nextln: return v4 + diff --git a/cranelift/filetests/src/lib.rs b/cranelift/filetests/src/lib.rs index 9ea7431672..8bff0eb29d 100644 --- a/cranelift/filetests/src/lib.rs +++ b/cranelift/filetests/src/lib.rs @@ -45,6 +45,7 @@ mod test_domtree; mod test_interpret; mod test_legalizer; mod test_licm; +mod test_optimize; mod test_preopt; mod test_print_cfg; mod test_run; @@ -120,6 +121,7 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result "interpret" => test_interpret::subtest(parsed), "legalizer" => test_legalizer::subtest(parsed), "licm" => test_licm::subtest(parsed), + "optimize" => test_optimize::subtest(parsed), "preopt" => test_preopt::subtest(parsed), "print-cfg" => test_print_cfg::subtest(parsed), "run" => test_run::subtest(parsed), diff --git a/cranelift/filetests/src/test_optimize.rs b/cranelift/filetests/src/test_optimize.rs new file mode 100644 index 0000000000..dfab6a1c4a --- /dev/null +++ b/cranelift/filetests/src/test_optimize.rs @@ -0,0 +1,47 @@ +//! Test command for testing the optimization phases. +//! +//! The `optimize` test command runs each function through the +//! optimization passes, but not lowering or regalloc. The output for +//! filecheck purposes is the resulting CLIF. +//! +//! Some legalization may be ISA-specific, so this requires an ISA +//! (for now). + +use crate::subtest::{run_filecheck, Context, SubTest}; +use anyhow::Result; +use cranelift_codegen::ir; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestOptimize; + +pub fn subtest(parsed: &TestCommand) -> Result> { + assert_eq!(parsed.command, "optimize"); + Ok(Box::new(TestOptimize)) +} + +impl SubTest for TestOptimize { + fn name(&self) -> &'static str { + "optimize" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> Result<()> { + let isa = context.isa.expect("optimize needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx + .optimize(isa) + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; + + let clif = format!("{:?}", comp_ctx.func); + run_filecheck(&clif, context) + } +} diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index c7b1302346..9703edd089 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -108,6 +108,18 @@ fn check_overlaps(env: &TermEnv) -> Errors { let mut by_term = HashMap::new(); for rule in env.rules.iter() { if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs { + let is_multi_ctor = match &env.terms[tid.index()].kind { + &TermKind::Decl { multi, .. } => multi, + _ => false, + }; + if is_multi_ctor { + // Rules for multi-constructors are not checked for + // overlap: the ctor returns *every* match, not just + // the first or highest-priority one, so overlap does + // not actually affect the results. + continue; + } + let mut binds = Vec::new(); let rule = RulePatterns { rule, diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index c25dba62b4..3d36dd849a 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -393,6 +393,7 @@ impl Engine { | "machine_code_cfg_info" | "tls_model" // wasmtime doesn't use tls right now | "opt_level" // opt level doesn't change semantics + | "use_egraphs" // optimizing with egraphs doesn't change semantics | "enable_alias_analysis" // alias analysis-based opts don't change semantics | "probestack_func_adjusts_sp" // probestack above asserted disabled | "probestack_size_log2" // probestack above asserted disabled diff --git a/scripts/publish.rs b/scripts/publish.rs index 600ab1372d..e417653735 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -24,8 +24,8 @@ const CRATES_TO_PUBLISH: &[&str] = &[ "cranelift-bforest", "cranelift-codegen-shared", "cranelift-codegen-meta", - "cranelift-codegen", "cranelift-egraph", + "cranelift-codegen", "cranelift-reader", "cranelift-serde", "cranelift-module", @@ -88,6 +88,7 @@ const PUBLIC_CRATES: &[&str] = &[ "cranelift-bforest", "cranelift-codegen-shared", "cranelift-codegen-meta", + "cranelift-egraph", "cranelift-codegen", "cranelift-reader", "cranelift-serde",