diff --git a/Cargo.lock b/Cargo.lock index ac6545cd1f..a91b1554dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -554,7 +554,6 @@ dependencies = [ "cranelift-bforest", "cranelift-codegen-meta", "cranelift-codegen-shared", - "cranelift-egraph", "cranelift-entity", "cranelift-isle", "criterion", @@ -580,18 +579,6 @@ dependencies = [ name = "cranelift-codegen-shared" version = "0.92.0" -[[package]] -name = "cranelift-egraph" -version = "0.92.0" -dependencies = [ - "cranelift-entity", - "fxhash", - "hashbrown", - "indexmap", - "log", - "smallvec", -] - [[package]] name = "cranelift-entity" version = "0.92.0" diff --git a/Cargo.toml b/Cargo.toml index 7f9cf233a7..e70b034024 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,7 +78,6 @@ opt-level = 0 resolver = '2' members = [ "cranelift", - "cranelift/egraph", "cranelift/isle/fuzz", "cranelift/isle/islec", "cranelift/serde", @@ -137,7 +136,6 @@ wasmtime-wit-bindgen = { path = "crates/wit-bindgen", version = "=5.0.0" } cranelift-wasm = { path = "cranelift/wasm", version = "0.92.0" } cranelift-codegen = { path = "cranelift/codegen", version = "0.92.0" } -cranelift-egraph = { path = "cranelift/egraph", version = "0.92.0" } cranelift-frontend = { path = "cranelift/frontend", version = "0.92.0" } cranelift-entity = { path = "cranelift/entity", version = "0.92.0" } cranelift-native = { path = "cranelift/native", version = "0.92.0" } diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 65818b9074..580af31d22 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -18,8 +18,7 @@ bumpalo = "3" cranelift-codegen-shared = { path = "./shared", version = "0.92.0" } cranelift-entity = { workspace = true } cranelift-bforest = { workspace = true } -cranelift-egraph = { workspace = true } -hashbrown = { workspace = true, optional = true } +hashbrown = { workspace = true } target-lexicon = { workspace = true } log = { workspace = true } serde = { version = "1.0.94", features = ["derive"], optional = true } @@ -42,16 +41,18 @@ cranelift-codegen-meta = { path = "meta", version = "0.92.0" } cranelift-isle = { path = "../isle/isle", version = "=0.92.0" } [features] -default = ["std", "unwind"] +default = ["std", "unwind", "trace-log"] # The "std" feature enables use of libstd. The "core" feature enables use # of some minimal std-like replacement libraries. At least one of these two # features need to be enabled. std = [] -# The "core" features enables use of "hashbrown" since core doesn't have -# a HashMap implementation, and a workaround for Cargo #4866. -core = ["hashbrown"] +# The "core" feature used to enable a hashmap workaround, but is now +# deprecated (we (i) always use hashbrown, and (ii) don't support a +# no_std build anymore). The feature remains for backward +# compatibility as a no-op. +core = [] # This enables some additional functions useful for writing tests, but which # can significantly increase the size of the library. diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs index e0adf5827e..5e9f0abfef 100644 --- a/cranelift/codegen/meta/src/gen_inst.rs +++ b/cranelift/codegen/meta/src/gen_inst.rs @@ -60,51 +60,36 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) { fmt.empty_line(); } -/// Generate the InstructionData and InstructionImms enums. +/// Generate the InstructionData enum. /// /// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a /// `ValueList` to store the additional information out of line. -/// -/// `InstructionImms` stores everything about an instruction except for the arguments: in other -/// words, the `Opcode` and any immediates or other parameters. `InstructionData` stores this, plus -/// the SSA `Value` arguments. fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) { - for (name, include_args) in &[("InstructionData", true), ("InstructionImms", false)] { - fmt.line("#[derive(Copy, Clone, Debug, PartialEq, Hash)]"); - if !include_args { - // `InstructionImms` gets some extra derives: it acts like a sort of - // extended opcode and we want to allow for hashconsing via `Eq`. - fmt.line("#[derive(Eq)]"); + fmt.line("#[derive(Copy, Clone, Debug, PartialEq, Hash)]"); + fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#); + fmt.line("#[allow(missing_docs)]"); + fmtln!(fmt, "pub enum InstructionData {"); + fmt.indent(|fmt| { + for format in formats { + fmtln!(fmt, "{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode: Opcode,"); + if format.has_value_list { + fmt.line("args: ValueList,"); + } else if format.num_value_operands == 1 { + fmt.line("arg: Value,"); + } else if format.num_value_operands > 0 { + fmtln!(fmt, "args: [Value; {}],", format.num_value_operands); + } + for field in &format.imm_fields { + fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type); + } + }); + fmtln!(fmt, "},"); } - fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#); - fmt.line("#[allow(missing_docs)]"); - // Generate `enum InstructionData` or `enum InstructionImms`. (This - // comment exists so one can grep for `enum InstructionData`!) - fmtln!(fmt, "pub enum {} {{", name); - fmt.indent(|fmt| { - for format in formats { - fmtln!(fmt, "{} {{", format.name); - fmt.indent(|fmt| { - fmt.line("opcode: Opcode,"); - if *include_args { - if format.has_value_list { - fmt.line("args: ValueList,"); - } else if format.num_value_operands == 1 { - fmt.line("arg: Value,"); - } else if format.num_value_operands > 0 { - fmtln!(fmt, "args: [Value; {}],", format.num_value_operands); - } - } - for field in &format.imm_fields { - fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type); - } - }); - fmtln!(fmt, "},"); - } - }); - fmt.line("}"); - } + }); + fmt.line("}"); } fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) { @@ -165,122 +150,6 @@ fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_ fmtln!(fmt, "}"); } -/// Generate the conversion from `InstructionData` to `InstructionImms`, stripping out the -/// `Value`s. -fn gen_instruction_data_to_instruction_imms(formats: &[&InstructionFormat], fmt: &mut Formatter) { - fmt.line("impl std::convert::From<&InstructionData> for InstructionImms {"); - fmt.indent(|fmt| { - fmt.doc_comment("Convert an `InstructionData` into an `InstructionImms`."); - fmt.line("fn from(data: &InstructionData) -> InstructionImms {"); - fmt.indent(|fmt| { - fmt.line("match data {"); - fmt.indent(|fmt| { - for format in formats { - fmtln!(fmt, "InstructionData::{} {{", format.name); - fmt.indent(|fmt| { - fmt.line("opcode,"); - for field in &format.imm_fields { - fmtln!(fmt, "{},", field.member); - } - fmt.line(".."); - }); - fmtln!(fmt, "}} => InstructionImms::{} {{", format.name); - fmt.indent(|fmt| { - fmt.line("opcode: *opcode,"); - for field in &format.imm_fields { - fmtln!(fmt, "{}: {}.clone(),", field.member, field.member); - } - }); - fmt.line("},"); - } - }); - fmt.line("}"); - }); - fmt.line("}"); - }); - fmt.line("}"); - fmt.empty_line(); -} - -/// Generate the conversion from `InstructionImms` to `InstructionData`, adding the -/// `Value`s. -fn gen_instruction_imms_to_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) { - fmt.line("impl InstructionImms {"); - fmt.indent(|fmt| { - fmt.doc_comment("Convert an `InstructionImms` into an `InstructionData` by adding args."); - fmt.line( - "pub fn with_args(&self, values: &[Value], value_list: &mut ValueListPool) -> InstructionData {", - ); - fmt.indent(|fmt| { - fmt.line("match self {"); - fmt.indent(|fmt| { - for format in formats { - fmtln!(fmt, "InstructionImms::{} {{", format.name); - fmt.indent(|fmt| { - fmt.line("opcode,"); - for field in &format.imm_fields { - fmtln!(fmt, "{},", field.member); - } - }); - fmt.line("} => {"); - if format.has_value_list { - fmtln!(fmt, "let args = ValueList::from_slice(values, value_list);"); - } - fmt.indent(|fmt| { - fmtln!(fmt, "InstructionData::{} {{", format.name); - fmt.indent(|fmt| { - fmt.line("opcode: *opcode,"); - for field in &format.imm_fields { - fmtln!(fmt, "{}: {}.clone(),", field.member, field.member); - } - if format.has_value_list { - fmtln!(fmt, "args,"); - } else if format.num_value_operands == 1 { - fmtln!(fmt, "arg: values[0],"); - } else if format.num_value_operands > 0 { - let mut args = vec![]; - for i in 0..format.num_value_operands { - args.push(format!("values[{}]", i)); - } - fmtln!(fmt, "args: [{}],", args.join(", ")); - } - }); - fmt.line("}"); - }); - fmt.line("},"); - } - }); - fmt.line("}"); - }); - fmt.line("}"); - }); - fmt.line("}"); - fmt.empty_line(); -} - -/// Generate the `opcode` method on InstructionImms. -fn gen_instruction_imms_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) { - fmt.line("impl InstructionImms {"); - fmt.indent(|fmt| { - fmt.doc_comment("Get the opcode of this instruction."); - fmt.line("pub fn opcode(&self) -> Opcode {"); - fmt.indent(|fmt| { - let mut m = Match::new("*self"); - for format in formats { - m.arm( - format!("Self::{}", format.name), - vec!["opcode", ".."], - "opcode".to_string(), - ); - } - fmt.add_match(m); - }); - fmt.line("}"); - }); - fmt.line("}"); - fmt.empty_line(); -} - /// Generate the boring parts of the InstructionData implementation. /// /// These methods in `impl InstructionData` can be generated automatically from the instruction @@ -401,8 +270,12 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter This operation requires a reference to a `ValueListPool` to determine if the contents of any `ValueLists` are equal. + + This operation takes a closure that is allowed to map each + argument value to some other value before the instructions + are compared. This allows various forms of canonicalization. "#); - fmt.line("pub fn eq(&self, other: &Self, pool: &ir::ValueListPool) -> bool {"); + fmt.line("pub fn eq Value>(&self, other: &Self, pool: &ir::ValueListPool, mapper: F) -> bool {"); fmt.indent(|fmt| { fmt.line("if ::core::mem::discriminant(self) != ::core::mem::discriminant(other) {"); fmt.indent(|fmt| { @@ -418,13 +291,13 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter let args_eq = if format.has_value_list { members.push("args"); - Some("args1.as_slice(pool) == args2.as_slice(pool)") + Some("args1.as_slice(pool).iter().zip(args2.as_slice(pool).iter()).all(|(a, b)| mapper(*a) == mapper(*b))") } else if format.num_value_operands == 1 { members.push("arg"); - Some("arg1 == arg2") + Some("mapper(*arg1) == mapper(*arg2)") } else if format.num_value_operands > 0 { members.push("args"); - Some("args1 == args2") + Some("args1.iter().zip(args2.iter()).all(|(a, b)| mapper(*a) == mapper(*b))") } else { None }; @@ -459,8 +332,12 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter This operation requires a reference to a `ValueListPool` to hash the contents of any `ValueLists`. + + This operation takes a closure that is allowed to map each + argument value to some other value before it is hashed. This + allows various forms of canonicalization. "#); - fmt.line("pub fn hash(&self, state: &mut H, pool: &ir::ValueListPool) {"); + fmt.line("pub fn hash Value>(&self, state: &mut H, pool: &ir::ValueListPool, mapper: F) {"); fmt.indent(|fmt| { fmt.line("match *self {"); fmt.indent(|fmt| { @@ -468,17 +345,17 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter let name = format!("Self::{}", format.name); let mut members = vec!["opcode"]; - let args = if format.has_value_list { + let (args, len) = if format.has_value_list { members.push("ref args"); - "args.as_slice(pool)" + ("args.as_slice(pool)", "args.len(pool)") } else if format.num_value_operands == 1 { members.push("ref arg"); - "arg" - } else if format.num_value_operands > 0{ + ("std::slice::from_ref(arg)", "1") + } else if format.num_value_operands > 0 { members.push("ref args"); - "args" + ("args", "args.len()") } else { - "&()" + ("&[]", "0") }; for field in &format.imm_fields { @@ -493,7 +370,13 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter for field in &format.imm_fields { fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", field.member); } - fmtln!(fmt, "::core::hash::Hash::hash({}, state);", args); + fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", len); + fmtln!(fmt, "for &arg in {} {{", args); + fmt.indent(|fmt| { + fmtln!(fmt, "let arg = mapper(arg);"); + fmtln!(fmt, "::core::hash::Hash::hash(&arg, state);"); + }); + fmtln!(fmt, "}"); }); fmtln!(fmt, "}"); } @@ -1264,46 +1147,40 @@ fn gen_common_isle( gen_isle_enum(name, variants, fmt) } - if isle_target == IsleTarget::Lower { - // Generate all of the value arrays we need for `InstructionData` as well as - // the constructors and extractors for them. - fmt.line( - ";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;", + // Generate all of the value arrays we need for `InstructionData` as well as + // the constructors and extractors for them. + fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"); + fmt.empty_line(); + let value_array_arities: BTreeSet<_> = formats + .iter() + .filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1) + .map(|f| f.num_value_operands) + .collect(); + for n in value_array_arities { + fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n); + fmtln!(fmt, "(type ValueArray{} extern (enum))", n); + fmt.empty_line(); + + fmtln!( + fmt, + "(decl value_array_{} ({}) ValueArray{})", + n, + (0..n).map(|_| "Value").collect::>().join(" "), + n + ); + fmtln!( + fmt, + "(extern constructor value_array_{} pack_value_array_{})", + n, + n + ); + fmtln!( + fmt, + "(extern extractor infallible value_array_{} unpack_value_array_{})", + n, + n ); fmt.empty_line(); - let value_array_arities: BTreeSet<_> = formats - .iter() - .filter(|f| { - f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1 - }) - .map(|f| f.num_value_operands) - .collect(); - for n in value_array_arities { - fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n); - fmtln!(fmt, "(type ValueArray{} extern (enum))", n); - fmt.empty_line(); - - fmtln!( - fmt, - "(decl value_array_{} ({}) ValueArray{})", - n, - (0..n).map(|_| "Value").collect::>().join(" "), - n - ); - fmtln!( - fmt, - "(extern constructor value_array_{} pack_value_array_{})", - n, - n - ); - fmtln!( - fmt, - "(extern extractor infallible value_array_{} unpack_value_array_{})", - n, - n - ); - fmt.empty_line(); - } } // Generate the extern type declaration for `Opcode`. @@ -1322,32 +1199,24 @@ fn gen_common_isle( fmt.line(")"); fmt.empty_line(); - // Generate the extern type declaration for `InstructionData` - // (lowering) or `InstructionImms` (opt). - let inst_data_name = match isle_target { - IsleTarget::Lower => "InstructionData", - IsleTarget::Opt => "InstructionImms", - }; + // Generate the extern type declaration for `InstructionData`. fmtln!( fmt, - ";;;; `{}` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;", - inst_data_name + ";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;", ); fmt.empty_line(); - fmtln!(fmt, "(type {} extern", inst_data_name); + fmtln!(fmt, "(type InstructionData extern"); fmt.indent(|fmt| { fmt.line("(enum"); fmt.indent(|fmt| { for format in formats { let mut s = format!("({} (opcode Opcode)", format.name); - if isle_target == IsleTarget::Lower { - if format.has_value_list { - s.push_str(" (args ValueList)"); - } else if format.num_value_operands == 1 { - s.push_str(" (arg Value)"); - } else if format.num_value_operands > 1 { - write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap(); - } + if format.has_value_list { + s.push_str(" (args ValueList)"); + } else if format.num_value_operands == 1 { + s.push_str(" (arg Value)"); + } else if format.num_value_operands > 1 { + write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap(); } for field in &format.imm_fields { write!( @@ -1370,13 +1239,12 @@ fn gen_common_isle( // Generate the helper extractors for each opcode's full instruction. fmtln!( fmt, - ";;;; Extracting Opcode, Operands, and Immediates from `{}` ;;;;;;;;", - inst_data_name + ";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;", ); fmt.empty_line(); let ret_ty = match isle_target { IsleTarget::Lower => "Inst", - IsleTarget::Opt => "Id", + IsleTarget::Opt => "Value", }; for inst in instructions { if isle_target == IsleTarget::Opt && inst.format.has_value_list { @@ -1395,23 +1263,10 @@ fn gen_common_isle( .iter() .map(|o| { let ty = o.kind.rust_type; - match isle_target { - IsleTarget::Lower => { - if ty == "&[Value]" { - "ValueSlice" - } else { - ty.rsplit("::").next().unwrap() - } - } - IsleTarget::Opt => { - if ty == "&[Value]" { - panic!("value slice in mid-end extractor"); - } else if ty == "Value" || ty == "ir::Value" { - "Id" - } else { - ty.rsplit("::").next().unwrap() - } - } + if ty == "&[Value]" { + "ValueSlice" + } else { + ty.rsplit("::").next().unwrap() } }) .collect::>() @@ -1435,102 +1290,55 @@ fn gen_common_isle( .join(" ") ); - if isle_target == IsleTarget::Lower { - let mut s = format!( - "(inst_data (InstructionData.{} (Opcode.{})", - inst.format.name, inst.camel_name - ); + let mut s = format!( + "(inst_data{} (InstructionData.{} (Opcode.{})", + match isle_target { + IsleTarget::Lower => "", + IsleTarget::Opt => " ty", + }, + inst.format.name, + inst.camel_name + ); - // Value and varargs operands. - if inst.format.has_value_list { - // The instruction format uses a value list, but the - // instruction itself might have not only a `&[Value]` - // varargs operand, but also one or more `Value` operands as - // well. If this is the case, then we need to read them off - // the front of the `ValueList`. - let values: Vec<_> = inst - .operands_in - .iter() - .filter(|o| o.is_value()) - .map(|o| o.name) - .collect(); - let varargs = inst - .operands_in - .iter() - .find(|o| o.is_varargs()) - .unwrap() - .name; - if values.is_empty() { - write!(&mut s, " (value_list_slice {})", varargs).unwrap(); - } else { - write!( - &mut s, - " (unwrap_head_value_list_{} {} {})", - values.len(), - values.join(" "), - varargs - ) - .unwrap(); - } - } else if inst.format.num_value_operands == 1 { + // Value and varargs operands. + if inst.format.has_value_list { + // The instruction format uses a value list, but the + // instruction itself might have not only a `&[Value]` + // varargs operand, but also one or more `Value` operands as + // well. If this is the case, then we need to read them off + // the front of the `ValueList`. + let values: Vec<_> = inst + .operands_in + .iter() + .filter(|o| o.is_value()) + .map(|o| o.name) + .collect(); + let varargs = inst + .operands_in + .iter() + .find(|o| o.is_varargs()) + .unwrap() + .name; + if values.is_empty() { + write!(&mut s, " (value_list_slice {})", varargs).unwrap(); + } else { write!( &mut s, - " {}", - inst.operands_in.iter().find(|o| o.is_value()).unwrap().name - ) - .unwrap(); - } else if inst.format.num_value_operands > 1 { - let values = inst - .operands_in - .iter() - .filter(|o| o.is_value()) - .map(|o| o.name) - .collect::>(); - assert_eq!(values.len(), inst.format.num_value_operands); - let values = values.join(" "); - write!( - &mut s, - " (value_array_{} {})", - inst.format.num_value_operands, values, + " (unwrap_head_value_list_{} {} {})", + values.len(), + values.join(" "), + varargs ) .unwrap(); } - - // Immediates. - let imm_operands: Vec<_> = inst - .operands_in - .iter() - .filter(|o| !o.is_value() && !o.is_varargs()) - .collect(); - assert_eq!(imm_operands.len(), inst.format.imm_fields.len()); - for op in imm_operands { - write!(&mut s, " {}", op.name).unwrap(); - } - - s.push_str("))"); - fmt.line(&s); - } else { - // Mid-end case. - let mut s = format!( - "(enodes ty (InstructionImms.{} (Opcode.{})", - inst.format.name, inst.camel_name - ); - - // Immediates. - let imm_operands: Vec<_> = inst - .operands_in - .iter() - .filter(|o| !o.is_value() && !o.is_varargs()) - .collect(); - assert_eq!(imm_operands.len(), inst.format.imm_fields.len()); - for op in imm_operands { - write!(&mut s, " {}", op.name).unwrap(); - } - // End of `InstructionImms`. - s.push_str(")"); - - // Second arg to `enode`: value args. - assert!(!inst.operands_in.iter().any(|op| op.is_varargs())); + } else if inst.format.num_value_operands == 1 { + write!( + &mut s, + " {}", + inst.operands_in.iter().find(|o| o.is_value()).unwrap().name + ) + .unwrap(); + } else if inst.format.num_value_operands > 1 { let values = inst .operands_in .iter() @@ -1541,14 +1349,25 @@ fn gen_common_isle( let values = values.join(" "); write!( &mut s, - " (id_array_{} {})", + " (value_array_{} {})", inst.format.num_value_operands, values, ) .unwrap(); - - s.push_str(")"); - fmt.line(&s); } + + // Immediates. + let imm_operands: Vec<_> = inst + .operands_in + .iter() + .filter(|o| !o.is_value() && !o.is_varargs()) + .collect(); + assert_eq!(imm_operands.len(), inst.format.imm_fields.len()); + for op in imm_operands { + write!(&mut s, " {}", op.name).unwrap(); + } + + s.push_str("))"); + fmt.line(&s); }); fmt.line(")"); @@ -1566,10 +1385,53 @@ fn gen_common_isle( ); fmt.indent(|fmt| { let mut s = format!( - "(pure_enode ty (InstructionImms.{} (Opcode.{})", + "(make_inst ty (InstructionData.{} (Opcode.{})", inst.format.name, inst.camel_name ); + // Handle values. Note that we skip generating + // constructors for any instructions with variadic + // value lists. This is fine for the mid-end because + // in practice only calls and branches (for branch + // args) use this functionality, and neither can + // really be optimized or rewritten in the mid-end + // (currently). + // + // As a consequence, we only have to handle the + // one-`Value` case, in which the `Value` is directly + // in the `InstructionData`, and the multiple-`Value` + // case, in which the `Value`s are in a + // statically-sized array (e.g. `[Value; 2]` for a + // binary op). + assert!(!inst.format.has_value_list); + if inst.format.num_value_operands == 1 { + write!( + &mut s, + " {}", + inst.operands_in.iter().find(|o| o.is_value()).unwrap().name + ) + .unwrap(); + } else if inst.format.num_value_operands > 1 { + // As above, get all bindings together, and pass + // to a sub-term; here we use a constructor to + // build the value array. + let values = inst + .operands_in + .iter() + .filter(|o| o.is_value()) + .map(|o| o.name) + .collect::>(); + assert_eq!(values.len(), inst.format.num_value_operands); + let values = values.join(" "); + write!( + &mut s, + " (value_array_{}_ctor {})", + inst.format.num_value_operands, values + ) + .unwrap(); + } + + // Immediates (non-value args). for o in inst .operands_in .iter() @@ -1577,22 +1439,7 @@ fn gen_common_isle( { write!(&mut s, " {}", o.name).unwrap(); } - s.push_str(")"); - - let values = inst - .operands_in - .iter() - .filter(|o| o.is_value()) - .map(|o| o.name) - .collect::>(); - let values = values.join(" "); - write!( - &mut s, - " (id_array_{} {})", - inst.format.num_value_operands, values - ) - .unwrap(); - s.push_str(")"); + s.push_str("))"); fmt.line(&s); }); fmt.line(")"); @@ -1693,9 +1540,6 @@ pub(crate) fn generate( gen_instruction_data(&formats, &mut fmt); fmt.empty_line(); gen_instruction_data_impl(&formats, &mut fmt); - gen_instruction_data_to_instruction_imms(&formats, &mut fmt); - gen_instruction_imms_impl(&formats, &mut fmt); - gen_instruction_imms_to_instruction_data(&formats, &mut fmt); fmt.empty_line(); gen_opcodes(all_inst, &mut fmt); fmt.empty_line(); diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 8a5fed38eb..8d705a8809 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -12,7 +12,7 @@ use crate::alias_analysis::AliasAnalysis; use crate::dce::do_dce; use crate::dominator_tree::DominatorTree; -use crate::egraph::FuncEGraph; +use crate::egraph::EgraphPass; use crate::flowgraph::ControlFlowGraph; use crate::ir::Function; use crate::isa::TargetIsa; @@ -26,6 +26,7 @@ use crate::result::{CodegenResult, CompileResult}; use crate::settings::{FlagsOrIsa, OptLevel}; use crate::simple_gvn::do_simple_gvn; use crate::simple_preopt::do_preopt; +use crate::trace; use crate::unreachable_code::eliminate_unreachable_code; use crate::verifier::{verify_context, VerifierErrors, VerifierResult}; use crate::{timing, CompileError}; @@ -191,15 +192,7 @@ impl Context { self.remove_constant_phis(isa)?; if isa.flags().use_egraphs() { - log::debug!( - "About to optimize with egraph phase:\n{}", - self.func.display() - ); - self.compute_loop_analysis(); - let mut eg = FuncEGraph::new(&self.func, &self.domtree, &self.loop_analysis, &self.cfg); - eg.elaborate(&mut self.func); - log::debug!("After egraph optimization:\n{}", self.func.display()); - log::info!("egraph stats: {:?}", eg.stats); + self.egraph_pass()?; } else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() { self.replace_redundant_loads()?; self.simple_gvn(isa)?; @@ -379,4 +372,24 @@ impl Context { do_souper_harvest(&self.func, out); Ok(()) } + + /// Run optimizations via the egraph infrastructure. + pub fn egraph_pass(&mut self) -> CodegenResult<()> { + trace!( + "About to optimize with egraph phase:\n{}", + self.func.display() + ); + self.compute_loop_analysis(); + let mut alias_analysis = AliasAnalysis::new(&self.func, &self.domtree); + let mut pass = EgraphPass::new( + &mut self.func, + &self.domtree, + &self.loop_analysis, + &mut alias_analysis, + ); + pass.run(); + log::info!("egraph stats: {:?}", pass.stats); + trace!("After egraph optimization:\n{}", self.func.display()); + Ok(()) + } } diff --git a/cranelift/codegen/src/ctxhash.rs b/cranelift/codegen/src/ctxhash.rs new file mode 100644 index 0000000000..e172d46c12 --- /dev/null +++ b/cranelift/codegen/src/ctxhash.rs @@ -0,0 +1,168 @@ +//! A hashmap with "external hashing": nodes are hashed or compared for +//! equality only with some external context provided on lookup/insert. +//! This allows very memory-efficient data structures where +//! node-internal data references some other storage (e.g., offsets into +//! an array or pool of shared data). + +use hashbrown::raw::RawTable; +use std::hash::{Hash, Hasher}; + +/// Trait that allows for equality comparison given some external +/// context. +/// +/// Note that this trait is implemented by the *context*, rather than +/// the item type, for somewhat complex lifetime reasons (lack of GATs +/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on +/// the value type). +pub trait CtxEq { + /// Determine whether `a` and `b` are equal, given the context in + /// `self` and the union-find data structure `uf`. + fn ctx_eq(&self, a: &V1, b: &V2) -> bool; +} + +/// Trait that allows for hashing given some external context. +pub trait CtxHash: CtxEq { + /// Compute the hash of `value`, given the context in `self` and + /// the union-find data structure `uf`. + fn ctx_hash(&self, state: &mut H, value: &Value); +} + +/// A null-comparator context type for underlying value types that +/// already have `Eq` and `Hash`. +#[derive(Default)] +pub struct NullCtx; + +impl CtxEq for NullCtx { + fn ctx_eq(&self, a: &V, b: &V) -> bool { + a.eq(b) + } +} +impl CtxHash for NullCtx { + fn ctx_hash(&self, state: &mut H, value: &V) { + value.hash(state); + } +} + +/// A bucket in the hash table. +/// +/// Some performance-related design notes: we cache the hashcode for +/// speed, as this often buys a few percent speed in +/// interning-table-heavy workloads. We only keep the low 32 bits of +/// the hashcode, for memory efficiency: in common use, `K` and `V` +/// are often 32 bits also, and a 12-byte bucket is measurably better +/// than a 16-byte bucket. +struct BucketData { + hash: u32, + k: K, + v: V, +} + +/// A HashMap that takes external context for all operations. +pub struct CtxHashMap { + raw: RawTable>, +} + +impl CtxHashMap { + /// Create an empty hashmap with pre-allocated space for the given + /// capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self { + raw: RawTable::with_capacity(capacity), + } + } +} + +fn compute_hash(ctx: &Ctx, k: &K) -> u32 +where + Ctx: CtxHash, +{ + let mut hasher = crate::fx::FxHasher::default(); + ctx.ctx_hash(&mut hasher, k); + hasher.finish() as u32 +} + +impl CtxHashMap { + /// Insert a new key-value pair, returning the old value associated + /// with this key (if any). + pub fn insert(&mut self, k: K, v: V, ctx: &Ctx) -> Option + where + Ctx: CtxEq + CtxHash, + { + let hash = compute_hash(ctx, &k); + match self.raw.find(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k) + }) { + Some(bucket) => { + let data = unsafe { bucket.as_mut() }; + Some(std::mem::replace(&mut data.v, v)) + } + None => { + let data = BucketData { hash, k, v }; + self.raw + .insert_entry(hash as u64, data, |bucket| bucket.hash as u64); + None + } + } + } + + /// Look up a key, returning a borrow of the value if present. + pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> + where + Ctx: CtxEq + CtxHash + CtxHash, + { + let hash = compute_hash(ctx, k); + self.raw + .find(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, k) + }) + .map(|bucket| { + let data = unsafe { bucket.as_ref() }; + &data.v + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::hash::Hash; + + #[derive(Clone, Copy, Debug)] + struct Key { + index: u32, + } + struct Ctx { + vals: &'static [&'static str], + } + impl CtxEq for Ctx { + fn ctx_eq(&self, a: &Key, b: &Key) -> bool { + self.vals[a.index as usize].eq(self.vals[b.index as usize]) + } + } + impl CtxHash for Ctx { + fn ctx_hash(&self, state: &mut H, value: &Key) { + self.vals[value.index as usize].hash(state); + } + } + + #[test] + fn test_basic() { + let ctx = Ctx { + vals: &["a", "b", "a"], + }; + + let k0 = Key { index: 0 }; + let k1 = Key { index: 1 }; + let k2 = Key { index: 2 }; + + assert!(ctx.ctx_eq(&k0, &k2)); + assert!(!ctx.ctx_eq(&k0, &k1)); + assert!(!ctx.ctx_eq(&k2, &k1)); + + let mut map: CtxHashMap = CtxHashMap::with_capacity(4); + assert_eq!(map.insert(k0, 42, &ctx), None); + assert_eq!(map.insert(k2, 84, &ctx), Some(42)); + assert_eq!(map.get(&k1, &ctx), None); + assert_eq!(*map.get(&k0, &ctx).unwrap(), 84); + } +} diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs index d8d625671b..69870d556e 100644 --- a/cranelift/codegen/src/egraph.rs +++ b/cranelift/codegen/src/egraph.rs @@ -1,342 +1,462 @@ -//! Egraph-based mid-end optimization framework. +//! Support for egraphs represented in the DataFlowGraph. +use crate::alias_analysis::{AliasAnalysis, LastStores}; +use crate::ctxhash::{CtxEq, CtxHash, CtxHashMap}; +use crate::cursor::{Cursor, CursorPosition, FuncCursor}; use crate::dominator_tree::DominatorTree; -use crate::egraph::stores::PackedMemoryState; -use crate::flowgraph::ControlFlowGraph; -use crate::loop_analysis::{LoopAnalysis, LoopLevel}; -use crate::trace; -use crate::{ - fx::{FxHashMap, FxHashSet}, - inst_predicates::has_side_effect, - ir::{Block, Function, Inst, InstructionData, InstructionImms, Opcode, Type}, +use crate::egraph::domtree::DomTreeWithChildren; +use crate::egraph::elaborate::Elaborator; +use crate::fx::FxHashSet; +use crate::inst_predicates::is_pure_for_egraph; +use crate::ir::{ + DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool, }; -use alloc::vec::Vec; -use core::ops::Range; -use cranelift_egraph::{EGraph, Id, Language, NewOrExisting}; -use cranelift_entity::EntityList; +use crate::loop_analysis::LoopAnalysis; +use crate::opts::generated_code::ContextIter; +use crate::opts::IsleContext; +use crate::trace; +use crate::unionfind::UnionFind; +use cranelift_entity::packed_option::ReservedValue; use cranelift_entity::SecondaryMap; +use std::hash::Hasher; +mod cost; mod domtree; mod elaborate; -mod node; -mod stores; -use elaborate::Elaborator; -pub use node::{Node, NodeCtx}; -pub use stores::{AliasAnalysis, MemoryState}; - -pub struct FuncEGraph<'a> { +/// Pass over a Function that does the whole aegraph thing. +/// +/// - Removes non-skeleton nodes from the Layout. +/// - Performs a GVN-and-rule-application pass over all Values +/// reachable from the skeleton, potentially creating new Union +/// nodes (i.e., an aegraph) so that some values have multiple +/// representations. +/// - Does "extraction" on the aegraph: selects the best value out of +/// the tree-of-Union nodes for each used value. +/// - Does "scoped elaboration" on the aegraph: chooses one or more +/// locations for pure nodes to become instructions again in the +/// layout, as forced by the skeleton. +/// +/// At the beginning and end of this pass, the CLIF should be in a +/// state that passes the verifier and, additionally, has no Union +/// nodes. During the pass, Union nodes may exist, and instructions in +/// the layout may refer to results of instructions that are not +/// placed in the layout. +pub struct EgraphPass<'a> { + /// The function we're operating on. + func: &'a mut Function, /// Dominator tree, used for elaboration pass. domtree: &'a DominatorTree, - /// Loop analysis results, used for built-in LICM during elaboration. + /// Alias analysis, used during optimization. + alias_analysis: &'a mut AliasAnalysis<'a>, + /// "Domtree with children": like `domtree`, but with an explicit + /// list of children, rather than just parent pointers. + domtree_children: DomTreeWithChildren, + /// Loop analysis results, used for built-in LICM during + /// elaboration. loop_analysis: &'a LoopAnalysis, - /// Last-store tracker for integrated alias analysis during egraph build. - alias_analysis: AliasAnalysis, - /// The egraph itself. - pub(crate) egraph: EGraph, - /// "node context", containing arenas for node data. - pub(crate) node_ctx: NodeCtx, - /// Ranges in `side_effect_ids` for sequences of side-effecting - /// eclasses per block. - side_effects: SecondaryMap>, - side_effect_ids: Vec, - /// Map from store instructions to their nodes; used for store-to-load forwarding. - pub(crate) store_nodes: FxHashMap, - /// Ranges in `blockparam_ids_tys` for sequences of blockparam - /// eclass IDs and types per block. - blockparams: SecondaryMap>, - blockparam_ids_tys: Vec<(Id, Type)>, - /// Which canonical node IDs do we want to rematerialize in each + /// Which canonical Values do we want to rematerialize in each /// block where they're used? - pub(crate) remat_ids: FxHashSet, - /// Which canonical node IDs have an enode whose value subsumes - /// all others it's unioned with? - pub(crate) subsume_ids: FxHashSet, - /// Statistics recorded during the process of building, - /// optimizing, and lowering out of this egraph. + /// + /// (A canonical Value is the *oldest* Value in an eclass, + /// i.e. tree of union value-nodes). + remat_values: FxHashSet, + /// Stats collected while we run this pass. pub(crate) stats: Stats, - /// Current rewrite-recursion depth. Used to enforce a finite - /// limit on rewrite rule application so that we don't get stuck - /// in an infinite chain. + /// Union-find that maps all members of a Union tree (eclass) back + /// to the *oldest* (lowest-numbered) `Value`. + eclasses: UnionFind, +} + +/// Context passed through node insertion and optimization. +pub(crate) struct OptimizeCtx<'opt, 'analysis> +where + 'analysis: 'opt, +{ + // Borrowed from EgraphPass: + pub(crate) func: &'opt mut Function, + pub(crate) value_to_opt_value: &'opt mut SecondaryMap, + pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>, + pub(crate) eclasses: &'opt mut UnionFind, + pub(crate) remat_values: &'opt mut FxHashSet, + pub(crate) stats: &'opt mut Stats, + pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>, + pub(crate) alias_analysis_state: &'opt mut LastStores, + // Held locally during optimization of one node (recursively): pub(crate) rewrite_depth: usize, + pub(crate) subsume_values: FxHashSet, } -#[derive(Clone, Debug, Default)] -pub(crate) struct Stats { - pub(crate) node_created: u64, - pub(crate) node_param: u64, - pub(crate) node_result: u64, - pub(crate) node_pure: u64, - pub(crate) node_inst: u64, - pub(crate) node_load: u64, - pub(crate) node_dedup_query: u64, - pub(crate) node_dedup_hit: u64, - pub(crate) node_dedup_miss: u64, - pub(crate) node_ctor_created: u64, - pub(crate) node_ctor_deduped: u64, - pub(crate) node_union: u64, - pub(crate) node_subsume: u64, - pub(crate) store_map_insert: u64, - pub(crate) side_effect_nodes: u64, - pub(crate) rewrite_rule_invoked: u64, - pub(crate) rewrite_depth_limit: u64, - pub(crate) store_to_load_forward: u64, - pub(crate) elaborate_visit_node: u64, - pub(crate) elaborate_memoize_hit: u64, - pub(crate) elaborate_memoize_miss: u64, - pub(crate) elaborate_memoize_miss_remat: u64, - pub(crate) elaborate_licm_hoist: u64, - pub(crate) elaborate_func: u64, - pub(crate) elaborate_func_pre_insts: u64, - pub(crate) elaborate_func_post_insts: u64, +/// For passing to `insert_pure_enode`. Sometimes the enode already +/// exists as an Inst (from the original CLIF), and sometimes we're in +/// the middle of creating it and want to avoid inserting it if +/// possible until we know we need it. +pub(crate) enum NewOrExistingInst { + New(InstructionData, Type), + Existing(Inst), } -impl<'a> FuncEGraph<'a> { - /// Create a new EGraph for the given function. Requires the - /// domtree to be precomputed as well; the domtree is used for - /// scheduling when lowering out of the egraph. - pub fn new( - func: &Function, - domtree: &'a DominatorTree, - loop_analysis: &'a LoopAnalysis, - cfg: &ControlFlowGraph, - ) -> FuncEGraph<'a> { - let num_values = func.dfg.num_values(); - let num_blocks = func.dfg.num_blocks(); - let node_count_estimate = num_values * 2; - let alias_analysis = AliasAnalysis::new(func, cfg); - let mut this = Self { - domtree, - loop_analysis, - alias_analysis, - egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)), - node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg), - side_effects: SecondaryMap::with_capacity(num_blocks), - side_effect_ids: Vec::with_capacity(node_count_estimate), - store_nodes: FxHashMap::default(), - blockparams: SecondaryMap::with_capacity(num_blocks), - blockparam_ids_tys: Vec::with_capacity(num_blocks * 10), - remat_ids: FxHashSet::default(), - subsume_ids: FxHashSet::default(), - stats: Default::default(), - rewrite_depth: 0, +impl NewOrExistingInst { + fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) { + match self { + NewOrExistingInst::New(data, ty) => (*ty, *data), + NewOrExistingInst::Existing(inst) => { + let ty = dfg.ctrl_typevar(*inst); + (ty, dfg[*inst].clone()) + } + } + } +} + +impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis> +where + 'analysis: 'opt, +{ + /// Optimization of a single instruction. + /// + /// This does a few things: + /// - Looks up the instruction in the GVN deduplication map. If we + /// already have the same instruction somewhere else, with the + /// same args, then we can alias the original instruction's + /// results and omit this instruction entirely. + /// - Note that we do this canonicalization based on the + /// instruction with its arguments as *canonical* eclass IDs, + /// that is, the oldest (smallest index) `Value` reachable in + /// the tree-of-unions (whole eclass). This ensures that we + /// properly canonicalize newer nodes that use newer "versions" + /// of a value that are still equal to the older versions. + /// - If the instruction is "new" (not deduplicated), then apply + /// optimization rules: + /// - All of the mid-end rules written in ISLE. + /// - Store-to-load forwarding. + /// - Update the value-to-opt-value map, and update the eclass + /// union-find, if we rewrote the value to different form(s). + pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value { + // Create the external context for looking up and updating the + // GVN map. This is necessary so that instructions themselves + // do not have to carry all the references or data for a full + // `Eq` or `Hash` impl. + let gvn_context = GVNContext { + union_find: self.eclasses, + value_lists: &self.func.dfg.value_lists, }; - this.store_nodes.reserve(func.dfg.num_values() / 8); - this.remat_ids.reserve(func.dfg.num_values() / 4); - this.subsume_ids.reserve(func.dfg.num_values() / 4); - this.build(func); - this + + self.stats.pure_inst += 1; + if let NewOrExistingInst::New(..) = inst { + self.stats.new_inst += 1; + } + + // Does this instruction already exist? If so, add entries to + // the value-map to rewrite uses of its results to the results + // of the original (existing) instruction. If not, optimize + // the new instruction. + if let Some(&orig_result) = self + .gvn_map + .get(&inst.get_inst_key(&self.func.dfg), &gvn_context) + { + self.stats.pure_inst_deduped += 1; + if let NewOrExistingInst::Existing(inst) = inst { + debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1); + let result = self.func.dfg.first_result(inst); + self.value_to_opt_value[result] = orig_result; + self.eclasses.union(result, orig_result); + self.stats.union += 1; + result + } else { + orig_result + } + } else { + // Now actually insert the InstructionData and attach + // result value (exactly one). + let (inst, result, ty) = match inst { + NewOrExistingInst::New(data, typevar) => { + let inst = self.func.dfg.make_inst(data); + // TODO: reuse return value? + self.func.dfg.make_inst_results(inst, typevar); + let result = self.func.dfg.first_result(inst); + // Add to eclass unionfind. + self.eclasses.add(result); + // New inst. We need to do the analysis of its result. + (inst, result, typevar) + } + NewOrExistingInst::Existing(inst) => { + let result = self.func.dfg.first_result(inst); + let ty = self.func.dfg.ctrl_typevar(inst); + (inst, result, ty) + } + }; + + let opt_value = self.optimize_pure_enode(inst); + let gvn_context = GVNContext { + union_find: self.eclasses, + value_lists: &self.func.dfg.value_lists, + }; + self.gvn_map + .insert((ty, self.func.dfg[inst].clone()), opt_value, &gvn_context); + self.value_to_opt_value[result] = opt_value; + opt_value + } } - fn build(&mut self, func: &Function) { - // Mapping of SSA `Value` to eclass ID. - let mut value_to_id = FxHashMap::default(); + /// Optimizes an enode by applying any matching mid-end rewrite + /// rules (or store-to-load forwarding, which is a special case), + /// unioning together all possible optimized (or rewritten) forms + /// of this expression into an eclass and returning the `Value` + /// that represents that eclass. + fn optimize_pure_enode(&mut self, inst: Inst) -> Value { + // A pure node always has exactly one result. + let orig_value = self.func.dfg.first_result(inst); - // For each block in RPO, create an enode for block entry, for - // each block param, and for each instruction. - for &block in self.domtree.cfg_postorder().iter().rev() { - let loop_level = self.loop_analysis.loop_level(block); - let blockparam_start = - u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count"); - for (i, &value) in func.dfg.block_params(block).iter().enumerate() { - let ty = func.dfg.value_type(value); - let param = self - .egraph - .add( - Node::Param { - block, - index: i - .try_into() - .expect("blockparam index should fit in Node::Param"), - ty, - loop_level, - }, - &mut self.node_ctx, - ) - .get(); - value_to_id.insert(value, param); - self.blockparam_ids_tys.push((param, ty)); - self.stats.node_created += 1; - self.stats.node_param += 1; - } - let blockparam_end = - u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count"); - self.blockparams[block] = blockparam_start..blockparam_end; + let mut isle_ctx = IsleContext { ctx: self }; - let side_effect_start = - u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count"); - for inst in func.layout.block_insts(block) { - // Build args from SSA values. - let args = EntityList::from_iter( - func.dfg.inst_args(inst).iter().map(|&arg| { - let arg = func.dfg.resolve_aliases(arg); - *value_to_id - .get(&arg) - .expect("Must have seen def before this use") - }), - &mut self.node_ctx.args, + // Limit rewrite depth. When we apply optimization rules, they + // may create new nodes (values) and those are, recursively, + // optimized eagerly as soon as they are created. So we may + // have more than one ISLE invocation on the stack. (This is + // necessary so that as the toplevel builds the + // right-hand-side expression bottom-up, it uses the "latest" + // optimized values for all the constituent parts.) To avoid + // infinite or problematic recursion, we bound the rewrite + // depth to a small constant here. + const REWRITE_LIMIT: usize = 5; + if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT { + isle_ctx.ctx.stats.rewrite_depth_limit += 1; + return orig_value; + } + isle_ctx.ctx.rewrite_depth += 1; + + // Invoke the ISLE toplevel constructor, getting all new + // values produced as equivalents to this value. + trace!("Calling into ISLE with original value {}", orig_value); + isle_ctx.ctx.stats.rewrite_rule_invoked += 1; + let optimized_values = + crate::opts::generated_code::constructor_simplify(&mut isle_ctx, orig_value); + + // Create a union of all new values with the original (or + // maybe just one new value marked as "subsuming" the + // original, if present.) + let mut union_value = orig_value; + if let Some(mut optimized_values) = optimized_values { + while let Some(optimized_value) = optimized_values.next(&mut isle_ctx) { + trace!( + "Returned from ISLE for {}, got {:?}", + orig_value, + optimized_value ); + if optimized_value == orig_value { + trace!(" -> same as orig value; skipping"); + continue; + } + if isle_ctx.ctx.subsume_values.contains(&optimized_value) { + // Merge in the unionfind so canonicalization + // still works, but take *only* the subsuming + // value, and break now. + isle_ctx.ctx.eclasses.union(optimized_value, union_value); + union_value = optimized_value; + break; + } - let results = func.dfg.inst_results(inst); - let ty = if results.len() == 1 { - func.dfg.value_type(results[0]) + let old_union_value = union_value; + union_value = isle_ctx + .ctx + .func + .dfg + .union(old_union_value, optimized_value); + isle_ctx.ctx.stats.union += 1; + trace!(" -> union: now {}", union_value); + isle_ctx.ctx.eclasses.add(union_value); + isle_ctx + .ctx + .eclasses + .union(old_union_value, optimized_value); + isle_ctx.ctx.eclasses.union(old_union_value, union_value); + } + } + + isle_ctx.ctx.rewrite_depth -= 1; + + union_value + } + + /// Optimize a "skeleton" instruction, possibly removing + /// it. Returns `true` if the instruction should be removed from + /// the layout. + fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool { + self.stats.skeleton_inst += 1; + // Not pure, but may still be a load or store: + // process it to see if we can optimize it. + if let Some(new_result) = + self.alias_analysis + .process_inst(self.func, self.alias_analysis_state, inst) + { + self.stats.alias_analysis_removed += 1; + let result = self.func.dfg.first_result(inst); + self.value_to_opt_value[result] = new_result; + true + } else { + // Set all results to identity-map to themselves + // in the value-to-opt-value map. + for &result in self.func.dfg.inst_results(inst) { + self.value_to_opt_value[result] = result; + self.eclasses.add(result); + } + false + } + } +} + +impl<'a> EgraphPass<'a> { + /// Create a new EgraphPass. + pub fn new( + func: &'a mut Function, + domtree: &'a DominatorTree, + loop_analysis: &'a LoopAnalysis, + alias_analysis: &'a mut AliasAnalysis<'a>, + ) -> Self { + let num_values = func.dfg.num_values(); + let domtree_children = DomTreeWithChildren::new(func, domtree); + Self { + func, + domtree, + domtree_children, + loop_analysis, + alias_analysis, + stats: Stats::default(), + eclasses: UnionFind::with_capacity(num_values), + remat_values: FxHashSet::default(), + } + } + + /// Run the process. + pub fn run(&mut self) { + self.remove_pure_and_optimize(); + + trace!("egraph built:\n{}\n", self.func.display()); + if cfg!(feature = "trace-log") { + for (value, def) in self.func.dfg.values_and_defs() { + trace!(" -> {} = {:?}", value, def); + match def { + ValueDef::Result(i, 0) => { + trace!(" -> {} = {:?}", i, self.func.dfg[i]); + } + _ => {} + } + } + } + trace!("stats: {:?}", self.stats); + self.elaborate(); + } + + /// Remove pure nodes from the `Layout` of the function, ensuring + /// that only the "side-effect skeleton" remains, and also + /// optimize the pure nodes. This is the first step of + /// egraph-based processing and turns the pure CFG-based CLIF into + /// a CFG skeleton with a sea of (optimized) nodes tying it + /// together. + /// + /// As we walk through the code, we eagerly apply optimization + /// rules; at any given point we have a "latest version" of an + /// eclass of possible representations for a `Value` in the + /// original program, which is itself a `Value` at the root of a + /// union-tree. We keep a map from the original values to these + /// optimized values. When we encounter any instruction (pure or + /// side-effecting skeleton) we rewrite its arguments to capture + /// the "latest" optimized forms of these values. (We need to do + /// this as part of this pass, and not later using a finished map, + /// because the eclass can continue to be updated and we need to + /// only refer to its subset that exists at this stage, to + /// maintain acyclicity.) + fn remove_pure_and_optimize(&mut self) { + let mut cursor = FuncCursor::new(self.func); + let mut value_to_opt_value: SecondaryMap = + SecondaryMap::with_default(Value::reserved_value()); + let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> = + CtxHashMap::with_capacity(cursor.func.dfg.num_values()); + + // In domtree preorder, visit blocks. (TODO: factor out an + // iterator from this and elaborator.) + let root = self.domtree_children.root(); + let mut block_stack = vec![root]; + while let Some(block) = block_stack.pop() { + // We popped this block; push children + // immediately, then process this block. + block_stack.extend(self.domtree_children.children(block)); + + trace!("Processing block {}", block); + cursor.set_position(CursorPosition::Before(block)); + + let mut alias_analysis_state = self.alias_analysis.block_starting_state(block); + + for ¶m in cursor.func.dfg.block_params(block) { + trace!("creating initial singleton eclass for blockparam {}", param); + self.eclasses.add(param); + value_to_opt_value[param] = param; + } + while let Some(inst) = cursor.next_inst() { + trace!("Processing inst {}", inst); + + // While we're passing over all insts, create initial + // singleton eclasses for all result and blockparam + // values. Also do initial analysis of all inst + // results. + for &result in cursor.func.dfg.inst_results(inst) { + trace!("creating initial singleton eclass for {}", result); + self.eclasses.add(result); + } + + // Rewrite args of *all* instructions using the + // value-to-opt-value map. + cursor.func.dfg.resolve_aliases_in_arguments(inst); + for arg in cursor.func.dfg.inst_args_mut(inst) { + let new_value = value_to_opt_value[*arg]; + trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value); + debug_assert_ne!(new_value, Value::reserved_value()); + *arg = new_value; + } + + // Build a context for optimization, with borrows of + // state. We can't invoke a method on `self` because + // we've borrowed `self.func` mutably (as + // `cursor.func`) so we pull apart the pieces instead + // here. + let mut ctx = OptimizeCtx { + func: cursor.func, + value_to_opt_value: &mut value_to_opt_value, + gvn_map: &mut gvn_map, + eclasses: &mut self.eclasses, + rewrite_depth: 0, + subsume_values: FxHashSet::default(), + remat_values: &mut self.remat_values, + stats: &mut self.stats, + alias_analysis: self.alias_analysis, + alias_analysis_state: &mut alias_analysis_state, + }; + + if is_pure_for_egraph(ctx.func, inst) { + // Insert into GVN map and optimize any new nodes + // inserted (recursively performing this work for + // any nodes the optimization rules produce). + let inst = NewOrExistingInst::Existing(inst); + ctx.insert_pure_enode(inst); + // We've now rewritten all uses, or will when we + // see them, and the instruction exists as a pure + // enode in the eclass, so we can remove it. + cursor.remove_inst_and_step_back(); } else { - crate::ir::types::INVALID - }; - - let load_mem_state = self.alias_analysis.get_state_for_load(inst); - let is_readonly_load = match func.dfg[inst] { - InstructionData::Load { - opcode: Opcode::Load, - flags, - .. - } => flags.readonly() && flags.notrap(), - _ => false, - }; - - // Create the egraph node. - let op = InstructionImms::from(&func.dfg[inst]); - let opcode = op.opcode(); - let srcloc = func.srclocs[inst]; - let arity = u16::try_from(results.len()) - .expect("More than 2^16 results from an instruction"); - - let node = if is_readonly_load { - self.stats.node_created += 1; - self.stats.node_pure += 1; - Node::Pure { - op, - args, - ty, - arity, - } - } else if let Some(load_mem_state) = load_mem_state { - let addr = args.as_slice(&self.node_ctx.args)[0]; - trace!("load at inst {} has mem state {:?}", inst, load_mem_state); - self.stats.node_created += 1; - self.stats.node_load += 1; - Node::Load { - op, - ty, - addr, - mem_state: load_mem_state, - srcloc, - } - } else if has_side_effect(func, inst) || opcode.can_load() { - self.stats.node_created += 1; - self.stats.node_inst += 1; - Node::Inst { - op, - args, - ty, - arity, - srcloc, - loop_level, - } - } else { - self.stats.node_created += 1; - self.stats.node_pure += 1; - Node::Pure { - op, - args, - ty, - arity, - } - }; - let dedup_needed = self.node_ctx.needs_dedup(&node); - let is_pure = matches!(node, Node::Pure { .. }); - - let mut id = self.egraph.add(node, &mut self.node_ctx); - - if dedup_needed { - self.stats.node_dedup_query += 1; - match id { - NewOrExisting::New(_) => { - self.stats.node_dedup_miss += 1; - } - NewOrExisting::Existing(_) => { - self.stats.node_dedup_hit += 1; - } - } - } - - if opcode == Opcode::Store { - let store_data_ty = func.dfg.value_type(func.dfg.inst_args(inst)[0]); - self.store_nodes.insert(inst, (store_data_ty, id.get())); - self.stats.store_map_insert += 1; - } - - // Loads that did not already merge into an existing - // load: try to forward from a store (store-to-load - // forwarding). - if let NewOrExisting::New(new_id) = id { - if load_mem_state.is_some() { - let opt_id = crate::opts::store_to_load(new_id, self); - trace!("store_to_load: {} -> {}", new_id, opt_id); - if opt_id != new_id { - id = NewOrExisting::Existing(opt_id); - } - } - } - - // Now either optimize (for new pure nodes), or add to - // the side-effecting list (for all other new nodes). - let id = match id { - NewOrExisting::Existing(id) => id, - NewOrExisting::New(id) if is_pure => { - // Apply all optimization rules immediately; the - // aegraph (acyclic egraph) works best when we do - // this so all uses pick up the eclass with all - // possible enodes. - crate::opts::optimize_eclass(id, self) - } - NewOrExisting::New(id) => { - self.side_effect_ids.push(id); - self.stats.side_effect_nodes += 1; - id - } - }; - - // Create results and save in Value->Id map. - match results { - &[] => {} - &[one_result] => { - trace!("build: value {} -> id {}", one_result, id); - value_to_id.insert(one_result, id); - } - many_results => { - debug_assert!(many_results.len() > 1); - for (i, &result) in many_results.iter().enumerate() { - let ty = func.dfg.value_type(result); - let projection = self - .egraph - .add( - Node::Result { - value: id, - result: i, - ty, - }, - &mut self.node_ctx, - ) - .get(); - self.stats.node_created += 1; - self.stats.node_result += 1; - trace!("build: value {} -> id {}", result, projection); - value_to_id.insert(result, projection); - } + if ctx.optimize_skeleton_inst(inst) { + cursor.remove_inst_and_step_back(); } } } - - let side_effect_end = - u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count"); - let side_effect_range = side_effect_start..side_effect_end; - self.side_effects[block] = side_effect_range; } } /// Scoped elaboration: compute a final ordering of op computation - /// for each block and replace the given Func body. + /// for each block and update the given Func body. After this + /// runs, the function body is back into the state where every + /// Inst with an used result is placed in the layout (possibly + /// duplicated, if our code-motion logic decides this is the best + /// option). /// /// This works in concert with the domtree. We do a preorder /// traversal of the domtree, tracking a scoped map from Id to @@ -354,76 +474,95 @@ impl<'a> FuncEGraph<'a> { /// thus computed "as late as possible", but then memoized into /// the Id-to-Value map and available to all dominated blocks and /// for the rest of this block. (This subsumes GVN.) - pub fn elaborate(&mut self, func: &mut Function) { - let mut elab = Elaborator::new( - func, + fn elaborate(&mut self) { + let mut elaborator = Elaborator::new( + self.func, self.domtree, + &self.domtree_children, self.loop_analysis, - &self.egraph, - &self.node_ctx, - &self.remat_ids, + &mut self.remat_values, + &mut self.eclasses, &mut self.stats, ); - elab.elaborate( - |block| { - let blockparam_range = self.blockparams[block].clone(); - &self.blockparam_ids_tys - [blockparam_range.start as usize..blockparam_range.end as usize] - }, - |block| { - let side_effect_range = self.side_effects[block].clone(); - &self.side_effect_ids - [side_effect_range.start as usize..side_effect_range.end as usize] - }, - ); + elaborator.elaborate(); + + self.check_post_egraph(); } -} -/// State for egraph analysis that computes all needed properties. -pub(crate) struct Analysis; - -/// Analysis results for each eclass id. -#[derive(Clone, Debug)] -pub(crate) struct AnalysisValue { - pub(crate) loop_level: LoopLevel, -} - -impl Default for AnalysisValue { - fn default() -> Self { - Self { - loop_level: LoopLevel::root(), + #[cfg(debug_assertions)] + fn check_post_egraph(&self) { + // Verify that no union nodes are reachable from inst args, + // and that all inst args' defining instructions are in the + // layout. + for block in self.func.layout.blocks() { + for inst in self.func.layout.block_insts(block) { + for &arg in self.func.dfg.inst_args(inst) { + match self.func.dfg.value_def(arg) { + ValueDef::Result(i, _) => { + debug_assert!(self.func.layout.inst_block(i).is_some()); + } + ValueDef::Union(..) => { + panic!("egraph union node {} still reachable at {}!", arg, inst); + } + _ => {} + } + } + } } } + + #[cfg(not(debug_assertions))] + fn check_post_egraph(&self) {} } -impl cranelift_egraph::Analysis for Analysis { - type L = NodeCtx; - type Value = AnalysisValue; +/// Implementation of external-context equality and hashing on +/// InstructionData. This allows us to deduplicate instructions given +/// some context that lets us see its value lists and the mapping from +/// any value to "canonical value" (in an eclass). +struct GVNContext<'a> { + value_lists: &'a ValueListPool, + union_find: &'a UnionFind, +} - fn for_node( +impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> { + fn ctx_eq( &self, - ctx: &NodeCtx, - n: &Node, - values: &SecondaryMap, - ) -> AnalysisValue { - let loop_level = match n { - &Node::Pure { ref args, .. } => args - .as_slice(&ctx.args) - .iter() - .map(|&arg| values[arg].loop_level) - .max() - .unwrap_or(LoopLevel::root()), - &Node::Load { addr, .. } => values[addr].loop_level, - &Node::Result { value, .. } => values[value].loop_level, - &Node::Inst { loop_level, .. } | &Node::Param { loop_level, .. } => loop_level, - }; - - AnalysisValue { loop_level } - } - - fn meet(&self, _ctx: &NodeCtx, v1: &AnalysisValue, v2: &AnalysisValue) -> AnalysisValue { - AnalysisValue { - loop_level: std::cmp::max(v1.loop_level, v2.loop_level), - } + (a_ty, a_inst): &(Type, InstructionData), + (b_ty, b_inst): &(Type, InstructionData), + ) -> bool { + a_ty == b_ty + && a_inst.eq(b_inst, self.value_lists, |value| { + self.union_find.find(value) + }) } } + +impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> { + fn ctx_hash(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) { + std::hash::Hash::hash(&ty, state); + inst.hash(state, self.value_lists, |value| self.union_find.find(value)); + } +} + +/// Statistics collected during egraph-based processing. +#[derive(Clone, Debug, Default)] +pub(crate) struct Stats { + pub(crate) pure_inst: u64, + pub(crate) pure_inst_deduped: u64, + pub(crate) skeleton_inst: u64, + pub(crate) alias_analysis_removed: u64, + pub(crate) new_inst: u64, + pub(crate) union: u64, + pub(crate) subsume: u64, + pub(crate) remat: u64, + pub(crate) rewrite_rule_invoked: u64, + pub(crate) rewrite_depth_limit: u64, + pub(crate) elaborate_visit_node: u64, + pub(crate) elaborate_memoize_hit: u64, + pub(crate) elaborate_memoize_miss: u64, + pub(crate) elaborate_memoize_miss_remat: u64, + pub(crate) elaborate_licm_hoist: u64, + pub(crate) elaborate_func: u64, + pub(crate) elaborate_func_pre_insts: u64, + pub(crate) elaborate_func_post_insts: u64, +} diff --git a/cranelift/codegen/src/egraph/cost.rs b/cranelift/codegen/src/egraph/cost.rs new file mode 100644 index 0000000000..8a9f852818 --- /dev/null +++ b/cranelift/codegen/src/egraph/cost.rs @@ -0,0 +1,97 @@ +//! Cost functions for egraph representation. + +use crate::ir::Opcode; + +/// A cost of computing some value in the program. +/// +/// Costs are measured in an arbitrary union that we represent in a +/// `u32`. The ordering is meant to be meaningful, but the value of a +/// single unit is arbitrary (and "not to scale"). We use a collection +/// of heuristics to try to make this approximation at least usable. +/// +/// We start by defining costs for each opcode (see `pure_op_cost` +/// below). The cost of computing some value, initially, is the cost +/// of its opcode, plus the cost of computing its inputs. +/// +/// We then adjust the cost according to loop nests: for each +/// loop-nest level, we multiply by 1024. Because we only have 32 +/// bits, we limit this scaling to a loop-level of two (i.e., multiply +/// by 2^20 ~= 1M). +/// +/// Arithmetic on costs is always saturating: we don't want to wrap +/// around and return to a tiny cost when adding the costs of two very +/// expensive operations. It is better to approximate and lose some +/// precision than to lose the ordering by wrapping. +/// +/// Finally, we reserve the highest value, `u32::MAX`, as a sentinel +/// that means "infinite". This is separate from the finite costs and +/// not reachable by doing arithmetic on them (even when overflowing) +/// -- we saturate just *below* infinity. (This is done by the +/// `finite()` method.) An infinite cost is used to represent a value +/// that cannot be computed, or otherwise serve as a sentinel when +/// performing search for the lowest-cost representation of a value. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct Cost(u32); +impl Cost { + pub(crate) fn at_level(&self, loop_level: usize) -> Cost { + let loop_level = std::cmp::min(2, loop_level); + let multiplier = 1u32 << ((10 * loop_level) as u32); + Cost(self.0.saturating_mul(multiplier)).finite() + } + + pub(crate) fn infinity() -> Cost { + // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` + // only for heuristics and always saturate so this suffices!) + Cost(u32::MAX) + } + + pub(crate) fn zero() -> Cost { + Cost(0) + } + + /// Clamp this cost at a "finite" value. Can be used in + /// conjunction with saturating ops to avoid saturating into + /// `infinity()`. + fn finite(self) -> Cost { + Cost(std::cmp::min(u32::MAX - 1, self.0)) + } +} + +impl std::default::Default for Cost { + fn default() -> Cost { + Cost::zero() + } +} + +impl std::ops::Add for Cost { + type Output = Cost; + fn add(self, other: Cost) -> Cost { + Cost(self.0.saturating_add(other.0)).finite() + } +} + +/// Return the cost of a *pure* opcode. Caller is responsible for +/// checking that the opcode came from an instruction that satisfies +/// `inst_predicates::is_pure_for_egraph()`. +pub(crate) fn pure_op_cost(op: Opcode) -> Cost { + match op { + // Constants. + Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(0), + // Extends/reduces. + Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => { + Cost(1) + } + // "Simple" arithmetic. + Opcode::Iadd + | Opcode::Isub + | Opcode::Band + | Opcode::BandNot + | Opcode::Bor + | Opcode::BorNot + | Opcode::Bxor + | Opcode::BxorNot + | Opcode::Bnot => Cost(2), + // Everything else (pure.) + _ => Cost(3), + } +} diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index ee465def22..2ca59bdb03 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -1,47 +1,78 @@ //! Elaboration phase: lowers EGraph back to sequences of operations //! in CFG nodes. +use super::cost::{pure_op_cost, Cost}; use super::domtree::DomTreeWithChildren; -use super::node::{op_cost, Cost, Node, NodeCtx}; -use super::Analysis; use super::Stats; use crate::dominator_tree::DominatorTree; use crate::fx::FxHashSet; -use crate::ir::{Block, Function, Inst, Opcode, RelSourceLoc, Type, Value, ValueList}; -use crate::loop_analysis::LoopAnalysis; +use crate::ir::ValueDef; +use crate::ir::{Block, Function, Inst, Value}; +use crate::loop_analysis::{Loop, LoopAnalysis, LoopLevel}; use crate::scoped_hash_map::ScopedHashMap; use crate::trace; +use crate::unionfind::UnionFind; use alloc::vec::Vec; -use cranelift_egraph::{EGraph, Id, Language, NodeKey}; -use cranelift_entity::{packed_option::PackedOption, SecondaryMap}; +use cranelift_entity::{packed_option::ReservedValue, SecondaryMap}; use smallvec::{smallvec, SmallVec}; use std::ops::Add; -type LoopDepth = u32; - pub(crate) struct Elaborator<'a> { func: &'a mut Function, domtree: &'a DominatorTree, + domtree_children: &'a DomTreeWithChildren, loop_analysis: &'a LoopAnalysis, - node_ctx: &'a NodeCtx, - egraph: &'a EGraph, - id_to_value: ScopedHashMap, - id_to_best_cost_and_node: SecondaryMap, + eclasses: &'a mut UnionFind, + /// Map from Value that is produced by a pure Inst (and was thus + /// not in the side-effecting skeleton) to the value produced by + /// an elaborated inst (placed in the layout) to whose results we + /// refer in the final code. + /// + /// The first time we use some result of an instruction during + /// elaboration, we can place it and insert an identity map (inst + /// results to that same inst's results) in this scoped + /// map. Within that block and its dom-tree children, that mapping + /// is visible and we can continue to use it. This allows us to + /// avoid cloning the instruction. However, if we pop that scope + /// and use it somewhere else as well, we will need to + /// duplicate. We detect this case by checking, when a value that + /// we want is not present in this map, whether the producing inst + /// is already placed in the Layout. If so, we duplicate, and + /// insert non-identity mappings from the original inst's results + /// to the cloned inst's results. + value_to_elaborated_value: ScopedHashMap, + /// Map from Value to the best (lowest-cost) Value in its eclass + /// (tree of union value-nodes). + value_to_best_value: SecondaryMap, /// Stack of blocks and loops in current elaboration path. loop_stack: SmallVec<[LoopStackEntry; 8]>, - cur_block: Option, - first_branch: SecondaryMap>, - remat_ids: &'a FxHashSet, + /// The current block into which we are elaborating. + cur_block: Block, + /// Values that opt rules have indicated should be rematerialized + /// in every block they are used (e.g., immediates or other + /// "cheap-to-compute" ops). + remat_values: &'a FxHashSet, /// Explicitly-unrolled value elaboration stack. elab_stack: Vec, - elab_result_stack: Vec, + /// Results from the elab stack. + elab_result_stack: Vec, /// Explicitly-unrolled block elaboration stack. block_stack: Vec, + /// Stats for various events during egraph processing, to help + /// with optimization of this infrastructure. stats: &'a mut Stats, } +#[derive(Clone, Copy, Debug)] +struct ElaboratedValue { + in_block: Block, + value: Value, +} + #[derive(Clone, Debug)] struct LoopStackEntry { + /// The loop identifier. + lp: Loop, /// The hoist point: a block that immediately dominates this /// loop. May not be an immediate predecessor, but will be a valid /// point to place all loop-invariant ops: they must depend only @@ -54,22 +85,20 @@ struct LoopStackEntry { #[derive(Clone, Debug)] enum ElabStackEntry { - /// Next action is to resolve this id into a node and elaborate - /// args. - Start { id: Id }, + /// Next action is to resolve this value into an elaborated inst + /// (placed into the layout) that produces the value, and + /// recursively elaborate the insts that produce its args. + /// + /// Any inserted ops should be inserted before `before`, which is + /// the instruction demanding this value. + Start { value: Value, before: Inst }, /// Args have been pushed; waiting for results. - PendingNode { - canonical: Id, - node_key: NodeKey, - remat: bool, + PendingInst { + inst: Inst, + result_idx: usize, num_args: usize, - }, - /// Waiting for a result to return one projected value of a - /// multi-value result. - PendingProjection { - canonical: Id, - index: usize, - ty: Type, + remat: bool, + before: Inst, }, } @@ -79,56 +108,31 @@ enum BlockStackEntry { Pop, } -#[derive(Clone, Debug)] -enum IdValue { - /// A single value. - Value { - depth: LoopDepth, - block: Block, - value: Value, - }, - /// Multiple results; indices in `node_args`. - Values { - depth: LoopDepth, - block: Block, - values: ValueList, - }, -} - -impl IdValue { - fn block(&self) -> Block { - match self { - IdValue::Value { block, .. } | IdValue::Values { block, .. } => *block, - } - } -} - impl<'a> Elaborator<'a> { pub(crate) fn new( func: &'a mut Function, domtree: &'a DominatorTree, + domtree_children: &'a DomTreeWithChildren, loop_analysis: &'a LoopAnalysis, - egraph: &'a EGraph, - node_ctx: &'a NodeCtx, - remat_ids: &'a FxHashSet, + remat_values: &'a FxHashSet, + eclasses: &'a mut UnionFind, stats: &'a mut Stats, ) -> Self { - let num_blocks = func.dfg.num_blocks(); - let mut id_to_best_cost_and_node = - SecondaryMap::with_default((Cost::infinity(), Id::invalid())); - id_to_best_cost_and_node.resize(egraph.classes.len()); + let num_values = func.dfg.num_values(); + let mut value_to_best_value = + SecondaryMap::with_default((Cost::infinity(), Value::reserved_value())); + value_to_best_value.resize(num_values); Self { func, domtree, + domtree_children, loop_analysis, - egraph, - node_ctx, - id_to_value: ScopedHashMap::with_capacity(egraph.classes.len()), - id_to_best_cost_and_node, + eclasses, + value_to_elaborated_value: ScopedHashMap::with_capacity(num_values), + value_to_best_value, loop_stack: smallvec![], - cur_block: None, - first_branch: SecondaryMap::with_capacity(num_blocks), - remat_ids, + cur_block: Block::reserved_value(), + remat_values, elab_stack: vec![], elab_result_stack: vec![], block_stack: vec![], @@ -136,19 +140,23 @@ impl<'a> Elaborator<'a> { } } - fn cur_loop_depth(&self) -> LoopDepth { - self.loop_stack.len() as LoopDepth - } - - fn start_block(&mut self, idom: Option, block: Block, block_params: &[(Id, Type)]) { + fn start_block(&mut self, idom: Option, block: Block) { trace!( - "start_block: block {:?} with idom {:?} at loop depth {} scope depth {}", + "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}", block, idom, - self.cur_loop_depth(), - self.id_to_value.depth() + self.loop_stack.len(), + self.value_to_elaborated_value.depth() ); + // Pop any loop levels we're no longer in. + while let Some(inner_loop) = self.loop_stack.last() { + if self.loop_analysis.is_in_loop(block, inner_loop.lp) { + break; + } + self.loop_stack.pop(); + } + // Note that if the *entry* block is a loop header, we will // not make note of the loop here because it will not have an // immediate dominator. We must disallow this case because we @@ -156,14 +164,15 @@ impl<'a> Elaborator<'a> { // `LoopAnalysis` will otherwise still make note of this loop // and loop depths will not match. if let Some(idom) = idom { - if self.loop_analysis.is_loop_header(block).is_some() { + if let Some(lp) = self.loop_analysis.is_loop_header(block) { self.loop_stack.push(LoopStackEntry { + lp, // Any code hoisted out of this loop will have code // placed in `idom`, and will have def mappings // inserted in to the scoped hashmap at that block's // level. hoist_block: idom, - scope_depth: (self.id_to_value.depth() - 1) as u32, + scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32, }); trace!( " -> loop header, pushing; depth now {}", @@ -177,391 +186,457 @@ impl<'a> Elaborator<'a> { ); } - self.cur_block = Some(block); - for &(id, ty) in block_params { - let value = self.func.dfg.append_block_param(block, ty); - trace!(" -> block param id {:?} value {:?}", id, value); - self.id_to_value.insert_if_absent( - id, - IdValue::Value { - depth: self.cur_loop_depth(), - block, - value, - }, - ); - } + trace!("block {}: loop stack is {:?}", block, self.loop_stack); + + self.cur_block = block; } - fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList { - let (instdata, result_ty, arity) = match node { - Node::Pure { op, ty, arity, .. } | Node::Inst { op, ty, arity, .. } => ( - op.with_args(args, &mut self.func.dfg.value_lists), - *ty, - *arity, - ), - Node::Load { op, ty, .. } => { - (op.with_args(args, &mut self.func.dfg.value_lists), *ty, 1) - } - _ => panic!("Cannot `add_node()` on block param or projection"), - }; - let srcloc = match node { - Node::Inst { srcloc, .. } | Node::Load { srcloc, .. } => *srcloc, - _ => RelSourceLoc::default(), - }; - let opcode = instdata.opcode(); - // Is this instruction either an actual terminator (an - // instruction that must end the block), or at least in the - // group of branches at the end (including conditional - // branches that may be followed by an actual terminator)? We - // call this the "terminator group", and we record the first - // inst in this group (`first_branch` below) so that we do not - // insert instructions needed only by args of later - // instructions in the terminator group in the middle of the - // terminator group. - // - // E.g., for the original sequence - // v1 = op ... - // brnz vCond, block1 - // jump block2(v1) - // - // elaboration would naively produce - // - // brnz vCond, block1 - // v1 = op ... - // jump block2(v1) - // - // but we use the `first_branch` mechanism below to ensure - // that once we've emitted at least one branch, all other - // elaborated insts have to go before that. So we emit brnz - // first, then as we elaborate the jump, we find we need the - // `op`; we `insert_inst` it *before* the brnz (which is the - // `first_branch`). - let is_terminator_group_inst = - opcode.is_branch() || opcode.is_return() || opcode == Opcode::Trap; - let inst = self.func.dfg.make_inst(instdata); - self.func.srclocs[inst] = srcloc; - - if arity == 1 { - self.func.dfg.append_result(inst, result_ty); - } else { - for _ in 0..arity { - self.func.dfg.append_result(inst, crate::ir::types::INVALID); - } - } - - if is_terminator_group_inst { - self.func.layout.append_inst(inst, to_block); - if self.first_branch[to_block].is_none() { - self.first_branch[to_block] = Some(inst).into(); - } - } else if let Some(branch) = self.first_branch[to_block].into() { - self.func.layout.insert_inst(inst, branch); - } else { - self.func.layout.append_inst(inst, to_block); - } - self.func.dfg.inst_results_list(inst) - } - - fn compute_best_nodes(&mut self) { - let best = &mut self.id_to_best_cost_and_node; - for (eclass_id, eclass) in &self.egraph.classes { - trace!("computing best for eclass {:?}", eclass_id); - if let Some(child1) = eclass.child1() { - trace!(" -> child {:?}", child1); - best[eclass_id] = best[child1]; - } - if let Some(child2) = eclass.child2() { - trace!(" -> child {:?}", child2); - if best[child2].0 < best[eclass_id].0 { - best[eclass_id] = best[child2]; + fn compute_best_values(&mut self) { + let best = &mut self.value_to_best_value; + for (value, def) in self.func.dfg.values_and_defs() { + trace!("computing best for value {:?} def {:?}", value, def); + match def { + ValueDef::Union(x, y) => { + // Pick the best of the two options based on + // min-cost. This works because each element of `best` + // is a `(cost, value)` tuple; `cost` comes first so + // the natural comparison works based on cost, and + // breaks ties based on value number. + trace!(" -> best of {:?} and {:?}", best[x], best[y]); + best[value] = std::cmp::min(best[x], best[y]); + trace!(" -> {:?}", best[value]); } - } - if let Some(node_key) = eclass.get_node() { - let node = node_key.node(&self.egraph.nodes); - trace!(" -> eclass {:?}: node {:?}", eclass_id, node); - let (cost, id) = match node { - Node::Param { .. } - | Node::Inst { .. } - | Node::Load { .. } - | Node::Result { .. } => (Cost::zero(), eclass_id), - Node::Pure { op, .. } => { - let args_cost = self - .node_ctx - .children(node) + ValueDef::Param(_, _) => { + best[value] = (Cost::zero(), value); + } + // If the Inst is inserted into the layout (which is, + // at this point, only the side-effecting skeleton), + // then it must be computed and thus we give it zero + // cost. + ValueDef::Result(inst, _) if self.func.layout.inst_block(inst).is_some() => { + best[value] = (Cost::zero(), value); + } + ValueDef::Result(inst, _) => { + trace!(" -> value {}: result, computing cost", value); + let inst_data = &self.func.dfg[inst]; + let loop_level = self + .func + .layout + .inst_block(inst) + .map(|block| self.loop_analysis.loop_level(block)) + .unwrap_or(LoopLevel::root()); + // N.B.: at this point we know that the opcode is + // pure, so `pure_op_cost`'s precondition is + // satisfied. + let cost = pure_op_cost(inst_data.opcode()).at_level(loop_level.level()) + + self + .func + .dfg + .inst_args(inst) .iter() - .map(|&arg_id| { - trace!(" -> arg {:?}", arg_id); - best[arg_id].0 - }) + .map(|value| best[*value].0) // Can't use `.sum()` for `Cost` types; do // an explicit reduce instead. .fold(Cost::zero(), Cost::add); - let level = self.egraph.analysis_value(eclass_id).loop_level; - let cost = op_cost(op).at_level(level) + args_cost; - (cost, eclass_id) - } - }; - - if cost < best[eclass_id].0 { - best[eclass_id] = (cost, id); + best[value] = (cost, value); } - } - debug_assert_ne!(best[eclass_id].0, Cost::infinity()); - debug_assert_ne!(best[eclass_id].1, Id::invalid()); - trace!("best for eclass {:?}: {:?}", eclass_id, best[eclass_id]); + }; + debug_assert_ne!(best[value].0, Cost::infinity()); + debug_assert_ne!(best[value].1, Value::reserved_value()); + trace!("best for eclass {:?}: {:?}", value, best[value]); } } - fn elaborate_eclass_use(&mut self, id: Id) { - self.elab_stack.push(ElabStackEntry::Start { id }); + /// Elaborate use of an eclass, inserting any needed new + /// instructions before the given inst `before`. Should only be + /// given values corresponding to results of instructions or + /// blockparams. + fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue { + debug_assert_ne!(value, Value::reserved_value()); + + // Kick off the process by requesting this result + // value. + self.elab_stack + .push(ElabStackEntry::Start { value, before }); + + // Now run the explicit-stack recursion until we reach + // the root. self.process_elab_stack(); debug_assert_eq!(self.elab_result_stack.len(), 1); - self.elab_result_stack.clear(); + self.elab_result_stack.pop().unwrap() } fn process_elab_stack(&mut self) { while let Some(entry) = self.elab_stack.last() { match entry { - &ElabStackEntry::Start { id } => { + &ElabStackEntry::Start { value, before } => { // We always replace the Start entry, so pop it now. self.elab_stack.pop(); - self.stats.elaborate_visit_node += 1; - let canonical = self.egraph.canonical_id(id); - trace!("elaborate: id {}", id); + debug_assert_ne!(value, Value::reserved_value()); + let value = self.func.dfg.resolve_aliases(value); - let remat = if let Some(val) = self.id_to_value.get(&canonical) { - // Look at the defined block, and determine whether this - // node kind allows rematerialization if the value comes - // from another block. If so, ignore the hit and recompute - // below. - let remat = val.block() != self.cur_block.unwrap() - && self.remat_ids.contains(&canonical); + self.stats.elaborate_visit_node += 1; + let canonical_value = self.eclasses.find(value); + debug_assert_ne!(canonical_value, Value::reserved_value()); + trace!( + "elaborate: value {} canonical {} before {}", + value, + canonical_value, + before + ); + + let remat = if let Some(elab_val) = + self.value_to_elaborated_value.get(&canonical_value) + { + // Value is available. Look at the defined + // block, and determine whether this node kind + // allows rematerialization if the value comes + // from another block. If so, ignore the hit + // and recompute below. + let remat = elab_val.in_block != self.cur_block + && self.remat_values.contains(&canonical_value); if !remat { - trace!("elaborate: id {} -> {:?}", id, val); + trace!("elaborate: value {} -> {:?}", value, elab_val); self.stats.elaborate_memoize_hit += 1; - self.elab_result_stack.push(val.clone()); + self.elab_result_stack.push(*elab_val); continue; } - trace!("elaborate: id {} -> remat", id); + trace!("elaborate: value {} -> remat", canonical_value); self.stats.elaborate_memoize_miss_remat += 1; // The op is pure at this point, so it is always valid to // remove from this map. - self.id_to_value.remove(&canonical); + self.value_to_elaborated_value.remove(&canonical_value); true } else { - self.remat_ids.contains(&canonical) + // Value not available; but still look up + // whether it's been flagged for remat because + // this affects placement. + let remat = self.remat_values.contains(&canonical_value); + trace!(" -> not present in map; remat = {}", remat); + remat }; self.stats.elaborate_memoize_miss += 1; - // Get the best option; we use `id` (latest id) here so we - // have a full view of the eclass. - let (_, best_node_eclass) = self.id_to_best_cost_and_node[id]; - debug_assert_ne!(best_node_eclass, Id::invalid()); + // Get the best option; we use `value` (latest + // value) here so we have a full view of the + // eclass. + trace!("looking up best value for {}", value); + let (_, best_value) = self.value_to_best_value[value]; + debug_assert_ne!(best_value, Value::reserved_value()); + trace!("elaborate: value {} -> best {}", value, best_value,); + + // Now resolve the value to its definition to see + // how we can compute it. + let (inst, result_idx) = match self.func.dfg.value_def(best_value) { + ValueDef::Result(inst, result_idx) => { + trace!( + " -> value {} is result {} of {}", + best_value, + result_idx, + inst + ); + (inst, result_idx) + } + ValueDef::Param(_, _) => { + // We don't need to do anything to compute + // this value; just push its result on the + // result stack (blockparams are already + // available). + trace!(" -> value {} is a blockparam", best_value); + self.elab_result_stack.push(ElaboratedValue { + in_block: self.cur_block, + value: best_value, + }); + continue; + } + ValueDef::Union(_, _) => { + panic!("Should never have a Union value as the best value"); + } + }; trace!( - "elaborate: id {} -> best {} -> eclass node {:?}", - id, - best_node_eclass, - self.egraph.classes[best_node_eclass] + " -> result {} of inst {:?}", + result_idx, + self.func.dfg[inst] ); - let node_key = self.egraph.classes[best_node_eclass].get_node().unwrap(); - let node = node_key.node(&self.egraph.nodes); - trace!(" -> enode {:?}", node); - // Is the node a block param? We should never get here if so - // (they are inserted when first visiting the block). - if matches!(node, Node::Param { .. }) { - unreachable!("Param nodes should already be inserted"); - } - - // Is the node a result projection? If so, resolve - // the value we are projecting a part of, then - // eventually return here (saving state with a - // PendingProjection). - if let Node::Result { - value, result, ty, .. - } = node - { - trace!(" -> result; pushing arg value {}", value); - self.elab_stack.push(ElabStackEntry::PendingProjection { - index: *result, - canonical, - ty: *ty, - }); - self.elab_stack.push(ElabStackEntry::Start { id: *value }); - continue; - } - - // We're going to need to emit this - // operator. First, enqueue all args to be + // We're going to need to use this instruction + // result, placing the instruction into the + // layout. First, enqueue all args to be // elaborated. Push state to receive the results - // and later elab this node. - let num_args = self.node_ctx.children(&node).len(); - self.elab_stack.push(ElabStackEntry::PendingNode { - canonical, - node_key, - remat, + // and later elab this inst. + let args = self.func.dfg.inst_args(inst); + let num_args = args.len(); + self.elab_stack.push(ElabStackEntry::PendingInst { + inst, + result_idx, num_args, + remat, + before, }); // Push args in reverse order so we process the // first arg first. - for &arg_id in self.node_ctx.children(&node).iter().rev() { - self.elab_stack.push(ElabStackEntry::Start { id: arg_id }); + for &arg in args.iter().rev() { + debug_assert_ne!(arg, Value::reserved_value()); + self.elab_stack + .push(ElabStackEntry::Start { value: arg, before }); } } - &ElabStackEntry::PendingNode { - canonical, - node_key, - remat, + &ElabStackEntry::PendingInst { + inst, + result_idx, num_args, + remat, + before, } => { self.elab_stack.pop(); - let node = node_key.node(&self.egraph.nodes); - - // We should have all args resolved at this point. - let arg_idx = self.elab_result_stack.len() - num_args; - let args = &self.elab_result_stack[arg_idx..]; - - // Gather the individual output-CLIF `Value`s. - let arg_values: SmallVec<[Value; 8]> = args - .iter() - .map(|idvalue| match idvalue { - IdValue::Value { value, .. } => *value, - IdValue::Values { .. } => { - panic!("enode depends directly on multi-value result") - } - }) - .collect(); - - // Compute max loop depth. - let max_loop_depth = args - .iter() - .map(|idvalue| match idvalue { - IdValue::Value { depth, .. } => *depth, - IdValue::Values { .. } => unreachable!(), - }) - .max() - .unwrap_or(0); - - // Remove args from result stack. - self.elab_result_stack.truncate(arg_idx); - - // Determine the location at which we emit it. This is the - // current block *unless* we hoist above a loop when all args - // are loop-invariant (and this op is pure). - let (loop_depth, scope_depth, block) = if node.is_non_pure() { - // Non-pure op: always at the current location. - ( - self.cur_loop_depth(), - self.id_to_value.depth(), - self.cur_block.unwrap(), - ) - } else if max_loop_depth == self.cur_loop_depth() || remat { - // Pure op, but depends on some value at the current loop - // depth, or remat forces it here: as above. - ( - self.cur_loop_depth(), - self.id_to_value.depth(), - self.cur_block.unwrap(), - ) - } else { - // Pure op, and does not depend on any args at current - // loop depth: hoist out of loop. - self.stats.elaborate_licm_hoist += 1; - let data = &self.loop_stack[max_loop_depth as usize]; - (max_loop_depth, data.scope_depth as usize, data.hoist_block) - }; - // Loop scopes are a subset of all scopes. - debug_assert!(scope_depth >= loop_depth as usize); - - // This is an actual operation; emit the node in sequence now. - let results = self.add_node(node, &arg_values[..], block); - let results_slice = results.as_slice(&self.func.dfg.value_lists); - - // Build the result and memoize in the id-to-value map. - let result = if results_slice.len() == 1 { - IdValue::Value { - depth: loop_depth, - block, - value: results_slice[0], - } - } else { - IdValue::Values { - depth: loop_depth, - block, - values: results, - } - }; - - self.id_to_value.insert_if_absent_with_depth( - canonical, - result.clone(), - scope_depth, + trace!( + "PendingInst: {} result {} args {} remat {} before {}", + inst, + result_idx, + num_args, + remat, + before ); - // Push onto the elab-results stack. - self.elab_result_stack.push(result) - } - &ElabStackEntry::PendingProjection { - ty, - index, - canonical, - } => { - self.elab_stack.pop(); + // We should have all args resolved at this + // point. Grab them and drain them out, removing + // them. + let arg_idx = self.elab_result_stack.len() - num_args; + let arg_values = &self.elab_result_stack[arg_idx..]; - // Grab the input from the elab-result stack. - let value = self.elab_result_stack.pop().expect("Should have result"); + // Compute max loop depth. + let loop_hoist_level = arg_values + .iter() + .map(|&value| { + // Find the outermost loop level at which + // the value's defining block *is not* a + // member. This is the loop-nest level + // whose hoist-block we hoist to. + let hoist_level = self + .loop_stack + .iter() + .position(|loop_entry| { + !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp) + }) + .unwrap_or(self.loop_stack.len()); + trace!( + " -> arg: elab_value {:?} hoist level {:?}", + value, + hoist_level + ); + hoist_level + }) + .max() + .unwrap_or(self.loop_stack.len()); + trace!( + " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}", + loop_hoist_level, + self.loop_stack.len(), + self.loop_stack, + ); - let (depth, block, values) = match value { - IdValue::Values { - depth, - block, - values, - .. - } => (depth, block, values), - IdValue::Value { .. } => { - unreachable!("Projection nodes should not be used on single results"); + // We know that this is a pure inst, because + // non-pure roots have already been placed in the + // value-to-elab'd-value map and are never subject + // to remat, so they will not reach this stage of + // processing. + // + // We now must determine the location at which we + // place the instruction. This is the current + // block *unless* we hoist above a loop when all + // args are loop-invariant (and this op is pure). + let (scope_depth, before, insert_block) = + if loop_hoist_level == self.loop_stack.len() || remat { + // Depends on some value at the current + // loop depth, or remat forces it here: + // place it at the current location. + ( + self.value_to_elaborated_value.depth(), + before, + self.func.layout.inst_block(before).unwrap(), + ) + } else { + // Does not depend on any args at current + // loop depth: hoist out of loop. + self.stats.elaborate_licm_hoist += 1; + let data = &self.loop_stack[loop_hoist_level]; + // `data.hoist_block` should dominate `before`'s block. + let before_block = self.func.layout.inst_block(before).unwrap(); + debug_assert!(self.domtree.dominates( + data.hoist_block, + before_block, + &self.func.layout + )); + // Determine the instruction at which we + // insert in `data.hoist_block`. + let before = self + .func + .layout + .canonical_branch_inst(&self.func.dfg, data.hoist_block) + .unwrap(); + (data.scope_depth as usize, before, data.hoist_block) + }; + + trace!( + " -> decided to place: before {} insert_block {}", + before, + insert_block + ); + + // Now we need to place `inst` at the computed + // location (just before `before`). Note that + // `inst` may already have been placed somewhere + // else, because a pure node may be elaborated at + // more than one place. In this case, we need to + // duplicate the instruction (and return the + // `Value`s for that duplicated instance + // instead). + trace!("need inst {} before {}", inst, before); + let inst = if self.func.layout.inst_block(inst).is_some() { + // Clone the inst! + let new_inst = self.func.dfg.clone_inst(inst); + trace!( + " -> inst {} already has a location; cloned to {}", + inst, + new_inst + ); + // Create mappings in the + // value-to-elab'd-value map from original + // results to cloned results. + for (&result, &new_result) in self + .func + .dfg + .inst_results(inst) + .iter() + .zip(self.func.dfg.inst_results(new_inst).iter()) + { + let elab_value = ElaboratedValue { + value: new_result, + in_block: insert_block, + }; + self.value_to_elaborated_value.insert_if_absent_with_depth( + result, + elab_value, + scope_depth, + ); + + self.eclasses.add(new_result); + self.eclasses.union(result, new_result); + self.value_to_best_value[new_result] = self.value_to_best_value[result]; + + trace!( + " -> cloned inst has new result {} for orig {}", + new_result, + result + ); } + new_inst + } else { + trace!(" -> no location; using original inst"); + // Create identity mappings from result values + // to themselves in this scope, since we're + // using the original inst. + for &result in self.func.dfg.inst_results(inst) { + let elab_value = ElaboratedValue { + value: result, + in_block: insert_block, + }; + self.value_to_elaborated_value.insert_if_absent_with_depth( + result, + elab_value, + scope_depth, + ); + trace!(" -> inserting identity mapping for {}", result); + } + inst }; - let values = values.as_slice(&self.func.dfg.value_lists); - let value = values[index]; - self.func.dfg.fill_in_value_type(value, ty); - let value = IdValue::Value { - depth, - block, - value, - }; - self.id_to_value.insert_if_absent(canonical, value.clone()); + // Place the inst just before `before`. + self.func.layout.insert_inst(inst, before); - self.elab_result_stack.push(value); + // Update the inst's arguments. + let args_dest = self.func.dfg.inst_args_mut(inst); + for (dest, val) in args_dest.iter_mut().zip(arg_values.iter()) { + *dest = val.value; + } + + // Now that we've consumed the arg values, pop + // them off the stack. + self.elab_result_stack.truncate(arg_idx); + + // Push the requested result index of the + // instruction onto the elab-results stack. + self.elab_result_stack.push(ElaboratedValue { + in_block: insert_block, + value: self.func.dfg.inst_results(inst)[result_idx], + }); } } } } - fn elaborate_block<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>( - &mut self, - idom: Option, - block: Block, - block_params_fn: &PF, - block_side_effects_fn: &SEF, - ) { - let blockparam_ids_tys = (block_params_fn)(block); - self.start_block(idom, block, blockparam_ids_tys); - for &id in (block_side_effects_fn)(block) { - self.elaborate_eclass_use(id); + fn elaborate_block(&mut self, idom: Option, block: Block) { + trace!("elaborate_block: block {}", block); + self.start_block(idom, block); + + // Iterate over the side-effecting skeleton using the linked + // list in Layout. We will insert instructions that are + // elaborated *before* `inst`, so we can always use its + // next-link to continue the iteration. + let mut next_inst = self.func.layout.first_inst(block); + let mut first_branch = None; + while let Some(inst) = next_inst { + trace!( + "elaborating inst {} with results {:?}", + inst, + self.func.dfg.inst_results(inst) + ); + // Record the first branch we see in the block; all + // elaboration for args of *any* branch must be inserted + // before the *first* branch, because the branch group + // must remain contiguous at the end of the block. + if self.func.dfg[inst].opcode().is_branch() && first_branch == None { + first_branch = Some(inst); + } + + // Determine where elaboration inserts insts. + let before = first_branch.unwrap_or(inst); + trace!(" -> inserting before {}", before); + + // For each arg of the inst, elaborate its value. + for i in 0..self.func.dfg.inst_args(inst).len() { + // Don't borrow across the below. + let arg = self.func.dfg.inst_args(inst)[i]; + trace!(" -> arg {}", arg); + // Elaborate the arg, placing any newly-inserted insts + // before `before`. Get the updated value, which may + // be different than the original. + let arg = self.elaborate_eclass_use(arg, before); + trace!(" -> rewrote arg to {:?}", arg); + self.func.dfg.inst_args_mut(inst)[i] = arg.value; + } + + // We need to put the results of this instruction in the + // map now. + for &result in self.func.dfg.inst_results(inst) { + trace!(" -> result {}", result); + self.value_to_elaborated_value.insert_if_absent( + result, + ElaboratedValue { + in_block: block, + value: result, + }, + ); + } + + next_inst = self.func.layout.next_inst(inst); } } - fn elaborate_domtree<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>( - &mut self, - block_params_fn: &PF, - block_side_effects_fn: &SEF, - domtree: &DomTreeWithChildren, - ) { + fn elaborate_domtree(&mut self, domtree: &DomTreeWithChildren) { let root = domtree.root(); self.block_stack.push(BlockStackEntry::Elaborate { block: root, @@ -571,9 +646,9 @@ impl<'a> Elaborator<'a> { match top { BlockStackEntry::Elaborate { block, idom } => { self.block_stack.push(BlockStackEntry::Pop); - self.id_to_value.increment_depth(); + self.value_to_elaborated_value.increment_depth(); - self.elaborate_block(idom, block, block_params_fn, block_side_effects_fn); + self.elaborate_block(idom, block); // Push children. We are doing a preorder // traversal so we do this after processing this @@ -592,39 +667,17 @@ impl<'a> Elaborator<'a> { self.block_stack[block_stack_end..].reverse(); } BlockStackEntry::Pop => { - self.id_to_value.decrement_depth(); - if let Some(innermost_loop) = self.loop_stack.last() { - if innermost_loop.scope_depth as usize == self.id_to_value.depth() { - self.loop_stack.pop(); - } - } + self.value_to_elaborated_value.decrement_depth(); } } } } - fn clear_func_body(&mut self) { - // Clear all instructions and args/results from the DFG. We - // rebuild them entirely during elaboration. (TODO: reuse the - // existing inst for the *first* copy of a given node.) - self.func.dfg.clear_insts(); - // Clear the instructions in every block, but leave the list - // of blocks and their layout unmodified. - self.func.layout.clear_insts(); - self.func.srclocs.clear(); - } - - pub(crate) fn elaborate<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>( - &mut self, - block_params_fn: PF, - block_side_effects_fn: SEF, - ) { - let domtree = DomTreeWithChildren::new(self.func, self.domtree); + pub(crate) fn elaborate(&mut self) { self.stats.elaborate_func += 1; self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64; - self.clear_func_body(); - self.compute_best_nodes(); - self.elaborate_domtree(&block_params_fn, &block_side_effects_fn, &domtree); + self.compute_best_values(); + self.elaborate_domtree(&self.domtree_children); self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64; } } diff --git a/cranelift/codegen/src/egraph/node.rs b/cranelift/codegen/src/egraph/node.rs deleted file mode 100644 index 01d8e4128c..0000000000 --- a/cranelift/codegen/src/egraph/node.rs +++ /dev/null @@ -1,366 +0,0 @@ -//! Node definition for EGraph representation. - -use super::PackedMemoryState; -use crate::ir::{Block, DataFlowGraph, InstructionImms, Opcode, RelSourceLoc, Type}; -use crate::loop_analysis::LoopLevel; -use cranelift_egraph::{CtxEq, CtxHash, Id, Language, UnionFind}; -use cranelift_entity::{EntityList, ListPool}; -use std::hash::{Hash, Hasher}; - -#[derive(Debug)] -pub enum Node { - /// A blockparam. Effectively an input/root; does not refer to - /// predecessors' branch arguments, because this would create - /// cycles. - Param { - /// CLIF block this param comes from. - block: Block, - /// Index of blockparam within block. - index: u32, - /// Type of the value. - ty: Type, - /// The loop level of this Param. - loop_level: LoopLevel, - }, - /// A CLIF instruction that is pure (has no side-effects). Not - /// tied to any location; we will compute a set of locations at - /// which to compute this node during lowering back out of the - /// egraph. - Pure { - /// The instruction data, without SSA values. - op: InstructionImms, - /// eclass arguments to the operator. - args: EntityList, - /// Type of result, if one. - ty: Type, - /// Number of results. - arity: u16, - }, - /// A CLIF instruction that has side-effects or is otherwise not - /// representable by `Pure`. - Inst { - /// The instruction data, without SSA values. - op: InstructionImms, - /// eclass arguments to the operator. - args: EntityList, - /// Type of result, if one. - ty: Type, - /// Number of results. - arity: u16, - /// The source location to preserve. - srcloc: RelSourceLoc, - /// The loop level of this Inst. - loop_level: LoopLevel, - }, - /// A projection of one result of an `Inst` or `Pure`. - Result { - /// `Inst` or `Pure` node. - value: Id, - /// Index of the result we want. - result: usize, - /// Type of the value. - ty: Type, - }, - - /// A load instruction. Nominally a side-effecting `Inst` (and - /// included in the list of side-effecting roots so it will always - /// be elaborated), but represented as a distinct kind of node so - /// that we can leverage deduplication to do - /// redundant-load-elimination for free (and make store-to-load - /// forwarding much easier). - Load { - // -- identity depends on: - /// The original load operation. Must have one argument, the - /// address. - op: InstructionImms, - /// The type of the load result. - ty: Type, - /// Address argument. Actual address has an offset, which is - /// included in `op` (and thus already considered as part of - /// the key). - addr: Id, - /// The abstract memory state that this load accesses. - mem_state: PackedMemoryState, - - // -- not included in dedup key: - /// Source location, for traps. Not included in Eq/Hash. - srcloc: RelSourceLoc, - }, -} - -impl Node { - pub(crate) fn is_non_pure(&self) -> bool { - match self { - Node::Inst { .. } | Node::Load { .. } => true, - _ => false, - } - } -} - -/// Shared pools for type and id lists in nodes. -pub struct NodeCtx { - /// Arena for arg eclass-ID lists. - pub args: ListPool, -} - -impl NodeCtx { - pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self { - let n_args = dfg.value_lists.capacity(); - Self { - args: ListPool::with_capacity(n_args), - } - } -} - -impl NodeCtx { - fn ids_eq(&self, a: &EntityList, b: &EntityList, uf: &mut UnionFind) -> bool { - let a = a.as_slice(&self.args); - let b = b.as_slice(&self.args); - a.len() == b.len() && a.iter().zip(b.iter()).all(|(&a, &b)| uf.equiv_id_mut(a, b)) - } - - fn hash_ids(&self, a: &EntityList, hash: &mut H, uf: &mut UnionFind) { - let a = a.as_slice(&self.args); - for &id in a { - uf.hash_id_mut(hash, id); - } - } -} - -impl CtxEq for NodeCtx { - fn ctx_eq(&self, a: &Node, b: &Node, uf: &mut UnionFind) -> bool { - match (a, b) { - ( - &Node::Param { - block, - index, - ty, - loop_level: _, - }, - &Node::Param { - block: other_block, - index: other_index, - ty: other_ty, - loop_level: _, - }, - ) => block == other_block && index == other_index && ty == other_ty, - ( - &Node::Result { value, result, ty }, - &Node::Result { - value: other_value, - result: other_result, - ty: other_ty, - }, - ) => uf.equiv_id_mut(value, other_value) && result == other_result && ty == other_ty, - ( - &Node::Pure { - ref op, - ref args, - ty, - arity: _, - }, - &Node::Pure { - op: ref other_op, - args: ref other_args, - ty: other_ty, - arity: _, - }, - ) => *op == *other_op && self.ids_eq(args, other_args, uf) && ty == other_ty, - ( - &Node::Inst { ref args, .. }, - &Node::Inst { - args: ref other_args, - .. - }, - ) => self.ids_eq(args, other_args, uf), - ( - &Node::Load { - ref op, - ty, - addr, - mem_state, - .. - }, - &Node::Load { - op: ref other_op, - ty: other_ty, - addr: other_addr, - mem_state: other_mem_state, - // Explicitly exclude: `inst` and `srcloc`. We - // want loads to merge if identical in - // opcode/offset, address expression, and last - // store (this does implicit - // redundant-load-elimination.) - // - // Note however that we *do* include `ty` (the - // type) and match on that: we otherwise would - // have no way of disambiguating loads of - // different widths to the same address. - .. - }, - ) => { - op == other_op - && ty == other_ty - && uf.equiv_id_mut(addr, other_addr) - && mem_state == other_mem_state - } - _ => false, - } - } -} - -impl CtxHash for NodeCtx { - fn ctx_hash(&self, value: &Node, uf: &mut UnionFind) -> u64 { - let mut state = crate::fx::FxHasher::default(); - std::mem::discriminant(value).hash(&mut state); - match value { - &Node::Param { - block, - index, - ty: _, - loop_level: _, - } => { - block.hash(&mut state); - index.hash(&mut state); - } - &Node::Result { - value, - result, - ty: _, - } => { - uf.hash_id_mut(&mut state, value); - result.hash(&mut state); - } - &Node::Pure { - ref op, - ref args, - ty, - arity: _, - } => { - op.hash(&mut state); - self.hash_ids(args, &mut state, uf); - ty.hash(&mut state); - } - &Node::Inst { ref args, .. } => { - self.hash_ids(args, &mut state, uf); - } - &Node::Load { - ref op, - ty, - addr, - mem_state, - .. - } => { - op.hash(&mut state); - ty.hash(&mut state); - uf.hash_id_mut(&mut state, addr); - mem_state.hash(&mut state); - } - } - - state.finish() - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct Cost(u32); -impl Cost { - pub(crate) fn at_level(&self, loop_level: LoopLevel) -> Cost { - let loop_level = std::cmp::min(2, loop_level.level()); - let multiplier = 1u32 << ((10 * loop_level) as u32); - Cost(self.0.saturating_mul(multiplier)).finite() - } - - pub(crate) fn infinity() -> Cost { - // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` - // only for heuristics and always saturate so this suffices!) - Cost(u32::MAX) - } - - pub(crate) fn zero() -> Cost { - Cost(0) - } - - /// Clamp this cost at a "finite" value. Can be used in - /// conjunction with saturating ops to avoid saturating into - /// `infinity()`. - fn finite(self) -> Cost { - Cost(std::cmp::min(u32::MAX - 1, self.0)) - } -} - -impl std::default::Default for Cost { - fn default() -> Cost { - Cost::zero() - } -} - -impl std::ops::Add for Cost { - type Output = Cost; - fn add(self, other: Cost) -> Cost { - Cost(self.0.saturating_add(other.0)).finite() - } -} - -pub(crate) fn op_cost(op: &InstructionImms) -> Cost { - match op.opcode() { - // Constants. - Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(0), - // Extends/reduces. - Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => { - Cost(1) - } - // "Simple" arithmetic. - Opcode::Iadd - | Opcode::Isub - | Opcode::Band - | Opcode::BandNot - | Opcode::Bor - | Opcode::BorNot - | Opcode::Bxor - | Opcode::BxorNot - | Opcode::Bnot => Cost(2), - // Everything else. - _ => Cost(3), - } -} - -impl Language for NodeCtx { - type Node = Node; - - fn children<'a>(&'a self, node: &'a Node) -> &'a [Id] { - match node { - Node::Param { .. } => &[], - Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_slice(&self.args), - Node::Load { addr, .. } => std::slice::from_ref(addr), - Node::Result { value, .. } => std::slice::from_ref(value), - } - } - - fn children_mut<'a>(&'a mut self, node: &'a mut Node) -> &'a mut [Id] { - match node { - Node::Param { .. } => &mut [], - Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_mut_slice(&mut self.args), - Node::Load { addr, .. } => std::slice::from_mut(addr), - Node::Result { value, .. } => std::slice::from_mut(value), - } - } - - fn needs_dedup(&self, node: &Node) -> bool { - match node { - Node::Pure { .. } | Node::Load { .. } => true, - _ => false, - } - } -} - -#[cfg(test)] -mod test { - #[test] - #[cfg(target_pointer_width = "64")] - fn node_size() { - use super::*; - assert_eq!(std::mem::size_of::(), 16); - assert_eq!(std::mem::size_of::(), 32); - } -} diff --git a/cranelift/codegen/src/egraph/stores.rs b/cranelift/codegen/src/egraph/stores.rs deleted file mode 100644 index 9746eba159..0000000000 --- a/cranelift/codegen/src/egraph/stores.rs +++ /dev/null @@ -1,293 +0,0 @@ -//! Last-store tracking via alias analysis. -//! -//! We partition memory state into several *disjoint pieces* of -//! "abstract state". There are a finite number of such pieces: -//! currently, we call them "heap", "table", "vmctx", and "other". Any -//! given address in memory belongs to exactly one disjoint piece. -//! -//! One never tracks which piece a concrete address belongs to at -//! runtime; this is a purely static concept. Instead, all -//! memory-accessing instructions (loads and stores) are labeled with -//! one of these four categories in the `MemFlags`. It is forbidden -//! for a load or store to access memory under one category and a -//! later load or store to access the same memory under a different -//! category. This is ensured to be true by construction during -//! frontend translation into CLIF and during legalization. -//! -//! Given that this non-aliasing property is ensured by the producer -//! of CLIF, we can compute a *may-alias* property: one load or store -//! may-alias another load or store if both access the same category -//! of abstract state. -//! -//! The "last store" pass helps to compute this aliasing: we perform a -//! fixpoint analysis to track the last instruction that *might have* -//! written to a given part of abstract state. We also track the block -//! containing this store. -//! -//! We can't say for sure that the "last store" *did* actually write -//! that state, but we know for sure that no instruction *later* than -//! it (up to the current instruction) did. However, we can get a -//! must-alias property from this: if at a given load or store, we -//! look backward to the "last store", *AND* we find that it has -//! exactly the same address expression and value type, then we know -//! that the current instruction's access *must* be to the same memory -//! location. -//! -//! To get this must-alias property, we leverage the node -//! hashconsing. We design the Eq/Hash (node identity relation -//! definition) of the `Node` struct so that all loads with (i) the -//! same "last store", and (ii) the same address expression, and (iii) -//! the same opcode-and-offset, will deduplicate (the first will be -//! computed, and the later ones will use the same value). Furthermore -//! we have an optimization that rewrites a load into the stored value -//! of the last store *if* the last store has the same address -//! expression and constant offset. -//! -//! This gives us two optimizations, "redundant load elimination" and -//! "store-to-load forwarding". -//! -//! In theory we could also do *dead-store elimination*, where if a -//! store overwrites a value earlier written by another store, *and* -//! if no other load/store to the abstract state category occurred, -//! *and* no other trapping instruction occurred (at which point we -//! need an up-to-date memory state because post-trap-termination -//! memory state can be observed), *and* we can prove the original -//! store could not have trapped, then we can eliminate the original -//! store. Because this is so complex, and the conditions for doing it -//! correctly when post-trap state must be correct likely reduce the -//! potential benefit, we don't yet do this. - -use crate::flowgraph::ControlFlowGraph; -use crate::fx::{FxHashMap, FxHashSet}; -use crate::inst_predicates::has_memory_fence_semantics; -use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode}; -use crate::trace; -use cranelift_entity::{EntityRef, SecondaryMap}; -use smallvec::{smallvec, SmallVec}; - -/// For a given program point, the vector of last-store instruction -/// indices for each disjoint category of abstract state. -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] -struct LastStores { - heap: MemoryState, - table: MemoryState, - vmctx: MemoryState, - other: MemoryState, -} - -/// State of memory seen by a load. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] -pub enum MemoryState { - /// State at function entry: nothing is known (but it is one - /// consistent value, so two loads from "entry" state at the same - /// address will still provide the same result). - #[default] - Entry, - /// State just after a store by the given instruction. The - /// instruction is a store from which we can forward. - Store(Inst), - /// State just before the given instruction. Used for abstract - /// value merges at merge-points when we cannot name a single - /// producing site. - BeforeInst(Inst), - /// State just after the given instruction. Used when the - /// instruction may update the associated state, but is not a - /// store whose value we can cleanly forward. (E.g., perhaps a - /// barrier of some sort.) - AfterInst(Inst), -} - -/// Memory state index, packed into a u32. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct PackedMemoryState(u32); - -impl From for PackedMemoryState { - fn from(state: MemoryState) -> Self { - match state { - MemoryState::Entry => Self(0), - MemoryState::Store(i) => Self(1 | (i.index() as u32) << 2), - MemoryState::BeforeInst(i) => Self(2 | (i.index() as u32) << 2), - MemoryState::AfterInst(i) => Self(3 | (i.index() as u32) << 2), - } - } -} - -impl PackedMemoryState { - /// Does this memory state refer to a specific store instruction? - pub fn as_store(&self) -> Option { - if self.0 & 3 == 1 { - Some(Inst::from_bits(self.0 >> 2)) - } else { - None - } - } -} - -impl LastStores { - fn update(&mut self, func: &Function, inst: Inst) { - let opcode = func.dfg[inst].opcode(); - if has_memory_fence_semantics(opcode) { - self.heap = MemoryState::AfterInst(inst); - self.table = MemoryState::AfterInst(inst); - self.vmctx = MemoryState::AfterInst(inst); - self.other = MemoryState::AfterInst(inst); - } else if opcode.can_store() { - if let Some(memflags) = func.dfg[inst].memflags() { - *self.for_flags(memflags) = MemoryState::Store(inst); - } else { - self.heap = MemoryState::AfterInst(inst); - self.table = MemoryState::AfterInst(inst); - self.vmctx = MemoryState::AfterInst(inst); - self.other = MemoryState::AfterInst(inst); - } - } - } - - fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState { - if memflags.heap() { - &mut self.heap - } else if memflags.table() { - &mut self.table - } else if memflags.vmctx() { - &mut self.vmctx - } else { - &mut self.other - } - } - - fn meet_from(&mut self, other: &LastStores, loc: Inst) { - let meet = |a: MemoryState, b: MemoryState| -> MemoryState { - match (a, b) { - (a, b) if a == b => a, - _ => MemoryState::BeforeInst(loc), - } - }; - - self.heap = meet(self.heap, other.heap); - self.table = meet(self.table, other.table); - self.vmctx = meet(self.vmctx, other.vmctx); - self.other = meet(self.other, other.other); - } -} - -/// An alias-analysis pass. -pub struct AliasAnalysis { - /// Last-store instruction (or none) for a given load. Use a hash map - /// instead of a `SecondaryMap` because this is sparse. - load_mem_state: FxHashMap, -} - -impl AliasAnalysis { - /// Perform an alias analysis pass. - pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis { - log::trace!("alias analysis: input is:\n{:?}", func); - let block_input = Self::compute_block_input_states(func, cfg); - let load_mem_state = Self::compute_load_last_stores(func, block_input); - AliasAnalysis { load_mem_state } - } - - fn compute_block_input_states( - func: &Function, - cfg: &ControlFlowGraph, - ) -> SecondaryMap> { - let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks()); - let mut worklist: SmallVec<[Block; 16]> = smallvec![]; - let mut worklist_set = FxHashSet::default(); - let entry = func.layout.entry_block().unwrap(); - worklist.push(entry); - worklist_set.insert(entry); - block_input[entry] = Some(LastStores::default()); - - while let Some(block) = worklist.pop() { - worklist_set.remove(&block); - let state = block_input[block].clone().unwrap(); - - trace!("alias analysis: input to {} is {:?}", block, state); - - let state = func - .layout - .block_insts(block) - .fold(state, |mut state, inst| { - state.update(func, inst); - trace!("after {}: state is {:?}", inst, state); - state - }); - - for succ in cfg.succ_iter(block) { - let succ_first_inst = func.layout.first_inst(succ).unwrap(); - let succ_state = &mut block_input[succ]; - let old = succ_state.clone(); - if let Some(succ_state) = succ_state.as_mut() { - succ_state.meet_from(&state, succ_first_inst); - } else { - *succ_state = Some(state); - }; - let updated = *succ_state != old; - - if updated && worklist_set.insert(succ) { - worklist.push(succ); - } - } - } - - block_input - } - - fn compute_load_last_stores( - func: &Function, - block_input: SecondaryMap>, - ) -> FxHashMap { - let mut load_mem_state = FxHashMap::default(); - load_mem_state.reserve(func.dfg.num_insts() / 8); - - for block in func.layout.blocks() { - let mut state = block_input[block].clone().unwrap(); - - for inst in func.layout.block_insts(block) { - trace!( - "alias analysis: scanning at {} with state {:?} ({:?})", - inst, - state, - func.dfg[inst], - ); - - // N.B.: we match `Load` specifically, and not any - // other kinds of loads (or any opcode such that - // `opcode.can_load()` returns true), because some - // "can load" instructions actually have very - // different semantics (are not just a load of a - // particularly-typed value). For example, atomic - // (load/store, RMW, CAS) instructions "can load" but - // definitely should not participate in store-to-load - // forwarding or redundant-load elimination. Our goal - // here is to provide a `MemoryState` just for plain - // old loads whose semantics we can completely reason - // about. - if let InstructionData::Load { - opcode: Opcode::Load, - flags, - .. - } = func.dfg[inst] - { - let mem_state = *state.for_flags(flags); - trace!( - "alias analysis: at {}: load with mem_state {:?}", - inst, - mem_state, - ); - - load_mem_state.insert(inst, mem_state.into()); - } - - state.update(func, inst); - } - } - - load_mem_state - } - - /// Get the state seen by a load, if any. - pub fn get_state_for_load(&self, inst: Inst) -> Option { - self.load_mem_state.get(&inst).copied() - } -} diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 76245722f5..b67f110137 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -45,6 +45,35 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool { trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data) } +/// Does the given instruction behave as a "pure" node with respect to +/// aegraph semantics? +/// +/// - Actual pure nodes (arithmetic, etc) +/// - Loads with the `readonly` flag set +pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool { + let is_readonly_load = match func.dfg[inst] { + InstructionData::Load { + opcode: Opcode::Load, + flags, + .. + } => flags.readonly() && flags.notrap(), + _ => false, + }; + // Multi-value results do not play nicely with much of the egraph + // infrastructure. They are in practice used only for multi-return + // calls and some other odd instructions (e.g. iadd_cout) which, + // for now, we can afford to leave in place as opaque + // side-effecting ops. So if more than one result, then the inst + // is "not pure". Similarly, ops with zero results can be used + // only for their side-effects, so are never pure. (Or if they + // are, we can always trivially eliminate them with no effect.) + let has_one_result = func.dfg.inst_results(inst).len() == 1; + + let op = func.dfg[inst].opcode(); + + has_one_result && (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op))) +} + /// Does the given instruction have any side-effect as per [has_side_effect], or else is a load, /// but not the get_pinned_reg opcode? pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool { diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index 48eb4fa910..877fc2cb83 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -125,23 +125,6 @@ impl DataFlowGraph { self.immediates.clear(); } - /// Clear all instructions, but keep blocks and other metadata - /// (signatures, constants, immediates). Everything to do with - /// `Value`s is cleared, including block params and debug info. - /// - /// Used during egraph-based optimization to clear out the pre-opt - /// body so that we can regenerate it from the egraph. - pub(crate) fn clear_insts(&mut self) { - self.insts.clear(); - self.results.clear(); - self.value_lists.clear(); - self.values.clear(); - self.values_labels = None; - for block in self.blocks.values_mut() { - block.params = ValueList::new(); - } - } - /// Get the total number of instructions created in this function, whether they are currently /// inserted in the layout or not. /// @@ -173,6 +156,11 @@ impl DataFlowGraph { self.values.len() } + /// Get an iterator over all values and their definitions. + pub fn values_and_defs(&self) -> impl Iterator + '_ { + self.values().map(|value| (value, self.value_def(value))) + } + /// Starts collection of debug information. pub fn collect_debug_info(&mut self) { if self.values_labels.is_none() { @@ -279,12 +267,6 @@ impl DataFlowGraph { self.values[v].ty() } - /// Fill in the type of a value, only if currently invalid (as a placeholder). - pub(crate) fn fill_in_value_type(&mut self, v: Value, ty: Type) { - debug_assert!(self.values[v].ty().is_invalid() || self.values[v].ty() == ty); - self.values[v].set_type(ty); - } - /// Get the definition of a value. /// /// This is either the instruction that defined it or the Block that has the value as an @@ -298,6 +280,7 @@ impl DataFlowGraph { // detect alias loops without overrunning the stack. self.value_def(self.resolve_aliases(original)) } + ValueData::Union { x, y, .. } => ValueDef::Union(x, y), } } @@ -313,6 +296,7 @@ impl DataFlowGraph { Inst { inst, num, .. } => Some(&v) == self.inst_results(inst).get(num as usize), Param { block, num, .. } => Some(&v) == self.block_params(block).get(num as usize), Alias { .. } => false, + Union { .. } => false, } } @@ -422,6 +406,8 @@ pub enum ValueDef { Result(Inst, usize), /// Value is the n'th parameter to a block. Param(Block, usize), + /// Value is a union of two other values. + Union(Value, Value), } impl ValueDef { @@ -458,6 +444,7 @@ impl ValueDef { pub fn num(self) -> usize { match self { Self::Result(_, n) | Self::Param(_, n) => n, + Self::Union(_, _) => 0, } } } @@ -476,6 +463,11 @@ enum ValueData { /// An alias value can't be linked as an instruction result or block parameter. It is used as a /// placeholder when the original instruction or block has been rewritten or modified. Alias { ty: Type, original: Value }, + + /// Union is a "fork" in representation: the value can be + /// represented as either of the values named here. This is used + /// for aegraph (acyclic egraph) representation in the DFG. + Union { ty: Type, x: Value, y: Value }, } /// Bit-packed version of ValueData, for efficiency. @@ -483,40 +475,71 @@ enum ValueData { /// Layout: /// /// ```plain -/// | tag:2 | type:14 | num:16 | index:32 | +/// | tag:2 | type:14 | x:24 | y:24 | +/// +/// Inst 00 ty inst output inst index +/// Param 01 ty blockparam num block index +/// Alias 10 ty 0 value index +/// Union 11 ty first value second value /// ``` #[derive(Clone, Copy, Debug, PartialEq, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] struct ValueDataPacked(u64); +/// Encodes a value in 0..2^32 into 0..2^n, where n is less than 32 +/// (and is implied by `mask`), by translating 2^32-1 (0xffffffff) +/// into 2^n-1 and panic'ing on 2^n..2^32-1. +fn encode_narrow_field(x: u32, bits: u8) -> u32 { + if x == 0xffff_ffff { + (1 << bits) - 1 + } else { + debug_assert!(x < (1 << bits)); + x + } +} + +/// The inverse of the above `encode_narrow_field`: unpacks 2^n-1 into +/// 2^32-1. +fn decode_narrow_field(x: u32, bits: u8) -> u32 { + if x == (1 << bits) - 1 { + 0xffff_ffff + } else { + x + } +} + impl ValueDataPacked { - const INDEX_SHIFT: u64 = 0; - const INDEX_BITS: u64 = 32; - const NUM_SHIFT: u64 = Self::INDEX_SHIFT + Self::INDEX_BITS; - const NUM_BITS: u64 = 16; - const TYPE_SHIFT: u64 = Self::NUM_SHIFT + Self::NUM_BITS; - const TYPE_BITS: u64 = 14; - const TAG_SHIFT: u64 = Self::TYPE_SHIFT + Self::TYPE_BITS; - const TAG_BITS: u64 = 2; + const Y_SHIFT: u8 = 0; + const Y_BITS: u8 = 24; + const X_SHIFT: u8 = Self::Y_SHIFT + Self::Y_BITS; + const X_BITS: u8 = 24; + const TYPE_SHIFT: u8 = Self::X_SHIFT + Self::X_BITS; + const TYPE_BITS: u8 = 14; + const TAG_SHIFT: u8 = Self::TYPE_SHIFT + Self::TYPE_BITS; + const TAG_BITS: u8 = 2; - const TAG_INST: u64 = 1; - const TAG_PARAM: u64 = 2; - const TAG_ALIAS: u64 = 3; + const TAG_INST: u64 = 0; + const TAG_PARAM: u64 = 1; + const TAG_ALIAS: u64 = 2; + const TAG_UNION: u64 = 3; - fn make(tag: u64, ty: Type, num: u16, index: u32) -> ValueDataPacked { + fn make(tag: u64, ty: Type, x: u32, y: u32) -> ValueDataPacked { debug_assert!(tag < (1 << Self::TAG_BITS)); debug_assert!(ty.repr() < (1 << Self::TYPE_BITS)); + let x = encode_narrow_field(x, Self::X_BITS); + let y = encode_narrow_field(y, Self::Y_BITS); + ValueDataPacked( (tag << Self::TAG_SHIFT) | ((ty.repr() as u64) << Self::TYPE_SHIFT) - | ((num as u64) << Self::NUM_SHIFT) - | ((index as u64) << Self::INDEX_SHIFT), + | ((x as u64) << Self::X_SHIFT) + | ((y as u64) << Self::Y_SHIFT), ) } #[inline(always)] - fn field(self, shift: u64, bits: u64) -> u64 { + fn field(self, shift: u8, bits: u8) -> u64 { (self.0 >> shift) & ((1 << bits) - 1) } @@ -537,14 +560,17 @@ impl From for ValueDataPacked { fn from(data: ValueData) -> Self { match data { ValueData::Inst { ty, num, inst } => { - Self::make(Self::TAG_INST, ty, num, inst.as_bits()) + Self::make(Self::TAG_INST, ty, num.into(), inst.as_bits()) } ValueData::Param { ty, num, block } => { - Self::make(Self::TAG_PARAM, ty, num, block.as_bits()) + Self::make(Self::TAG_PARAM, ty, num.into(), block.as_bits()) } ValueData::Alias { ty, original } => { Self::make(Self::TAG_ALIAS, ty, 0, original.as_bits()) } + ValueData::Union { ty, x, y } => { + Self::make(Self::TAG_ALIAS, ty, x.as_bits(), y.as_bits()) + } } } } @@ -552,25 +578,33 @@ impl From for ValueDataPacked { impl From for ValueData { fn from(data: ValueDataPacked) -> Self { let tag = data.field(ValueDataPacked::TAG_SHIFT, ValueDataPacked::TAG_BITS); - let ty = data.field(ValueDataPacked::TYPE_SHIFT, ValueDataPacked::TYPE_BITS) as u16; - let num = data.field(ValueDataPacked::NUM_SHIFT, ValueDataPacked::NUM_BITS) as u16; - let index = data.field(ValueDataPacked::INDEX_SHIFT, ValueDataPacked::INDEX_BITS) as u32; + let ty = u16::try_from(data.field(ValueDataPacked::TYPE_SHIFT, ValueDataPacked::TYPE_BITS)) + .expect("Mask should ensure result fits in a u16"); + let x = u32::try_from(data.field(ValueDataPacked::X_SHIFT, ValueDataPacked::X_BITS)) + .expect("Mask should ensure result fits in a u32"); + let y = u32::try_from(data.field(ValueDataPacked::Y_SHIFT, ValueDataPacked::Y_BITS)) + .expect("Mask should ensure result fits in a u32"); let ty = Type::from_repr(ty); match tag { ValueDataPacked::TAG_INST => ValueData::Inst { ty, - num, - inst: Inst::from_bits(index), + num: u16::try_from(x).expect("Inst result num should fit in u16"), + inst: Inst::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), }, ValueDataPacked::TAG_PARAM => ValueData::Param { ty, - num, - block: Block::from_bits(index), + num: u16::try_from(x).expect("Blockparam index should fit in u16"), + block: Block::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), }, ValueDataPacked::TAG_ALIAS => ValueData::Alias { ty, - original: Value::from_bits(index), + original: Value::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), + }, + ValueDataPacked::TAG_UNION => ValueData::Union { + ty, + x: Value::from_bits(decode_narrow_field(x, ValueDataPacked::X_BITS)), + y: Value::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), }, _ => panic!("Invalid tag {} in ValueDataPacked 0x{:x}", tag, data.0), } @@ -582,8 +616,11 @@ impl From for ValueData { impl DataFlowGraph { /// Create a new instruction. /// - /// The type of the first result is indicated by `data.ty`. If the instruction produces - /// multiple results, also call `make_inst_results` to allocate value table entries. + /// The type of the first result is indicated by `data.ty`. If the + /// instruction produces multiple results, also call + /// `make_inst_results` to allocate value table entries. (It is + /// always safe to call `make_inst_results`, regardless of how + /// many results the instruction has.) pub fn make_inst(&mut self, data: InstructionData) -> Inst { let n = self.num_insts() + 1; self.results.resize(n); @@ -608,6 +645,7 @@ impl DataFlowGraph { match self.value_def(value) { ir::ValueDef::Result(inst, _) => self.display_inst(inst), ir::ValueDef::Param(_, _) => panic!("value is not defined by an instruction"), + ir::ValueDef::Union(_, _) => panic!("value is a union of two other values"), } } @@ -823,6 +861,19 @@ impl DataFlowGraph { self.insts[inst].put_value_list(branch_values) } + /// Clone an instruction, attaching new result `Value`s and + /// returning them. + pub fn clone_inst(&mut self, inst: Inst) -> Inst { + // First, add a clone of the InstructionData. + let inst_data = self[inst].clone(); + let new_inst = self.make_inst(inst_data); + // Get the controlling type variable. + let ctrl_typevar = self.ctrl_typevar(inst); + // Create new result values. + self.make_inst_results(new_inst, ctrl_typevar); + new_inst + } + /// Get the first result of an instruction. /// /// This function panics if the instruction doesn't have any result. @@ -847,6 +898,14 @@ impl DataFlowGraph { self.results[inst] } + /// Create a union of two values. + pub fn union(&mut self, x: Value, y: Value) -> Value { + // Get the type. + let ty = self.value_type(x); + debug_assert_eq!(ty, self.value_type(y)); + self.make_value(ValueData::Union { ty, x, y }) + } + /// Get the call signature of a direct or indirect call instruction. /// Returns `None` if `inst` is not a call instruction. pub fn call_signature(&self, inst: Inst) -> Option { diff --git a/cranelift/codegen/src/ir/layout.rs b/cranelift/codegen/src/ir/layout.rs index 819c332d45..7162c848c5 100644 --- a/cranelift/codegen/src/ir/layout.rs +++ b/cranelift/codegen/src/ir/layout.rs @@ -61,18 +61,6 @@ impl Layout { self.last_block = None; } - /// Clear instructions from every block, but keep the blocks. - /// - /// Used by the egraph-based optimization to clear out the - /// function body but keep the CFG skeleton. - pub(crate) fn clear_insts(&mut self) { - self.insts.clear(); - for block in self.blocks.values_mut() { - block.first_inst = None.into(); - block.last_inst = None.into(); - } - } - /// Returns the capacity of the `BlockData` map. pub fn block_capacity(&self) -> usize { self.blocks.capacity() diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 23f952738e..3858a56917 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -48,7 +48,7 @@ pub use crate::ir::function::{DisplayFunctionAnnotations, Function}; pub use crate::ir::globalvalue::GlobalValueData; pub use crate::ir::heap::{HeapData, HeapStyle}; pub use crate::ir::instructions::{ - InstructionData, InstructionImms, Opcode, ValueList, ValueListPool, VariableArgs, + InstructionData, Opcode, ValueList, ValueListPool, VariableArgs, }; pub use crate::ir::jumptable::JumpTableData; pub use crate::ir::known_symbol::KnownSymbol; diff --git a/cranelift/codegen/src/ir/progpoint.rs b/cranelift/codegen/src/ir/progpoint.rs index 0152949e7a..39c4d98fbe 100644 --- a/cranelift/codegen/src/ir/progpoint.rs +++ b/cranelift/codegen/src/ir/progpoint.rs @@ -37,6 +37,7 @@ impl From for ProgramPoint { match def { ValueDef::Result(inst, _) => inst.into(), ValueDef::Param(block, _) => block.into(), + ValueDef::Union(_, _) => panic!("Union does not have a single program point"), } } } @@ -78,6 +79,7 @@ impl From for ExpandedProgramPoint { match def { ValueDef::Result(inst, _) => inst.into(), ValueDef::Param(block, _) => block.into(), + ValueDef::Union(_, _) => panic!("Union does not have a single program point"), } } } diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index bdee39cbd8..97460f9a1a 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -585,5 +585,27 @@ macro_rules! isle_common_prelude_methods { | IntCC::SignedLessThan => Some(*cc), } } + + #[inline] + fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) { + let [a, b] = *arr; + (a, b) + } + + #[inline] + fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 { + [a, b] + } + + #[inline] + fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) { + let [a, b, c] = *arr; + (a, b, c) + } + + #[inline] + fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 { + [a, b, c] + } }; } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 3c2ffad5c3..621b9b572f 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -95,6 +95,7 @@ mod alias_analysis; mod bitset; mod constant_hash; mod context; +mod ctxhash; mod dce; mod divconst_magic_numbers; mod egraph; @@ -111,6 +112,7 @@ mod result; mod scoped_hash_map; mod simple_gvn; mod simple_preopt; +mod unionfind; mod unreachable_code; mod value_label; diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs index be6d5e588e..f93e6ce87e 100644 --- a/cranelift/codegen/src/loop_analysis.rs +++ b/cranelift/codegen/src/loop_analysis.rs @@ -37,7 +37,7 @@ struct LoopData { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct LoopLevel(u8); impl LoopLevel { - const INVALID: u8 = 0xff; + const INVALID: u8 = u8::MAX; /// Get the root level (no loop). pub fn root() -> Self { diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index ab1bc3bb7a..ee1c2e1214 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -56,25 +56,8 @@ macro_rules! isle_lower_prelude_methods { } #[inline] - fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) { - let [a, b] = *arr; - (a, b) - } - - #[inline] - fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 { - [a, b] - } - - #[inline] - fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) { - let [a, b, c] = *arr; - (a, b, c) - } - - #[inline] - fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 { - [a, b, c] + fn value_type(&mut self, val: Value) -> Type { + self.lower_ctx.dfg().value_type(val) } #[inline] @@ -230,11 +213,6 @@ macro_rules! isle_lower_prelude_methods { self.lower_ctx.dfg()[inst] } - #[inline] - fn value_type(&mut self, val: Value) -> Type { - self.lower_ctx.dfg().value_type(val) - } - #[inline] fn def_inst(&mut self, val: Value) -> Option { self.lower_ctx.dfg().value_def(val).inst() diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs index 7e77370e29..8f0ca0c9ee 100644 --- a/cranelift/codegen/src/opts.rs +++ b/cranelift/codegen/src/opts.rs @@ -1,308 +1,131 @@ //! Optimization driver using ISLE rewrite rules on an egraph. -use crate::egraph::Analysis; -use crate::egraph::FuncEGraph; -pub use crate::egraph::{Node, NodeCtx}; +use crate::egraph::{NewOrExistingInst, OptimizeCtx}; use crate::ir::condcodes; pub use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::dfg::ValueDef; pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8}; pub use crate::ir::types::*; pub use crate::ir::{ - dynamic_to_fixed, AtomicRmwOp, Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap, - HeapImm, Immediate, InstructionImms, JumpTable, MemFlags, Opcode, StackSlot, Table, TrapCode, - Type, Value, + dynamic_to_fixed, AtomicRmwOp, Block, Constant, DataFlowGraph, DynamicStackSlot, FuncRef, + GlobalValue, Heap, HeapImm, Immediate, InstructionData, JumpTable, MemFlags, Opcode, StackSlot, + Table, TrapCode, Type, Value, }; use crate::isle_common_prelude_methods; use crate::machinst::isle::*; use crate::trace; -pub use cranelift_egraph::{Id, NewOrExisting, NodeIter}; -use cranelift_entity::{EntityList, EntityRef}; -use smallvec::SmallVec; +use cranelift_entity::packed_option::ReservedValue; +use smallvec::{smallvec, SmallVec}; use std::marker::PhantomData; -pub type IdArray = EntityList; #[allow(dead_code)] pub type Unit = (); pub type Range = (usize, usize); +pub type ValueArray2 = [Value; 2]; +pub type ValueArray3 = [Value; 3]; pub type ConstructorVec = SmallVec<[T; 8]>; -mod generated_code; +pub(crate) mod generated_code; use generated_code::ContextIter; -struct IsleContext<'a, 'b> { - egraph: &'a mut FuncEGraph<'b>, +pub(crate) struct IsleContext<'a, 'b, 'c> { + pub(crate) ctx: &'a mut OptimizeCtx<'b, 'c>, } -const REWRITE_LIMIT: usize = 5; - -pub fn optimize_eclass<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id { - trace!("running rules on eclass {}", id.index()); - egraph.stats.rewrite_rule_invoked += 1; - - if egraph.rewrite_depth > REWRITE_LIMIT { - egraph.stats.rewrite_depth_limit += 1; - return id; - } - egraph.rewrite_depth += 1; - - // Find all possible rewrites and union them in, returning the - // union. - let mut ctx = IsleContext { egraph }; - let optimized_ids = generated_code::constructor_simplify(&mut ctx, id); - let mut union_id = id; - if let Some(mut ids) = optimized_ids { - while let Some(new_id) = ids.next(&mut ctx) { - if ctx.egraph.subsume_ids.contains(&new_id) { - trace!(" -> eclass {} subsumes {}", new_id, id); - ctx.egraph.stats.node_subsume += 1; - // Merge in the unionfind so canonicalization still - // works, but take *only* the subsuming ID, and break - // now. - ctx.egraph.egraph.unionfind.union(union_id, new_id); - union_id = new_id; - break; - } - ctx.egraph.stats.node_union += 1; - let old_union_id = union_id; - union_id = ctx - .egraph - .egraph - .union(&ctx.egraph.node_ctx, union_id, new_id); - trace!( - " -> union eclass {} with {} to get {}", - new_id, - old_union_id, - union_id - ); - } - } - trace!(" -> optimize {} got {}", id, union_id); - ctx.egraph.rewrite_depth -= 1; - union_id -} - -pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id { - // Note that we only examine the latest enode in the eclass: opts - // are invoked for every new enode added to an eclass, so - // traversing the whole eclass would be redundant. - let load_key = egraph.egraph.classes[id].get_node().unwrap(); - if let Node::Load { - op: - InstructionImms::Load { - opcode: Opcode::Load, - offset: load_offset, - .. - }, - ty: load_ty, - addr: load_addr, - mem_state, - .. - } = load_key.node(&egraph.egraph.nodes) - { - if let Some(store_inst) = mem_state.as_store() { - trace!(" -> got load op for id {}", id); - if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) { - trace!(" -> got store id: {} ty: {}", store_id, store_ty); - let store_key = egraph.egraph.classes[*store_id].get_node().unwrap(); - if let Node::Inst { - op: - InstructionImms::Store { - opcode: Opcode::Store, - offset: store_offset, - .. - }, - args: store_args, - .. - } = store_key.node(&egraph.egraph.nodes) - { - let store_args = store_args.as_slice(&egraph.node_ctx.args); - let store_data = store_args[0]; - let store_addr = store_args[1]; - if *load_offset == *store_offset - && *load_ty == *store_ty - && egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr) - { - trace!(" -> same offset, type, address; forwarding"); - egraph.stats.store_to_load_forward += 1; - return store_data; - } - } - } - } - } - - id -} - -struct NodesEtorIter<'a, 'b> -where - 'b: 'a, -{ - root: Id, - iter: NodeIter, +pub(crate) struct InstDataEtorIter<'a, 'b, 'c> { + stack: SmallVec<[Value; 8]>, _phantom1: PhantomData<&'a ()>, _phantom2: PhantomData<&'b ()>, + _phantom3: PhantomData<&'c ()>, } - -impl<'a, 'b> generated_code::ContextIter for NodesEtorIter<'a, 'b> -where - 'b: 'a, -{ - type Context = IsleContext<'a, 'b>; - type Output = (Type, InstructionImms, IdArray); - - fn next(&mut self, ctx: &mut IsleContext<'a, 'b>) -> Option { - while let Some(node) = self.iter.next(&ctx.egraph.egraph) { - trace!("iter from root {}: node {:?}", self.root, node); - match node { - Node::Pure { - op, - args, - ty, - arity, - } - | Node::Inst { - op, - args, - ty, - arity, - .. - } if *arity == 1 => { - return Some((*ty, op.clone(), args.clone())); - } - _ => {} - } - } - None - } -} - -impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> { - isle_common_prelude_methods!(); - - fn eclass_type(&mut self, eclass: Id) -> Option { - let mut iter = self.egraph.egraph.enodes(eclass); - while let Some(node) = iter.next(&self.egraph.egraph) { - match node { - &Node::Pure { ty, arity, .. } | &Node::Inst { ty, arity, .. } if arity == 1 => { - return Some(ty); - } - &Node::Load { ty, .. } => return Some(ty), - &Node::Result { ty, .. } => return Some(ty), - &Node::Param { ty, .. } => return Some(ty), - _ => {} - } - } - None - } - - fn at_loop_level(&mut self, eclass: Id) -> (u8, Id) { - ( - self.egraph.egraph.analysis_value(eclass).loop_level.level() as u8, - eclass, - ) - } - - type enodes_etor_iter = NodesEtorIter<'a, 'b>; - - fn enodes_etor(&mut self, eclass: Id) -> Option> { - Some(NodesEtorIter { - root: eclass, - iter: self.egraph.egraph.enodes(eclass), +impl<'a, 'b, 'c> InstDataEtorIter<'a, 'b, 'c> { + fn new(root: Value) -> Self { + debug_assert_ne!(root, Value::reserved_value()); + Self { + stack: smallvec![root], _phantom1: PhantomData, _phantom2: PhantomData, - }) - } - - fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id { - let op = op.clone(); - match self.egraph.egraph.add( - Node::Pure { - op, - args, - ty, - arity: 1, - }, - &mut self.egraph.node_ctx, - ) { - NewOrExisting::New(id) => { - self.egraph.stats.node_created += 1; - self.egraph.stats.node_pure += 1; - self.egraph.stats.node_ctor_created += 1; - optimize_eclass(id, self.egraph) - } - NewOrExisting::Existing(id) => { - self.egraph.stats.node_ctor_deduped += 1; - id - } + _phantom3: PhantomData, } } - - fn id_array_0_etor(&mut self, arg0: IdArray) -> Option<()> { - let values = arg0.as_slice(&self.egraph.node_ctx.args); - if values.len() == 0 { - Some(()) - } else { - None - } - } - - fn id_array_0_ctor(&mut self) -> IdArray { - EntityList::default() - } - - fn id_array_1_etor(&mut self, arg0: IdArray) -> Option { - let values = arg0.as_slice(&self.egraph.node_ctx.args); - if values.len() == 1 { - Some(values[0]) - } else { - None - } - } - - fn id_array_1_ctor(&mut self, arg0: Id) -> IdArray { - EntityList::from_iter([arg0].into_iter(), &mut self.egraph.node_ctx.args) - } - - fn id_array_2_etor(&mut self, arg0: IdArray) -> Option<(Id, Id)> { - let values = arg0.as_slice(&self.egraph.node_ctx.args); - if values.len() == 2 { - Some((values[0], values[1])) - } else { - None - } - } - - fn id_array_2_ctor(&mut self, arg0: Id, arg1: Id) -> IdArray { - EntityList::from_iter([arg0, arg1].into_iter(), &mut self.egraph.node_ctx.args) - } - - fn id_array_3_etor(&mut self, arg0: IdArray) -> Option<(Id, Id, Id)> { - let values = arg0.as_slice(&self.egraph.node_ctx.args); - if values.len() == 3 { - Some((values[0], values[1], values[2])) - } else { - None - } - } - - fn id_array_3_ctor(&mut self, arg0: Id, arg1: Id, arg2: Id) -> IdArray { - EntityList::from_iter( - [arg0, arg1, arg2].into_iter(), - &mut self.egraph.node_ctx.args, - ) - } - - fn remat(&mut self, id: Id) -> Id { - trace!("remat: {}", id); - self.egraph.remat_ids.insert(id); - id - } - - fn subsume(&mut self, id: Id) -> Id { - trace!("subsume: {}", id); - self.egraph.subsume_ids.insert(id); - id - } +} + +impl<'a, 'b, 'c> ContextIter for InstDataEtorIter<'a, 'b, 'c> +where + 'b: 'a, + 'c: 'b, +{ + type Context = IsleContext<'a, 'b, 'c>; + type Output = (Type, InstructionData); + + fn next(&mut self, ctx: &mut IsleContext<'a, 'b, 'c>) -> Option { + while let Some(value) = self.stack.pop() { + debug_assert_ne!(value, Value::reserved_value()); + let value = ctx.ctx.func.dfg.resolve_aliases(value); + trace!("iter: value {:?}", value); + match ctx.ctx.func.dfg.value_def(value) { + ValueDef::Union(x, y) => { + debug_assert_ne!(x, Value::reserved_value()); + debug_assert_ne!(y, Value::reserved_value()); + trace!(" -> {}, {}", x, y); + self.stack.push(x); + self.stack.push(y); + continue; + } + ValueDef::Result(inst, _) if ctx.ctx.func.dfg.inst_results(inst).len() == 1 => { + let ty = ctx.ctx.func.dfg.value_type(value); + trace!(" -> value of type {}", ty); + return Some((ty, ctx.ctx.func.dfg[inst].clone())); + } + _ => {} + } + } + None + } +} + +impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> { + isle_common_prelude_methods!(); + + type inst_data_etor_iter = InstDataEtorIter<'a, 'b, 'c>; + + fn inst_data_etor(&mut self, eclass: Value) -> Option> { + Some(InstDataEtorIter::new(eclass)) + } + + fn make_inst_ctor(&mut self, ty: Type, op: &InstructionData) -> Value { + let value = self + .ctx + .insert_pure_enode(NewOrExistingInst::New(op.clone(), ty)); + trace!("make_inst_ctor: {:?} -> {}", op, value); + value + } + + fn value_array_2_ctor(&mut self, arg0: Value, arg1: Value) -> ValueArray2 { + [arg0, arg1] + } + + fn value_array_3_ctor(&mut self, arg0: Value, arg1: Value, arg2: Value) -> ValueArray3 { + [arg0, arg1, arg2] + } + + #[inline] + fn value_type(&mut self, val: Value) -> Type { + self.ctx.func.dfg.value_type(val) + } + + fn remat(&mut self, value: Value) -> Value { + trace!("remat: {}", value); + self.ctx.remat_values.insert(value); + self.ctx.stats.remat += 1; + value + } + + fn subsume(&mut self, value: Value) -> Value { + trace!("subsume: {}", value); + self.ctx.subsume_values.insert(value); + self.ctx.stats.subsume += 1; + value + } } diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index 9a75b3d6b3..caed553ba7 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -145,31 +145,15 @@ (iadd ty x x)) ;; x<<32>>32: uextend/sextend 32->64. -(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) +(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) (uextend $I64 x)) -(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) +(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32)))) (sextend $I64 x)) ;; TODO: strength reduction: mul/div to shifts ;; TODO: div/rem by constants -> magic multiplications -;; Reassociate when it benefits LICM. -(rule (simplify (iadd ty (iadd ty x y) z)) - (if-let (at_loop_level lx _) x) - (if-let (at_loop_level ly _) y) - (if-let (at_loop_level lz _) z) - (if (u8_lt lx ly)) - (if (u8_lt lz ly)) - (iadd ty (iadd ty x z) y)) -(rule (simplify (iadd ty (iadd ty x y) z)) - (if-let (at_loop_level lx _) x) - (if-let (at_loop_level ly _) y) - (if-let (at_loop_level lz _) z) - (if (u8_lt ly lx)) - (if (u8_lt lz lx)) - (iadd ty (iadd ty y z) x)) - ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're ;; used. This is neutral (add-with-imm) or positive (iconst) for ;; register pressure, and these ops are very cheap. diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index e3573bcc3a..ef4e8c28fd 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -107,7 +107,7 @@ (rule (simplify (isub ty (iadd ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (isub ty x (iconst ty (imm64 (u64_sub k1 k2))))) + (isub ty x (iconst ty (imm64 (u64_sub k2 k1))))) (rule (simplify (iadd ty (isub ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index ca9a307d28..68d06ddf24 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -32,6 +32,15 @@ ;; `cranelift-entity`-based identifiers. (type Type (primitive Type)) +(type Value (primitive Value)) +(type ValueList (primitive ValueList)) + +;; ISLE representation of `&[Value]`. +(type ValueSlice (primitive ValueSlice)) + +;; Extract the type of a `Value`. +(decl value_type (Type) Value) +(extern extractor infallible value_type value_type) (decl u32_add (u32 u32) u32) (extern constructor u32_add u32_add) diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index 597ddb0043..b35ca78c28 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -5,15 +5,10 @@ ;; `cranelift-entity`-based identifiers. (type Inst (primitive Inst)) -(type Value (primitive Value)) - -;; ISLE representation of `&[Value]`. -(type ValueSlice (primitive ValueSlice)) ;; ISLE representation of `Vec` (type VecMask extern (enum)) -(type ValueList (primitive ValueList)) (type ValueRegs (primitive ValueRegs)) (type WritableValueRegs (primitive WritableValueRegs)) @@ -214,10 +209,6 @@ (decl inst_data (InstructionData) Inst) (extern extractor infallible inst_data inst_data) -;; Extract the type of a `Value`. -(decl value_type (Type) Value) -(extern extractor infallible value_type value_type) - ;; Extract the type of the instruction's first result. (decl result_type (Type) Inst) (extractor (result_type ty) diff --git a/cranelift/codegen/src/prelude_opt.isle b/cranelift/codegen/src/prelude_opt.isle index 46baaddd13..d3fc0d1bb4 100644 --- a/cranelift/codegen/src/prelude_opt.isle +++ b/cranelift/codegen/src/prelude_opt.isle @@ -2,60 +2,33 @@ ;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; An eclass ID. -(type Id (primitive Id)) - -;; What is the type of an eclass (if a single type)? -(decl eclass_type (Type) Id) -(extern extractor eclass_type eclass_type) - -;; Helper to wrap an Id-matching pattern and extract type. -(decl has_type (Type Id) Id) -(extractor (has_type ty id) - (and (eclass_type ty) - id)) - ;; Extract any node(s) for the given eclass ID. -(decl multi enodes (Type InstructionImms IdArray) Id) -(extern extractor enodes enodes_etor) +(decl multi inst_data (Type InstructionData) Value) +(extern extractor inst_data inst_data_etor) ;; Construct a pure node, returning a new (or deduplicated ;; already-existing) eclass ID. -(decl pure_enode (Type InstructionImms IdArray) Id) -(extern constructor pure_enode pure_enode_ctor) +(decl make_inst (Type InstructionData) Value) +(extern constructor make_inst make_inst_ctor) -;; Type of an Id slice (for args). -(type IdArray (primitive IdArray)) - -(decl id_array_0 () IdArray) -(extern constructor id_array_0 id_array_0_ctor) -(extern extractor id_array_0 id_array_0_etor) -(decl id_array_1 (Id) IdArray) -(extern constructor id_array_1 id_array_1_ctor) -(extern extractor id_array_1 id_array_1_etor) -(decl id_array_2 (Id Id) IdArray) -(extern constructor id_array_2 id_array_2_ctor) -(extern extractor id_array_2 id_array_2_etor) -(decl id_array_3 (Id Id Id) IdArray) -(extern constructor id_array_3 id_array_3_ctor) -(extern extractor id_array_3 id_array_3_etor) - -;; Extractor to get the min loop-level of an eclass. -(decl at_loop_level (u8 Id) Id) -(extern extractor infallible at_loop_level at_loop_level) +;; Constructors for value arrays. +(decl value_array_2_ctor (Value Value) ValueArray2) +(extern constructor value_array_2_ctor value_array_2_ctor) +(decl value_array_3_ctor (Value Value Value) ValueArray3) +(extern constructor value_array_3_ctor value_array_3_ctor) ;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The main matcher rule invoked by the toplevel driver. -(decl multi simplify (Id) Id) +(decl multi simplify (Value) Value) ;; Mark a node as requiring remat when used in a different block. -(decl remat (Id) Id) +(decl remat (Value) Value) (extern constructor remat remat) ;; Mark a node as subsuming whatever else it's rewritten from -- this ;; is definitely preferable, not just a possible option. Useful for, ;; e.g., constant propagation where we arrive at a definite "final ;; answer". -(decl subsume (Id) Id) +(decl subsume (Value) Value) (extern constructor subsume subsume) diff --git a/cranelift/codegen/src/simple_gvn.rs b/cranelift/codegen/src/simple_gvn.rs index 327ff8f4bd..87dc4d1a96 100644 --- a/cranelift/codegen/src/simple_gvn.rs +++ b/cranelift/codegen/src/simple_gvn.rs @@ -39,14 +39,14 @@ struct HashKey<'a, 'f: 'a> { impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> { fn hash(&self, state: &mut H) { let pool = &self.pos.borrow().func.dfg.value_lists; - self.inst.hash(state, pool); + self.inst.hash(state, pool, |value| value); self.ty.hash(state); } } impl<'a, 'f: 'a> PartialEq for HashKey<'a, 'f> { fn eq(&self, other: &Self) -> bool { let pool = &self.pos.borrow().func.dfg.value_lists; - self.inst.eq(&other.inst, pool) && self.ty == other.ty + self.inst.eq(&other.inst, pool, |value| value) && self.ty == other.ty } } impl<'a, 'f: 'a> Eq for HashKey<'a, 'f> {} diff --git a/cranelift/codegen/src/unionfind.rs b/cranelift/codegen/src/unionfind.rs new file mode 100644 index 0000000000..b6c534aa5f --- /dev/null +++ b/cranelift/codegen/src/unionfind.rs @@ -0,0 +1,74 @@ +//! Simple union-find data structure. + +use crate::trace; +use cranelift_entity::{packed_option::ReservedValue, EntityRef, SecondaryMap}; +use std::hash::Hash; + +/// A union-find data structure. The data structure can allocate +/// `Id`s, indicating eclasses, and can merge eclasses together. +#[derive(Clone, Debug, PartialEq)] +pub struct UnionFind { + parent: SecondaryMap>, +} + +#[derive(Clone, Debug, PartialEq)] +struct Val(Idx); +impl Default for Val { + fn default() -> Self { + Self(Idx::reserved_value()) + } +} + +impl UnionFind { + /// Create a new `UnionFind` with the given capacity. + pub fn with_capacity(cap: usize) -> Self { + UnionFind { + parent: SecondaryMap::with_capacity(cap), + } + } + + /// Add an `Idx` to the `UnionFind`, with its own equivalence class + /// initially. All `Idx`s must be added before being queried or + /// unioned. + pub fn add(&mut self, id: Idx) { + debug_assert!(id != Idx::reserved_value()); + self.parent[id] = Val(id); + } + + /// Find the canonical `Idx` of a given `Idx`. + pub fn find(&self, mut node: Idx) -> Idx { + while node != self.parent[node].0 { + node = self.parent[node].0; + } + node + } + + /// Find the canonical `Idx` of a given `Idx`, updating the data + /// structure in the process so that future queries for this `Idx` + /// (and others in its chain up to the root of the equivalence + /// class) will be faster. + pub fn find_and_update(&mut self, mut node: Idx) -> Idx { + // "Path splitting" mutating find (Tarjan and Van Leeuwen). + debug_assert!(node != Idx::reserved_value()); + while node != self.parent[node].0 { + let next = self.parent[self.parent[node].0].0; + debug_assert!(next != Idx::reserved_value()); + self.parent[node] = Val(next); + node = next; + } + debug_assert!(node != Idx::reserved_value()); + node + } + + /// Merge the equivalence classes of the two `Idx`s. + pub fn union(&mut self, a: Idx, b: Idx) { + let a = self.find_and_update(a); + let b = self.find_and_update(b); + let (a, b) = (std::cmp::min(a, b), std::cmp::max(a, b)); + if a != b { + // Always canonicalize toward lower IDs. + self.parent[b] = Val(a); + trace!("union: {}, {}", a, b); + } + } +} diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index 535fe0d99e..b19523b7ef 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -1041,6 +1041,10 @@ impl<'a> Verifier<'a> { )); } } + ValueDef::Union(_, _) => { + // Nothing: union nodes themselves have no location, + // so we cannot check any dominance properties. + } } Ok(()) } @@ -1070,6 +1074,11 @@ impl<'a> Verifier<'a> { self.context(loc_inst), format!("instruction result {} is not defined by the instruction", v), )), + ValueDef::Union(_, _) => errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("instruction result {} is a union node", v), + )), } } diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 58d73e4ba4..552cc8549b 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -298,6 +298,7 @@ fn type_suffix(func: &Function, inst: Inst) -> Option { let def_block = match func.dfg.value_def(ctrl_var) { ValueDef::Result(instr, _) => func.layout.inst_block(instr), ValueDef::Param(block, _) => Some(block), + ValueDef::Union(..) => None, }; if def_block.is_some() && def_block == func.layout.inst_block(inst) { return None; diff --git a/cranelift/egraph/Cargo.toml b/cranelift/egraph/Cargo.toml deleted file mode 100644 index 7d591a187f..0000000000 --- a/cranelift/egraph/Cargo.toml +++ /dev/null @@ -1,24 +0,0 @@ -[package] -authors = ["The Cranelift Project Developers"] -name = "cranelift-egraph" -version = "0.92.0" -description = "acyclic-egraph (aegraph) implementation for Cranelift" -license = "Apache-2.0 WITH LLVM-exception" -documentation = "https://docs.rs/cranelift-egraph" -repository = "https://github.com/bytecodealliance/wasmtime" -edition = "2021" - -[dependencies] -cranelift-entity = { workspace = true } -log = { workspace = true } -smallvec = { workspace = true } -indexmap = { version = "1.9.1" } -hashbrown = { version = "0.12.2", features = ["raw"] } -fxhash = "0.2.1" - -[features] -default = [] - -# Enable detailed trace-level debug logging. Excluded by default to -# omit the dynamic overhead of checking the logging level. -trace-log = [] diff --git a/cranelift/egraph/src/bumpvec.rs b/cranelift/egraph/src/bumpvec.rs deleted file mode 100644 index 7c8d210cb9..0000000000 --- a/cranelift/egraph/src/bumpvec.rs +++ /dev/null @@ -1,524 +0,0 @@ -//! Vectors allocated in arenas, with small per-vector overhead. - -use std::marker::PhantomData; -use std::mem::MaybeUninit; -use std::ops::Range; - -/// A vector of `T` stored within a `BumpArena`. -/// -/// This is something like a normal `Vec`, except that all accesses -/// and updates require a separate borrow of the `BumpArena`. This, in -/// turn, makes the Vec itself very compact: only three `u32`s (12 -/// bytes). The `BumpSlice` variant is only two `u32`s (8 bytes) and -/// is sufficient to reconstruct a slice, but not grow the vector. -/// -/// The `BumpVec` does *not* implement `Clone` or `Copy`; it -/// represents unique ownership of a range of indices in the arena. If -/// dropped, those indices will be unavailable until the arena is -/// freed. This is "fine" (it is normally how arena allocation -/// works). To explicitly free and make available for some -/// allocations, a very rudimentary reuse mechanism exists via -/// `BumpVec::free(arena)`. (The allocation path opportunistically -/// checks the first range on the freelist, and can carve off a piece -/// of it if larger than needed, but it does not attempt to traverse -/// the entire freelist; this is a compromise between bump-allocation -/// speed and memory efficiency, which also influences speed through -/// cached-memory reuse.) -/// -/// The type `T` should not have a `Drop` implementation. This -/// typically means that it does not own any boxed memory, -/// sub-collections, or other resources. This is important for the -/// efficiency of the data structure (otherwise, to call `Drop` impls, -/// the arena needs to track which indices are live or dead; the -/// BumpVec itself cannot do the drop because it does not retain a -/// reference to the arena). Note that placing a `T` with a `Drop` -/// impl in the arena is still *safe*, because leaking (that is, never -/// calling `Drop::drop()`) is safe. It is merely less efficient, and -/// so should be avoided if possible. -#[derive(Debug)] -pub struct BumpVec { - base: u32, - len: u32, - cap: u32, - _phantom: PhantomData, -} - -/// A slice in an arena: like a `BumpVec`, but has a fixed size that -/// cannot grow. The size of this struct is one 32-bit word smaller -/// than `BumpVec`. It is copyable/cloneable because it will never be -/// freed. -#[derive(Debug, Clone, Copy)] -pub struct BumpSlice { - base: u32, - len: u32, - _phantom: PhantomData, -} - -#[derive(Default)] -pub struct BumpArena { - vec: Vec>, - freelist: Vec>, -} - -impl BumpArena { - /// Create a new arena into which one can allocate `BumpVec`s. - pub fn new() -> Self { - Self { - vec: vec![], - freelist: vec![], - } - } - - /// Create a new arena, pre-allocating space for `cap` total `T` - /// elements. - pub fn arena_with_capacity(cap: usize) -> Self { - Self { - vec: Vec::with_capacity(cap), - freelist: Vec::with_capacity(cap / 16), - } - } - - /// Create a new `BumpVec` with the given pre-allocated capacity - /// and zero length. - pub fn vec_with_capacity(&mut self, cap: usize) -> BumpVec { - let cap = u32::try_from(cap).unwrap(); - if let Some(range) = self.maybe_freelist_alloc(cap) { - BumpVec { - base: range.start, - len: 0, - cap, - _phantom: PhantomData, - } - } else { - let base = self.vec.len() as u32; - for _ in 0..cap { - self.vec.push(MaybeUninit::uninit()); - } - BumpVec { - base, - len: 0, - cap, - _phantom: PhantomData, - } - } - } - - /// Create a new `BumpVec` with a single element. The capacity is - /// also only one element; growing the vector further will require - /// a reallocation. - pub fn single(&mut self, t: T) -> BumpVec { - let mut vec = self.vec_with_capacity(1); - unsafe { - self.write_into_index(vec.base, t); - } - vec.len = 1; - vec - } - - /// Create a new `BumpVec` with the sequence from an iterator. - pub fn from_iter>(&mut self, i: I) -> BumpVec { - let base = self.vec.len() as u32; - self.vec.extend(i.map(|item| MaybeUninit::new(item))); - let len = self.vec.len() as u32 - base; - BumpVec { - base, - len, - cap: len, - _phantom: PhantomData, - } - } - - /// Append two `BumpVec`s, returning a new one. Consumes both - /// vectors. This will use the capacity at the end of `a` if - /// possible to move `b`'s elements into place; otherwise it will - /// need to allocate new space. - pub fn append(&mut self, a: BumpVec, b: BumpVec) -> BumpVec { - if (a.cap - a.len) >= b.len { - self.append_into_cap(a, b) - } else { - self.append_into_new(a, b) - } - } - - /// Helper: read the `T` out of a given arena index. After - /// reading, that index becomes uninitialized. - unsafe fn read_out_of_index(&self, index: u32) -> T { - // Note that we don't actually *track* uninitialized status - // (and this is fine because we will never `Drop` and we never - // allow a `BumpVec` to refer to an uninitialized index, so - // the bits are effectively dead). We simply read the bits out - // and return them. - self.vec[index as usize].as_ptr().read() - } - - /// Helper: write a `T` into the given arena index. Index must - /// have been uninitialized previously. - unsafe fn write_into_index(&mut self, index: u32, t: T) { - self.vec[index as usize].as_mut_ptr().write(t); - } - - /// Helper: move a `T` from one index to another. Old index - /// becomes uninitialized and new index must have previously been - /// uninitialized. - unsafe fn move_item(&mut self, from: u32, to: u32) { - let item = self.read_out_of_index(from); - self.write_into_index(to, item); - } - - /// Helper: push a `T` onto the end of the arena, growing its - /// storage. The `T` to push is read out of another index, and - /// that index subsequently becomes uninitialized. - unsafe fn push_item(&mut self, from: u32) -> u32 { - let index = self.vec.len() as u32; - let item = self.read_out_of_index(from); - self.vec.push(MaybeUninit::new(item)); - index - } - - /// Helper: append `b` into the capacity at the end of `a`. - fn append_into_cap(&mut self, mut a: BumpVec, b: BumpVec) -> BumpVec { - debug_assert!(a.cap - a.len >= b.len); - for i in 0..b.len { - // Safety: initially, the indices in `b` are initialized; - // the indices in `a`'s cap, beyond its length, are - // uninitialized. We move the initialized contents from - // `b` to the tail beyond `a`, and we consume `b` (so it - // no longer exists), and we update `a`'s length to cover - // the initialized contents in their new location. - unsafe { - self.move_item(b.base + i, a.base + a.len + i); - } - } - a.len += b.len; - b.free(self); - a - } - - /// Helper: return a range of indices that are available - /// (uninitialized) according to the freelist for `len` elements, - /// if possible. - fn maybe_freelist_alloc(&mut self, len: u32) -> Option> { - if let Some(entry) = self.freelist.last_mut() { - if entry.len() >= len as usize { - let base = entry.start; - entry.start += len; - if entry.start == entry.end { - self.freelist.pop(); - } - return Some(base..(base + len)); - } - } - None - } - - /// Helper: append `a` and `b` into a completely new allocation. - fn append_into_new(&mut self, a: BumpVec, b: BumpVec) -> BumpVec { - // New capacity: round up to a power of two. - let len = a.len + b.len; - let cap = round_up_power_of_two(len); - - if let Some(range) = self.maybe_freelist_alloc(cap) { - for i in 0..a.len { - // Safety: the indices in `a` must be initialized. We read - // out the item and copy it to a new index; the old index - // is no longer covered by a BumpVec, because we consume - // `a`. - unsafe { - self.move_item(a.base + i, range.start + i); - } - } - for i in 0..b.len { - // Safety: the indices in `b` must be initialized. We read - // out the item and copy it to a new index; the old index - // is no longer covered by a BumpVec, because we consume - // `b`. - unsafe { - self.move_item(b.base + i, range.start + a.len + i); - } - } - - a.free(self); - b.free(self); - - BumpVec { - base: range.start, - len, - cap, - _phantom: PhantomData, - } - } else { - self.vec.reserve(cap as usize); - let base = self.vec.len() as u32; - for i in 0..a.len { - // Safety: the indices in `a` must be initialized. We read - // out the item and copy it to a new index; the old index - // is no longer covered by a BumpVec, because we consume - // `a`. - unsafe { - self.push_item(a.base + i); - } - } - for i in 0..b.len { - // Safety: the indices in `b` must be initialized. We read - // out the item and copy it to a new index; the old index - // is no longer covered by a BumpVec, because we consume - // `b`. - unsafe { - self.push_item(b.base + i); - } - } - let len = self.vec.len() as u32 - base; - - for _ in len..cap { - self.vec.push(MaybeUninit::uninit()); - } - - a.free(self); - b.free(self); - - BumpVec { - base, - len, - cap, - _phantom: PhantomData, - } - } - } - - /// Returns the size of the backing `Vec`. - pub fn size(&self) -> usize { - self.vec.len() - } -} - -fn round_up_power_of_two(x: u32) -> u32 { - debug_assert!(x > 0); - debug_assert!(x < 0x8000_0000); - let log2 = 32 - (x - 1).leading_zeros(); - 1 << log2 -} - -impl BumpVec { - /// Returns a slice view of this `BumpVec`, given a borrow of the - /// arena. - pub fn as_slice<'a>(&'a self, arena: &'a BumpArena) -> &'a [T] { - let maybe_uninit_slice = - &arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; - // Safety: the index range we represent must be initialized. - unsafe { std::mem::transmute(maybe_uninit_slice) } - } - - /// Returns a mutable slice view of this `BumpVec`, given a - /// mutable borrow of the arena. - pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena) -> &'a mut [T] { - let maybe_uninit_slice = - &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; - // Safety: the index range we represent must be initialized. - unsafe { std::mem::transmute(maybe_uninit_slice) } - } - - /// Returns the length of this vector. Does not require access to - /// the arena. - pub fn len(&self) -> usize { - self.len as usize - } - - /// Returns the capacity of this vector. Does not require access - /// to the arena. - pub fn cap(&self) -> usize { - self.cap as usize - } - - /// Reserve `extra_len` capacity at the end of the vector, - /// reallocating if necessary. - pub fn reserve(&mut self, extra_len: usize, arena: &mut BumpArena) { - let extra_len = u32::try_from(extra_len).unwrap(); - if self.cap - self.len < extra_len { - if self.base + self.cap == arena.vec.len() as u32 { - for _ in 0..extra_len { - arena.vec.push(MaybeUninit::uninit()); - } - self.cap += extra_len; - } else { - let new_cap = self.cap + extra_len; - let new = arena.vec_with_capacity(new_cap as usize); - unsafe { - for i in 0..self.len { - arena.move_item(self.base + i, new.base + i); - } - } - self.base = new.base; - self.cap = new.cap; - } - } - } - - /// Push an item, growing the capacity if needed. - pub fn push(&mut self, t: T, arena: &mut BumpArena) { - if self.cap > self.len { - unsafe { - arena.write_into_index(self.base + self.len, t); - } - self.len += 1; - } else if (self.base + self.cap) as usize == arena.vec.len() { - arena.vec.push(MaybeUninit::new(t)); - self.cap += 1; - self.len += 1; - } else { - let new_cap = round_up_power_of_two(self.cap + 1); - let extra = new_cap - self.cap; - self.reserve(extra as usize, arena); - unsafe { - arena.write_into_index(self.base + self.len, t); - } - self.len += 1; - } - } - - /// Clone, if `T` is cloneable. - pub fn clone(&self, arena: &mut BumpArena) -> BumpVec - where - T: Clone, - { - let mut new = arena.vec_with_capacity(self.len as usize); - for i in 0..self.len { - let item = self.as_slice(arena)[i as usize].clone(); - new.push(item, arena); - } - new - } - - /// Truncate the length to a smaller-or-equal length. - pub fn truncate(&mut self, len: usize) { - let len = len as u32; - assert!(len <= self.len); - self.len = len; - } - - /// Consume the BumpVec and return its indices to a free pool in - /// the arena. - pub fn free(self, arena: &mut BumpArena) { - arena.freelist.push(self.base..(self.base + self.cap)); - } - - /// Freeze the capacity of this BumpVec, turning it into a slice, - /// for a smaller struct (8 bytes rather than 12). Once this - /// exists, it is copyable, because the slice will never be freed. - pub fn freeze(self, arena: &mut BumpArena) -> BumpSlice { - if self.cap > self.len { - arena - .freelist - .push((self.base + self.len)..(self.base + self.cap)); - } - BumpSlice { - base: self.base, - len: self.len, - _phantom: PhantomData, - } - } -} - -impl BumpSlice { - /// Returns a slice view of the `BumpSlice`, given a borrow of the - /// arena. - pub fn as_slice<'a>(&'a self, arena: &'a BumpArena) -> &'a [T] { - let maybe_uninit_slice = - &arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; - // Safety: the index range we represent must be initialized. - unsafe { std::mem::transmute(maybe_uninit_slice) } - } - - /// Returns a mutable slice view of the `BumpSlice`, given a - /// mutable borrow of the arena. - pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena) -> &'a mut [T] { - let maybe_uninit_slice = - &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; - // Safety: the index range we represent must be initialized. - unsafe { std::mem::transmute(maybe_uninit_slice) } - } - - /// Returns the length of the `BumpSlice`. - pub fn len(&self) -> usize { - self.len as usize - } -} - -impl std::default::Default for BumpVec { - fn default() -> Self { - BumpVec { - base: 0, - len: 0, - cap: 0, - _phantom: PhantomData, - } - } -} - -impl std::default::Default for BumpSlice { - fn default() -> Self { - BumpSlice { - base: 0, - len: 0, - _phantom: PhantomData, - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_round_up() { - assert_eq!(1, round_up_power_of_two(1)); - assert_eq!(2, round_up_power_of_two(2)); - assert_eq!(4, round_up_power_of_two(3)); - assert_eq!(4, round_up_power_of_two(4)); - assert_eq!(32, round_up_power_of_two(24)); - assert_eq!(0x8000_0000, round_up_power_of_two(0x7fff_ffff)); - } - - #[test] - fn test_basic() { - let mut arena: BumpArena = BumpArena::new(); - - let a = arena.single(1); - let b = arena.single(2); - let c = arena.single(3); - let ab = arena.append(a, b); - assert_eq!(ab.as_slice(&arena), &[1, 2]); - assert_eq!(ab.cap(), 2); - let abc = arena.append(ab, c); - assert_eq!(abc.len(), 3); - assert_eq!(abc.cap(), 4); - assert_eq!(abc.as_slice(&arena), &[1, 2, 3]); - assert_eq!(arena.size(), 9); - let mut d = arena.single(4); - // Should have reused the freelist. - assert_eq!(arena.size(), 9); - assert_eq!(d.len(), 1); - assert_eq!(d.cap(), 1); - assert_eq!(d.as_slice(&arena), &[4]); - d.as_mut_slice(&mut arena)[0] = 5; - assert_eq!(d.as_slice(&arena), &[5]); - abc.free(&mut arena); - let d2 = d.clone(&mut arena); - let dd = arena.append(d, d2); - // Should have reused the freelist. - assert_eq!(arena.size(), 9); - assert_eq!(dd.as_slice(&arena), &[5, 5]); - let mut e = arena.from_iter([10, 11, 12].into_iter()); - e.push(13, &mut arena); - assert_eq!(arena.size(), 13); - e.reserve(4, &mut arena); - assert_eq!(arena.size(), 17); - let _f = arena.from_iter([1, 2, 3, 4, 5, 6, 7, 8].into_iter()); - assert_eq!(arena.size(), 25); - e.reserve(8, &mut arena); - assert_eq!(e.cap(), 16); - assert_eq!(e.as_slice(&arena), &[10, 11, 12, 13]); - // `e` must have been copied now that `f` is at the end of the - // arena. - assert_eq!(arena.size(), 41); - } -} diff --git a/cranelift/egraph/src/ctxhash.rs b/cranelift/egraph/src/ctxhash.rs deleted file mode 100644 index f70086a68c..0000000000 --- a/cranelift/egraph/src/ctxhash.rs +++ /dev/null @@ -1,281 +0,0 @@ -//! A hashmap with "external hashing": nodes are hashed or compared for -//! equality only with some external context provided on lookup/insert. -//! This allows very memory-efficient data structures where -//! node-internal data references some other storage (e.g., offsets into -//! an array or pool of shared data). - -use super::unionfind::UnionFind; -use hashbrown::raw::{Bucket, RawTable}; -use std::hash::{Hash, Hasher}; -use std::marker::PhantomData; - -/// Trait that allows for equality comparison given some external -/// context. -/// -/// Note that this trait is implemented by the *context*, rather than -/// the item type, for somewhat complex lifetime reasons (lack of GATs -/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on -/// the value type). -/// -/// Furthermore, the `ctx_eq` method includes a `UnionFind` parameter, -/// because in practice we require this and a borrow to it cannot be -/// included in the context type without GATs (similarly to above). -pub trait CtxEq { - /// Determine whether `a` and `b` are equal, given the context in - /// `self` and the union-find data structure `uf`. - fn ctx_eq(&self, a: &V1, b: &V2, uf: &mut UnionFind) -> bool; -} - -/// Trait that allows for hashing given some external context. -pub trait CtxHash: CtxEq { - /// Compute the hash of `value`, given the context in `self` and - /// the union-find data structure `uf`. - fn ctx_hash(&self, value: &Value, uf: &mut UnionFind) -> u64; -} - -/// A null-comparator context type for underlying value types that -/// already have `Eq` and `Hash`. -#[derive(Default)] -pub struct NullCtx; - -impl CtxEq for NullCtx { - fn ctx_eq(&self, a: &V, b: &V, _: &mut UnionFind) -> bool { - a.eq(b) - } -} -impl CtxHash for NullCtx { - fn ctx_hash(&self, value: &V, _: &mut UnionFind) -> u64 { - let mut state = fxhash::FxHasher::default(); - value.hash(&mut state); - state.finish() - } -} - -/// A bucket in the hash table. -/// -/// Some performance-related design notes: we cache the hashcode for -/// speed, as this often buys a few percent speed in -/// interning-table-heavy workloads. We only keep the low 32 bits of -/// the hashcode, for memory efficiency: in common use, `K` and `V` -/// are often 32 bits also, and a 12-byte bucket is measurably better -/// than a 16-byte bucket. -struct BucketData { - hash: u32, - k: K, - v: V, -} - -/// A HashMap that takes external context for all operations. -pub struct CtxHashMap { - raw: RawTable>, -} - -impl CtxHashMap { - /// Create an empty hashmap. - pub fn new() -> Self { - Self { - raw: RawTable::new(), - } - } - - /// Create an empty hashmap with pre-allocated space for the given - /// capacity. - pub fn with_capacity(capacity: usize) -> Self { - Self { - raw: RawTable::with_capacity(capacity), - } - } -} - -impl CtxHashMap { - /// Insert a new key-value pair, returning the old value associated - /// with this key (if any). - pub fn insert + CtxHash>( - &mut self, - k: K, - v: V, - ctx: &Ctx, - uf: &mut UnionFind, - ) -> Option { - let hash = ctx.ctx_hash(&k, uf) as u32; - match self.raw.find(hash as u64, |bucket| { - hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf) - }) { - Some(bucket) => { - let data = unsafe { bucket.as_mut() }; - Some(std::mem::replace(&mut data.v, v)) - } - None => { - let data = BucketData { hash, k, v }; - self.raw - .insert_entry(hash as u64, data, |bucket| bucket.hash as u64); - None - } - } - } - - /// Look up a key, returning a borrow of the value if present. - pub fn get<'a, Q, Ctx: CtxEq + CtxHash + CtxHash>( - &'a self, - k: &Q, - ctx: &Ctx, - uf: &mut UnionFind, - ) -> Option<&'a V> { - let hash = ctx.ctx_hash(k, uf) as u32; - self.raw - .find(hash as u64, |bucket| { - hash == bucket.hash && ctx.ctx_eq(&bucket.k, k, uf) - }) - .map(|bucket| { - let data = unsafe { bucket.as_ref() }; - &data.v - }) - } - - /// Return an Entry cursor on a given bucket for a key, allowing - /// for fetching the current value or inserting a new one. - #[inline(always)] - pub fn entry<'a, Ctx: CtxEq + CtxHash>( - &'a mut self, - k: K, - ctx: &'a Ctx, - uf: &mut UnionFind, - ) -> Entry<'a, K, V> { - let hash = ctx.ctx_hash(&k, uf) as u32; - match self.raw.find(hash as u64, |bucket| { - hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf) - }) { - Some(bucket) => Entry::Occupied(OccupiedEntry { - bucket, - _phantom: PhantomData, - }), - None => Entry::Vacant(VacantEntry { - raw: &mut self.raw, - hash, - key: k, - }), - } - } -} - -/// An entry in the hashmap. -pub enum Entry<'a, K: 'a, V> { - Occupied(OccupiedEntry<'a, K, V>), - Vacant(VacantEntry<'a, K, V>), -} - -/// An occupied entry. -pub struct OccupiedEntry<'a, K, V> { - bucket: Bucket>, - _phantom: PhantomData<&'a ()>, -} - -impl<'a, K: 'a, V> OccupiedEntry<'a, K, V> { - /// Get the value. - pub fn get(&self) -> &'a V { - let bucket = unsafe { self.bucket.as_ref() }; - &bucket.v - } -} - -/// A vacant entry. -pub struct VacantEntry<'a, K, V> { - raw: &'a mut RawTable>, - hash: u32, - key: K, -} - -impl<'a, K, V> VacantEntry<'a, K, V> { - /// Insert a value. - pub fn insert(self, v: V) -> &'a V { - let bucket = self.raw.insert( - self.hash as u64, - BucketData { - hash: self.hash, - k: self.key, - v, - }, - |bucket| bucket.hash as u64, - ); - let data = unsafe { bucket.as_ref() }; - &data.v - } -} - -#[cfg(test)] -mod test { - use super::*; - use std::hash::Hash; - - #[derive(Clone, Copy, Debug)] - struct Key { - index: u32, - } - struct Ctx { - vals: &'static [&'static str], - } - impl CtxEq for Ctx { - fn ctx_eq(&self, a: &Key, b: &Key, _: &mut UnionFind) -> bool { - self.vals[a.index as usize].eq(self.vals[b.index as usize]) - } - } - impl CtxHash for Ctx { - fn ctx_hash(&self, value: &Key, _: &mut UnionFind) -> u64 { - let mut state = fxhash::FxHasher::default(); - self.vals[value.index as usize].hash(&mut state); - state.finish() - } - } - - #[test] - fn test_basic() { - let ctx = Ctx { - vals: &["a", "b", "a"], - }; - let mut uf = UnionFind::new(); - - let k0 = Key { index: 0 }; - let k1 = Key { index: 1 }; - let k2 = Key { index: 2 }; - - assert!(ctx.ctx_eq(&k0, &k2, &mut uf)); - assert!(!ctx.ctx_eq(&k0, &k1, &mut uf)); - assert!(!ctx.ctx_eq(&k2, &k1, &mut uf)); - - let mut map: CtxHashMap = CtxHashMap::new(); - assert_eq!(map.insert(k0, 42, &ctx, &mut uf), None); - assert_eq!(map.insert(k2, 84, &ctx, &mut uf), Some(42)); - assert_eq!(map.get(&k1, &ctx, &mut uf), None); - assert_eq!(*map.get(&k0, &ctx, &mut uf).unwrap(), 84); - } - - #[test] - fn test_entry() { - let mut ctx = Ctx { - vals: &["a", "b", "a"], - }; - let mut uf = UnionFind::new(); - - let k0 = Key { index: 0 }; - let k1 = Key { index: 1 }; - let k2 = Key { index: 2 }; - - let mut map: CtxHashMap = CtxHashMap::new(); - match map.entry(k0, &mut ctx, &mut uf) { - Entry::Vacant(v) => { - v.insert(1); - } - _ => panic!(), - } - match map.entry(k1, &mut ctx, &mut uf) { - Entry::Vacant(_) => {} - Entry::Occupied(_) => panic!(), - } - match map.entry(k2, &mut ctx, &mut uf) { - Entry::Occupied(o) => { - assert_eq!(*o.get(), 1); - } - _ => panic!(), - } - } -} diff --git a/cranelift/egraph/src/lib.rs b/cranelift/egraph/src/lib.rs deleted file mode 100644 index e94416c2f2..0000000000 --- a/cranelift/egraph/src/lib.rs +++ /dev/null @@ -1,666 +0,0 @@ -//! # ægraph (aegraph, or acyclic e-graph) implementation. -//! -//! An aegraph is a form of e-graph. We will first describe the -//! e-graph, then the aegraph as a slightly less powerful but highly -//! optimized variant of it. -//! -//! The main goal of this library is to be explicitly memory-efficient -//! and light on allocations. We need to be as fast and as small as -//! possible in order to minimize impact on compile time in a -//! production compiler. -//! -//! ## The e-graph -//! -//! An e-graph, or equivalence graph, is a kind of node-based -//! intermediate representation (IR) data structure that consists of -//! *eclasses* and *enodes*. An eclass contains one or more enodes; -//! semantically an eclass is like a value, and an enode is one way to -//! compute that value. If several enodes are in one eclass, the data -//! structure is asserting that any of these enodes, if evaluated, -//! would produce the value. -//! -//! An e-graph also contains a deduplicating hash-map of nodes, so if -//! the user creates the same e-node more than once, they get the same -//! e-class ID. -//! -//! In the usual use-case, an e-graph is used to build a sea-of-nodes -//! IR for a function body or other expression-based code, and then -//! *rewrite rules* are applied to the e-graph. Each rewrite -//! potentially introduces a new e-node that is equivalent to an -//! existing e-node, and then unions the two e-nodes' classes -//! together. -//! -//! In the trivial case this results in an e-class containing a series -//! of e-nodes that are newly added -- all known forms of an -//! expression -- but Note how if a rewrite rule rewrites into an -//! existing e-node (discovered via deduplication), rewriting can -//! result in unioning of two e-classes that have existed for some -//! time. -//! -//! An e-graph's enodes refer to *classes* for their arguments, rather -//! than other nodes directly. This is key to the ability of an -//! e-graph to canonicalize: when two e-classes that are already used -//! as arguments by other e-nodes are unioned, all e-nodes that refer -//! to those e-classes are themselves re-canonicalized. This can -//! result in "cascading" unioning of eclasses, in a process that -//! discovers the transitive implications of all individual -//! equalities. This process is known as "equality saturation". -//! -//! ## The acyclic e-graph (aegraph) -//! -//! An e-graph is powerful, but it can also be expensive to build and -//! saturate: there are often many different forms an expression can -//! take (because many different rewrites are possible), and cascading -//! canonicalization requires heavyweight data structure bookkeeping -//! that is expensive to maintain. -//! -//! This crate introduces the aegraph: an acyclic e-graph. This data -//! structure stores an e-class as an *immutable persistent data -//! structure*. An id can refer to some *level* of an eclass: a -//! snapshot of the nodes in the eclass at one point in time. The -//! nodes referred to by this id never change, though the eclass may -//! grow later. -//! -//! A *union* is also an operation that creates a new eclass id: the -//! original eclass IDs refer to the original eclass contents, while -//! the id resulting from the `union()` operation refers to an eclass -//! that has all nodes. -//! -//! In order to allow for adequate canonicalization, an enode normally -//! stores the *latest* eclass id for each argument, but computes -//! hashes and equality using a *canonical* eclass id. We define such -//! a canonical id with a union-find data structure, just as for a -//! traditional e-graph. It is normally the lowest id referring to -//! part of the eclass. -//! -//! The persistent/immutable nature of this data structure yields one -//! extremely important property: it is acyclic! This simplifies -//! operation greatly: -//! -//! - When "elaborating" out of the e-graph back to linearized code, -//! so that we can generate machine code, we do not need to break -//! cycles. A given enode cannot indirectly refer back to itself. -//! -//! - When applying rewrite rules, the nodes visible from a given id -//! for an eclass never change. This means that we only need to -//! apply rewrite rules at that node id *once*. -//! -//! ## Data Structure and Example -//! -//! Each eclass id refers to a table entry ("eclass node", which is -//! different than an "enode") that can be one of: -//! -//! - A single enode; -//! - An enode and an earlier eclass id it is appended to (a "child" -//! eclass node); -//! - A "union node" with two earlier eclass ids. -//! -//! Building the aegraph consists solely of adding new entries to the -//! end of this table of eclass nodes. An enode referenced from any -//! given eclass node can only refer to earlier eclass ids. -//! -//! For example, consider the following eclass table: -//! -//! ```plain -//! -//! eclass/enode table -//! -//! eclass1 iconst(1) -//! eclass2 blockparam(block0, 0) -//! eclass3 iadd(eclass1, eclass2) -//! ``` -//! -//! This represents the expression `iadd(blockparam(block0, 0), -//! iconst(1))` (as the sole enode for eclass3). -//! -//! Now, say that as we further build the function body, we add -//! another enode `iadd(eclass3, iconst(1))`. The `iconst(1)` will be -//! deduplicated to `eclass1`, and the toplevel `iadd` will become its -//! own new eclass (`eclass4`). -//! -//! ```plain -//! eclass4 iadd(eclass3, eclass1) -//! ``` -//! -//! Now we apply our body of rewrite rules, and these results can -//! combine `x + 1 + 1` into `x + 2`; so we get: -//! -//! ```plain -//! eclass5 iconst(2) -//! eclass6 union(iadd(eclass2, eclass5), eclass4) -//! ``` -//! -//! Note that we added the nodes for the new expression, and then we -//! union'd it with the earlier `eclass4`. Logically this represents a -//! single eclass that contains two nodes -- the `x + 1 + 1` and `x + -//! 2` representations -- and the *latest* id for the eclass, -//! `eclass6`, can reach all nodes in the eclass (here the node stored -//! in `eclass6` and the earlier one in `elcass4`). -//! -//! ## aegraph vs. egraph -//! -//! Where does an aegraph fall short of an e-graph -- or in other -//! words, why maintain the data structures to allow for full -//! (re)canonicalization at all, with e.g. parent pointers to -//! recursively update parents? -//! -//! This question deserves further study, but right now, it appears -//! that the difference is limited to a case like the following: -//! -//! - expression E1 is interned into the aegraph. -//! - expression E2 is interned into the aegraph. It uses E1 as an -//! argument to one or more operators, and so refers to the -//! (currently) latest id for E1. -//! - expression E3 is interned into the aegraph. A rewrite rule fires -//! that unions E3 with E1. -//! -//! In an e-graph, the last action would trigger a re-canonicalization -//! of all "parents" (users) of E1; so E2 would be re-canonicalized -//! using an id that represents the union of E1 and E3. At -//! code-generation time, E2 could choose to use a value computed by -//! either E1's or E3's operator. In an aegraph, this is not the case: -//! E2's e-class and e-nodes are immutable once created, so E2 refers -//! only to E1's representation of the value (a "slice" of the whole -//! e-class). -//! -//! While at first this sounds quite limiting, there actually appears -//! to be a nice mutually-beneficial interaction with the immediate -//! application of rewrite rules: by applying all rewrites we know -//! about right when E1 is interned, E2 can refer to the best version -//! when it is created. The above scenario only leads to a missed -//! optimization if: -//! -//! - a rewrite rule exists from E3 to E1, but not E1 to E3; and -//! - E3 is *cheaper* than E1. -//! -//! Or in other words, this only matters if there is a rewrite rule -//! that rewrites into a more expensive direction. This is unlikely -//! for the sorts of rewrite rules we plan to write; it may matter -//! more if many possible equalities are expressed, such as -//! associativity, commutativity, etc. -//! -//! Note that the above represents the best of our understanding, but -//! there may be cases we have missed; a more complete examination of -//! this question would involve building a full equality saturation -//! loop on top of the (a)egraph in this crate, and testing with many -//! benchmarks to see if it makes any difference. -//! -//! ## Rewrite Rules (FLAX: Fast Localized Aegraph eXpansion) -//! -//! The most common use of an e-graph or aegraph is to serve as the IR -//! for a compiler. In this use-case, we usually wish to transform the -//! program using a body of rewrite rules that represent valid -//! transformations (equivalent and hopefully simpler ways of -//! computing results). An aegraph supports applying rules in a fairly -//! straightforward way: whenever a new eclass entry is added to the -//! table, we invoke a toplevel "apply all rewrite rules" entry -//! point. This entry point creates new nodes as needed, and when -//! done, unions the rewritten nodes with the original. We thus -//! *immediately* expand a new value into all of its representations. -//! -//! This immediate expansion stands in contrast to a traditional -//! "equality saturation" e-egraph system, in which it is usually best -//! to apply rules in batches and then fix up the -//! canonicalization. This approach was introduced in the `egg` -//! e-graph engine [^1]. We call our system FLAX (because flax is an -//! alternative to egg): Fast Localized Aegraph eXpansion. -//! -//! The reason that this is possible in an aegraph but not -//! (efficiently, at least) in a traditional e-graph is that the data -//! structure nodes are immutable once created: an eclass id will -//! always refer to a fixed set of enodes. There is no -//! recanonicalizing of eclass arguments as they union; but also this -//! is not usually necessary, because args will have already been -//! processed and eagerly rewritten as well. In other words, eager -//! rewriting and the immutable data structure mutually allow each -//! other to be practical; both work together. -//! -//! [^1]: M Willsey, C Nandi, Y R Wang, O Flatt, Z Tatlock, P -//! Panchekha. "egg: Fast and Flexible Equality Saturation." In -//! POPL 2021. - -use cranelift_entity::PrimaryMap; -use cranelift_entity::{entity_impl, packed_option::ReservedValue, SecondaryMap}; -use smallvec::{smallvec, SmallVec}; -use std::fmt::Debug; -use std::hash::Hash; -use std::marker::PhantomData; - -mod bumpvec; -mod ctxhash; -mod unionfind; - -pub use bumpvec::{BumpArena, BumpSlice, BumpVec}; -pub use ctxhash::{CtxEq, CtxHash, CtxHashMap, Entry}; -pub use unionfind::UnionFind; - -/// An eclass ID. -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct Id(u32); -entity_impl!(Id, "eclass"); - -impl Id { - pub fn invalid() -> Id { - Self::reserved_value() - } -} -impl std::default::Default for Id { - fn default() -> Self { - Self::invalid() - } -} - -/// A trait implemented by all "languages" (types that can be enodes). -pub trait Language: CtxEq + CtxHash { - type Node: Debug; - fn children<'a>(&'a self, node: &'a Self::Node) -> &'a [Id]; - fn children_mut<'a>(&'a mut self, ctx: &'a mut Self::Node) -> &'a mut [Id]; - fn needs_dedup(&self, node: &Self::Node) -> bool; -} - -/// A trait that allows the aegraph to compute a property of each -/// node as it is created. -pub trait Analysis { - type L: Language; - type Value: Clone + Default; - fn for_node( - &self, - ctx: &Self::L, - n: &::Node, - values: &SecondaryMap, - ) -> Self::Value; - fn meet(&self, ctx: &Self::L, v1: &Self::Value, v2: &Self::Value) -> Self::Value; -} - -/// Conditionally-compiled trace-log macro. (Borrowed from -/// `cranelift-codegen`; it's not worth factoring out a common -/// subcrate for this.) -#[macro_export] -macro_rules! trace { - ($($tt:tt)*) => { - if cfg!(feature = "trace-log") { - ::log::trace!($($tt)*); - } - }; -} - -/// An egraph. -pub struct EGraph> { - /// Node-allocation arena. - pub nodes: Vec, - /// Hash-consing map from Nodes to eclass IDs. - node_map: CtxHashMap, - /// Eclass definitions. Each eclass consists of an enode, and - /// child pointer to the rest of the eclass. - pub classes: PrimaryMap, - /// Union-find for canonical ID generation. This lets us name an - /// eclass with a canonical ID that is the same for all - /// generations of the class. - pub unionfind: UnionFind, - /// Analysis and per-node state. - pub analysis: Option<(A, SecondaryMap)>, -} - -/// A reference to a node. -#[derive(Clone, Copy, Debug)] -pub struct NodeKey { - index: u32, -} - -impl NodeKey { - fn from_node_idx(node_idx: usize) -> NodeKey { - NodeKey { - index: u32::try_from(node_idx).unwrap(), - } - } - - /// Get the node for this NodeKey, given the `nodes` from the - /// appropriate `EGraph`. - pub fn node<'a, N>(&self, nodes: &'a [N]) -> &'a N { - &nodes[self.index as usize] - } - - fn bits(self) -> u32 { - self.index - } - - fn from_bits(bits: u32) -> Self { - NodeKey { index: bits } - } -} - -struct NodeKeyCtx<'a, 'b, L: Language> { - nodes: &'a [L::Node], - node_ctx: &'b L, -} - -impl<'a, 'b, L: Language> CtxEq for NodeKeyCtx<'a, 'b, L> { - fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool { - let a = a.node(self.nodes); - let b = b.node(self.nodes); - self.node_ctx.ctx_eq(a, b, uf) - } -} - -impl<'a, 'b, L: Language> CtxHash for NodeKeyCtx<'a, 'b, L> { - fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 { - self.node_ctx.ctx_hash(value.node(self.nodes), uf) - } -} - -/// An EClass entry. Contains either a single new enode and a child -/// eclass (i.e., adds one new enode), or unions two child eclasses -/// together. -#[derive(Debug, Clone, Copy)] -pub struct EClass { - // formats: - // - // 00 | unused (31 bits) | NodeKey (31 bits) - // 01 | eclass_child (31 bits) | NodeKey (31 bits) - // 10 | eclass_child_1 (31 bits) | eclass_child_id_2 (31 bits) - bits: u64, -} - -impl EClass { - fn node(node: NodeKey) -> EClass { - let node_idx = node.bits() as u64; - debug_assert!(node_idx < (1 << 31)); - EClass { - bits: (0b00 << 62) | node_idx, - } - } - - fn node_and_child(node: NodeKey, eclass_child: Id) -> EClass { - let node_idx = node.bits() as u64; - debug_assert!(node_idx < (1 << 31)); - debug_assert!(eclass_child != Id::invalid()); - let child = eclass_child.0 as u64; - debug_assert!(child < (1 << 31)); - EClass { - bits: (0b01 << 62) | (child << 31) | node_idx, - } - } - - fn union(child1: Id, child2: Id) -> EClass { - debug_assert!(child1 != Id::invalid()); - let child1 = child1.0 as u64; - debug_assert!(child1 < (1 << 31)); - - debug_assert!(child2 != Id::invalid()); - let child2 = child2.0 as u64; - debug_assert!(child2 < (1 << 31)); - - EClass { - bits: (0b10 << 62) | (child1 << 31) | child2, - } - } - - /// Get the node, if any, from a node-only or node-and-child - /// eclass. - pub fn get_node(&self) -> Option { - self.as_node() - .or_else(|| self.as_node_and_child().map(|(node, _)| node)) - } - - /// Get the first child, if any. - pub fn child1(&self) -> Option { - self.as_node_and_child() - .map(|(_, p1)| p1) - .or(self.as_union().map(|(p1, _)| p1)) - } - - /// Get the second child, if any. - pub fn child2(&self) -> Option { - self.as_union().map(|(_, p2)| p2) - } - - /// If this EClass is just a lone enode, return it. - pub fn as_node(&self) -> Option { - if (self.bits >> 62) == 0b00 { - let node_idx = (self.bits & ((1 << 31) - 1)) as u32; - Some(NodeKey::from_bits(node_idx)) - } else { - None - } - } - - /// If this EClass is one new enode and a child, return the node - /// and child ID. - pub fn as_node_and_child(&self) -> Option<(NodeKey, Id)> { - if (self.bits >> 62) == 0b01 { - let node_idx = (self.bits & ((1 << 31) - 1)) as u32; - let child = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; - Some((NodeKey::from_bits(node_idx), Id::from_bits(child))) - } else { - None - } - } - - /// If this EClass is the union variety, return the two child - /// EClasses. Both are guaranteed not to be `Id::invalid()`. - pub fn as_union(&self) -> Option<(Id, Id)> { - if (self.bits >> 62) == 0b10 { - let child1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; - let child2 = (self.bits & ((1 << 31) - 1)) as u32; - Some((Id::from_bits(child1), Id::from_bits(child2))) - } else { - None - } - } -} - -/// A new or existing `T` when adding to a deduplicated set or data -/// structure, like an egraph. -#[derive(Clone, Copy, Debug)] -pub enum NewOrExisting { - New(T), - Existing(T), -} - -impl NewOrExisting { - /// Get the underlying value. - pub fn get(self) -> T { - match self { - NewOrExisting::New(t) => t, - NewOrExisting::Existing(t) => t, - } - } -} - -impl> EGraph -where - L::Node: 'static, -{ - /// Create a new aegraph. - pub fn new(analysis: Option) -> Self { - let analysis = analysis.map(|a| (a, SecondaryMap::new())); - Self { - nodes: vec![], - node_map: CtxHashMap::new(), - classes: PrimaryMap::new(), - unionfind: UnionFind::new(), - analysis, - } - } - - /// Create a new aegraph with the given capacity. - pub fn with_capacity(nodes: usize, analysis: Option) -> Self { - let analysis = analysis.map(|a| (a, SecondaryMap::with_capacity(nodes))); - Self { - nodes: Vec::with_capacity(nodes), - node_map: CtxHashMap::with_capacity(nodes), - classes: PrimaryMap::with_capacity(nodes), - unionfind: UnionFind::with_capacity(nodes), - analysis, - } - } - - /// Add a new node. - pub fn add(&mut self, node: L::Node, node_ctx: &L) -> NewOrExisting { - // Push the node. We can then build a NodeKey that refers to - // it and look for an existing interned copy. If one exists, - // we can pop the pushed node and return the existing Id. - let node_idx = self.nodes.len(); - trace!("adding node: {:?}", node); - let needs_dedup = node_ctx.needs_dedup(&node); - self.nodes.push(node); - - let key = NodeKey::from_node_idx(node_idx); - if needs_dedup { - let ctx = NodeKeyCtx { - nodes: &self.nodes[..], - node_ctx, - }; - - match self.node_map.entry(key, &ctx, &mut self.unionfind) { - Entry::Occupied(o) => { - let eclass_id = *o.get(); - self.nodes.pop(); - trace!(" -> existing id {}", eclass_id); - NewOrExisting::Existing(eclass_id) - } - Entry::Vacant(v) => { - // We're creating a new eclass now. - let eclass_id = self.classes.push(EClass::node(key)); - trace!(" -> new node and eclass: {}", eclass_id); - self.unionfind.add(eclass_id); - - // Add to interning map with a NodeKey referring to the eclass. - v.insert(eclass_id); - - // Update analysis. - let node_ctx = ctx.node_ctx; - self.update_analysis_new(node_ctx, eclass_id, key); - - NewOrExisting::New(eclass_id) - } - } - } else { - let eclass_id = self.classes.push(EClass::node(key)); - self.unionfind.add(eclass_id); - NewOrExisting::New(eclass_id) - } - } - - /// Merge one eclass into another, maintaining the acyclic - /// property (args must have lower eclass Ids than the eclass - /// containing the node with those args). Returns the Id of the - /// merged eclass. - pub fn union(&mut self, ctx: &L, a: Id, b: Id) -> Id { - assert_ne!(a, Id::invalid()); - assert_ne!(b, Id::invalid()); - let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b)); - trace!("union: id {} and id {}", a, b); - if a == b { - trace!(" -> no-op"); - return a; - } - - self.unionfind.union(a, b); - - // If the younger eclass has no child, we can link it - // directly and return that eclass. Otherwise, we create a new - // union eclass. - if let Some(node) = self.classes[a].as_node() { - trace!( - " -> id {} is one-node eclass; making into node-and-child with id {}", - a, - b - ); - self.classes[a] = EClass::node_and_child(node, b); - self.update_analysis_union(ctx, a, a, b); - return a; - } - - let u = self.classes.push(EClass::union(a, b)); - self.unionfind.add(u); - self.unionfind.union(u, b); - trace!(" -> union id {} and id {} into id {}", a, b, u); - self.update_analysis_union(ctx, u, a, b); - u - } - - /// Get the canonical ID for an eclass. This may be an older - /// generation, so will not be able to see all enodes in the - /// eclass; but it will allow us to unambiguously refer to an - /// eclass, even across merging. - pub fn canonical_id_mut(&mut self, eclass: Id) -> Id { - self.unionfind.find_and_update(eclass) - } - - /// Get the canonical ID for an eclass. This may be an older - /// generation, so will not be able to see all enodes in the - /// eclass; but it will allow us to unambiguously refer to an - /// eclass, even across merging. - pub fn canonical_id(&self, eclass: Id) -> Id { - self.unionfind.find(eclass) - } - - /// Get the enodes for a given eclass. - pub fn enodes(&self, eclass: Id) -> NodeIter { - NodeIter { - stack: smallvec![eclass], - _phantom1: PhantomData, - _phantom2: PhantomData, - } - } - - /// Update analysis for a given eclass node (new-enode case). - fn update_analysis_new(&mut self, ctx: &L, eclass: Id, node: NodeKey) { - if let Some((analysis, state)) = self.analysis.as_mut() { - let node = node.node(&self.nodes); - state[eclass] = analysis.for_node(ctx, node, state); - } - } - - /// Update analysis for a given eclass node (union case). - fn update_analysis_union(&mut self, ctx: &L, eclass: Id, a: Id, b: Id) { - if let Some((analysis, state)) = self.analysis.as_mut() { - let a = &state[a]; - let b = &state[b]; - state[eclass] = analysis.meet(ctx, a, b); - } - } - - /// Get the analysis value for a given eclass. Panics if no analysis is present. - pub fn analysis_value(&self, eclass: Id) -> &A::Value { - &self.analysis.as_ref().unwrap().1[eclass] - } -} - -/// An iterator over all nodes in an eclass. -/// -/// Because eclasses are immutable once created, this does *not* need -/// to hold an open borrow on the egraph; it is free to add new nodes, -/// while our existing Ids will remain valid. -pub struct NodeIter> { - stack: SmallVec<[Id; 8]>, - _phantom1: PhantomData, - _phantom2: PhantomData, -} - -impl> NodeIter { - #[inline(always)] - pub fn next<'a>(&mut self, egraph: &'a EGraph) -> Option<&'a L::Node> { - while let Some(next) = self.stack.pop() { - let eclass = egraph.classes[next]; - if let Some(node) = eclass.as_node() { - return Some(&egraph.nodes[node.index as usize]); - } else if let Some((node, child)) = eclass.as_node_and_child() { - if child != Id::invalid() { - self.stack.push(child); - } - return Some(&egraph.nodes[node.index as usize]); - } else if let Some((child1, child2)) = eclass.as_union() { - debug_assert!(child1 != Id::invalid()); - debug_assert!(child2 != Id::invalid()); - self.stack.push(child2); - self.stack.push(child1); - continue; - } else { - unreachable!("Invalid eclass format"); - } - } - None - } -} diff --git a/cranelift/egraph/src/unionfind.rs b/cranelift/egraph/src/unionfind.rs deleted file mode 100644 index dd90fc8c23..0000000000 --- a/cranelift/egraph/src/unionfind.rs +++ /dev/null @@ -1,85 +0,0 @@ -//! Simple union-find data structure. - -use crate::{trace, Id}; -use cranelift_entity::SecondaryMap; -use std::hash::{Hash, Hasher}; - -/// A union-find data structure. The data structure can allocate -/// `Id`s, indicating eclasses, and can merge eclasses together. -#[derive(Clone, Debug)] -pub struct UnionFind { - parent: SecondaryMap, -} - -impl UnionFind { - /// Create a new `UnionFind`. - pub fn new() -> Self { - UnionFind { - parent: SecondaryMap::new(), - } - } - - /// Create a new `UnionFind` with the given capacity. - pub fn with_capacity(cap: usize) -> Self { - UnionFind { - parent: SecondaryMap::with_capacity(cap), - } - } - - /// Add an `Id` to the `UnionFind`, with its own equivalence class - /// initially. All `Id`s must be added before being queried or - /// unioned. - pub fn add(&mut self, id: Id) { - self.parent[id] = id; - } - - /// Find the canonical `Id` of a given `Id`. - pub fn find(&self, mut node: Id) -> Id { - while node != self.parent[node] { - node = self.parent[node]; - } - node - } - - /// Find the canonical `Id` of a given `Id`, updating the data - /// structure in the process so that future queries for this `Id` - /// (and others in its chain up to the root of the equivalence - /// class) will be faster. - pub fn find_and_update(&mut self, mut node: Id) -> Id { - // "Path splitting" mutating find (Tarjan and Van Leeuwen). - let orig = node; - while node != self.parent[node] { - let next = self.parent[self.parent[node]]; - self.parent[node] = next; - node = next; - } - trace!("find_and_update: {} -> {}", orig, node); - node - } - - /// Merge the equivalence classes of the two `Id`s. - pub fn union(&mut self, a: Id, b: Id) { - let a = self.find_and_update(a); - let b = self.find_and_update(b); - let (a, b) = (std::cmp::min(a, b), std::cmp::max(a, b)); - if a != b { - // Always canonicalize toward lower IDs. - self.parent[b] = a; - trace!("union: {}, {}", a, b); - } - } - - /// Determine if two `Id`s are equivalent, after - /// canonicalizing. Update union-find data structure during our - /// canonicalization to make future lookups faster. - pub fn equiv_id_mut(&mut self, a: Id, b: Id) -> bool { - self.find_and_update(a) == self.find_and_update(b) - } - - /// Hash an `Id` after canonicalizing it. Update union-find data - /// structure to make future lookups/hashing faster. - pub fn hash_id_mut(&mut self, hash: &mut H, id: Id) { - let id = self.find_and_update(id); - id.hash(hash); - } -} diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index 348dbf7212..6eaa6fcda9 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -7,8 +7,8 @@ function %f0(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 2 v2 = imul v0, v1 - ; check: v1 = iadd v0, v0 - ; nextln: return v1 + ; check: v3 = iadd v0, v0 + ; check: return v3 return v2 } @@ -17,6 +17,6 @@ block0: v0 = iconst.i32 0xffff_ffff_9876_5432 v1 = uextend.i64 v0 return v1 - ; check: v0 = iconst.i64 0x9876_5432 - ; nextln: return v0 ; v0 = 0x9876_5432 + ; check: v2 = iconst.i64 0x9876_5432 + ; check: return v2 ; v2 = 0x9876_5432 } diff --git a/cranelift/filetests/filetests/egraph/alias_analysis.clif b/cranelift/filetests/filetests/egraph/alias_analysis.clif index 340455dfad..ce78431469 100644 --- a/cranelift/filetests/filetests/egraph/alias_analysis.clif +++ b/cranelift/filetests/filetests/egraph/alias_analysis.clif @@ -16,7 +16,7 @@ block0(v0: i64): return v7 } -; check: v1 = load.i64 heap v0 -; nextln: store v0, v1 -; nextln: v2 = load.i64 v0 -; nextln: return v2 +; check: v3 = load.i64 heap v0 +; check: store v0, v3 +; check: v7 = load.i64 v0 +; check: return v7 diff --git a/cranelift/filetests/filetests/egraph/basic-gvn.clif b/cranelift/filetests/filetests/egraph/basic-gvn.clif index d8023f0ac9..7b38786228 100644 --- a/cranelift/filetests/filetests/egraph/basic-gvn.clif +++ b/cranelift/filetests/filetests/egraph/basic-gvn.clif @@ -21,9 +21,9 @@ block2(v6: i32): ;; Check that the `iadd` for `v4` is subsumed by `v2`: ; check: block0(v0: i32, v1: i32): -; nextln: v2 = iadd v0, v1 +; check: v2 = iadd v0, v1 ; check: block1: -; nextln: v3 = iadd.i32 v2, v0 -; nextln: return v3 +; check: v5 = iadd.i32 v2, v0 +; nextln: return v5 ; check: block2: ; nextln: return v1 diff --git a/cranelift/filetests/filetests/egraph/licm.clif b/cranelift/filetests/filetests/egraph/licm.clif index 233763d9e6..a6f4585567 100644 --- a/cranelift/filetests/filetests/egraph/licm.clif +++ b/cranelift/filetests/filetests/egraph/licm.clif @@ -26,15 +26,15 @@ block2(v9: i32): ; check: block1(v2: i32): ;; constants are not lifted; they are rematerialized in each block where used -; nextln: v3 = iconst.i32 40 -; nextln: v4 = icmp eq v2, v3 -; nextln: v5 = iconst.i32 1 -; nextln: v6 = iadd v2, v5 -; nextln: brnz v4, block2 -; nextln: jump block1(v6) +; check: v5 = iconst.i32 40 +; check: v6 = icmp eq v2, v5 +; check: v3 = iconst.i32 1 +; check: v8 = iadd v2, v3 +; check: brnz v6, block2 +; check: jump block1(v8) ; check: block2: -; nextln: v7 = iconst.i32 1 -; nextln: v8 = iadd.i32 v1, v7 -; nextln: return v8 +; check: v10 = iconst.i32 1 +; check: v4 = iadd.i32 v1, v10 +; check: return v4 diff --git a/cranelift/filetests/filetests/egraph/misc.clif b/cranelift/filetests/filetests/egraph/misc.clif index 33b4c88197..668c643cd5 100644 --- a/cranelift/filetests/filetests/egraph/misc.clif +++ b/cranelift/filetests/filetests/egraph/misc.clif @@ -15,7 +15,7 @@ block0(v0: i64): ; check: function %stack_load(i64) -> i64 fast { ; nextln: ss0 = explicit_slot 8 ; check: block0(v0: i64): -; nextln: v1 = stack_addr.i64 ss0 -; nextln: store notrap aligned v0, v1 +; nextln: v2 = stack_addr.i64 ss0 +; nextln: store notrap aligned v0, v2 ; nextln: return v0 ; nextln: } diff --git a/cranelift/filetests/filetests/egraph/remat.clif b/cranelift/filetests/filetests/egraph/remat.clif index 0df7db6141..69289b7cdf 100644 --- a/cranelift/filetests/filetests/egraph/remat.clif +++ b/cranelift/filetests/filetests/egraph/remat.clif @@ -20,16 +20,16 @@ block2: } ; check: block0(v0: i32): -; nextln: v1 = iconst.i32 42 -; nextln: v2 = iadd v0, v1 -; nextln: brnz v2, block1 -; nextln: jump block2 +; check: v1 = iconst.i32 42 +; check: v2 = iadd v0, v1 +; check: brnz v2, block1 +; check: jump block2 ; check: block1: -; nextln: v5 = iconst.i32 126 -; nextln: v6 = iadd.i32 v0, v5 -; nextln: return v6 +; check: v11 = iconst.i32 126 +; check: v13 = iadd.i32 v0, v11 +; check: return v13 ; check: block2: -; nextln: v3 = iconst.i32 42 -; nextln: v4 = iadd.i32 v0, v3 -; nextln: return v4 +; check: v15 = iconst.i32 42 +; check: v16 = iadd.i32 v0, v15 +; check: return v16 diff --git a/cranelift/preopt/src/constant_folding.rs b/cranelift/preopt/src/constant_folding.rs index 1faf22e927..59432255cc 100644 --- a/cranelift/preopt/src/constant_folding.rs +++ b/cranelift/preopt/src/constant_folding.rs @@ -67,6 +67,7 @@ fn resolve_value_to_imm(dfg: &ir::DataFlowGraph, value: ir::Value) -> Option inst, ValueDef::Param(_, _) => return None, + ValueDef::Union(_, _) => return None, }; use self::ir::{InstructionData::*, Opcode::*};